#########################################################################################################
## Function 'extract_features_with_minOverlap'
#########################################################################################################
## extracts all features from 'tab2' and 'tab1', that overlap  with each other
## 	by at least a certain fraction 'mo' (default=0.5) of the shorter feature
##
## 'tab1' and 'tab2' are two tables containing two sets of features (e.g. ChIP-seq peaks) 
##  with: col1 = chromosome, col2 = feature start, col3 = feature end
#########################################################################################################

extract_features_with_minOverlap <- function(tab1, tab2, mo=0.5){          
       
       library(IRanges)
       
       chr <- sort(intersect(tab2[,1], tab1[,1]))
       hits <- data.frame(stringsAsFactors=F)
         
       for(i in 1:length(chr)){
       		
       		## Get all feature (peak) coordinates on chr i from table 1
            co1 <- tab1[tab1[,1]==chr[i], ]   					
            rownames(co1) <- NULL
            ## Transfer to IRanges format
            co1.Ranges <- IRanges(start=co1[,2],end=co1[,3])
               
            ## Get all feature (peak) coordinates on chr i from table 2
            co2 <- tab2[tab2[,1]==chr[i], ]
            rownames(co2) <- NULL
            ## Transfer to IRanges format
            co2.Ranges <- IRanges(start=co2[,2],end=co2[,3])
            
            ## Find overlaps between feature (peaks) 
            overlap <- as.matrix(findOverlaps(co2.Ranges,co1.Ranges,select="all"))
            

            ## Extract length of shorter feature (peak)
            min.feat <- apply(cbind(width(co2.Ranges)[overlap[,1]],width(co1.Ranges)[overlap[,2]]),1,min)
            
            ## Extract length of overlap between features (peaks)
            max.start <- apply(cbind(start(co2.Ranges)[overlap[,1]],start(co1.Ranges)[overlap[,2]]),1,max)
            min.end <- apply(cbind(end(co2.Ranges)[overlap[,1]],end(co1.Ranges)[overlap[,2]]),1,min)
            ol.len <- width(IRanges(start= max.start, end=min.end))
            
            ## Extract cases where the overlap covers at least fraction 'mo' of the smaller feature (peak)
            overlap.min <- overlap[which(ol.len/min.feat >= mo), ]
            
            ## Create output table of pairs of overlapping features (peaks) from the two input tables
            if (length(dim(overlap.min)) > 0) {	
            	hits <- rbind(hits, cbind(co2[overlap.min[,1], ], co1[overlap.min[,2], ]))
            } else if (length(overlap.min) == 2) {
            	hits <- rbind(hits, cbind(co2[overlap.min[1], ], co1[overlap.min[2], ]))
            }
             
         }
        
         return(hits)
}

