$VERSION = 10;
print "Enter gff-file name and press ENTER:\n";
$gff_file=<STDIN>;
chomp($gff_file);
print "Bin by percentiles (y/n):\n";
$percentile_bins=<STDIN>;
chomp($percentile_bins);
if($percentile_bins eq 'y'){
    print "Set number percentiles:\n";
    $nr_cut=<STDIN>;
    chomp($nr_cut);
}
else{
    print "Set number of Cut-Offs:\n";
    $nr_cut=<STDIN>;
    chomp($nr_cut);
}
print "Keep all cut-off bins separate (y/n):\n";
$sep_bins=<STDIN>;
chomp($sep_bins);

#Compare directory
$comp_dir="./compare";

open IN, "$gff_file" or die "Can't open file!\n";
$nr_row1=0;
while($rad=<IN>){
    chomp($rad);
    @tmp=split/\t/, $rad;
    $gff_arr1[$nr_row1][0]=$tmp[0];
    $gff_arr1[$nr_row1][1]=$tmp[3];
    $gff_arr1[$nr_row1][2]=$tmp[4];
    $tot_length1+=$tmp[4]-$tmp[3];
    $gff_arr1[$nr_row1][3]=$tmp[5];
    $gff_arr1[$nr_row1][4]=$nr_row1;
    $nr_row1++;
}
close(IN);
$av_len1=int($tot_length1/$nr_row1);

@sort_gff_arr1 = sort {$a->[3] <=> $b->[3]} @gff_arr1;
$min_val=$sort_gff_arr1[0][3];
$max_val=$sort_gff_arr1[$nr_row1-1][3];

opendir(DIR, "$comp_dir") or die "can't open directory!";
@file_list=();
$nr_files=0;
while (defined($file = readdir(DIR))) {
    if(index($file, '.gff')>-1 || index($file, '.GFF')>-1){
        push @file_list, $file;
        $nr_files++;
    }
}
closedir(DIR);

for($x=0; $x<$nr_files; $x++){
    for($y=0; $y<$nr_cut; $y++){
        $overlap_arr[$x][$y]=0;
        $overlap_merge_arr[$x][$y]=0;
    }
}

for($y=0; $y<$nr_cut; $y++){
        $tot_sum_cut_group[$y]=0;
    }

if($percentile_bins eq 'y'){
    $tmp_val=$nr_row1/$nr_cut;
    for($x=0; $x<$nr_cut; $x++){
        for($y=0; $y<$nr_row1; $y++){
            if($y>=$x*$tmp_val && $y<($x+1)*$tmp_val){
                $sort_gff_arr1[$y][4]=$x;
                $tot_sum_cut_group[$x]++;
            }
        }
    }
    @gff_arr1 = sort {$a->[0] cmp $b->[0] || $a->[1] <=> $b->[1]} @sort_gff_arr1;
}
else{
    for($x=0; $x<$nr_row1; $x++){
        $index=int(($gff_arr1[$x][3]-$min_val)/(($max_val-$min_val)/$nr_cut));
        if($index==$nr_cut){
            $index=$nr_cut-1;
        }
        $tot_sum_cut_group[$index]++;
    }
}


#for($y=0; $y<$nr_cut; $y++){
#        print "$tot_sum_cut_group[$y]\t";
#    }
#print "\n",scalar(@val_arr1),"\n";

open OUT, ">AllOverlapDetailsOut_new_v$VERSION\.txt" or die "Can't open outfile!\n";

for($p=0; $p<$nr_files; $p++){
    &pairwise_compare($file_list[$p], $p);
}

sub pairwise_compare {
    ($file2, $row) = @_;
    
    open IN, "$comp_dir/$file2" or die "can't open $file2!";
    $tot_length2=0;
    $nr_row2=0;
    @chrom_arr2=();
    @start_arr2=();
    @stop_arr2=();
    while($rad=<IN>){
        chomp($rad);
        @tmp=split/\t/, $rad;
        push @chrom_arr2, $tmp[0];
        push @start_arr2, $tmp[3];
        push @stop_arr2, $tmp[4];
        $tot_length2+=$tmp[4]-$tmp[3];
        $nr_row2++;
    }
    close(IN);
    $av_len2=int($tot_length2/$nr_row2);
    $length_arr[$row]=$tot_length2;
    
    for($y=0; $y<$nr_cut; $y++){
        $sum_cut_group[$y]=0;
    }
    
    $overlap12=0;
    $start_y=0;
    for($x=0; $x<$nr_row1; $x++){
        for($y=$start_y; $y<$nr_row2; $y++){
            if($gff_arr1[$x][0] eq $chrom_arr2[$y]){
                if($start_arr2[$y]>$gff_arr1[$x][2]){
                    last;
                }
                if($start_arr2[$y]>=$gff_arr1[$x][1] && $start_arr2[$y]<=$gff_arr1[$x][2]){
                    $overlap12++;
                    $start_y=$y;
                    if($percentile_bins eq 'y'){
                        $index=$gff_arr1[$x][4];
                    }
                    else{
                        $index=int(($gff_arr1[$x][3]-$min_val)/(($max_val-$min_val)/$nr_cut));
                        if($index==$nr_cut){
                            $index=$nr_cut-1;
                        }
                    }
                    $sum_cut_group[$index]++;
                    last;
                }
                if($stop_arr2[$y]>=$gff_arr1[$x][1] && $stop_arr2[$y]<=$gff_arr1[$x][2]){
                    $overlap12++;
                    $start_y=$y;
                    if($percentile_bins eq 'y'){
                        $index=$gff_arr1[$x][4];
                    }
                    else{
                        $index=int(($gff_arr1[$x][3]-$min_val)/(($max_val-$min_val)/$nr_cut));
                        if($index==$nr_cut){
                            $index=$nr_cut-1;
                        }
                    }
                    $sum_cut_group[$index]++;
                    last;
                }
                if($stop_arr2[$y]>=$gff_arr1[$x][2] && $start_arr2[$y]<=$gff_arr1[$x][1]){
                    $overlap12++;
                    $start_y=$y;
                    if($percentile_bins eq 'y'){
                        $index=$gff_arr1[$x][4];
                    }
                    else{
                        $index=int(($gff_arr1[$x][3]-$min_val)/(($max_val-$min_val)/$nr_cut));
                        if($index==$nr_cut){
                            $index=$nr_cut-1;
                        }
                    }
                    $sum_cut_group[$index]++;
                    last;
                }
                if($stop_arr2[$y]<=$gff_arr1[$x][2] && $start_arr2[$y]>=$gff_arr1[$x][1]){
                    $overlap12++;
                    $start_y=$y;
                    if($percentile_bins eq 'y'){
                        $index=$gff_arr1[$x][4];
                    }
                    else{
                        $index=int(($gff_arr1[$x][3]-$min_val)/(($max_val-$min_val)/$nr_cut));
                        if($index==$nr_cut){
                            $index=$nr_cut-1;
                        }
                    }
                    $sum_cut_group[$index]++;
                    last;
                }
            }
        }
    }
    
    print OUT "$file1 vs $file2\nFile1Regions\tFile2Regions\tNr1overlap2\tPercent1Overlap2\tAverageRegionLength1\n";
    $precent_ol=($overlap12/$nr_row1)*100;
    print OUT "$nr_row1\t$nr_row2\t$overlap12\t$precent_ol\t$av_len1\n\n";
    
    for($c=0; $c<$nr_cut; $c++){
        $group_sum=0;
        $cut_sum=0;
        for($d=$c; $d<$nr_cut; $d++){
            $group_sum+=$tot_sum_cut_group[$d];
            $cut_sum+=$sum_cut_group[$d];
        }
        if($group_sum==0){
            $overlap_merge_arr[$row][$c]=0;
        }
        else {$overlap_merge_arr[$row][$c]=$cut_sum/$group_sum;}
        if($tot_sum_cut_group[$c]==0){
            $overlap_arr[$row][$c]=0;
        }
        else {$overlap_arr[$row][$c]=$sum_cut_group[$c]/$tot_sum_cut_group[$c];}
        print "$sum_cut_group[$c]\t";
    }
    print "\n";
    
    print OUT "$file2 vs $file1\nFile2Regions\tFile1Regions\tNr2overlap1\tPercent2Overlap1\tAverageRegionLength2\n";
    $precent_ol=($overlap21/$nr_row2)*100;
    print OUT "$nr_row2\t$nr_row1\t$overlap21\t$precent_ol\t$av_len2\n\n";
    
    print "\nRow $row done\n";
}

if($sep_bins eq 'y'){
    open OUT2, ">OverlapAllvs1_increaseCutOffOutSepBins_v$VERSION\.txt" or die "Can't open outfile!\n";
    print OUT2 "OverlapArray\n\n";
    
    if($percentile_bins eq 'y'){
        $tmp_index=-1;
        for($y=0; $y<$nr_row1; $y++){
            if($sort_gff_arr1[$y][4]!=$tmp_index){
                print OUT2 "\t", $sort_gff_arr1[$y][3];
                $tmp_index=$sort_gff_arr1[$y][4];
            }
        }
    }
    else{
        for($y=0; $y<$nr_cut; $y++){
            print OUT2 "\t", $min_val+($y*(($max_val-$min_val)/$nr_cut));
        }
    }
    print OUT2 "\n";
    for($y=0; $y<$nr_cut; $y++){
        print OUT2 "\t$tot_sum_cut_group[$y]";
    }
    print OUT2 "\tTotalLength\n";
    
    for($x=0; $x<$nr_files; $x++){
        print OUT2 "$file_list[$x]";
        for($y=0; $y<$nr_cut; $y++){
            print OUT2 "\t$overlap_arr[$x][$y]";
        }
        print OUT2 "\t$length_arr[$x]\n";
    }
}

if($sep_bins eq 'n'){
    open OUT2, ">OverlapAllvs1_increaseCutOffOutMergeHigherBins_v$VERSION\.txt" or die "Can't open outfile!\n";
    print OUT2 "OverlapArray\n\n";
    
    if($percentile_bins eq 'y'){
        $tmp_index=-1;
        for($y=0; $y<$nr_row1; $y++){
            if($sort_gff_arr1[$y][4]!=$tmp_index){
                print OUT2 "\t", $sort_gff_arr1[$y][3];
                $tmp_index=$sort_gff_arr1[$y][4];
            }
        }
    }
    else{
        for($y=0; $y<$nr_cut; $y++){
            print OUT2 "\t", $min_val+($y*(($max_val-$min_val)/$nr_cut));
        }
    }
    print OUT2 "\n";
    for($y=0; $y<$nr_cut; $y++){
        $group_sum=0;
        for($x=$y; $x<$nr_cut; $x++){
            $group_sum+=$tot_sum_cut_group[$x];
        }
        print OUT2 "\t$group_sum";
    }
    print OUT2 "\tTotalLength\n";
    
    for($x=0; $x<$nr_files; $x++){
        print OUT2 "$file_list[$x]";
        for($y=0; $y<$nr_cut; $y++){
            print OUT2 "\t$overlap_merge_arr[$x][$y]";
        }
        print OUT2 "\t$length_arr[$x]\n";
    }
}

