#!/usr/bin/perl -w
use strict;

# To use this script, you must have run the script called "step_1_in_custom_quantification_of_bwa_aligned_reads_121222_1.pl".  That ...
# ... script will have generated a file with a name like this:  
# readIDs_151bpto200bp_TLENs_FLAGs_99_163_355_419_CIGAR50M_reads.txt
# The file above would have been the result of asking for quantification of library fragments that are ...
# ... from 151-200 base pairs long in a paired-end sam file called "reads.sam" with 50 base pair reads.  In order ...
# to process that file with the current script, you would enter this:
# perl step_2_if_quantifying_midpoints_of_library_inserts_121222_1.pl readIDs_151bpto200bp_TLENs_FLAGs_99_163_355_419_CIGAR50M_reads.txt

# The file called "sacCer3.chrom.sizes" must be available in the directory in which you run this script.  That file ...
# ... is available from the UCSC Genome Browser site.  It is also pasted directly below.  (Note that there is no header row.)

# chrX	745751
# chrM	85779
# chrI	230218
# chrII	813184
# chrIII	316620
# chrIV	1531933
# chrIX	439888
# chrV	576874
# chrVI	270161
# chrVII	1090940
# chrVIII	562643
# chrXI	666816
# chrXII	1078177
# chrXIII	924431
# chrXIV	784333
# chrXV	1091291
# chrXVI	948066

my $extendedfile = $ARGV[0];
unless ($extendedfile =~ /^extended_/) {
    die "the file doesn't start with extended:  $extendedfile\n";
}

my $file = "sacCer3.chrom.sizes";

my @chrs = qw(
    chrX
    chrM
    chrI
    chrII
    chrIII
    chrIV
    chrIX
    chrV
    chrVI
    chrVII
    chrVIII
    chrXI
    chrXII
    chrXIII
    chrXIV
    chrXV
    chrXVI
    );

my %chr_length = ();

open (IN, "<$file") or die "problem\n";
while (my $line = <IN>) {
    chomp $line;
    if ($line =~ /\r/) {
	die "problem\n";
    }
    if ($line =~ /\S/) {
	my @array = split /\t/, $line;
	unless ($#array == 1) {
	    die "problem\n";
	}
	$chr_length{$array[0]} = $array[1];
    }
}
close IN;

$file = $extendedfile;

my $pos;
my %chr_pos_depth = ();
my $depth;
my $length;
my $chr;

foreach $chr (@chrs) {
    $length = $chr_length{$chr};
    $pos = 1;
    while ($pos <= $length) {
	$chr_pos_depth{$chr}{$pos} = 0;
	$pos++;
    }
}

my %chr_seen = ();
open (IN, "<$file") or die "problem\n";
while (my $line = <IN>) {
    chomp $line;
    if ($line =~ /\r/) {
	die "problem\n";
    }
    if ($line =~ /\S/) {
	my @array = split /\t/, $line;
	unless ($#array == 5) {
	    die "array is $#array\n";
	}
	$chr = $array[2];
	$pos = $array[3];
	unless ($chr_seen{$chr}) {
	    print "$chr\n";
	    $chr_seen{$chr} = 1;
	}
	my $end = $pos + ($array[5] - 1);
	my $midpos = int(($pos + $end) / 2);
	$chr_pos_depth{$chr}{$midpos}++;
    }
}
close IN;


my $out = "center_spot_manual_genomecov_$file";
if ($file eq $out) {
    die "problem\n";
}

open (OUT, ">$out");
foreach $chr (@chrs) {
    $length = $chr_length{$chr};
    $pos = 1;
    while ($pos <= $length) {
	print OUT "$chr\t$pos\t$chr_pos_depth{$chr}{$pos}\n";
	$pos++;
    }
}
close OUT;

print "done!\n";
