#!/usr/bin/perl
use strict;
# Script to extract the coverage of a Reference set of contigs (SAGs) by a metagenomes with nucmer
# Argument 0 : Ref file of the SAG
# Argument 1 : Directory of the .coords file (generated by nucmer)
# Argument 2 : Out file
if (($ARGV[0] eq "-h") || ($ARGV[0] eq "--h") || ($ARGV[0] eq "-help" )|| ($ARGV[0] eq "--help") || (!defined($ARGV[2])))
{
	print "# Script to extract the coverage of a Reference set of contigs (SAGs) by a metagenomes with nucmer
# Argument 0 : Ref file of the SAG
# Argument 1 : Directory of the .coords file (generated by nucmer)
# Argument 2 : Out file
\n";
	die("\n");
}

my $fasta_in=$ARGV[0];
my $cover_in=$ARGV[1];
my $out_file=$ARGV[2];
my $th_id=95;
my $th_ratio=0;

# Get ref data
my %length;
my $id_c="";
my $total_bp=0;
open(FA,"<$fasta_in") || die ("pblm opening file $fasta_in");
while (<FA>){
	chomp($_);
	if ($_=~/^>(.*)/){
		$id_c=$1;
	}
	else{
		$length{$id_c}+=length($_);
		$total_bp+=length($_);
	}
}
close FA;

# Get informations about the reads associated to each contig and the total number of bp in the different metagenomes
my $depth_file_2="Depth_file_contig_translated";
my $id_c="";
my %depth;
open(DEPTH_2,"<$depth_file_2") || die ("pblm opening file $depth_file_2\n");
while(<DEPTH_2>){
	chomp($_);
	my @tab=split(" ",$_);
	$depth{$tab[1]}=$tab[2];
}
close DEPTH_2;

my $total_read_by_dataset="Bp_number_by_dataset.tab";
my %total_bp_dataset;
open(TO,"<$total_read_by_dataset") || die ("pblm opening file $total_read_by_dataset");
while (<TO>){
	chomp($_);
	my @tab=split("\t",$_);
	$total_bp_dataset{$tab[1]}=$tab[2];
}
close TO;

# Get metadata for the metagenomes
my $saanich_metadata="Saanich_metadata.tab";
my %sample_metadata;
open(SAANICH,"<$saanich_metadata") || die ("pblm opening file $saanich_metadata");
while (<SAANICH>){
	chomp($_);
	my @tab=split("\t",$_);
	$sample_metadata{$tab[0]}{"date"}=$tab[1];
	$sample_metadata{$tab[0]}{"depth"}=$tab[2];
	$sample_metadata{$tab[0]}{"depth_bis"}=$tab[3];
	$sample_metadata{$tab[0]}{"year"}=$tab[4];
	$sample_metadata{$tab[0]}{"month"}=$tab[5];
}
close SAANICH;
my @liste_date=("08/01/09","11/01/09","02/01/10","07/01/10","08/01/10","01/01/11","02/01/11","08/01/11");
my @liste_sample_depth=("10m","100m","120m","135m","150m","200m");

open(S1,">$out_file") || die ("pblm opening file $out_file\n");
$cover_in=~/.*\/Recruit_(.*)\//;
my $id_sag=$1;
my @liste_cover=<$cover_in*.coords>;
foreach(@liste_cover){
	my $file=$_;
	$file=~/.*\/(.*)\.coords/;
	my $id=$1;
	if ($total_bp_dataset{$id}>0 && defined($sample_metadata{$id})){
		my $count=0;
		open(CO,"<$file") || die ("pblm opening file $file\n");
		while(<CO>){
			chomp($_);
			my @tab=split(" ",$_);
			if ($tab[2] eq "|" && $tab[0] ne "[S1]"){
				if ($tab[9]>=$th_id){
					my $match=$id."_".$tab[15];
					my $weight=1;
					if (defined($depth{$match})){$weight=$depth{$match};}
					my $hit=$tab[6];
					$count+=$tab[6]*$weight;
				}
			}
		}
		close CO;
		my $ratio=$count/($total_bp_dataset{$id}*$total_bp)*1000000; ## PER MB;
		if ($ratio>=$th_ratio){
			print S1 "$id,$sample_metadata{$id}{year},$sample_metadata{$id}{month},$sample_metadata{$id}{depth_bis},$id_sag,$ratio\n";
		}
	}
}
close S1;