#!/usr/bin/perl -w


#This takes three input files. The first is a lepmap2 input file, i.e., makeped format.
#The second is lepmap output file, including the phased output (outputPhasedData=1). 
#The third is a SNPsInTheMap file, which has the original SNP number in column 16 and the lm3 chromosome in col 17.
#This script will generate an Rqtl .csv input file for each of the four families in the pedigree. 

############
############
#collect individial id numbers from lepmap input file
#This is the makeped format file that contains the original genotypes.
open (GENO, $ARGV[0]) || die("No genotype file!\nCheck the path to the file.\n");
	while (<GENO>){	
		chomp($line = $_);
		if($line !~ /java/){
		push @data, "$line";		
		}
	}
	close (GENO);
	

@sequenceoffamilies = qw(5 2 3 4 6);
for($famseq = 1; $famseq <5; $famseq ++){


$familyofinterest = $famseq;

$wouldbemom = 2000 + $sequenceoffamilies[$familyofinterest];

	@wormline = ();
	@IDsequence = ();
	@rawgenoarray = ();

for($worm = 0; $worm < $#data+1; $worm ++){
	@wormline = split("\t", $data[$worm]);
	@rawgenos = ();
	$famID = $wormline[0];
	$indID = $wormline[1];
	$momID = $wormline[3];

	if($famID == $sequenceoffamilies[$familyofinterest] && $momID == $wouldbemom){
	push @IDsequence,  $indID;

	for($w =6; $w < $#wormline+1; $w++){
		if($wormline[$w] eq "0 0"){
		push @rawgenos, "NA"
		}
		elsif($wormline[$w] eq "1 1"){
		push @rawgenos, "1"
		}
		elsif($wormline[$w] eq "1 2"){
		push @rawgenos, "2"		
		}
		elsif($wormline[$w] eq "2 2"){
		push @rawgenos, "3"		
		}
		else{die ("The genotype is weird at wormline $w\n");
		}
		
	}
	
	push @rawgenoarray, [@rawgenos];
	$phenonumber = $#rawgenos;

	}
	
}



#	@IDsequence is an array that contains the individual ID for each worm in the family specified by the $familyofinterest
#	@rawgenoarray is an array of arrays; the subarrays contain the sequence of raw genotypes for each individual. These are for mapping genotypes onto phased haplotypes

#Next, load the phase calls output by lepmap, e.g., lepmap3/IPIR.3
	@markerdata = ();		
	@markernum = ();
	@geneticpos = ();
	@phasecalls = ();
	@haplocalls = ();
	@justhaps = ();
	
	open (PHASES, $ARGV[1]) || die("Where's the lepmap phase data?\n");
	while(<PHASES>){
	if($_ !~ /^#/){		#Remove the lepmap2 headers
		chomp($markerline = $_);
		@phasecalls = ();
		@markerdata = split("\t", $markerline);
		
		push @markernum, $markerdata[0];
		push @geneticpos, $markerdata[1];
		@justhaps = split(" ", $markerdata[$familyofinterest + 4]);
		@markerphases = split("",$justhaps[0]);	
		$lastpos = $#markerphases;
		$nworms = ($lastpos +1)/2;

	
		for($j=0; $j<($lastpos)/2; $j+=1){
		if("$markerphases[$j]$markerphases[$j+$nworms]" eq "00"){
			push @phasecalls, "1";
			print "1";
		}
		elsif("$markerphases[$j]$markerphases[$j+$nworms]" eq "01"){
			push @phasecalls,  "3";
			print "3";
		}elsif("$markerphases[$j]$markerphases[$j+$nworms]" eq "10"){
			push @phasecalls,  "2";
			print "2";
		}elsif("$markerphases[$j]$markerphases[$j+$nworms]" eq "11"){
			push @phasecalls,  "4";
			print "4";
		}else{die("what's the genotype?");}		
		}		
			push @haplocalls, [@phasecalls];	
			print "\n";
		
	}	
}	
close(PHASES);


#	@markernum is an array containing the names of the SNP markers, as their numbers from the input file (should be 1:1389 for full snp input file)
#	@geneticpos is an array containing the genetic positions for those markers, from the lepmap2 "male position" column


open (SNP, $ARGV[2]) || die("No SNPinfo file!\nCheck the path to the file.\n");
	while (<SNP>){	
		chomp($line = $_);
		if($line !~ /SNP/){
		@snpinfo = split("\t",$line);
		push @SNPno1389, "$snpinfo[15]";
		}
	}
	close (SNP);
	






#The only thing left is to print this out in the rqtl format


$printout = 1;
if($printout == 1){
open QTLFILE, "> Fam$sequenceoffamilies[$familyofinterest].csv";
select QTLFILE;

#Now, print the marker-phenotype and marker names:
print  "strain";	#Because the strain name will end up in the first column, as in rqtl infiles
	for($p =0; $p < $phenonumber+1; $p++){
	$phenum = $p+1;
	print  ","."M"."$phenum";
	}

for($q = 0; $q < $#markernum+1; $q++){
	print  ","."SNP"."$SNPno1389[$markernum[$q] -1]";	
	}
	print  "\n";

#Print the chromosome number for rqtl infile
for($p =0; $p < $phenonumber+1; $p++){
	print  ","
	}
for($q = 0; $q < $#markernum+1; $q++){
	print  ",1";	#$namecount\n";
	}
	print  "\n";

#Print the marker positions
for($p =0; $p < $phenonumber+1; $p++){
	print  ","
}
for($q = 0; $q < $#geneticpos +1; $q++){
print  ",$geneticpos[$q]"; 	
}
print  "\n";


#print the genotypes
for($eachworm= 0; $eachworm < $#IDsequence+1; $eachworm ++){

	print  "$IDsequence[$eachworm]";

	for($ph = 0; $ph < $phenonumber+1; $ph ++){
		print  ",$rawgenoarray[$eachworm][$ph]";
		}
	for($q = 0; $q < $#geneticpos +1; $q++){
		print  ",$haplocalls[$q][$eachworm]";
		}
	print  "\n";
}

select STDOUT;
close QTLFILE;

}



}




