setwd("~/Dropbox/GeneticMapR/ZakasEtAlSupplementalFiles/FileS3.MapConstruction")
haps <- read.csv(file = "Sb702ParentalHaplotypeAssignment.csv") #founder genotype phases#
rcs <- read.table(file = "PoolSeqReadCounts.txt", head = F) # poolseq SNP allele read counts
HapLogLs <- data.frame(nrow = 10, ncol = 7)#
#
# Calculate haplotype assignment likelihoods #
#
for(j in 1:10){ # for each autosome #
	chr <- haps[haps$snpID %in% rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),1],] #Extract the intercross SNPs for that chromosome#
	rc <- rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),]	# Extract the poolseq read counts for those SNPs#
	rc[,c(4:7)] <- rc[,c(4:7)] +1 # Add one to each read count to deal with zeros. #
	###### Estimate allele frequencies for each SNP for each population #
	probrefbay <- NULL;#
	probrefLB <- NULL#
	for(i in 1:dim(chr)[1]){#
		probrefbay[i] <- (rc[which(rc$V1 == chr$snpID[i]),4]) / ((rc[which(rc$V1 == chr$snpID[i]),4]) + rc[which(rc$V1 == chr$snpID[i]),5])#
		probrefLB[i] <-  (rc[which(rc$V1 == chr$snpID[i]),6]) / ((rc[which(rc$V1 == chr$snpID[i]),6]) + rc[which(rc$V1 == chr$snpID[i]),7])#
	}#
	probaltbay <- 1-probrefbay#
	probaltLB <- 1-probrefLB#
#
	probmat <- data.frame(probrefbay, probaltbay, probrefLB, probaltLB)#
	probmat <- probmat^2 # Square each allele frequency to get the estimated homozygote frequency at HWE.#
#
	##### Make vectors of genotype probabilities for each of the founder assignment possibilities#
#
	bayvec1 <- NULL#
	lbvec1 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec1[i] <- probmat[i,chr$BayHapSb702[i]+1]#
		lbvec1[i] <- probmat[i, chr$BayHapSb702[i]+3]#
	}#
#
	bayvec2 <- NULL#
	lbvec2 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec2[i] <- probmat[i,chr$LBHapSb702[i]+1]#
		lbvec2[i] <- probmat[i, chr$LBHapSb702[i]+3]#
	}#
#
	##### Next, calculate the log likelihood of each assignment#
	HapLogLs[j, 1] <- sum(log(bayvec1)) # This is the log likelihood that the hypothesized Bayonne chromosome derives from the Bayonne population#
	HapLogLs[j, 2] <- sum(log(lbvec1))	# This is the log likelihood that the hypothesized Bayonne chromosome derives from the Long Beach population#
	HapLogLs[j, 3] <- sum(log(bayvec2)) # This is the log likelihood that the hypothesized Long Beach chromosome derives from the Bayonne population#
	HapLogLs[j, 4] <- sum(log(lbvec2))	# This is the log likelihood that the hypothesized Long Beach chromosome derives from the Long Beach population#
}#
#
	# Sum the log likelihoods to get the log likelihoods for the hypothesized arrangement and the alternative arrangment#
HapLogLs[,5] <- apply(HapLogLs[,c(1,4)], 1, sum)#
HapLogLs[,6] <- apply(HapLogLs[,c(2,3)], 1, sum)#
#
	# Take the log-10 ratio of the likelihoods as a test statistic.#
HapLogLs[,7] <-log(exp(HapLogLs[,5]) / exp(HapLogLs[,6]),10)#
#
names(HapLogLs) <-c ("logLHap1Bay", "logLHap1LB", "logLHap2Bay", "logLHap2LB", "logL12BayLB", "logL12LBBay", "log10LR.BayLBvsLBBay")#
#
HapLogLs
log(-14+36, 10)
HapLogLs[,7]
j
bayvec2
bayvec1
prod(bayvec1)
prod(bayvec2)
log(prod(bayvec1))
setwd("~/Dropbox/GeneticMapR/ZakasEtAlSupplementalFiles 3/FileS3.MapConstruction")
haps <- read.csv(file = "Sb702ParentalHaplotypeAssignment.csv") #founder genotype phases for SNPs infered (Step 1) to have intercross segregation pattern (AAxBB).#
#
rcs <- read.table(file = "PoolSeqReadCounts.txt", head = F) # poolseq SNP allele read counts. Columns are SNP ID, contig, position, count of reference allele reads from Bayonne pools, count of alternative allele reads from Bayonne pools, count of reference allele reads from Long Beach pools, count of alternative allele reads from Long Beach pools. #
#
HapLogLs <- data.frame(nrow = 10, ncol = 7)
for(j in 1:10){ # for each autosome #
	chr <- haps[haps$snpID %in% rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),1],] #Extract the intercross SNPs for that chromosome#
	rc <- rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),]	# Extract the poolseq read counts for those SNPs#
	rc[,c(4:7)] <- rc[,c(4:7)] +1 # Add one to each read count to deal with zeros. #
	###### Estimate allele frequencies for each SNP for each population#
	probrefbay <- NULL;#
	probrefLB <- NULL#
	for(i in 1:dim(chr)[1]){#
		probrefbay[i] <- (rc[which(rc$V1 == chr$snpID[i]),4]) / ((rc[which(rc$V1 == chr$snpID[i]),4]) + rc[which(rc$V1 == chr$snpID[i]),5])#
		probrefLB[i] <-  (rc[which(rc$V1 == chr$snpID[i]),6]) / ((rc[which(rc$V1 == chr$snpID[i]),6]) + rc[which(rc$V1 == chr$snpID[i]),7])#
	}#
	probaltbay <- 1-probrefbay#
	probaltLB <- 1-probrefLB#
#
	probmat <- data.frame(probrefbay, probaltbay, probrefLB, probaltLB)#
	probmat <- probmat^2 # Square each allele frequency to get the estimated homozygote frequency at Hardy-Weinberg Equilibrium.#
#
	##### Make vectors of genotype probabilities for each of the founder assignment possibilities#
#
	bayvec1 <- NULL#
	lbvec1 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec1[i] <- probmat[i,chr$BayHapSb702[i]+1]#
		lbvec1[i] <- probmat[i, chr$BayHapSb702[i]+3]#
	}#
#
	bayvec2 <- NULL#
	lbvec2 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec2[i] <- probmat[i,chr$LBHapSb702[i]+1]#
		lbvec2[i] <- probmat[i, chr$LBHapSb702[i]+3]#
	}#
#
	##### Next, calculate the log likelihood of each assignment#
#
	HapLogLs[j, 1] <- sum(log(bayvec1)) # This is the log likelihood that the first chromosome derives from the Bayonne population#
	HapLogLs[j, 2] <- sum(log(lbvec1))	# This is the log likelihood that the first chromosome derives from the Long Beach population#
	HapLogLs[j, 3] <- sum(log(bayvec2)) # This is the log likelihood that the second chromosome derives from the Bayonne population#
	HapLogLs[j, 4] <- sum(log(lbvec2))	# This is the log likelihood that the second chromosome derives from the Long Beach population#
}#
#
	# Sum the log likelihoods to get the log likelihoods for the hypothesized arrangement and the alternative arrangment#
HapLogLs[,5] <- apply(HapLogLs[,c(1,4)], 1, sum)#
HapLogLs[,6] <- apply(HapLogLs[,c(2,3)], 1, sum)#
#
	# Take the log-10 ratio of the likelihoods as an easy-to-interpret test statistic.#
HapLogLs[,7] <-log(exp(HapLogLs[,5]) / exp(HapLogLs[,6]),10)#
#
names(HapLogLs) <-c ("logLHap1Bay", "logLHap1LB", "logLHap2Bay", "logLHap2LB", "logL12BayLB", "logL12LBBay", "log10LR.BayLBvsLBBay")#
#
# Report the results:#
#
HapLogLs
for(j in 1:10){ # for each autosome #
	chr <- haps[haps$snpID %in% rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),1],] #Extract the intercross SNPs for that chromosome#
	rc <- rcs[rcs$V1 %in% c(haps[haps$Sb271LG ==j,1]),]	# Extract the poolseq read counts for those SNPs#
	rc[,c(4:7)] <- rc[,c(4:7)] +1 # Add one to each read count to deal with zeros. #
	###### Estimate allele frequencies for each SNP for each population#
	probrefbay <- NULL;#
	probrefLB <- NULL#
	for(i in 1:dim(chr)[1]){#
		probrefbay[i] <- (rc[which(rc$V1 == chr$snpID[i]),4]) / ((rc[which(rc$V1 == chr$snpID[i]),4]) + rc[which(rc$V1 == chr$snpID[i]),5])#
		probrefLB[i] <-  (rc[which(rc$V1 == chr$snpID[i]),6]) / ((rc[which(rc$V1 == chr$snpID[i]),6]) + rc[which(rc$V1 == chr$snpID[i]),7])#
	}#
	probaltbay <- 1-probrefbay#
	probaltLB <- 1-probrefLB#
#
	probmat <- data.frame(probrefbay, probaltbay, probrefLB, probaltLB)#
	probmat <- probmat^2 # Square each allele frequency to get the estimated homozygote frequency at Hardy-Weinberg Equilibrium.#
#
	##### Make vectors of genotype probabilities for each of the founder assignment possibilities#
#
	bayvec1 <- NULL#
	lbvec1 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec1[i] <- probmat[i,chr$Hap1[i]+1]#
		lbvec1[i] <- probmat[i, chr$Hap1[i]+3]#
	}#
#
	bayvec2 <- NULL#
	lbvec2 <- NULL#
	for(i in 1:dim(chr)[1]){#
		bayvec2[i] <- probmat[i,chr$Hap2[i]+1]#
		lbvec2[i] <- probmat[i, chr$Hap2[i]+3]#
	}#
#
	##### Next, calculate the log likelihood of each assignment#
#
	HapLogLs[j, 1] <- sum(log(bayvec1)) # This is the log likelihood that the first chromosome derives from the Bayonne population#
	HapLogLs[j, 2] <- sum(log(lbvec1))	# This is the log likelihood that the first chromosome derives from the Long Beach population#
	HapLogLs[j, 3] <- sum(log(bayvec2)) # This is the log likelihood that the second chromosome derives from the Bayonne population#
	HapLogLs[j, 4] <- sum(log(lbvec2))	# This is the log likelihood that the second chromosome derives from the Long Beach population#
}#
#
	# Sum the log likelihoods to get the log likelihoods for the hypothesized arrangement and the alternative arrangment#
HapLogLs[,5] <- apply(HapLogLs[,c(1,4)], 1, sum)#
HapLogLs[,6] <- apply(HapLogLs[,c(2,3)], 1, sum)#
#
	# Take the log-10 ratio of the likelihoods as an easy-to-interpret test statistic.#
HapLogLs[,7] <-log(exp(HapLogLs[,5]) / exp(HapLogLs[,6]),10)#
#
names(HapLogLs) <-c ("logLHap1Bay", "logLHap1LB", "logLHap2Bay", "logLHap2LB", "logL12BayLB", "logL12LBBay", "log10LR.BayLBvsLBBay")#
#
# Report the results:#
#
HapLogLs
10^2.55
10^29
10^26
q()
