# Decoupled Maternal and Zygotic Genetic Effects Shape the Evolution of Development
# Zakas, Deutscher, Kay, Rockman

###########################################
# The following files should be in the working directory:
# FileS1.SbenedictiGeneticMaps.R
# FileS2.BackcrossLarvae.csv

# The following R packages are required for analyses:
# R/qtl (we used version 1.41-6)
library(qtl)
# lme4 (we used version 1.1-13)
library(lme4)

# load Rqtl cross objects 
load("FileS1.SbenedictiGeneticMaps.R")
# Sb702 is final map. 

###########################################
## Functions for QTL mapping with family-structured G2 data
## structscan() partitions individuals into the four G2 families, A, C, F, and H, and then performs interval mapping in each separately before summing the lod scores. 
## structperm() performs permutations within families and then interval mapping via structscan. 
###########################################

structscan <- function(x, pheno= pheno, model = "normal"){
	famA <- subset(x, ind = x$pheno$Family == "A")
	famC <- subset(x, ind = x$pheno$Family == "C")
	famF <- subset(x, ind = x$pheno$Family == "F")
	famH <- subset(x, ind = x$pheno$Family == "H")
	scanA <- scanone(famA, pheno = pheno, model = model)
	scanC <- scanone(famC, pheno = pheno, model = model)
	scanF <- scanone(famF, pheno = pheno, model = model)
	scanH <- scanone(famH, pheno = pheno, model = model)	
	scanA+scanC+scanF+scanH		
}

structperm <- function(x, pheno, model = "normal", n.perm = 100){
	perm.out <- NULL
	for(i in 1:n.perm){	
		temp <- x$pheno[,pheno]
		temp[which(x$pheno$Family == "A")] <- sample(temp[which(x$pheno$Family == "A")])
		temp[which(x$pheno$Family == "C")] <- sample(temp[which(x$pheno$Family == "C")])
		temp[which(x$pheno$Family == "F")] <- sample(temp[which(x$pheno$Family == "F")])
		temp[which(x$pheno$Family == "H")] <- sample(temp[which(x$pheno$Family == "H")])
		temp.cross <- x
		temp.cross$pheno[,pheno] <- temp
		max(structscan(temp.cross, pheno, model))$lod -> perm.out[i]
	}
	perm.out
}	

###########################################
# Find and plot QTL
###########################################

fun <- calc.genoprob(Sb702, step=1) #fun is a cross object for interval mapping

# Zygotic-effect mapping
nchaetaescan <- structscan(fun, pheno = "nchaetae", model = "normal") #Number of chaetae
maxchaetaescan <- structscan(fun, pheno = "maxchaetlength", model = "normal")	#Maximum chaetae length
cirriscan<- structscan(fun, pheno = "cirri.binary", model = "binary")	#presence of anal cirri. Binary trait
G2areascan<- structscan(fun, pheno = "G2.area", model = "normal") #G2 area

# Maternal-effect mapping
# Subset cross to females only
xx <-subset(fun, ind=fun$pheno$Sex=="F")
G3areascan<- structscan(xx, pheno = "G3.area", model = "normal")

#Structured permutations to establish thresholds 
nchaetaeperms <- structperm(fun,pheno="nchaetae", n.perm = 1000)
maxchaetaeperms <- structperm(fun,pheno="maxchaetlength", n.perm = 1000)
cirriperms <- structperm(fun,pheno="cirri.binary", n.perm = 1000, model = "binary") 
G2areaperms <- structperm(fun,pheno="G2.area", n.perm = 1000)
G3areaperms <- structperm(xx,pheno="G3.area", n.perm = 1000) #Females only

# Plot the lod scores (Figure 2E)
ymx=max(c(G3areascan[,3],nchaetaescan[,3],maxchaetaescan[,3],cirriscan[,3]), na.rm=TRUE) # set axes
plot(G3areascan, ylim=c(0,ymx), col="darkgreen")
plot(nchaetaescan, add=TRUE, col="blue")
plot(maxchaetaescan, add=TRUE, col="red")
plot(cirriscan, add=TRUE, col="purple")
abline(h = quantile(nchaetaeperms, 0.95), col = "blue")
abline(h = quantile(maxchaetaeperms, 0.95), col = "red")
abline(h = quantile(G3areaperms, 0.95), col = "darkgreen")
abline(h = quantile(cirriperms, 0.95), col = "purple")

#####################################
# Area Correlations
# G2 larval size shows no zygotic genetic effects
#####################################

# No evidence for linkage between G2 genotype and G2 larval size
plot(G2areascan,  col="black")
abline(h = quantile(G2areaeperms, 0.95), col = "black")

# G2 area and G3 area are not correlated. 
G2.z <- (xx$pheno$G2.area - mean(xx$pheno$G2.area, na.rm =T)) /  sd(xx$pheno$G2.area, na.rm = T)
G3.z <- (xx$pheno$G3.area - mean(xx$pheno$G3.area, na.rm =T)) /  sd(xx$pheno$G3.area, na.rm = T)
cor.test(G3.z, G2.z)

# Figure 2A:
plot(G3.z ~ G2.z, pch =19, cex = 0.5, xlab = "G2 larval size", ylab = "G3 brood mean larval size")

#####################################
# Tests for Family and Interaction Effects and Estimation of Effect Sizes
#####################################

#####################################
# Single phenotype Family effects
# Does G2 family affect phenotype? Here compare models with and without family effects using ANOVA.
#####################################

anova(lm(fun$pheno$nchaetae  ~ 1), lm(fun$pheno$nchaetae  ~ fun$pheno$Family)) #Number of chaetae

anova(lm(fun$pheno$maxchaetlength  ~ 1), lm(fun$pheno$maxchaetlength  ~ fun$pheno$Family)) #Max chaetae length

anova(lm(xx$pheno$G3.area  ~ 1), lm(xx$pheno$G3.area  ~ xx$pheno$Family)) #G3 offspring area

anova(glm(fun$pheno$cirri.binary  ~ 1, family = "binomial"),glm(fun$pheno$cirri.binary  ~ fun$pheno$Family, family = "binomial"), test = "LRT") #presence of anal cirri

anova(lm(fun$pheno$G2.area  ~ 1), lm(fun$pheno$G2.area  ~ fun$pheno$Family)) #G2 offspring size

# We find that G2 larval size and chaetae length show effects of G2 family (i.e., the identity of the F1 mother). These results suggest that these traits are affected by one or more of 1) environmental maternal effects, 2) maternal genetic effects that segregate within Bayonne or Long Beach populations, or 3) zygotic genetic effects that segregate within Bayonne or Long Beach populations, or 4) zygotic genetic effects that segregate between (not within) Bayonne or Long Beach and that are differentially represented among the G2s from different mothers as a result of differential segregation distortion. 

############################
# Non-Additive Effects
# Do we find evidence for epistasis among the QTLs detected by their marginal effects? 
# These results are reported in Table S2
############################

############################
# Procedure: 
# Step 1. Make QTL set for each phenotype
# Step 2. Make Family covariate numeric
# Step 3a. Comparing models with and without a QTL interaction effect as a variable using ANOVA. 
# Step 3b. Comparing models with and without Family as a variable using ANOVA. 
############################

############################
# Step1. Find the marker for each significant QTl. Then make the QTL for each phenotype a set. 
############################ 

# Number Chaetae 	("SNP223"=LG3@2.5, "SNP231"= LG8@1.66)
# Chaetae length 	("SNP1289"=LG3@3.6, "SNP363"=LG9@1.5)
# G3 Area 			("SNP999"=LG6@17.9, "SNP281"=LG7@2.1)
# Cirri				("SNP86"=LG5@5.14)

# Make and fit QTL models for each phenotype
qtlnchaetae <- makeqtl(fun, c(3,8), c(2.5,1.66), what = "prob")
lodnchaetae <- fitqtl(fun, pheno= "nchaetae", qtlnchaetae, method = "hk")

qtlmaxchaetae= makeqtl(fun, c(3,9), c(3.56,1.46), what = "prob")
lodmaxchaetae <- fitqtl(fun, pheno="maxchaetlength", qtlmaxchaetae, method = "hk")

qtlcirri= makeqtl(fun, 5, 5.14, what = "prob")
lodcirri <- fitqtl(fun, pheno= "cirri.binary", qtlcirri, method = "hk", model="binary")

qtlG3= makeqtl(xx, c(6,7), c(17.9,2.1),what = "prob")	#Female G2s only
lodG3area <- fitqtl(xx, pheno="G3.area", qtlG3, method = "hk")

############################ 
# Step 2. Recode the family structure as multiple numeric covariates for the full dataset and the female-only (xx) dataset
############################

matrix(nrow=dim(Sb702$pheno)[1], ncol = 4, data =0) -> FamilyCovariate
FamilyCovariate <- as.data.frame(FamilyCovariate)
names(FamilyCovariate) <- c("A","C","F","H")
FamilyCovariate$A[which(Sb702$pheno$Family == "A")] <- 1
FamilyCovariate$C[which(Sb702$pheno$Family == "C")] <- 1
FamilyCovariate$F[which(Sb702$pheno$Family == "F")] <- 1
FamilyCovariate$H[which(Sb702$pheno$Family == "H")] <- 1

matrix(nrow=dim(xx$pheno)[1], ncol = 4, data =0) -> xxFamilyCovariate
xxFamilyCovariate <- as.data.frame(xxFamilyCovariate)
names(xxFamilyCovariate) <- c("A","C","F","H")
xxFamilyCovariate$A[which(xx$pheno$Family == "A")] <- 1
xxFamilyCovariate$C[which(xx$pheno$Family == "C")] <- 1
xxFamilyCovariate$F[which(xx$pheno$Family == "F")] <- 1
xxFamilyCovariate$H[which(xx$pheno$Family == "H")] <- 1

############################ 
# Step 3. Check for interactions and family effects for QTL of each phenotype
############################ 

############################ 
### Chaetae Number effect size

# Checking for QTL interaction effect
lodnchaetae1 <- fitqtl(fun, pheno="nchaetae", qtlnchaetae, method = "hk", formula=y~Q1+Q2, cov= FamilyCovariate, get.ests = T)
lodnchaetae2 <- fitqtl(fun, pheno="nchaetae", qtlnchaetae, method = "hk", formula=y~Q1*Q2, cov= FamilyCovariate, get.ests = T)
summary(lodnchaetae2)
# Interaction term is not significant

# Model incorporating family effect
lodnchaetaeFam1 <- fitqtl(fun, pheno="nchaetae", qtlnchaetae, method = "hk", formula=y~Q1+Q2+ A+C+H, cov= FamilyCovariate, get.ests = T)

# Statistical test if adding Family improves the model
# Twice the difference in lods is chi-square distributed under the null with df = difference in the number of parameters between the models. In this case, family adds three parameters.) 
test.statistic <- 2* (lodnchaetaeFam1$lod - lodnchaetae1$lod)
pchisq(test.statistic, df = 3, lower = F)
# No support for family in the model. 
 
# We estimate effects from model lodnchaetae1, with no family effect and no epistasis

summary(lodnchaetae1)

############################ 
### Max Chaetae Length effect size

#Checking for QTL interaction effect
lodmaxchaetae1 <- fitqtl(fun, pheno="maxchaetlength", qtlmaxchaetae, method = "hk", formula=y~Q1+Q2, cov= FamilyCovariate, get.ests = T)
lodmaxchaetae2 <- fitqtl(fun, pheno= "maxchaetlength", qtlmaxchaetae, method = "hk", formula=y~Q1*Q2, cov= FamilyCovariate, get.ests = T)
summary(lodmaxchaetae2)
# Interaction term is not significant

# Model incorporating family effect
lodmaxchaetaeFam <- fitqtl(fun, pheno="maxchaetlength", qtlmaxchaetae, method = "hk", formula=y~Q1+Q2+ A+C+H, cov= FamilyCovariate, get.ests = T)

# Statistical test if adding Family improves the model
test.statistic <- 2* (lodmaxchaetaeFam$lod - lodmaxchaetae1$lod)
pchisq(test.statistic, df = 3, lower = F)
# p = 0.0187 for family effect

#Drop family to estimate the variance it explains:
lodmaxchaetaeFam$result.full[1,"%var"] - lodmaxchaetae$result.full[1,"%var"]
# 5.7% of the total variance.

# We estimate effects from model lodmaxchaetaefam, with a family effect and no epistasis 

summary(lodmaxchaetaeFam)

############################ 
### G3 Area effect size
### This analysis uses G2 females only (cross object xx, and xxFamilyCovariate) because this is a maternal trait

#Checking for QTL interaction effect
lodG3area1 <- fitqtl(xx, pheno="G3.area", qtlG3, method = "hk", formula=y~Q1+Q2, cov= xxFamilyCovariate, get.ests = T)
lodG3area2 <- fitqtl(xx, pheno="G3.area", qtlG3, method = "hk", formula=y~Q1*Q2, cov= xxFamilyCovariate, get.ests = T)
summary(lodG3area2)
# Interaction term (i.e., epistasis between LG6 and LG7) is significant at p = 0.0075. 

#Checking for QTL interaction effect with Family effect (intereaction is significant so included here)
lodG3areaFam <- fitqtl(xx, pheno="G3.area", qtlG3, method = "hk", formula=y~Q1*Q2+ A+C+H, cov= xxFamilyCovariate, get.ests = T)

#Test for family improving the model
test.statistic2 <- 2* (lodG3areaFam$lod - lodG3area2$lod)
pchisq(test.statistic2, df = 3, lower = F)
# No support for family in the model. 

# We estimate effects from model lodG3area2, with epistasis and no family effect.

summary(lodG3area2)

############################ 
### Cirri effect size
# There is only one QTL to test here- no interaction. 

lodcirri1 <- fitqtl(fun, pheno="cirri.binary", qtlcirri, method = "hk", formula=y~Q1, cov= FamilyCovariate, get.ests = T, model="binary")
lodcirriFam <- fitqtl(fun, pheno="cirri.binary", qtlcirri, method = "hk", formula=y~Q1+A+C+H, cov= FamilyCovariate, get.ests = T, model="binary")

test.statistic3 <- 2* (lodcirriFam$lod - lodcirri1$lod)
pchisq(test.statistic3, df = 3, lower = F)
# No support for family in the model. 

summary(lodcirri1)

#####################################
# Conclusions:
# Family has an effect on chaetae length when looking at 'phenotypes only' model or when looking at QTL (fitqtl). 
# There is significant interaction of QTLs for G3area only.
#####################################

#####################################
## Figure 3A: Plots of phenotype distributions as a function of QTL genotypes
#####################################

## Number of chaetae
cf=plotPXG(fun, c("SNP223", "SNP231"), pheno="nchaetae")  #shows PXG plot

## Max chaetae length
cg=plotPXG(fun, c("SNP1289", "SNP363"), pheno="maxchaetlength")	

## Offspring G3 area 
ch=plotPXG(xx, c("SNP999", "SNP281"), pheno="G3.area") 

# Plot 1 Chaetae Number
old.par <- par(mfrow=c(3,1)) #mai(Bottom, Left, Top, Right), T+B and L=R must be equal #mpg(titles,label,line)
boxplot(cf[,3]~cf[,1]*cf[,2], main="Chaetae Number",  ylab="Number", cex.lab= 1.5, cex.main=1.8, cex.axis=1.2, col=c("palegreen", "palegreen3","burlywood","palegreen3","burlywood", "tan1","burlywood", "tan1","darkorange1" ), at=c(1,2,3, 5,6,7, 9,10,11), names=c("PP\nPP", "PP\nPL", "PP\nLL", "PL\nPP", "PL\nPL", "PL\nLL", "LL\nPP", "LL\nPL", "LL\nLL"), par(mai=c(0.4,0.6,0.2,0.2)),par(mgp=c(3,1.8,0)))

# Plot 2 Chaetae Length
boxplot(cg[,3]~cg[,2]*cg[,1], main="Chaetae Length",  ylab="Microns",cex.lab= 1.5, cex.main=1.8, cex.axis=1.2, col=c("palegreen", "palegreen3","burlywood","palegreen3","burlywood", "tan1","burlywood", "tan1","darkorange1"), at=c(1,2,3, 5,6,7, 9,10,11), names=c("PP\nPP", "PP\nPL", "PP\nLL", "PL\nPP", "PL\nPL", "PL\nLL", "LL\nPP", "LL\nPL", "LL\nLL"), par(mai=c(0.4,0.6,0.2,0.2)),par(mgp=c(3,1.8,0)))

# Plot 3 G3 Area
boxplot(ch[,3]~ch[,1]*ch[,2], main=expression(bold("G"[3]*" Offspring Size")), ylab="Square Microns",cex.lab= 1.5, cex.main=1.8, cex.axis=1.2, names=c("PP\nPP", "PP\nPL", "PP\nLL", "PL\nPP", "PL\nPL", "PL\nLL", "LL\nPP", "LL\nPL", "LL\nLL"),col=c("palegreen", "palegreen3","burlywood","palegreen3","burlywood", "tan1","burlywood", "tan1","darkorange1"), at=c(1,2,3, 5,6,7, 9,10,11), par(mai=c(0.4,0.6,0.2,0.2)),par(mgp=c(3,1.8,0)))

######################################
# Cheatae length and number are corrolated. 
# Does the shared QTL on chromosome 3 explain this correlation?
######################################

# Plot of Chaetate length vs Number
plot(fun$pheno$maxchaetlength~fun$pheno$nchaetae, xlab="chaetae number", ylab="chaetae length", main="number vs length") # correlation of chaetae length and number
abline(lm(fun$pheno$maxchaetlength~fun$pheno$nchaetae))

summary(lm(fun$pheno$maxchaetlength~fun$pheno$nchaetae))
cor.test(fun$pheno$maxchaetlength,fun$pheno$nchaetae)

## t = 7.6008, df = 200, p-value = 1.116e-12
# 95 percent confidence interval:
# 0.3588156 0.5739558
# sample estimates:
#     cor 
# 0.473416 


###### When using fitQTL at just the QTL on LG3, are the remaining residuls betweeen chaetae number and length still corrolated? (yes)
##### Make 1 qtl from the marker on Ch3 that is shared

# Make a subset of the data where Chaetae Length is not missing (some larave don't have chaetae)
nomissing <- subset(Sb702, ind = which(!is.na(Sb702$pheno$nchaetae)))
nomissing <- subset(nomissing, ind = which(!is.na(nomissing$pheno$maxchaetlength)))
nomissing = calc.genoprob(nomissing, step=1)

qtl3 <- makeqtl(nomissing, c(3), c(2.5), what = "prob")

# Pull the residuals from Chaetae Number and Length after fitting for QTL3
attr(fitqtl(nomissing, pheno = "nchaetae", qtl = qtl3, formula =y~Q1, method = "hk"), "residuals") -> numberresid
attr(fitqtl(nomissing, pheno = "maxchaetlength", qtl = qtl3, formula =y~Q1, method = "hk"), "residuals") -> lengthresid

# Plot Residuals of Chaetae Length vs Number
plot(lengthresid ~ numberresid, main="Residuals of number vs length when CH3 QTL is fit" )
abline(lm(lengthresid ~ numberresid))

summary(lm(lengthresid ~ numberresid))
##Multiple R-squared:  0.1086,	Adjusted R-squared:  0.1041 F-statistic: 24.36 on 1 and 200 DF,  p-value: 1.678e-06
 
### Chaetae Length and Number remain correlated when accounting for shared QTL on LG3. r-squared= 0.22*** with all data, to r-squared= 0.1*** with residuals only
######################################


######################################
# Multivariate analysis of quantitative traits 
# The anal cirri phenotype is excluded because it is discrete (presence/absence)
######################################

crossobject <- xx # Analysis includes G2 females only, to accommodate maternal-effect G3 size. 
crossobject <- sim.geno(crossobject, n.draws=1) # This step imputes missing data on the X chromosome.
crossobject <- calc.genoprob(crossobject)

# Make a scanone object to hold the results of the analysis
scan.holder <- scanone(crossobject, pheno = "nchaetae")
scan.holder$lod <- 0
scan.holder[,4:17] <- 0
names(scan.holder)[3:17] <- c("global.add.P", "global.dom.P", "coefA.t1", "coefD.t1", "coefA.t2", "coefD.t2","coefA.t3", "coefD.t3", "t1.add.P", "t1.dom.P", "t2.add.P", "t2.dom.P", "t3.add.P", "t3.dom.P", "genotype.P")

######################################
# mvscan() is a function that performs multivariate trait mapping for three traits. 
mvscan <- function(crossthing, scan.object){
	t1 <- crossthing$pheno$nchaetae
	t2 <- crossthing$pheno$maxchaetlength
	t3 <- crossthing$pheno$G3.area
	for(j in 1: nchr(crossthing)){
		chr <- names(nmar(crossthing))[j]
		global.add.P <- NULL;
		global.dom.P <- NULL;
		coefA.t1 <- NULL;
		coefD.t1 <- NULL;
		coefA.t2 <- NULL;
		coefD.t2 <- NULL;
		coefA.t3 <- NULL;
		coefD.t3 <- NULL;
		t1.add.P <- NULL;
		t1.dom.P <- NULL;
		t2.add.P <- NULL;
		t2.dom.P <- NULL;
		t3.add.P <- NULL;
		t3.dom.P <- NULL;
		genotype.P <- NULL;
		if(chr != "X"){		# Autosomal scan
			for(i in 1: (nmar(crossthing)[chr])){	
				qtlA <- crossthing$geno[[chr]]$data[,i] -2
				qtlD <- qtlA
				qtlD[qtlA == 0] <- 1/2
				qtlD[qtlA != 0] <- -1/2
				nl <- manova(cbind(t1,t2,t3) ~ qtlA + qtlD + crossthing$pheno$Family)
				nl.null <- manova(cbind(t1,t2,t3) ~ crossthing$pheno$Family)
				global.add.P[i] <- summary(nl)$stats["qtlA", "Pr(>F)"]
				global.dom.P[i] <- summary(nl)$stats["qtlD", "Pr(>F)"]
				coefA.t1[i] <- nl$coefficients["qtlA", 1]
				coefD.t1[i] <- nl$coefficients["qtlD", 1]
				coefA.t2[i] <- nl$coefficients["qtlA", 2]
				coefD.t2[i] <- nl$coefficients["qtlD", 2]
				coefA.t3[i] <- nl$coefficients["qtlA", 3]
				coefD.t3[i] <- nl$coefficients["qtlD", 3]
				t1.add.P[i] <- summary.aov(nl)[[1]]["qtlA", "Pr(>F)"]
				t1.dom.P[i] <- summary.aov(nl)[[1]]["qtlD", "Pr(>F)"]
				t2.add.P[i] <- summary.aov(nl)[[2]]["qtlA", "Pr(>F)"]
				t2.dom.P[i] <- summary.aov(nl)[[2]]["qtlD", "Pr(>F)"]
				t3.add.P[i] <- summary.aov(nl)[[3]]["qtlA", "Pr(>F)"]
				t3.dom.P[i] <- summary.aov(nl)[[3]]["qtlD", "Pr(>F)"]
				genotype.P[i] <- anova(nl, nl.null)$"Pr(>F)"[2] #This is the test for a genotype effect
			}
		} else {			# X-chromosome scan; dominance effects missing because genotypes are PP or LP only. 
			for(i in 1: (nmar(crossthing)[chr])){	
				qtlA <- crossthing$geno[[chr]]$draws[,i,1] -2
				nl <- manova(cbind(t1,t2,t3) ~ qtlA + crossthing$pheno$Family)
				nl.null <- manova(cbind(t1,t2,t3) ~ crossthing$pheno$Family)
				global.add.P[i] <- summary(nl)$stats["qtlA", "Pr(>F)"]
				global.dom.P[i] <- NA
				coefA.t1[i] <- nl$coefficients["qtlA", 1]
				coefD.t1[i] <- NA
				coefA.t2[i] <- nl$coefficients["qtlA", 2]
				coefD.t2[i] <- NA
				coefA.t3[i] <- nl$coefficients["qtlA", 3]
				coefD.t3[i] <- NA
				t1.add.P[i] <- summary.aov(nl)[[1]]["qtlA", "Pr(>F)"]
				t1.dom.P[i] <- NA
				t2.add.P[i] <- summary.aov(nl)[[2]]["qtlA", "Pr(>F)"]
				t2.dom.P[i] <- NA
				t3.add.P[i] <- summary.aov(nl)[[3]]["qtlA", "Pr(>F)"]
				t3.dom.P[i] <- NA	
				genotype.P[i] <- anova(nl, nl.null)$"Pr(>F)"[2] #This is the test for a genotype effect
			}
		}
	# Fill in the scanone object with the results of the scan
	scan.object[which(scan.object$chr == chr),"global.add.P"] <- -log(global.add.P, 10)
	scan.object[which(scan.object$chr == chr),"global.dom.P"] <- -log(global.dom.P, 10)
	scan.object[which(scan.object$chr == chr),"coefA.t1"] <- coefA.t1
	scan.object[which(scan.object$chr == chr),"coefD.t1"] <- coefD.t1
	scan.object[which(scan.object$chr == chr),"coefA.t2"] <- coefA.t2
	scan.object[which(scan.object$chr == chr),"coefD.t2"] <- coefD.t2
	scan.object[which(scan.object$chr == chr),"coefA.t3"] <- coefA.t3
	scan.object[which(scan.object$chr == chr),"coefD.t3"] <- coefD.t3
	scan.object[which(scan.object$chr == chr),"t1.add.P"] <- -log(t1.add.P, 10)
	scan.object[which(scan.object$chr == chr),"t1.dom.P"] <- -log(t1.dom.P, 10)
	scan.object[which(scan.object$chr == chr),"t2.add.P"] <- -log(t2.add.P, 10)
	scan.object[which(scan.object$chr == chr),"t2.dom.P"] <- -log(t2.dom.P, 10)
	scan.object[which(scan.object$chr == chr),"t3.add.P"] <- -log(t3.add.P, 10)
	scan.object[which(scan.object$chr == chr),"t3.dom.P"] <- -log(t3.dom.P, 10)
	scan.object[which(scan.object$chr == chr),"genotype.P"] <- -log(genotype.P, 10)
	}
	return(scan.object)
}
######################################

######## Perform the multivariate genome scan
mvscan(crossobject, scan.holder) -> Scan3

######## Permutations within G2 families followed by mvscan to generate genome-wide null distribution of p-values
structmvperm <- function(x = crossobject, scan.holder, n.perm = 1000){
perm.out <- matrix(nrow = n.perm, ncol = 15, data = NA)
colnames(perm.out) <- c("global.add.P", "global.dom.P", "coefA.t1", "coefD.t1", "coefA.t2", "coefD.t2","coefA.t3", "coefD.t3", "t1.add.P", "t1.dom.P", "t2.add.P", "t2.dom.P", "t3.add.P", "t3.dom.P", "genotype.P")
	for(i in 1:n.perm){	
		temp <- x$pheno
		temp[which(x$pheno$Family == "A"),] <- temp[sample (which(x$pheno$Family == "A")),]
		temp[which(x$pheno$Family == "C"),] <- temp[sample (which(x$pheno$Family == "C")),]
		temp[which(x$pheno$Family == "F"),] <- temp[sample (which(x$pheno$Family == "F")),]
		temp[which(x$pheno$Family == "H"),] <- temp[sample (which(x$pheno$Family == "H")),]
		temp.cross <- x
		temp.cross$pheno <- temp
		permmvscan <- mvscan(temp.cross, scan.holder)
		perm.out[i,] <- apply(permmvscan[,3:17], 2, max, na.rm = T)
		print(i)
	}
	perm.out
}	

#This is slow, about four perms per minute on my desktop:
FullScan3Perms <- structmvperm(crossobject, n.perm = 1000)

######## Results
# The columns of Scan3 report -log10(p) values and effect coefficients from a variety of models. 
# Lod column 15 reports the -log10(p) from a general test of a genotype effect at each position. It compares a model that includes intercept and family only to one that also includes an additive effect and dominance deviation due to the focal genotype. 
summary(Scan3, lod = 15) 
#plot the -log(p) genotype effect. 
plot(Scan3, lod = c(15), ylab = "-log10(p)", lwd = 2, main="Multivariate QTL Scan")
abline(h=quantile(FullScan3Perms[,15], 0.95))

# Five loci exceed the genome-wide p-value threshold. These coincide with the QTLs detected in the univariate analyses above. 

######################################
######## Fit a QTL model

#Specify the QTL genotypes
qtl3A <- crossobject$geno[[3]]$data[,"SNP89"] -2
qtl3D <- qtl3A
qtl3D[qtl3A == 0] <- 1/2
qtl3D[qtl3A != 0] <- -1/2

qtl6A <- crossobject$geno[[6]]$data[,"SNP598"] -2
qtl6D <- qtl6A
qtl6D[qtl6A == 0] <- 1/2
qtl6D[qtl6A != 0] <- -1/2

qtl7A <- crossobject$geno[[7]]$data[,"SNP68"] -2
qtl7D <- qtl7A
qtl7D[qtl7A == 0] <- 1/2
qtl7D[qtl7A != 0] <- -1/2

qtl8A <- crossobject$geno[[8]]$data[,"SNP231"] -2
qtl8D <- qtl8A
qtl8D[qtl8A == 0] <- 1/2
qtl8D[qtl8A != 0] <- -1/2

qtl9A <- crossobject$geno[[9]]$data[,"SNP703"] -2
qtl9D <- qtl9A
qtl9D[qtl9A == 0] <- 1/2
qtl9D[qtl9A != 0] <- -1/2

# Specify the phenotypes
# Raw phenotypes:
t1 <- (crossobject$pheno$nchaetae)
t2 <- (crossobject$pheno$maxchaetlength)
t3 <- (crossobject$pheno$G3.area)

# Full model
nl.A.D <- manova(cbind(t1,t2,t3) ~ qtl3A + qtl3D + qtl6A + qtl6D + qtl7A + qtl7D+ qtl8A + qtl8D + qtl9A + qtl9D + crossobject$pheno$Family)
summary(nl.A.D)
#Some of the dominance effects are not significant

# Reduced model:
nl <- manova(cbind(t1,t2,t3) ~ qtl3A + qtl3D + qtl6A + qtl6D + qtl7A + qtl8A + qtl9A + crossobject$pheno$Family)
summary(nl)

###Test for interactions
nl.AxA <- manova(cbind(t1,t2,t3) ~ qtl3A + qtl3D + qtl6A + qtl6D + qtl7A + qtl8A + qtl9A 
									+ qtl3A:qtl6A + qtl3A:qtl7A + qtl3A:qtl8A + qtl3A:qtl9A 
									+ qtl6A:qtl7A + qtl6A:qtl8A + qtl6A:qtl9A
									+qtl7A:qtl8A + qtl7A:qtl9A + qtl8A:qtl9A
									+ crossobject$pheno$Family)
anova(nl.AxA, nl)									
#Pairwise AxA epistasis does not improve the model									

# Estimates of effects
nl$coef

#####################################
# Figure 3B: Plot the additive effect coefficients
#####################################

par(mfrow =c(1,3))
plot(nl$coef[c(2,4,6:8),c(1,2)], ylab = "Chaetae length (µm)", xlab = "Chaetae number", main = "Multivariate QTL Additive Effects")
points(0,0, cex = 2)
segments(rep(0,5), rep(0,5), nl$coef[c(2,4,6:8),1], nl$coef[c(2,4,6:8),2], col = c("black", "purple", "brown", "blue" , "red"), lwd =3)

plot(nl$coef[c(2,4,6:8),c(1,3)], ylab = "Offspring size (µm2)", xlab = "Chaetae number")
points(0,0, cex = 2)
segments(rep(0,5), rep(0,5), nl$coef[c(2,4,6:8),1], nl$coef[c(2,4,6:8),3], col = c("black", "purple", "brown", "blue" , "red"), lwd =3)

plot(nl$coef[c(2,4,6:8),c(2,3)], ylab = "Offspring size (µm2)", xlab = "Chaetae length (µm)")
points(0,0, cex = 2)
segments(rep(0,5), rep(0,5), nl$coef[c(2,4,6:8),2], nl$coef[c(2,4,6:8),3], col = c("black", "purple", "brown",  "blue", "red"), lwd =3)

######################################
# Analysis of Bayonne Backcross data
# 30 G2 males were crossed to Bayonne (planktotrophic) females.
# We then pheotyped a mean of 44 larvae per cross (range 14-50; median 50)
######################################

BClarvae <- read.csv(file = "FileS2.BackcrossLarvae.csv") 

# Each row reports three phenotypes for one backcross larva: area, maximum length of larval chaetae, and number of chaetae.
# It also reports the phenotypes of the G2 male that fathered each larva, with those phenotypes prefixed "G2."
# And it includes the genotypes of the G2 males at the peak lod markers from the analysis of G2 larval phenotypes:
# qtl3.nch = SNP223 
# qtl8.nch = SNP231 
# qtl3.chl = SNP1289
# qtl9.chl = SNP363

# Tests for penetrant effects of QTL idenified in G2 larvae

# Phenotype: Number of larval chaetae
lmer(nChaetae ~ G2.qtl3nch + G2.qtl8nch + (1|cross), data = BClarvae) -> nchfull
lmer(nChaetae ~ 1 + (1|cross), data = BClarvae) -> nchnull
anova(nchfull, nchnull)
#Significant genotype effect, p = 6e-5
summary(nchfull)
#Additive effects in expected directions. Each lecithotroph allele decreases chaetae number:
#            Estimate Std. Error t value
#(Intercept)  18.6010     1.9437   9.570
#G2.qtl3nch   -1.3703     0.6703  -2.044
#G2.qtl8nch   -3.1903     0.7478  -4.266

# Phenotype: Length of larval chaetae
# This analysis suffers from unbalanced genotype representation among the 30 G2 males. None of the males is homozygous for the planktotroph allele at the LG9 qtl. 
lmer(maxchaetlength ~ G2.qtl3chl + G2.qtl9chl + (1|cross), data = BClarvae) -> chlfull
lmer(maxchaetlength ~ 1 + (1|cross), data = BClarvae) -> chlnull
anova(chlfull, chlnull)
#Significant genotype effect, p = 1.5e-7
summary(chlfull)
#Additive effects of expected direction and magnitude for qtl3, no effect for qtl9. 
#            Estimate Std. Error t value
#(Intercept) 234.7214    17.1721  13.669
#G2.qtl3chl  -28.4416     4.3715  -6.506
#G2.qtl9chl   -0.3703     7.9316  -0.047


#####################################
# Figure 4: Plot the phenotype distributions for backcross broods as a function of G2 paternal genotype
#####################################
# requires ggplot
NCHgenotype <- factor(as.character(paste(BClarvae[,12], BClarvae[,13])))
BClarvae[,16] <- NCHgenotype
names(BClarvae)[16] <- "NCHgenotype"
ggplot(BClarvae, aes(x = nChaetae, y  = G2.Ind, group = G2.Ind, fill = NCHgenotype)) + geom_density_ridges(stat = "binline", binwidth = 1) + scale_fill_manual(values = c("palegreen", "palegreen3","burlywood","palegreen3","burlywood", "tan1","burlywood", "tan1","darkorange1"))+ theme_classic()

