##############################################################################################
################################### Supplementary File 3 #####################################
##############################################################################################
# R scripts used for the analysis of pyrosequencing and flow cytometry data to determine the #
# median expression, expression noise and fitness associated with different pTDH3 alleles.   #
##############################################################################################

# 1 - Analysis of pyrosequencing data to quantify YFP mRNA levels (Figure 1B-C).                  ===> Line 29 
# 2 - Expression: R functions used for filtering out artifacts in flow data and to transform      ===> Line 175
#     fluorescence levels in mRNA levels.
# 3 - Expression: Cleaning flow data using functions defined in 2.                                ===> Line 637
# 4 - Expression: Correction for technical variation between samples and calculation of           ===> Line 793
#     median expression and expression noise for each genotype.
# 5 - Fitness: Analysis of flow data to compute the relative fitness for the 43 pTDH3 alleles.    ===> Line 1192
# 6 - Relationship between median expression level and fitness (Figure 2C).                       ===> Line 1833
# 7 - Impact of expression noise (noise strength) on fitness (Figure 3).                          ===> Line 1985
# 8 - Impact of expression noise (SD) on fitness (Supplementary figures).                         ===> Line 2484
# 9 - Impact of expression noise (CV*) on fitness (Supplementary figures).                        ===> Line 2987
# 10 - Impact of expression noise (LogCV*) on fitness (Supplementary figures).                    ===> Line 3490
# 11 - Expression noise vs fitness: robustness to variation in 3 parameters                       ===> Line 3988
#     (Figure 3 - figure supplement 5-7).
# 12 - Fitness: Direct competition between strains with low expression noise and strains with     ===> Line 4850
#      high expression noise (Figure 4).
# 13 - Expression : Comparison of effects of 20 pTDH3 alleles at the HO locus and at the native   ===> Line 5792
#      TDH3 locus (Figure 2 - figure supplement 1A-B).
# 14 - Analysis of single cell doubling times (Figure 5 - figure supplement 1).                   ===> Line 6458


##################################################################################
# 1 - Analysis of pyrosequencing data to quantify YFP mRNA levels (Figure 1B-C). #
##################################################################################

# Load input file
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

library(mratios)

DATA <- read.table("CoExtractions_Formated.txt",header=TRUE)


#Compute allele frequencies corrected for pyrosequencing biases

ZERO <- subset(DATA, CLASS == "DNA" & STRAIN.1 == "Y978" & STRAIN.2 == "Y1182")
FIFTY <- subset(DATA, CLASS == "DNA" & STRAIN.1 == "Y2683" & STRAIN.2 == "EMPTY")
ONE <- subset(DATA, CLASS == "DNA" & STRAIN.1 == "Y1002" & STRAIN.2 == "EMPTY")
CONTROLS <- rbind(ZERO,FIFTY,ONE)
CONTROLS[,"EXPECTED"] <- c(0,0,0,0.5,0.5,0.5,1,1,1)

new.spline <- smooth.spline(CONTROLS$YFP.PYRO,CONTROLS$EXPECTED,df=3)

x <- seq(0,1,by=0.001)
YFP <- predict(new.spline,x)$y

# # plot(CONTROLS$YFP.PYRO,CONTROLS$EXPECTED,pch=19,col="blue")
# abline(a=0,b=1)
# points(x,YFP,type="l",col="red")

DATA[,"YFP.CORRECTED"] <- predict(new.spline,DATA[,"YFP.PYRO"])$y

write.table(DATA,file="CoExtractions_Corrected.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#Separate allele frequencies measured in DNA extractions and RNA extractions.
DATA <- read.table("CoExtractions_Corrected.txt",header=TRUE)

DNA <- subset(DATA, CLASS == "DNA")
RNA <- subset(DATA, CLASS == "RNA")

write.table(DNA,file="CoExtractions_DNA.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
write.table(RNA,file="CoExtractions_RNA.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)

#Aggregate DNA data across replicates
DNA <- subset(DNA, MIX == 2)

DNA.MEAN <- aggregate(YFP.CORRECTED ~ STRAIN.1 + STRAIN.2 + MIX + CLASS + YFP.COPY + MEDIAN.FLUO + MEDIAN.FLUO.SD + N.FLUO, data=DNA, FUN=mean)
DNA.SD <- aggregate(YFP.CORRECTED ~ STRAIN.1 + STRAIN.2 + MIX + CLASS + YFP.COPY + MEDIAN.FLUO + MEDIAN.FLUO.SD + N.FLUO, data=DNA, FUN=sd)

#Aggregate RNA data across replicates
RNA <- subset(RNA, MIX == 2)

RNA.MEAN <- aggregate(YFP.CORRECTED ~ STRAIN.1 + STRAIN.2 + MIX + CLASS + YFP.COPY + MEDIAN.FLUO + MEDIAN.FLUO.SD + N.FLUO, data=RNA, FUN=mean)
RNA.SD <- aggregate(YFP.CORRECTED ~ STRAIN.1 + STRAIN.2 + MIX + CLASS + YFP.COPY + MEDIAN.FLUO + MEDIAN.FLUO.SD + N.FLUO, data=RNA, FUN=sd)

RESULT <- RNA.MEAN[,c("STRAIN.1","YFP.COPY","MEDIAN.FLUO","MEDIAN.FLUO.SD","N.FLUO")]
RESULT[,"YFP.DNA.MEAN"] <- DNA.MEAN[,"YFP.CORRECTED"]
RESULT[,"YFP.DNA.SD"] <- DNA.SD[,"YFP.CORRECTED"]
RESULT[,"YFP.RNA.MEAN"] <- RNA.MEAN[,"YFP.CORRECTED"]
RESULT[,"YFP.RNA.SD"] <- RNA.SD[,"YFP.CORRECTED"]

# RESULT[,"MEDIAN.FLUO"] <- as.numeric(levels(RESULT[,"MEDIAN.FLUO"])[RESULT[,"MEDIAN.FLUO"]])
# RESULT[,"MEDIAN.FLUO.SD"] <- as.numeric(levels(RESULT[,"MEDIAN.FLUO.SD"])[RESULT[,"MEDIAN.FLUO.SD"]])
# RESULT[,"N.FLUO"] <- as.numeric(levels(RESULT[,"N.FLUO"])[RESULT[,"N.FLUO"]])
# RESULT[,"YFP.COPY"] <- as.numeric(levels(RESULT[,"YFP.COPY"])[RESULT[,"YFP.COPY"]])


#CALCULATE RNA LEVEL

#CALCULATE GFP LEVEL RELATIVE TO YFP FROM DIPLOID 
DIPLOID.DNA <- subset(DATA, STRAIN.1 == "Y2683" & CLASS == "DNA")
DIPLOID.RNA <- subset(DATA, STRAIN.1 == "Y2683" & CLASS == "RNA")

GFP.EXPR.DIPLOID <- -(1-mean(DIPLOID.RNA$YFP.CORRECTED))*mean(DIPLOID.DNA$YFP.CORRECTED)/((1-mean(DIPLOID.RNA$YFP.CORRECTED))*(1-mean(DIPLOID.DNA$YFP.CORRECTED))-(1-mean(DIPLOID.DNA$YFP.CORRECTED)))


#CALCULATE GFP LEVEL RELATIVE TO YFP FROM HAPLOID MIX 
MIX.DNA <- subset(DATA, STRAIN.1 == "Y1002" & STRAIN.2 == "Y1182"  & CLASS == "DNA")
MIX.RNA <- subset(DATA, STRAIN.1 == "Y1002" & STRAIN.2 == "Y1182"  & CLASS == "RNA")

GFP.EXPR.HAPLOID <- -(1-mean(MIX.RNA$YFP.CORRECTED))*mean(MIX.DNA$YFP.CORRECTED)/((1-mean(MIX.RNA$YFP.CORRECTED))*(1-mean(MIX.DNA$YFP.CORRECTED))-(1-mean(MIX.DNA$YFP.CORRECTED)))

# GFP.EXPR.DIPLOID <- -(1-mean(DIPLOID.RNA$YFP.CORRECTED))*0.5/((1-mean(DIPLOID.RNA$YFP.CORRECTED))*0.5-0.5)

for (i in 1:nrow(RESULT))
{
	E <- RESULT[i,"YFP.RNA.MEAN"]
	C <- GFP.EXPR.HAPLOID
	D <- 1 - RESULT[i,"YFP.DNA.MEAN"]
	B <- RESULT[i,"YFP.DNA.MEAN"]
	A <- RESULT[i,"YFP.COPY"]
	
	RESULT[i,"RNA.LEVEL"] <- -A*E*C*D/(E*B-B)	
}


for (i in 1:nrow(RESULT))
{
	CUR.RNA <- subset(RNA, STRAIN.1 == RESULT[i,"STRAIN.1"])
	CUR.DNA <- subset(DNA, STRAIN.1 == RESULT[i,"STRAIN.1"])
	
	t.ratio <- t.test.ratio(CUR.RNA[,"YFP.CORRECTED"],CUR.DNA[,"YFP.CORRECTED"],alternative="two.sided",var.equal=FALSE,iterativeCI=FALSE)
	
	RESULT[i,"RNA.LOW.CI"] <- (t.ratio$estimate[3] - t.ratio$conf.int[1])*RESULT[i,"YFP.COPY"]
	RESULT[i,"RNA.HIGH.CI"] <- (t.ratio$conf.int[2] - t.ratio$estimate[3])*RESULT[i,"YFP.COPY"]
}

WT <- subset(RESULT,STRAIN.1 == "Y1002")[,"RNA.LEVEL"]
RESULT[,"RNA.RELATIVE"] <- RESULT[,"RNA.LEVEL"]/WT
#RESULT[1,"YFP.COPY"] <- 3

write.table(RESULT,file="RNA.FLUO.CURVE.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#Determine relationship between RNA level and fluorescence.
RESULT <- read.table("RNA.FLUO.CURVE.txt",header=TRUE)

DATA.FIT <- subset(RESULT, RNA.LEVEL != 0)
#FIT <- nls(RNA.RELATIVE~exp(a*MEDIAN.FLUO+b),data=DATA.FIT)
FIT <- nls(log(RNA.RELATIVE)~a*MEDIAN.FLUO+b,data=DATA.FIT)

x.mid <- seq(0.45,1.1,by=0.01)
y.mid <- exp(x.mid*coef(FIT)[1]+coef(FIT)[2])

pdf("RNA.vs.FLUO.NONLINEAR.pdf",useDingbats=F)
plotCI(RESULT$MEDIAN.FLUO,RESULT$RNA.RELATIVE,liw=-0.1,uiw=-0.1,err="y",xlim=c(0.45,1.05),ylim=c(0,2),xlab="Fluorescence Intensity", ylab="EYFP mRNA level",gap=0,sfrac=0,pch=21,cex=2,lwd=1.6)
plotCI(RESULT$MEDIAN.FLUO,RESULT$RNA.RELATIVE,liw=RESULT$RNA.LOW.CI,uiw=RESULT$RNA.HIGH.CI,err="y",xlim=c(0.45,1.05),ylim=c(0,2),xlab="Fluorescence Intensity", ylab="EYFP mRNA level",gap=0,sfrac=0,pch=21,cex=2,add=TRUE)
plotCI(RESULT$MEDIAN.FLUO,RESULT$RNA.RELATIVE,liw=1.96*RESULT$MEDIAN.FLUO.SD/sqrt(RESULT$N.FLUO),uiw=1.96*RESULT$MEDIAN.FLUO.SD/sqrt(RESULT$N.FLUO),err="x",pch=21,gap=0,add=TRUE,sfrac=0,cex=2)
points(x.mid,y.mid,type="l",col="red",lwd=1.5)
abline(h=1,lty=2)
abline(v=RESULT[8,"MEDIAN.FLUO"],lty=2)
dev.off()

pdf("RNA.vs.FLUO.LINEAR.pdf",useDingbats=F)
plotCI(RESULT$MEDIAN.FLUO,log(RESULT$RNA.RELATIVE),liw=-0.1,uiw=-0.1,err="y",xlim=c(0.45,1.05),ylim=c(log(0.02),log(2.2)),xlab="Log(Fluorescence Intensity)", ylab="Log(EYFP mRNA level)",gap=0,sfrac=0,pch=21,cex=2,lwd=1.6)
plotCI(RESULT$MEDIAN.FLUO,log(RESULT$RNA.RELATIVE),liw=RESULT$RNA.LOW.CI,uiw=RESULT$RNA.HIGH.CI,err="y",xlab="Fluorescence Intensity", ylab="EYFP mRNA level",gap=0,sfrac=0,pch=21,cex=2,add=TRUE)
plotCI(RESULT$MEDIAN.FLUO,log(RESULT$RNA.RELATIVE),liw=1.96*RESULT$MEDIAN.FLUO.SD/sqrt(RESULT$N.FLUO),uiw=1.96*RESULT$MEDIAN.FLUO.SD/sqrt(RESULT$N.FLUO),err="x",pch=21,gap=0,add=TRUE,sfrac=0,cex=2)
points(x.mid,log(y.mid),type="l",col="red",lwd=1.5)
abline(h=0,lty=2)
abline(v=RESULT[8,"MEDIAN.FLUO"],lty=2)
dev.off()

cor.test(DATA.FIT$MEDIAN.FLUO,log(DATA.FIT$RNA.RELATIVE))


##################################################################################################################################
# 2 - Expression: R functions used for filtering out artifacts in flow data and to transform fluorescence levels in mRNA levels. #
##################################################################################################################################

###############################
### a) ROTATION OF FCS DATA ###
###############################

ROT <- function(x,Rotation){
  Result <- Rotation%*%x
  return(Result)
}

#--------------------------------------------------------------------------------------------------------------------------------#

################################
### b) HARD GATE CALIBRATION ###
################################

GATE.CALIB <- function(x,logFSC.A.MIN,logFSC.A.MAX,logFSC.H.MIN,logFSC.H.MAX,FSC.A_FSC.H.MIN,FSC.A_FSC.H.MAX,Width.MIN,Width.MAX) {
  
  if (missing(logFSC.A.MIN)) {logFSC.A.MIN <- 5.1}
  if (missing(logFSC.A.MAX)) {logFSC.A.MAX <- 6.1}
  if (missing(logFSC.H.MIN)) {logFSC.H.MIN <- 5.2}
  if (missing(logFSC.H.MAX)) {logFSC.H.MAX <- 6.8}
  if (missing(FSC.A_FSC.H.MIN)) {FSC.A_FSC.H.MIN <- 0.88}
  if (missing(FSC.A_FSC.H.MAX)) {FSC.A_FSC.H.MAX <- 0.94}
  if (missing(Width.MIN)) {Width.MIN <- 30}
  if (missing(Width.MAX)) {Width.MAX <- 80}
  
  Merge.Frame <- read.FCS(x,transformation=FALSE,alter.names=TRUE)
  
  ##############################	
  ##Log transformation of data##
  ##############################
  
  logTrans <- logTransform(transformationId="log10-transformation",logbase=10,r=1,d=1)
  Merge.Frame <- transform(Merge.Frame,`logFSC.A`=logTrans(`FSC.A`))
  Merge.Frame <- transform(Merge.Frame,`logFSC.H`=logTrans(`FSC.H`))
  Merge.Frame <- transform(Merge.Frame,`logFL1.A`=logTrans(`FL1.A`))
  Merge.Frame <- transform(Merge.Frame,`logFL1.H`=logTrans(`FL1.H`))
  
  ####################################
  ##Calculate phenotypes of interest##
  ####################################
  
  Data.Fluo <- as.data.frame(exprs(Merge.Frame))
  Data.Fluo[Data.Fluo == 0] <- 1
  
  Phenotype3 <- Data.Fluo[,"logFL1.A"]/Data.Fluo[,"logFSC.A"]
  Phenotype3 <- as.matrix(Phenotype3)
  colnames(Phenotype3) <- "FL1/FSC"
  Merge.Frame <- cbind2(Merge.Frame, Phenotype3)
  
  Phenotype4 <- (Data.Fluo[,"logFSC.A"])/(Data.Fluo[,"logFSC.H"])
  Phenotype4 <- as.matrix(Phenotype4)
  colnames(Phenotype4) <- "FSC.A/FSC.H"
  Merge.Frame <- cbind2(Merge.Frame, Phenotype4)		
  
  PlotAll <- as.data.frame(exprs(Merge.Frame))
  
  
  #############################
  ###Quick plot to set gates###
  #############################
  
  # #Quick plot to set gates
  # quartz(height=14,width=14)
  par(mfrow=c(2,2))
  
  plot(PlotAll[,"Width"],PlotAll[,"logFSC.A"],pch=20,cex=0.5,col="#00000022",xlim=c(20,100),ylim=c(4,7))
  abline(v=Width.MIN)
  abline(v=Width.MAX)
  abline(h=logFSC.A.MIN)
  abline(h=logFSC.A.MAX)
  
  plot(PlotAll[,"logFSC.A"],PlotAll[,"logFL1.A"],pch=20,cex=0.5,col="#00000022",xlim=c(4,7),ylim=c(1.5,6.5))
  abline(v=logFSC.A.MIN)
  abline(v=logFSC.A.MAX)
  
  plot(PlotAll[,"logFSC.H"],PlotAll[,"logFSC.A"],pch=20,cex=0.4,col="#00000022",xlim=c(3,8),ylim=c(4,7))
  abline(v=logFSC.H.MIN)
  abline(v=logFSC.H.MAX)
  abline(h=logFSC.A.MIN)
  abline(h=logFSC.A.MAX)
  
  plot(PlotAll[,"logFSC.A"],PlotAll[,"FSC.A/FSC.H"],pch=20,cex=0.4,col="#00000022",xlim=c(3,8))
  abline(v=logFSC.A.MIN)
  abline(v=logFSC.A.MAX)
  abline(h=FSC.A_FSC.H.MIN)
  abline(h=FSC.A_FSC.H.MAX)
  
  
  
  OUTPUT <- c(logFSC.A.MIN,logFSC.A.MAX,logFSC.H.MIN,logFSC.H.MAX,FSC.A_FSC.H.MIN,FSC.A_FSC.H.MAX,Width.MIN,Width.MAX)
  names(OUTPUT) <- c("logFSC.A.MIN","logFSC.A.MAX","logFSC.H.MIN","logFSC.H.MAX","FSC.A_FSC.H.MIN","FSC.A_FSC.H.MAX","Width.MIN","Width.MAX")
  
  return(OUTPUT)
}


#--------------------------------------------------------------------------------------------------------------------------------#

############################
### c) CLEANING FCS DATA ###
############################


CLEANING <- function(x,GATES) {
  
  Merge.Frame <- read.FCS(x["FILENAMES"],transformation=FALSE,alter.names=TRUE)
  
  OUTPUT <- data.frame(matrix(nrow=1,ncol=length(c("COUNTS.INITIAL",	"COUNTS.GATES",	"COUNTS.SINGLES",	"COUNTS.FINAL", "FSC.KURTOSIS",	"WIDTH",	"FSC.MEDIAN.INITIAL",	"FSC.MAD.INITIAL",	"FL1.MEDIAN.INITIAL",	"FL1.MAD.INITIAL",	"YFP.MEDIAN.INITIAL",	"YFP.MAD.INITIAL", "YFP.SD.INITIAL",	"INTERCEPT.INITIAL",	"SLOPE.INITIAL", "THETA", "YFP.MEDIAN.ROT","YFP.MAD.ROT","YFP.SD.ROT",	"FSC.MEDIAN.FINAL",	"FSC.MAD.FINAL",	"YFP.MEDIAN.FINAL",	"YFP.MAD.FINAL", "YFP.SD.FINAL","log.YFP.MEDIAN","log.YFP.MAD","log.YFP.SD")
  )))
  colnames(OUTPUT) <- c("COUNTS.INITIAL",	"COUNTS.GATES",	"COUNTS.SINGLES",	"COUNTS.FINAL", "FSC.KURTOSIS",	"WIDTH",	"FSC.MEDIAN.INITIAL",	"FSC.MAD.INITIAL",	"FL1.MEDIAN.INITIAL",	"FL1.MAD.INITIAL",	"YFP.MEDIAN.INITIAL",	"YFP.MAD.INITIAL", "YFP.SD.INITIAL",	"INTERCEPT.INITIAL",	"SLOPE.INITIAL", "THETA", "YFP.MEDIAN.ROT","YFP.MAD.ROT","YFP.SD.ROT",	"FSC.MEDIAN.FINAL",	"FSC.MAD.FINAL",	"YFP.MEDIAN.FINAL",	"YFP.MAD.FINAL", "YFP.SD.FINAL","log.YFP.MEDIAN","log.YFP.MAD","log.YFP.SD")
  
  OUTPUT["COUNTS.INITIAL"] <- nrow(exprs(Merge.Frame))
  
  if (OUTPUT["COUNTS.INITIAL"] > 1500 & x["SKIP"] == "NO")	{
    
    ##############################	
    ##Log transformation of data##
    ##############################
    
    Start.exp <- exprs(Merge.Frame)
    Start.exp[,"FL1.A"] <- Start.exp[,"FL1.A"] + 10
    
    Merge.Frame <- new("flowFrame",Start.exp)
    
    Merge.Frame <- transform(Merge.Frame,`logFSC.A`=logTrans(`FSC.A`))
    Merge.Frame <- transform(Merge.Frame,`logFSC.H`=logTrans(`FSC.H`))
    Merge.Frame <- transform(Merge.Frame,`logFL1.A`=logTrans(`FL1.A`))
    Merge.Frame <- transform(Merge.Frame,`logFL1.H`=logTrans(`FL1.H`))
    
    ####################################
    ##Calculate phenotypes of interest##
    ####################################
    
    Data.Fluo <- as.data.frame(exprs(Merge.Frame))
    Data.Fluo[Data.Fluo == 0] <- 1
    
    Phenotype3 <- Data.Fluo[,"logFL1.A"]/Data.Fluo[,"logFSC.A"]
    Phenotype3 <- as.matrix(Phenotype3)
    colnames(Phenotype3) <- "YFP.INITIAL"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype3)
    
    Phenotype4 <- (Data.Fluo[,"logFSC.A"])/(Data.Fluo[,"logFSC.H"])
    Phenotype4 <- as.matrix(Phenotype4)
    colnames(Phenotype4) <- "FSC.A/FSC.H"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype4)		
    
    PlotAll <- as.data.frame(exprs(Merge.Frame))	
    
    ############
    #Hard Gates# 
    ############
    
    rectGate <- rectangleGate(filterId="Noise Removal","logFSC.A"=c(GATES["logFSC.A.MIN"],GATES["logFSC.A.MAX"]), "logFSC.H"=c(GATES["logFSC.H.MIN"],GATES["logFSC.H.MAX"]), "FSC.A/FSC.H"=c(GATES["FSC.A_FSC.H.MIN"],GATES["FSC.A_FSC.H.MAX"]),"Width"=c(GATES["Width.MIN"],GATES["Width.MAX"]),"YFP.INITIAL"=c(min(PlotAll[,"YFP.INITIAL"]),max(PlotAll[,"YFP.INITIAL"])),"FL1.A"=c(11,max(PlotAll[,"FL1.A"])))
    
    Hard.Gates <- Subset(Merge.Frame, rectGate)
    Hard.Gates.exp <- exprs(Hard.Gates)
    
    OUTPUT["COUNTS.GATES"] <- nrow(Hard.Gates.exp)
    
    ####################
    #Doublet Hard Gates# 
    ####################
    
    Doublet.Model <- PCAgrid(cbind(Hard.Gates.exp[,"logFSC.A"],Hard.Gates.exp[,"FSC.A/FSC.H"]),k=2,method="sd",scores=TRUE,center="median")
    
    Scores <- Doublet.Model$scores
    
    Distri <- normalmixEM2comp(Scores[,2],sigsqrd=c(0.0022,0.0068)^2,mu=c(-0.0013,0.0086),lambda=c(0.56,0.44))
    
    Lambda <- Distri$lambda
    Mu <- Distri$mu
    Sigma <- Distri$sigma 
    
    Order <- c(which(Mu == min(Mu)),which(Mu == max(Mu)))
    
    Lambda <- Lambda[Order]
    Mu <- Mu[Order]
    Sigma <- Sigma[Order] 
    
    #Good cluster
    f <- function(x) dnorm(x,m=Mu[1],sd=Sigma[1])*Lambda[1]-dnorm(x,m=Mu[2],sd=Sigma[2])*Lambda[2]
    Threshold <- try(uniroot(f,interval=c(Mu[1],Mu[2]+Sigma[2]))$root)
    
    # if (Threshold > 0.1)
    # {
    # Threshold <- 0.075
    # }
    
    #Remove big cells based on FSC.A/FSC.H
    Position <- which(Scores[,2] < Threshold)
    Doublet.Gates.exp <- Hard.Gates.exp[Position,]
    Doublet.Gates <- new("flowFrame",Doublet.Gates.exp)
    
    #Remove cells with extreme FSC.A 
    DOUBLETS <- Doublet.Gates.exp[,"logFSC.A"]
    
    MEDIAN <- median(DOUBLETS)
    MAD <- mad(DOUBLETS)
    LOW <- MEDIAN - 2*MAD
    HIGH <- MEDIAN + 2*MAD
    
    NEW.MEDIAN <- MEDIAN
    OLD.MEDIAN <- median(DOUBLETS[which(DOUBLETS > LOW & DOUBLETS < HIGH)])
    
    while (abs(NEW.MEDIAN-OLD.MEDIAN) > 0.001)
    {
      NEW.DOUBLETS <- DOUBLETS[which(DOUBLETS > LOW & DOUBLETS < HIGH)]
      OLD.MEDIAN <- NEW.MEDIAN
      NEW.MEDIAN <- median(NEW.DOUBLETS)
      NEW.MAD <- mad(NEW.DOUBLETS)
      
      LOW <- NEW.MEDIAN - 2*NEW.MAD
      HIGH <- NEW.MEDIAN + 2*NEW.MAD
    }
    
    rectGate <- rectangleGate(filterId="Outliers logFSC.A","logFSC.A"=c(LOW,HIGH))
    
    Final.Doublets <- Subset(Doublet.Gates, rectGate)
    
    OUTPUT["FSC.KURTOSIS"] <- kurtosis(DOUBLETS)
    
    #################
    #Singles Cluster#
    #################
    
    Doublet.filter <- flowClust(Final.Doublets,varNames=c("logFSC.H","logFSC.A"),K=1,B=50,min.count=1000,nu.est=2,trans=0,seed=10,z.cutoff=0,level=0.9,tol=1e-4)
    Well.pop <- split(Final.Doublets,Doublet.filter,population=list(sc1=1))
    Well.Doublet <- Well.pop$sc1
    
    Doublets.exp <- as.data.frame(exprs(Well.Doublet))
    
    OUTPUT["COUNTS.SINGLES"] <- nrow(Doublets.exp)
    
    # plot(Hard.Gates.exp[,"logFSC.A"],Hard.Gates.exp[,"logFSC.H"],pch=20,col="#00000066")
    # points(Doublets.exp[,"logFSC.A"],Doublets.exp[,"logFSC.H"],pch=20,col="#FF000099")
    
    #############
    #Fluo filter#
    #############
    
    #REMOVE OUTLIERS FL1/FSC
    FLUO <- Doublets.exp[,"YFP.INITIAL"]
    
    MEDIAN <- median(FLUO)
    MAD <- mad(FLUO)
    LOW <- MEDIAN - 4*MAD
    HIGH <- MEDIAN + 4*MAD
    
    NEW.MEDIAN <- MEDIAN
    OLD.MEDIAN <- median(FLUO[which(FLUO > LOW & FLUO < HIGH)])
    
    while (abs(NEW.MEDIAN-OLD.MEDIAN) > 0.001)
    {
      NEW.FLUO <- FLUO[which(FLUO > LOW & FLUO < HIGH)]
      OLD.MEDIAN <- NEW.MEDIAN
      NEW.MEDIAN <- median(NEW.FLUO)
      NEW.MAD <- mad(NEW.FLUO)
      
      LOW <- NEW.MEDIAN - 4*NEW.MAD
      HIGH <- NEW.MEDIAN + 4*NEW.MAD
    }
    
    rectGate <- rectangleGate(filterId="Outliers FL1/FSC Removal","YFP.INITIAL"=c(LOW,HIGH))
    
    Hard.Fluo <- Subset(Well.Doublet, rectGate)
    
    Gate.Fluo <- flowClust(Hard.Fluo, varNames=c("logFSC.A","logFL1.A"),K=1,B=50,min.count=1000,nu.est=1,trans=0,z.cutoff=0.5,seed=10,tol=1e-5,nu=1.5,level=0.98)
    Well.pop <- split(Hard.Fluo,Gate.Fluo,population=list(sc1=1))
    Well.Fluo <- Well.pop$sc1
    Fluo.exp <- as.data.frame(exprs(Well.Fluo))
    
    #SAVE DATA
    OUTPUT["COUNTS.FINAL"] <- nrow(Fluo.exp)
    OUTPUT["WIDTH"] <- median(Fluo.exp[,"Width"])
    OUTPUT["FSC.MEDIAN.INITIAL"] <- median(Fluo.exp[,"logFSC.A"])
    OUTPUT["FSC.MAD.INITIAL"] <- mad(Fluo.exp[,"logFSC.A"])
    OUTPUT["FL1.MEDIAN.INITIAL"] <- median(Fluo.exp[,"logFL1.A"])
    OUTPUT["FL1.MAD.INITIAL"] <- mad(Fluo.exp[,"logFL1.A"])	
    OUTPUT["YFP.MEDIAN.INITIAL"] <- median(Fluo.exp[,"YFP.INITIAL"])
    OUTPUT["YFP.MAD.INITIAL"] <- mad(Fluo.exp[,"YFP.INITIAL"])
    OUTPUT["YFP.SD.INITIAL"] <- sd(Fluo.exp[,"YFP.INITIAL"])
    
    # plot(Doublets.exp$logFSC.A,Doublets.exp$logFL1.A,pch=20,col="#00000066")
    # points(Fluo.exp$logFSC.A,Fluo.exp$logFL1.A,pch=20,col="#FF000099")		
    
    ###############################################################
    #####Remove correlation between logFSC.A and FL1.A/FSC.A#######
    ###############################################################
    
    #1-Define orthogonal regression
    Intercept <- c()
    Slope <- c()
    Theta <- c()
    
    Fluo.Model <- PCAgrid(cbind(Fluo.exp[,"logFSC.A"],Fluo.exp[,"YFP.INITIAL"]),k=2,method="sd",scores=FALSE,center="median")
    
    #2-Center of rotation
    x.center <- Fluo.Model$center[1]
    y.center <- Fluo.Model$center[2]
    
    #3-Initial Intercept and Slope
    Slope[1] <- Fluo.Model$loadings[2,1] / Fluo.Model$loadings[1,1]
    Intercept[1] <- Fluo.Model$center[2] - Slope[1]*Fluo.Model$center[1]
    
    #4-Calculate angle of rotation
    a <- c(x.center-0,y.center-Intercept[1]) #Vector from Intercept to Centroid
    b <- c(x.center-0,y.center-y.center) #Vector with slope 0 through Centroid
    
    Theta[1] <- acos(sum(a*b)/(sqrt(sum(a*a))*sqrt(sum(b*b)))) #Angle between 2 vectors
    
    if (Slope[1] < 0)
    {
      Theta[1] <- -Theta[1]
    }		
    
    #5-Define rotation matrix
    Rotation <- matrix(c(cos(Theta[1]),-sin(Theta[1]),sin(Theta[1]),cos(Theta[1])),ncol=2,nrow=2)
    
    #6-Transform Data
    Coord <- t(as.matrix(Fluo.exp[,c("logFSC.A","YFP.INITIAL")]))
    
    Coord[1,] <- Coord[1,] - x.center
    Coord[2,] <- Coord[2,] - y.center
    
    Result <- ROT(x=Coord,Rotation=Rotation)
    
    Result[1,] <- Result[1,] + x.center
    Result[2,] <- Result[2,] + y.center
    
    #7-Keep record of rotated values
    Fluo.exp[,"FSC.FINAL"] <- Result[1,]
    Fluo.exp[,"YFP.ROT"] <- Result[2,]
    
    
    ###########################################################################
    #Apply correction for linear relation between fluorescence and mRNA levels#
    ###########################################################################
    
    REF <- 0.905811693
    NEG <- 0.519116913
    
    for (j in 1:nrow(Fluo.exp))
    {
      Fluo.exp[j,"YFP.FINAL"] <- (exp((Fluo.exp[j,"YFP.ROT"]-0.905274742)*log(10)/0.294448097) - 0.05) * (REF - NEG) + NEG
    }
    
    OUTPUT["INTERCEPT.INITIAL"] <- Intercept[1]
    OUTPUT["SLOPE.INITIAL"] <- Slope[1]
    OUTPUT["THETA"] <- Theta[1]
    OUTPUT["YFP.MEDIAN.ROT"] <- median(Fluo.exp[,"YFP.ROT"])
    OUTPUT["YFP.MAD.ROT"] <- mad(Fluo.exp[,"YFP.ROT"])
    OUTPUT["YFP.SD.ROT"] <- sd(Fluo.exp[,"YFP.ROT"])
    OUTPUT["FSC.MEDIAN.FINAL"] <- median(Fluo.exp[,"FSC.FINAL"])
    OUTPUT["FSC.MAD.FINAL"] <- mad(Fluo.exp[,"FSC.FINAL"])	
    OUTPUT["YFP.MEDIAN.FINAL"] <- median(Fluo.exp[,"YFP.FINAL"])
    OUTPUT["YFP.MAD.FINAL"] <- mad(Fluo.exp[,"YFP.FINAL"])	
    OUTPUT["YFP.SD.FINAL"] <- sd(Fluo.exp[,"YFP.FINAL"])	
    OUTPUT["log.YFP.MEDIAN"] <- median(log(Fluo.exp[,"YFP.FINAL"]))
    OUTPUT["log.YFP.MAD"] <- mad(log(Fluo.exp[,"YFP.FINAL"]))	
    OUTPUT["log.YFP.SD"] <- sd(log(Fluo.exp[,"YFP.FINAL"]))	
    
    x["COUNTER"]
    print(x["COUNTER"])
    
    #Save clean data with rotation correction
    Data <- Fluo.exp[,c("Width","Time","logFSC.A","logFSC.H","logFL1.A","logFL1.H","YFP.INITIAL","YFP.ROT","FSC.FINAL","YFP.FINAL")]
    write.table(Data,file=paste("CLEAN.DATA/","Day",x["DAY"],"_Rep",x["REP.UNIQUE"],"_Plate",x["PLATE"],"_Well",x["POSITION"],".txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
    
    return(OUTPUT)	
    
  } else {
    OUTPUT[] <- NA
    OUTPUT["COUNTS.INITIAL"] <- nrow(exprs(Merge.Frame))
    
    Data <- as.data.frame(matrix(data=NA,ncol=10))
    colnames(Data) <- c("Width","Time","logFSC.A","logFSC.H","logFL1.A","logFL1.H","YFP.INITIAL","YFP.ROT","FSC.FINAL","YFP.FINAL")
    write.table(Data,file=paste("CLEAN.DATA/","Day",x["DAY"],"_Rep",x["REP.UNIQUE"],"_Plate",x["PLATE"],"_Well",x["POSITION"],".txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
    
    return(OUTPUT)
  }
  
}


#--------------------------------------------------------------------------------------------------------------------------------#

################################
### d) QUALITY CONTROL PLOTS ###
################################

FCS.PLOT <- function(x) {
  
  INITIAL.DATA <- read.FCS(x["INITIAL"],transformation=FALSE,alter.names=TRUE)
  CLEAN.DATA <-  read.table(x["CLEAN"],header=TRUE,as.is=TRUE)
  
  ##############################	
  ##Log transformation of data##
  ##############################
  
  Merge.Frame <- transform(INITIAL.DATA,`logFSC.A`=logTrans(`FSC.A`))
  Merge.Frame <- transform(Merge.Frame,`logFSC.H`=logTrans(`FSC.H`))
  Merge.Frame <- transform(Merge.Frame,`logFL1.A`=logTrans(`FL1.A`))
  Merge.Frame <- transform(Merge.Frame,`logFL1.H`=logTrans(`FL1.H`))
  
  ####################################
  ##Calculate phenotypes of interest##
  ####################################
  
  Data.Fluo <- as.data.frame(exprs(Merge.Frame))
  Data.Fluo[Data.Fluo == 0] <- 1
  
  Phenotype3 <- Data.Fluo[,"logFL1.A"]/Data.Fluo[,"logFSC.A"]
  Phenotype3 <- as.matrix(Phenotype3)
  colnames(Phenotype3) <- "YFP.INITIAL"
  Merge.Frame <- cbind2(Merge.Frame, Phenotype3)
  
  Phenotype4 <- (Data.Fluo[,"logFSC.A"])/(Data.Fluo[,"logFSC.H"])
  Phenotype4 <- as.matrix(Phenotype4)
  colnames(Phenotype4) <- "FSC.A/FSC.H"
  Merge.Frame <- cbind2(Merge.Frame, Phenotype4)		
  
  PLOT.ALL <- as.data.frame(exprs(Merge.Frame))
  
  NAME <- paste("P",x["PLATE"],"R",x["REP"],"_",x["POSITION"],"_",x["STRAIN"],"_",x["CLASS"],sep="")
  
  
  pdf(paste("CLEANING.PLOTS/",NAME,".pdf",sep=""))
  
  par(mfrow=c(2,2))
  
  plot(PLOT.ALL$logFSC.A,PLOT.ALL$logFSC.H,pch=20,cex=0.5,col="#00000033",xlim=c(4.3,6.5),ylim=c(5,7),xlab="logFSC.A",ylab="logFSC.H",main=NAME)
  points(CLEAN.DATA$logFSC.A,CLEAN.DATA$logFSC.H,pch=20,cex=0.5,col="#FF000066",main=NAME)
  
  plot(PLOT.ALL$logFSC.A,PLOT.ALL$logFL1.A,pch=20,cex=0.5,col="#00000033",xlim=c(4.3,6.5),ylim=c(1.5,6.5),xlab="logFSC.A",ylab="logFL1.A",main=NAME)
  points(CLEAN.DATA$logFSC.A,CLEAN.DATA$logFL1.A,pch=20,cex=0.5,col="#FF000066",main=NAME)
  
  plot(CLEAN.DATA$logFSC.A,CLEAN.DATA$YFP.INITIAL,pch=20,cex=0.5,col="#FF0000AA",xlab="logFSC.A",ylab="FL1/FSC",main=NAME)
  abline(lm(CLEAN.DATA$YFP.INITIAL~CLEAN.DATA$logFSC.A),lty=2,col="red",lwd=2.5)
  points(CLEAN.DATA$FSC.FINAL,CLEAN.DATA$YFP.FINAL,pch=20,cex=0.5,col="#00FF00AA",main=NAME)
  abline(lm(CLEAN.DATA$YFP.FINAL~CLEAN.DATA$FSC.FINAL),lty=2,col="green",lwd=2.5)
  
  hist(CLEAN.DATA$YFP.FINAL,breaks=50,main=NAME,xlab="FL1/FSC")
  
  dev.off()	
  
}

#--------------------------------------------------------------------------------------------------------------------------------#

############################################
### e) log10 transformation of flow data ###
############################################

logTrans <- logTransform(transformationId="log10-transformation",logbase=10,r=1,d=1)



####################################################################
# 3 - Expression: Cleaning flow data using functions defined in 2. #
####################################################################


#Clear memory
rm(list=ls())
options(warn=-1)

########################
# a) LOADING LIBRARIES #
########################

library(flowCore)
library(flowClust)
library(flowViz)
library(plotrix)
library(nlme)
library(MethComp)
library(outliers)
library(pcaPP)

library(reshape2)
library(MASS)
library(ggplot2)
library(Hmisc)
library(fBasics)
library(lawstat)
library(fitdistrplus)
library(mixtools)
library(vioplot)
library(gplots)
library(RColorBrewer)
library(calibrate)

box <- graphics::box


########################
# b) LOADING FUNCTIONS #
########################

#Set source directory
source.dir <- "/Users/fduveau/Documents/Lab/Post-Doc/Wittkopp/Project/Mapping/Accuri Experiments/Templates.Analysis"
setwd(source.dir)

#File containing R functions defined in section 2.
source("Cleaning.Functions.2.R")


#################
# c) CLEAN DATA #
#################

#Set working directory
parent.dir <- "/Path.to.input.file"
setwd(parent.dir)

#Load experiment setup
SETUP <- read.table("../TEMPLATE.FIX.txt",header=TRUE,as.is=TRUE)

#Load paths of FCS files. Warning: The order of FCS files in their folder should be the same as the order of samples in "TEMPLATE.FIX.txt"
FILENAMES <- list.files("..",pattern=".fcs",recursive=TRUE,include.dirs=TRUE,full.names=TRUE)
SETUP[,"FILENAMES"] <- FILENAMES
SETUP[,"COUNTER"] <- c(1:nrow(SETUP))

#Analyze Glucose data.
CUR <- SETUP[1:384,]

#Determine Hard Gates
GATES <- GATE.CALIB(FILENAMES[1])
GATES[2] <- 6.2
GATES[1] <- 5.2
CUR[,"SKIP"] <- "NO"

#Clean Data
Output <- apply(CUR,1,CLEANING,GATES=GATES)


OUTPUT <- as.data.frame(Output[[1]])
OUTPUT[1:nrow(SETUP),] <- NA

for (i in 1:nrow(CUR))
{
  if (is.null(Output[[i]]))
  {} else {
    OUTPUT[i,] <- Output[[i]]
  }
}

write.table(OUTPUT,"Experiment.Output.txt",row.names=FALSE,sep="\t")

CLEAN <- cbind.data.frame(SETUP[,1:(ncol(SETUP)-2)],OUTPUT)

write.table(CLEAN,"Clean.Data.txt",row.names=FALSE,sep="\t")

CLEAN <- read.table("Clean.Data.txt",header=TRUE,as.is=TRUE)


############################
# d) QUALITY CONTROL PLOTS #
############################

##### WARNING !!!!! Order of clean files should be the same as order of FCS files !!!! #####
CLEANAMES <- paste("CLEAN.DATA/",list.files("CLEAN.DATA",pattern=".txt",recursive=TRUE,include.dirs=TRUE),sep="")


SAMPLE.TO.PLOT <- 2154

x <- c()
x["INITIAL"] <- FILENAMES[SAMPLE.TO.PLOT]
x["CLEAN"] <- CLEANAMES[SAMPLE.TO.PLOT-6*96]
x["PLATE"] <- CLEAN[SAMPLE.TO.PLOT,"PLATE"]
x["REP"] <- CLEAN[SAMPLE.TO.PLOT,"REP"]
x["POSITION"] <- CLEAN[SAMPLE.TO.PLOT,"POSITION"]
x["STRAIN"] <- CLEAN[SAMPLE.TO.PLOT,"STRAIN"]
x["CLASS"] <- CLEAN[SAMPLE.TO.PLOT,"CLASS"]

FCS.PLOT(x=x)


######CORRECT WRONG SAMPLES#####

SETUP <- subset(SETUP, ID %in% c("R238","R252","R275"))

#Clean Data
Output <- apply(SETUP,1,CLEANING,GATES=GATES)


OUTPUT <- as.data.frame(Output[[1]])

OUTPUT[2:nrow(SETUP),1:ncol(OUTPUT)] <- NA

for (i in 2:nrow(SETUP))
{
  if (is.null(Output[[i]]))
  {} else {
    OUTPUT[i,] <- Output[[i]]
  }
}

CLEAN.2 <- cbind.data.frame(SETUP[,1:(ncol(SETUP)-2)],OUTPUT)
CLEAN.2 <- CLEAN.2[,-which(colnames(CLEAN.2)=="FILENAMES")]

CLEAN <- read.table("Clean.Data.txt",header=TRUE,as.is=TRUE)


for (i in 1:nrow(CLEAN.2))
{
  POSITION <- which(CLEAN$ID == CLEAN.2[i,"ID"] & CLEAN$REP == CLEAN.2[i,"REP"])
  CLEAN[POSITION,] <- CLEAN.2[i,]
}

write.table(CLEAN,"Clean.Data.txt",row.names=FALSE,sep="\t")


###################################################################################################################################################
# 4 - Expression: Correction for technical variation between samples and calculation of median expression and expression noise for each genotype. #
###################################################################################################################################################


########################
# a) LOADING LIBRARIES #
########################

#Clear memory
rm(list=ls())
options(warn=-1)

library(flowCore)
library(flowClust)
library(flowViz)
library(pcaPP)
library(mixtools)
library(plyr)
library(robustlmm)
library(plotrix)
library(MASS)

box <- graphics::box

########################################
# b) Load data generated in section 3. #
########################################

parent.dir <- "/Path.to.input.file"
setwd(parent.dir)

DATA.TYPE <- c(
  rep("factor",20),rep("integer",4),rep("numeric",23))

###Read in data sets
DATA <- read.table("Clean.Data.txt",header=TRUE,colClasses=DATA.TYPE)

REF <- 0.905811693
NEG <- 0.519116913

###Transformation of data on a scale appropriate for applying linear corrections.
DATA[,"log.RNA.MEDIAN"] <- log(((DATA[,"YFP.MEDIAN.FINAL"] - NEG) / (REF - NEG)) + 0.05)
DATA[,"log.RNA.SD"] <- DATA[,"YFP.SD.FINAL"] / ((DATA[,"YFP.MEDIAN.FINAL"] - NEG) + (REF - NEG)*0.05)


##################################################
# c) Correction for "FLOW.RUN" and "ROW" effects #
##################################################

DATA[,"FLOW.RUN"] <- as.factor(DATA[,"FLOW.RUN"])
DATA[,"ROW"] <- as.factor(DATA[,"ROW"])

CIS.DATA <- subset(DATA, ENVIRONMENT == "GLUCOSE" & GROWTH == "WHEEL")
CIS.DATA <- droplevels(CIS.DATA)

CIS.DATA <- CIS.DATA[complete.cases(CIS.DATA),]

###Isolate control samples and remove FSC outliers.
TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")


######1-CORRECT FSC MEDIAN##############
FSC.MEDIAN.CORRECT <- rlm(FSC.MEDIAN.FINAL ~ FLOW.RUN, data = TRANS.CONTROL)
COEF.FLOW.RUN   <- c(0,coefficients(FSC.MEDIAN.CORRECT)[grep("FLOW.RUN",names(coefficients(FSC.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}

CIS.DATA[,"FSC.MEDIAN.CORRECT"] <- CIS.DATA$FSC.MEDIAN.FINAL - OUT

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")

FSC.MEDIAN.CORRECT <- rlm(FSC.MEDIAN.CORRECT ~ ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(0,coefficients(FSC.MEDIAN.CORRECT)[grep("ROW",names(coefficients(FSC.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"FSC.MEDIAN.CORRECT"] <- CIS.DATA$FSC.MEDIAN.CORRECT - OUT

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")

# plot(TRANS.CONTROL$FSC.MEDIAN.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$FSC.MEDIAN.FINAL ~ TRANS.CONTROL$DAY)

#Remove FSC outliers.
LOW <- median(TRANS.CONTROL$FSC.MEDIAN.CORRECT) - 4*mad(TRANS.CONTROL$FSC.MEDIAN.CORRECT)
HIGH <- median(TRANS.CONTROL$FSC.MEDIAN.CORRECT) + 4*mad(TRANS.CONTROL$FSC.MEDIAN.CORRECT)
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)


######2-CORRECT YFP MEDIAN##############
YFP.MEDIAN.CORRECT <- rlm(log.RNA.MEDIAN ~ 0 + FLOW.RUN, data = TRANS.CONTROL)

COEF.FLOW.RUN   <- c(coefficients(YFP.MEDIAN.CORRECT)[grep("FLOW.RUN",names(coefficients(YFP.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}

CIS.DATA[,"log.RNA.MEDIAN.CORRECT"] <- CIS.DATA$log.RNA.MEDIAN - OUT + mean(TRANS.CONTROL$log.RNA.MEDIAN)

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

YFP.MEDIAN.CORRECT <- rlm(log.RNA.MEDIAN.CORRECT ~ 0 + ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(coefficients(YFP.MEDIAN.CORRECT)[grep("ROW",names(coefficients(YFP.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"log.RNA.MEDIAN.CORRECT"] <- CIS.DATA$log.RNA.MEDIAN.CORRECT - OUT + mean(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT)
CIS.DATA[,"YFP.MEDIAN.CORRECT"] <- (exp(CIS.DATA[,"log.RNA.MEDIAN.CORRECT"]) - 0.05) * (REF - NEG) + NEG

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

# plot(TRANS.CONTROL$log.RNA.MEDIAN ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$DAY)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$BLOCK)

# plot(TRANS.CONTROL$YFP.MEDIAN.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$DAY)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$BLOCK)


######2-CORRECT YFP STANDARD DEVIATION##############
YFP.MAD.CORRECT <- rlm(log.RNA.SD ~ 0 + FLOW.RUN, data = TRANS.CONTROL)
COEF.FLOW.RUN   <- c(coefficients(YFP.MAD.CORRECT)[grep("FLOW.RUN",names(coefficients(YFP.MAD.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}

CIS.DATA[,"log.RNA.SD.CORRECT"] <- CIS.DATA$log.RNA.SD - OUT + mean(TRANS.CONTROL$log.RNA.SD) 

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

YFP.MAD.CORRECT <- rlm(log.RNA.SD.CORRECT ~ 0 + ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(coefficients(YFP.MAD.CORRECT)[grep("ROW",names(coefficients(YFP.MAD.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"log.RNA.SD.CORRECT"] <- CIS.DATA$log.RNA.SD.CORRECT - OUT + mean(TRANS.CONTROL$log.RNA.SD.CORRECT)
CIS.DATA[,"YFP.SD.CORRECT"] <- CIS.DATA[,"log.RNA.SD.CORRECT"] * ((CIS.DATA[,"YFP.MEDIAN.CORRECT"] - NEG) + (REF - NEG)*0.05)

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)


# plot(TRANS.CONTROL$log.RNA.SD ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$DAY)


# plot(TRANS.CONTROL$YFP.SD.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$DA

write.table(CIS.DATA,"GLUCOSE.WHEEL.CORRECT.txt",row.names=FALSE,sep="\t",quote=FALSE)

#####################################################
# d) Compute expression phenotypes for each sample. #
#####################################################

CIS.DATA <- read.table("GLUCOSE.WHEEL.CORRECT.txt",header=TRUE)

CIS.DATA <- CIS.DATA[complete.cases(CIS.DATA),]

#Strains with a single copy of pTDH3-YFP (Ura- reference).
WT.1 <- subset(CIS.DATA, ID == "2")
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"YFP.MEDIAN.ADJUST"] <- CIS.DATA[i,"YFP.MEDIAN.CORRECT"] - mean(NEG.1[,"YFP.MEDIAN.CORRECT"])
  CIS.DATA[i,"YFP.SD.ADJUST"] <- CIS.DATA[i,"YFP.SD.CORRECT"] - mean(NEG.1[,"YFP.SD.CORRECT"])
}

WT.1 <- subset(CIS.DATA, ID == "2")
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"YFP.MEDIAN.RELATIVE"] <- CIS.DATA[i,"YFP.MEDIAN.ADJUST"]/mean(WT.1[,"YFP.MEDIAN.ADJUST"])
  CIS.DATA[i,"YFP.SD.SCALED"] <- CIS.DATA[i,"YFP.SD.ADJUST"]/mean(WT.1[,"YFP.MEDIAN.ADJUST"])
}

WT.1 <- subset(CIS.DATA, ID == "2")
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"YFP.CV"] <- CIS.DATA[i,"YFP.SD.SCALED"]/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.FANO"] <- CIS.DATA[i,"YFP.SD.SCALED"]^2/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.LOG.CV"] <- log2(abs(CIS.DATA[i,"YFP.CV"]))
}

WT.1 <- subset(CIS.DATA, ID == "2")
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"YFP.SD.RELATIVE"] <- CIS.DATA[i,"YFP.SD.SCALED"]/mean(WT.1[,"YFP.SD.SCALED"])
  CIS.DATA[i,"YFP.CV.RELATIVE"] <- CIS.DATA[i,"YFP.SD.RELATIVE"]/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.FANO.RELATIVE"] <- CIS.DATA[i,"YFP.SD.RELATIVE"]^2/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.LOG.CV.RELATIVE"] <- log2(abs(CIS.DATA[i,"YFP.CV.RELATIVE"]))
}

#Strains with two copies of pTDH3-YFP (Ura+ reference).
WT.1 <- subset(CIS.DATA, ID %in% c("51","52"))
NEG.1 <- subset(CIS.DATA, ID == "3")

I <- which(CIS.DATA$YFP.CONSTRUCT == "DOUBLE" | CIS.DATA$ID %in% c("51","52"))

for (i in I)
{
  CIS.DATA[i,"YFP.MEDIAN.ADJUST"] <- CIS.DATA[i,"YFP.MEDIAN.CORRECT"] - median(NEG.1[,"YFP.MEDIAN.CORRECT"])
  CIS.DATA[i,"YFP.SD.ADJUST"] <- CIS.DATA[i,"YFP.SD.CORRECT"] - median(NEG.1[,"YFP.SD.CORRECT"])
}

WT.1 <- subset(CIS.DATA, ID %in% c("51","52"))
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in I)
{
  CIS.DATA[i,"YFP.MEDIAN.RELATIVE"] <- CIS.DATA[i,"YFP.MEDIAN.ADJUST"]/median(WT.1[,"YFP.MEDIAN.ADJUST"])
  CIS.DATA[i,"YFP.SD.SCALED"] <- CIS.DATA[i,"YFP.SD.ADJUST"]/median(WT.1[,"YFP.MEDIAN.ADJUST"])
}

WT.1 <- subset(CIS.DATA, ID %in% c("51","52"))
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in I)
{
  CIS.DATA[i,"YFP.CV"] <- CIS.DATA[i,"YFP.SD.SCALED"]/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.FANO"] <- CIS.DATA[i,"YFP.SD.SCALED"]^2/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.LOG.CV"] <- log2(abs(CIS.DATA[i,"YFP.CV"]))
}

WT.1 <- subset(CIS.DATA, ID %in% c("51","52"))
NEG.1 <- subset(CIS.DATA, ID == "3")

for (i in I)
{
  CIS.DATA[i,"YFP.SD.RELATIVE"] <- CIS.DATA[i,"YFP.SD.SCALED"]/mean(WT.1[,"YFP.SD.SCALED"])
  CIS.DATA[i,"YFP.CV.RELATIVE"] <- CIS.DATA[i,"YFP.SD.RELATIVE"]/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.FANO.RELATIVE"] <- CIS.DATA[i,"YFP.SD.RELATIVE"]^2/CIS.DATA[i,"YFP.MEDIAN.RELATIVE"]
  CIS.DATA[i,"YFP.LOG.CV.RELATIVE"] <- log2(abs(CIS.DATA[i,"YFP.CV.RELATIVE"]))
}


#Add NA for samples where CV or FANO are undefined.
for (i in 1:nrow(CIS.DATA))
{
  if (CIS.DATA[i,"YFP.MEDIAN.ADJUST"] < 0 | CIS.DATA[i,"YFP.SD.ADJUST"] < 0) {
    CIS.DATA[i,"YFP.CV"] <- NA
    CIS.DATA[i,"YFP.FANO"] <-NA
    CIS.DATA[i,"YFP.LOG.CV"] <- NA
    CIS.DATA[i,"YFP.CV.RELATIVE"] <- NA
    CIS.DATA[i,"YFP.FANO.RELATIVE"] <- NA
    CIS.DATA[i,"YFP.LOG.CV.RELATIVE"] <- NA

  }
}

#Write processed data to file
write.table(CIS.DATA,"GLUCOSE.WHEEL.DATA.txt",sep="\t",quote=FALSE,row.names=FALSE)


######################
# e) Remove outliers #
######################

#BASED ON YFP MEAN
for (i in 1:nrow(CIS.DATA))
{
  CUR <- subset(CIS.DATA, ID == CIS.DATA[i,"ID"])
  
  LOW <- median(CUR$YFP.MEDIAN.RELATIVE) - 5*mad(CUR$YFP.MEDIAN.RELATIVE)
  HIGH <- median(CUR$YFP.MEDIAN.RELATIVE) + 5*mad(CUR$YFP.MEDIAN.RELATIVE)
  
  if (CIS.DATA[i,"YFP.MEDIAN.RELATIVE"] > LOW & CIS.DATA[i,"YFP.MEDIAN.RELATIVE"] < HIGH)
  {
    CIS.DATA[i,"YFP.MEDIAN.OUTLIER"] <- "NO"
  } else {
    CIS.DATA[i,"YFP.MEDIAN.OUTLIER"] <- "YES"
  }
}

#BASED ON YFP SD
for (i in 1:nrow(CIS.DATA))
{
  CUR <- subset(CIS.DATA, ID == CIS.DATA[i,"ID"])
  
  LOW <- median(CUR$YFP.SD.RELATIVE) - 5*mad(CUR$YFP.SD.RELATIVE)
  HIGH <- median(CUR$YFP.SD.RELATIVE) + 5*mad(CUR$YFP.SD.RELATIVE)
  
  if (CIS.DATA[i,"YFP.SD.RELATIVE"] > LOW & CIS.DATA[i,"YFP.SD.RELATIVE"] < HIGH)
  {
    CIS.DATA[i,"YFP.SD.OUTLIER"] <- "NO"
  } else {
    CIS.DATA[i,"YFP.SD.OUTLIER"] <- "YES"
  }
}

#Create file corresponding to Supplementary File 1 - Dataset 2.
write.table(CIS.DATA,"GLUCOSE.WHEEL.DATA.txt",sep="\t",quote=FALSE,row.names=FALSE)


######################################################################
# f) Calculate average phenotypes across replicates for each strain. #
######################################################################

ALL.DATA <- read.table("GLUCOSE.WHEEL.DATA.txt",header=TRUE)

FILTER.DATA <- subset(ALL.DATA, YFP.MEDIAN.OUTLIER == "NO" & YFP.SD.OUTLIER == "NO")

write.table(FILTER.DATA,"FILTER.DATA.txt",sep="\t",quote=FALSE,row.names=FALSE)


TRANS.MEDIAN <- aggregate(cbind(YFP.MEDIAN.FINAL,YFP.SD.FINAL,YFP.MEDIAN.ADJUST,YFP.SD.ADJUST,YFP.SD.SCALED,YFP.CV,YFP.FANO,YFP.LOG.CV,YFP.MEDIAN.RELATIVE,YFP.SD.RELATIVE,YFP.CV.RELATIVE,YFP.FANO.RELATIVE,YFP.LOG.CV.RELATIVE,) ~ ID + STRAIN + MUTATION + YFP.CONSTRUCT, data=FILTER.DATA, FUN = mean, na.rm=TRUE, na.action=NULL)
TRANS.SD <- aggregate(cbind(YFP.MEDIAN.FINAL,YFP.SD.FINAL,YFP.MEDIAN.ADJUST,YFP.SD.ADJUST,YFP.SD.SCALED,YFP.CV,YFP.FANO,YFP.LOG.CV,YFP.MEDIAN.RELATIVE,YFP.SD.RELATIVE,YFP.CV.RELATIVE,YFP.FANO.RELATIVE,YFP.LOG.CV.RELATIVE) ~ ID + STRAIN + MUTATION + YFP.CONSTRUCT, data=FILTER.DATA, FUN = sd, na.rm=TRUE, na.action=NULL)
TRANS.N <- aggregate(cbind(YFP.MEDIAN.FINAL,YFP.SD.FINAL,YFP.MEDIAN.ADJUST,YFP.SD.ADJUST,YFP.SD.SCALED,YFP.CV,YFP.FANO,YFP.LOG.CV,YFP.MEDIAN.RELATIVE,YFP.SD.RELATIVE,YFP.CV.RELATIVE,YFP.FANO.RELATIVE,YFP.LOG.CV.RELATIVE) ~ ID + STRAIN + MUTATION + YFP.CONSTRUCT, data=FILTER.DATA, na.action=NULL, FUN = length)

TRANS.MEDIAN <- TRANS.MEDIAN[which(TRANS.N[,9] >= 3),]
TRANS.SD <- TRANS.SD[which(TRANS.N[,9] >= 3),]
TRANS.N <- TRANS.N[which(TRANS.N[,9] >= 3),]

TRANS <- cbind.data.frame(TRANS.MEDIAN,TRANS.SD[,5:17],TRANS.N[,17])

colnames(TRANS) <- c("ID","STRAIN","MUTATION","YFP.CONSTRUCT","YFP.MEDIAN.FINAL.MEAN","YFP.SD.FINAL.MEAN","YFP.MEDIAN.ADJUST.MEAN","YFP.SD.ADJUST.MEAN","YFP.SD.SCALED.MEAN","YFP.CV.MEAN","YFP.FANO.MEAN","YFP.LOG.CV.MEAN","YFP.MEDIAN.RELATIVE.MEAN","YFP.SD.RELATIVE.MEAN","YFP.CV.RELATIVE.MEAN","YFP.FANO.RELATIVE.MEAN","YFP.LOG.CV.RELATIVE.MEAN","YFP.MEDIAN.FINAL.SD","YFP.SD.FINAL.SD","YFP.MEDIAN.ADJUST.SD","YFP.SD.ADJUST.SD","YFP.SD.SCALED.SD","YFP.CV.SD","YFP.FANO.SD","YFP.LOG.CV.SD","YFP.MEDIAN.RELATIVE.SD","YFP.SD.RELATIVE.SD","YFP.CV.RELATIVE.SD","YFP.FANO.RELATIVE.SD","YFP.LOG.CV.RELATIVE.SD","N")

TRANS <- subset(TRANS, ID != "CTRL")

CLASSES <- read.table("../CLASSES.txt",header=TRUE)

for (i in 1:nrow(TRANS))
{
  TRANS[i,"CLASS"] <- CLASSES[which(CLASSES$STRAIN == TRANS[i,"STRAIN"]),"CLASS"]
}

#Data used for the "Expression" part of Figure 1-3 - Source Data 1.
write.table(TRANS,"SUMMARY.DATA.EXPRESSION.txt",sep="\t",quote=FALSE,row.names=FALSE)


################################################################################################
# 5 - Fitness: Analysis of flow data to compute the relative fitness for the 43 pTDH3 alleles. #
################################################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Necessary libraries
library(flowCore)
library(flowClust)
library(mixtools)
library(mratios)
library(gplots)
library(fitdistrplus)
library(RColorBrewer)
library(pcaPP)

#######################
#Set working directory#
#######################

parent.dir <- "/Path.to.input.file"
setwd(parent.dir)

Design.xp <- read.table("TEMPLATE.FITNESS.txt",header=TRUE,as.is=TRUE)


##################################
#Create list of all fcs filenames#
##################################

FILENAMES <- list.files(".",pattern=".fcs",recursive=TRUE,include.dirs=TRUE)

#Create Output File
Output.data <- Design.xp

RANDOM <- sample(1:6144,size=96,replace=FALSE)

#Open pdf for plots
pdf("Plot_YFP.GFP_v2.pdf",width=14,height=12)

###################
##Data processing##
###################

for(i in 1:nrow(Output.data))
{
  Merge.Frame <- read.FCS(as.character(FILENAMES[i]),transformation=FALSE,alter.names=TRUE)
  
   Output.data[i,"COUNTS.INITIAL"] <- nrow(exprs(Merge.Frame))
  
  
  if (is.na(Output.data[i,"COUNTS.INITIAL"]))
  {Output.data[i,"COUNTS.INITIAL"] <- 0}
  
  
  if (Output.data[i,"COUNTS.INITIAL"] >= 3000)
  {
    
    ##############################	
    ##Log transformation of data##
    ##############################
    
    Data.Fluo <- exprs(Merge.Frame)
    Data.Fluo[Data.Fluo == 0] <- NA
    Data.Fluo <- Data.Fluo[complete.cases(Data.Fluo[,c("FSC.A","FSC.H","FL1.H","FL2.H")]),]
    Merge.Frame <- new("flowFrame",Data.Fluo)	
    
    logTrans <- logTransform(transformationId="log10-transformation",logbase=10,r=1,d=1)
    Merge.Frame <- transform(Merge.Frame,`logFSC.A`=logTrans(`FSC.A`))
    Merge.Frame <- transform(Merge.Frame,`logFSC.H`=logTrans(`FSC.H`))
    Merge.Frame <- transform(Merge.Frame,`logFL1.A`=logTrans(`FL1.A`))
    Merge.Frame <- transform(Merge.Frame,`logFL1.H`=logTrans(`FL1.H`))
    Merge.Frame <- transform(Merge.Frame,`logFL2.A`=logTrans(`FL2.A`))
    Merge.Frame <- transform(Merge.Frame,`logFL2.H`=logTrans(`FL2.H`))
    
    ####################################
    ##Calculate phenotypes of interest##
    ####################################
    
    Data.Fluo <- exprs(Merge.Frame)
    
    Phenotype1 <- Data.Fluo[,"logFL1.H"]^2/Data.Fluo[,"logFSC.A"]^3
    Phenotype1 <- as.matrix(Phenotype1)
    colnames(Phenotype1) <- "FL1^2/FSC^3"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype1)
    
    Phenotype2 <- (Data.Fluo[,"logFL2.H"])^2/Data.Fluo[,"logFSC.A"]^3
    Phenotype2 <- as.matrix(Phenotype2)
    colnames(Phenotype2) <- "FL2^2/FSC^3"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype2)
    
    Phenotype3 <- Phenotype2/Phenotype1
    Phenotype3 <- as.matrix(Phenotype3)
    colnames(Phenotype3) <- "FL2/FL1"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype3)
    
    Phenotype4 <- (Data.Fluo[,"logFSC.A"]^8)/(Data.Fluo[,"logFSC.H"]^9)
    Phenotype4 <- as.matrix(Phenotype4)
    colnames(Phenotype4) <- "FSC.A/FSC.H"
    Merge.Frame <- cbind2(Merge.Frame, Phenotype4)
    
    PlotAll <- exprs(Merge.Frame)
    
    Output.data[i,"COUNTS.COMPLETE"] <- nrow(PlotAll)
    
 
    #Remove artifacts 
    rectGate <- rectangleGate(filterId="Noise Removal","logFSC.A"=c(4.6,6.7),"FSC.A/FSC.H"=c(0.066,0.17), "logFL2.H"=c(2.7,5.6), "logFL1.H"=c(3.3,6.2))
    
    Hard.Gates <- Subset(Merge.Frame, rectGate)
    Hard.Gates.exp <- exprs(Hard.Gates)
    
    
    ##############################################
    #Select fluorescent clusters based on FL2/FL1#
    ##############################################
    
    # Remove correlation between FL1.H and FL2.H
    Fluo.Model <- PCAgrid(cbind(Hard.Gates.exp[,"logFL1.H"],Hard.Gates.exp[,"logFL2.H"]),k=2,scale="sd",method="sd",scores=TRUE,center="median")
    
    Scores <- Fluo.Model$scores
    
    # plot(Scores[,1],Scores[,2],pch=20,cex=0.3,col="#00000044")
    
    if (Fluo.Model$loadings[2,2] < 0)
    {
      Scores[,2] <- -1 * Scores[,2]
    }
    
    # Find Modes of Two Populations
    # HIST <- hist(Scores[,2],breaks=100)
    DENSITY <- density(Scores[,2])
    
    MODES <- c(0,0)
    COUNT <- 1
    
    for (j in 2:(length(DENSITY$y)-1))
    {
      if (DENSITY$y[j] > DENSITY$y[j-1] & DENSITY$y[j] > DENSITY$y[j+1] & DENSITY$y[j] > 0.3)
      {
        MODES[COUNT] <- DENSITY$x[j]
        COUNT <- 1 + COUNT
      }
    }	
    
    MODES <- MODES[c(1,length(MODES))]
    
    
    #Find upper value for GFP population
    POSITIONS <- which(DENSITY$x > MODES[1] & DENSITY$x < mean(MODES))
    DENSITIES <- DENSITY$y[POSITIONS]
    
    OFFSET <- c()
    
    for (j in 1:(length(DENSITY$y)-20))
    {
      OFFSET[j] <- mean(DENSITY$y[j+c(1:20)])
    }
    
    CUR <- OFFSET[POSITIONS]
    DIFF <- DENSITIES-CUR
    POS <- min(which(DIFF < 0.025))
    
    GFP.HIGH <- DENSITY$x[POSITIONS[POS]]
    
    
    #Find lower value for YFP population
    POSITIONS <- which(DENSITY$x < MODES[2] & DENSITY$x > mean(MODES))
    DENSITIES <- DENSITY$y[POSITIONS]
    
    OFFSET <- c()
    
    for (j in (1+20):length(DENSITY$y))
    {
      OFFSET[j] <- mean(DENSITY$y[j-c(1:20)])
    }
    
    CUR <- OFFSET[POSITIONS]
    DIFF <- DENSITIES-CUR
    POS <- max(which(DIFF < 0.025))
    
    YFP.LOW <- DENSITY$x[POSITIONS[POS]]
    
    
    # hist(Scores[,2],breaks=200,freq=FALSE)
    # abline(h=0.3)
    # points(DENSITY$x,DENSITY$y,type="l",col="red")
    # abline(v=YFP.LOW)
    # abline(v=GFP.HIGH)
    # abline(v=MODES,col="red")
    
    # plot(1:length(CUR),DENSITIES-CUR,type="b")
    
    LOW <- median(Scores[,1]) - 3*mad(Scores[,1])
    HIGH <- median(Scores[,1]) + 3*mad(Scores[,1])
    
    # hist(Scores[,1],breaks=200,freq=FALSE)
    # abline(v=median(Scores[,1]))
    # abline(v=LOW)
    # abline(v=HIGH)
    
    #Select fluorescent clusters (T.YFP.GFP = doublets with one GFP cell and one YFP cell, T.YFP = YFP events, T.GFP = GFP events)
    T.YFP.GFP <- Hard.Gates.exp[which(Scores[,2] < YFP.LOW & Scores[,2] > GFP.HIGH & Scores[,1] > LOW),]
    T.YFP <- Hard.Gates.exp[which(Scores[,2] > YFP.LOW & Scores[,1] > LOW & Scores[,1] > LOW),]
    T.GFP <- Hard.Gates.exp[which(Scores[,2] < GFP.HIGH & Scores[,1] > LOW & Scores[,1] > LOW),]
    
    # plot(Scores[,1],Scores[,2],pch=20,cex=0.3,col="#00000044")
    # abline(h=GFP.LOW)
    # abline(h=GFP.HIGH)
    # abline(h=YFP.LOW)
    # abline(h=YFP.HIGH)
    # abline(v=LOW)
    
    #########################################
    ##Calculate number of YFP and GFP cells##
    #########################################
    
    #Calculation is explained in Methods.
    N.YFP <- nrow(T.YFP) + nrow(T.YFP.GFP)/(2*sqrt(nrow(T.GFP)/nrow(T.YFP))) + nrow(T.YFP.GFP)
    N.GFP <- nrow(T.GFP) + nrow(T.YFP.GFP)/(2*sqrt(nrow(T.YFP)/nrow(T.GFP))) + nrow(T.YFP.GFP)
    N.Debris <- nrow(PlotAll) - nrow(T.YFP.GFP) - nrow(T.YFP) - nrow(T.GFP)
    
    ##############################
    ###Add data in output table###
    ##############################
    
    Output.data[i,"Filtered.events"] <- nrow(T.YFP.GFP) + nrow(T.YFP) + nrow(T.GFP)
    Output.data[i,"YFP.events"] <- nrow(T.YFP)
    Output.data[i,"GFP.events"] <- nrow(T.GFP)
    Output.data[i,"YGFP.events"] <- nrow(T.YFP.GFP)
    Output.data[i,"N.YFP"] <- N.YFP
    Output.data[i,"N.GFP"] <- N.GFP
    Output.data[i,"N.noFP"] <- N.Debris
    Output.data[i,"Freq.YFP.events"] <- nrow(T.YFP)/(nrow(T.YFP)+nrow(T.GFP))
    Output.data[i,"Freq.GFP.events"] <- nrow(T.GFP)/(nrow(T.YFP)+nrow(T.GFP))
    Output.data[i,"Freq.YGFP.events"] <- nrow(T.YFP.GFP)/(nrow(T.YFP)+nrow(T.GFP)+nrow(T.YFP.GFP))
    Output.data[i,"Freq.YFP"] <- N.YFP/(N.YFP+N.GFP)
    Output.data[i,"Freq.GFP"] <- N.GFP/(N.YFP+N.GFP)
    Output.data[i,"Freq.noFP"] <- N.Debris/(N.YFP+N.GFP+N.Debris)
    
    ##############################
    ###Plot Final Gating in pdf###
    ##############################
    
    if (i %in% RANDOM)
    {
      #quartz(height=12,width=14)
      par(mfrow=c(2,2))
      
      plot(PlotAll[,"logFSC.A"],PlotAll[,"logFSC.H"],pch=20,cex=0.3,col="#00000044",main=paste(Output.data[i,"STRAIN"],"_",Output.data[i,"MUTATION"],"_","P",Output.data[i,"PLATE"],Output.data[i,"POSITION"],"_","T",Output.data[i,"TIME.POINT"],sep=""),cex.main=2,xlab="logFSC.A",ylab="logFSC.H")
      points(T.YFP[,"logFSC.A"],T.YFP[,"logFSC.H"],col="#FF990044",pch=20,cex=0.3)
      points(T.YFP.GFP[,"logFSC.A"],T.YFP.GFP[,"logFSC.H"],col="#DD336644",pch=20,cex=0.3)
      points(T.GFP[,"logFSC.A"],T.GFP[,"logFSC.H"],col="#22CC2244",pch=20,cex=0.3)
      abline(v=4.6)
      abline(v=6.7)
      
      plot(PlotAll[,"logFSC.A"],PlotAll[,"FL2/FL1"],pch=20,cex=0.3,col="#00000044",ylim=c(0.5,1.3),xlab="logFSC.A",ylab="YFP/GFP")
      points(T.YFP[,"logFSC.A"],T.YFP[,"FL2/FL1"],col="#FF990044",pch=20,cex=0.3)
      points(T.YFP.GFP[,"logFSC.A"],T.YFP.GFP[,"FL2/FL1"],col="#DD336644",pch=20,cex=0.3)
      points(T.GFP[,"logFSC.A"],T.GFP[,"FL2/FL1"],col="#22CC2244",pch=20,cex=0.3)
      abline(h=High.GFP,col="#00000066",lwd=1.5)
      abline(h=Low.GFP,col="#00000066",lwd=1.5)
      abline(h=High.YFP,col="#00000066",lwd=1.5)
      abline(h=Low.YFP,col="#00000066",lwd=1.5)
      abline(v=4.6)
      abline(v=6.7)
      
      plot(PlotAll[,"logFL2.H"],PlotAll[,"logFL1.H"],pch=20,cex=0.3,col="#00000044",xlim=c(2.9,5),ylim=c(3.5,5.6),xlab="logYFP.H",ylab="logGFP.H")
      points(T.YFP[,"logFL2.H"],T.YFP[,"logFL1.H"],col="#FF990044",pch=20,cex=0.3,xlim=c(3,5),ylim=c(3.5,5.5))
      points(T.YFP.GFP[,"logFL2.H"],T.YFP.GFP[,"logFL1.H"],col="#DD336644",pch=20,cex=0.3,xlim=c(3,5),ylim=c(3.5,5.5))
      points(T.GFP[,"logFL2.H"],T.GFP[,"logFL1.H"],col="#22CC2244",pch=20,cex=0.3)
      
      plot(Scores[,1],Scores[,2],pch=20,cex=0.3,col="#00000044",xlab="PC.1",ylab="PC.2")
      abline(h=GFP.HIGH)
      abline(h=YFP.LOW)
      abline(v=LOW)
    }
  }
  
  cat(i," of ",nrow(Design.xp)," is done\n")
}

dev.off()

write.table(Output.data,file="Experiment_Output.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)
Output.data <- read.table("Experiment_Output.txt",header=TRUE,as.is=TRUE)


###############################################################
##LOOK FOR MISTAKES IN WELL SEPARATION OR WHEN SAVING DATA!!!##
###############################################################

COUNTS <- rep(0,nrow(Output.data))

for (i in 1:nrow(Output.data))
{
  CUR <- subset(Output.data, COUNTS.INITIAL == Output.data[i,"COUNTS.INITIAL"] & COUNTS.COMPLETE == Output.data[i,"COUNTS.COMPLETE"])
  COUNTS[i] <- nrow(CUR)
}

which(COUNTS == 2)

length(unique(Output.data[,"COUNTS.INITIAL"]))


#################################################################
##Sort Data by Environment, Plate, plateposition and Time.point##
#################################################################

Sorted.data <- Output.data[order(Output.data[,"ENVIRONMENT"],Output.data[,"PLATE"],Output.data[,"POSITION"],Output.data[,"TIME.POINT"]),]

write.table(Sorted.data,file="Experiment_Output_Sorted.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


###########################################################################################################################
#####Reformat table to move the different time points of the same sample in different columns instead of different rows####
###########################################################################################################################

Data.preformat <- read.table("Experiment_Output_Sorted.txt",header=TRUE,as.is=TRUE)

N.points <- 4

Data.format <- Data.preformat[,c("ID","STRAIN","MUTATION","YFP.CONSTRUCT","ENVIRONMENT","POSITION","COLUMN","ROW","PLATE")] 

Raw.names <- rep(colnames(Data.preformat[,13:ncol(Data.preformat)]),each=N.points)
Numbers <- rep(c(1:N.points),length(Raw.names)/N.points)
Full.names <-c()

for (i in 1:length(Raw.names))
{
  Full.names[i] <- paste(Raw.names[i],Numbers[i],sep="_")
} 


for (i in Full.names)
{
  Data.format[,i] <- NA
}

Data.format <- Data.format[seq(1,nrow(Data.preformat),by=4),]

###Automatic filling of all columns

Data <- Data.preformat

for (j in 13:ncol(Data))
{
  Current.matrix <- matrix(Data[,j],ncol=N.points,nrow=nrow(Data)/N.points,byrow=TRUE)
  Data.format[,((13+((j-12)*N.points-(N.points-1)))-4):((13+((j-12)*N.points))-4)] <- as.data.frame(Current.matrix)
}


write.table(Data.format,file="Experiment_Output_Formatted.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


######################################
####Compute selection coefficients####
######################################

Data <- read.table("Experiment_Output_Formatted.txt",header=TRUE,as.is=TRUE)
Time <- read.table("../TIME.POINTS.txt",header=TRUE,as.is=TRUE)
Dilution <- read.table("../DILUTION.txt",header=TRUE,as.is=TRUE)

#1-Compute number of generations for each sample
for (i in 1:nrow(Data))
{
  CUR.DILUTION <- subset(Dilution, ENVIRONMENT == Data[i,"ENVIRONMENT"] & PLATE == Data[i,"PLATE"])	
  Data[i,"G_1.2"] <- log(Data[i,"DENSITY_2"]*CUR.DILUTION[1,"T1T2"]/Data[i,"DENSITY_1"])/log(2)
  Data[i,"G_2.3"] <- log(Data[i,"DENSITY_3"]*CUR.DILUTION[1,"T2T3"]/Data[i,"DENSITY_2"])/log(2)
  Data[i,"G_3.4"] <- log(Data[i,"DENSITY_4"]*CUR.DILUTION[1,"T3T4"]/Data[i,"DENSITY_3"])/log(2)
}

#2-Compute number of generations for each plate
NO.COST <- subset(Data, abs(Freq.YFP.events_1 - Freq.YFP.events_4) < 0.05)

for (i in 1:nrow(Data))
{
  CUR.PLATE <- subset(NO.COST, ENVIRONMENT == Data[i,"ENVIRONMENT"] & PLATE == Data[i,"PLATE"])
  
  Data[i,"GP_1.2"] <- median(CUR.PLATE[,"G_1.2"])
  Data[i,"GP_2.3"] <- median(CUR.PLATE[,"G_2.3"])
  Data[i,"GP_3.4"] <- median(CUR.PLATE[,"G_3.4"])
}

#3-Compute doubling time for each plate
for (i in 1:nrow(Data))
{
  CUR.TIME <- subset(Time, ENVIRONMENT == Data[i,"ENVIRONMENT"] & PLATE == Data[i,"PLATE"])
  
  Data[i,"DT_1.2"] <- CUR.TIME[1,"T1T2"]/Data[i,"GP_1.2"]
  Data[i,"DT_2.3"] <- CUR.TIME[1,"T2T3"]/Data[i,"GP_2.3"]
  Data[i,"DT_3.4"] <- CUR.TIME[1,"T3T4"]/Data[i,"GP_3.4"]
}


Data.Mix <- Data

#4-Compute the logarithm of allele frequency at each time point
Data.Mix[,"log.ratio_1"] <- log(Data.Mix[,"YFP.events_1"]/Data.Mix[,"GFP.events_1"])
Data.Mix[,"log.ratio_2"] <- log(Data.Mix[,"YFP.events_2"]/Data.Mix[,"GFP.events_2"])
Data.Mix[,"log.ratio_3"] <- log(Data.Mix[,"YFP.events_3"]/Data.Mix[,"GFP.events_3"])
Data.Mix[,"log.ratio_4"] <- log(Data.Mix[,"YFP.events_4"]/Data.Mix[,"GFP.events_4"])
Data.Mix[,"log.N_1"] <- log(Data.Mix[,"N.YFP_1"]/Data.Mix[,"N.GFP_1"])
Data.Mix[,"log.N_2"] <- log(Data.Mix[,"N.YFP_2"]/Data.Mix[,"N.GFP_2"])
Data.Mix[,"log.N_3"] <- log(Data.Mix[,"N.YFP_3"]/Data.Mix[,"N.GFP_3"])
Data.Mix[,"log.N_4"] <- log(Data.Mix[,"N.YFP_4"]/Data.Mix[,"N.GFP_4"])


####################
##COMPUTE FITNESS###
####################

#Fitness calculated using the number of YFP and GFP flow events (without taking into account cell doublets).
for (i in 1:nrow(Data.Mix))
{
  log.ratio <- unname(unlist(Data.Mix[i,c("log.ratio_1","log.ratio_2","log.ratio_3","log.ratio_4")]))
  G <- c(0,unlist(Data.Mix[i,c("GP_1.2","GP_2.3","GP_3.4")]))
  Generation <- c(G[1],sum(G[1:2]),sum(G[1:3]),sum(G[1:4]))
  DataFrame <- data.frame(cbind(log.ratio,Generation))
  colnames(DataFrame) <- c("log.ratio","Generation")
  Model <- lm(log.ratio~Generation,data=DataFrame,na.action=na.exclude)
  Data.Mix[i,"w.estimate"] <- exp(Model$coef[2])	
  F.test <- anova(Model, test="F")
  Data.Mix[i,"F.test"] <- F.test[5]
}

#Fitness calculated using the number of YFP and GFP cells (taking into account cell doublets).
for (i in 1:nrow(Data.Mix))
{
  log.ratio <- unname(unlist(Data.Mix[i,c("log.N_1","log.N_2","log.N_3","log.N_4")]))
  G <- c(0,unlist(Data.Mix[i,c("GP_1.2","GP_2.3","GP_3.4")]))
  Generation <- c(G[1],sum(G[1:2]),sum(G[1:3]),sum(G[1:4]))
  DataFrame <- data.frame(cbind(log.ratio,Generation))
  colnames(DataFrame) <- c("log.ratio","Generation")
  Model <- lm(log.ratio~Generation,data=DataFrame,na.action=na.exclude)
  Data.Mix[i,"w.estimate.2"] <- exp(Model$coef[2])	
  F.test <- anova(Model, test="F")
  Data.Mix[i,"F.test.2"] <- F.test[5]
}

write.table(Data.Mix,file="Experiment_s.estimates.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)

#####################
#Filter out outliers#
#####################

Data <- read.table("Experiment_s.estimates.txt",header=TRUE,as.is=TRUE)

#FLAG OUTLIERS FOR W.ESTIMATE
for (i in 1:nrow(Data))
{
  CUR.STRAIN <- subset(Data, ENVIRONMENT == Data[i,"ENVIRONMENT"] & STRAIN == Data[i,"STRAIN"])
  LOW <- median(CUR.STRAIN$w.estimate,na.rm=TRUE) - 3*mad(CUR.STRAIN$w.estimate,na.rm=TRUE)
  HIGH <- median(CUR.STRAIN$w.estimate,na.rm=TRUE) + 3*mad(CUR.STRAIN$w.estimate,na.rm=TRUE)
  
  if (is.na(Data[i,"w.estimate"]))
  {
    Data[i,"OUTLIER"] <- NA
  }	else if (Data[i,"w.estimate"] < LOW | Data[i,"w.estimate"] > HIGH) {
    Data[i,"OUTLIER"] <- "YES"
  } else {
    Data[i,"OUTLIER"] <- "NO"
  }
}

for (i in 1:nrow(Data))
{
  CUR.STRAIN <- subset(Data, ENVIRONMENT == Data[i,"ENVIRONMENT"] & STRAIN == Data[i,"STRAIN"])
  LOW <- median(CUR.STRAIN$w.estimate.2,na.rm=TRUE) - 3*mad(CUR.STRAIN$w.estimate.2,na.rm=TRUE)
  HIGH <- median(CUR.STRAIN$w.estimate.2,na.rm=TRUE) + 3*mad(CUR.STRAIN$w.estimate.2,na.rm=TRUE)
  
  if (is.na(Data[i,"w.estimate"]))
  {
    Data[i,"OUTLIER.2"] <- NA
  }	else if (Data[i,"w.estimate.2"] < LOW | Data[i,"w.estimate.2"] > HIGH) {
    Data[i,"OUTLIER.2"] <- "YES"
  } else {
    Data[i,"OUTLIER.2"] <- "NO"
  }
}

Data <- subset(Data, Data[,"OUTLIER.2"] == "NO")


##################################
#Calculate Fitness Relative To WT#
##################################

for (i in 1:nrow(Data))
{
  if (Data[i,"ENVIRONMENT"] == "GLUCOSE" & Data[i,"YFP.CONSTRUCT"] == "SINGLE" & Data[i,"STRAIN"] != "Y2682")
  {
    WT <- subset(Data, STRAIN == "Y1189" & ENVIRONMENT == "GLUCOSE")
    Data[i,"Fitness"] <- Data[i,"w.estimate.2"]/mean(WT[,"w.estimate.2"])
  }
  if (Data[i,"ENVIRONMENT"] == "GLUCOSE" & (Data[i,"YFP.CONSTRUCT"] == "DOUBLE" | Data[i,"STRAIN"] == "Y2682"))
  {
    WT <- subset(Data, STRAIN == "Y2682" & ENVIRONMENT == "GLUCOSE")
    Data[i,"Fitness"] <- Data[i,"w.estimate.2"]/mean(WT[,"w.estimate.2"])
  }
}

Data.filter <- Data[,setdiff(names(Data),c("OUTLIER","OUTLIER.2"))]

#Save file corresponding to Supplementary File 1 - Dataset 3.
write.table(Data.filter,file="Experiment_s.estimates_filtered.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


###########################################################################
#Make summary table with average fitness across replicates for each strain#
###########################################################################

Data <- read.table("Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Mean.table <- aggregate(Data[,83:ncol(Data)],by=list(Data$ID,Data$STRAIN,Data$MUTATION,Data$YFP.CONSTRUCT,Data$ENVIRONMENT),FUN=mean)
colnames(Mean.table) <- c(colnames(Data)[1:5],paste(colnames(Mean.table[,6:ncol(Mean.table)]),"mean",sep="_"))

SD.table <- aggregate(Data[,83:ncol(Data)],by=list(Data$ID,Data$STRAIN,Data$MUTATION,Data$YFP.CONSTRUCT,Data$ENVIRONMENT),FUN=sd)
colnames(SD.table) <- c(colnames(Data)[1:5],paste(colnames(SD.table[,6:ncol(SD.table)]),"sd",sep="_"))

N.table <- aggregate(Data[,83:ncol(Data)],by=list(Data$ID,Data$STRAIN,Data$MUTATION,Data$YFP.CONSTRUCT,Data$ENVIRONMENT),FUN=length)
colnames(N.table) <- "N.REP"

Combined <- cbind(Mean.table,SD.table[,6:ncol(SD.table)],N.table[,ncol(N.table)])
colnames(Combined)[ncol(Combined)] <- "N.rep"

for (i in 1:nrow(Combined))
{
  s.power <- power.t.test(n=Combined[i,"N.rep"],sd=Combined[i,"Fitness_sd"],sig.level=0.05,power=0.9,type="two.sample",alternative="two.sided")
  Combined[i,"Power"] <- s.power$delta
}

write.table(Combined,file="Summary.data.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#######################################################################################
#Compute P-values to compare fitness of each strain to fitness of the wild-type strain#
#######################################################################################

Data <- read.table("Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)
Summary <- read.table("Summary.data.txt",header=TRUE,as.is=TRUE)

for (i in 1:nrow(Summary))
{
  CUR <- subset(Data, STRAIN == Summary[i,"STRAIN"] & ENVIRONMENT == Summary[i,"ENVIRONMENT"])
  
  if (Summary[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    WT <- subset(Data, STRAIN == "Y2682" & ENVIRONMENT == Summary[i,"ENVIRONMENT"])
  } else {
    WT <- subset(Data, STRAIN == "Y1189" & ENVIRONMENT == Summary[i,"ENVIRONMENT"])
  }
  
  Summary[i,"P.VAL.FITNESS"] <- t.test(CUR$Fitness,WT$Fitness)$p.value
}

write.table(Summary,file="Summary.Data.Pval.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)



#####################
###Fitness boxplot###
#####################

# Data <- read.table("Summary.data.txt",header=TRUE,as.is=TRUE)

# Data[,"ENVIRONMENT"] <- factor(Data[,"ENVIRONMENT"],levels=c("GLUCOSE","GALACTOSE","GLYCEROL","ETHANOL"))
# Data[,"STRAIN"] <- factor(Data[,"STRAIN"],levels=STRAINS$STRAIN)


# pdf("Fitness_Boxplot.pdf",height=9,width=15)
# #quartz(height=9,width=15)
# par(mar=c(4,9,3,1)+0.1,mgp=c(7,1,0))
# boxplot(s.estimate.2~ENVIRONMENT+STRAIN,data=Data,col=c(1:4),ylab="Selection Coefficient")
# abline(v=seq(0.5,45.5,by=5),lty=2)
# abline(h=0,col="#00000077")
# box(lwd=3)
# dev.off()

# pairwise.t.test(Data$s,Data$Mix,p.adjust.method="bonferroni")


#########################
###Power analysis plot###
#########################

pdf("Power.vs.Fitness.pdf",useDingbats=FALSE)
plot(abs(Combined$Fitness_mean),Combined$Power,pch=20,xlab="Relative Fitness",ylab="Delta at Power = 0.9 and Alpha = 0.05",col=ifelse(Combined$ENVIRONMENT == "GLUCOSE","red",ifelse(Combined$ENVIRONMENT == "GLYCEROL","blue",ifelse(Combined$ENVIRONMENT == "GALACTOSE","darkgreen","orange"))))
legend("topright",c("GLUCOSE","GALACTOSE","GLYCEROL","ETHANOL"),text.col=c("red","green","blue","orange"))
dev.off()


###############################
#Compute Confidence Interval of Fitness#
###############################

Summary <- read.table("Summary.data.txt",header=TRUE,as.is=TRUE)

Summary[,"Fitness"] <- Summary[,"Fitness_mean"]

for (i in 1:nrow(Summary))
{
  Summary[i,"Low.95"] <- 1.96*Summary[i,"Fitness_sd"]/sqrt(Summary[i,"N.rep"])
  Summary[i,"High.95"] <- 1.96*Summary[i,"Fitness_sd"]/sqrt(Summary[i,"N.rep"])
}

#Save file corresponding to the "Fitness" part of Figure 1-3 - Source Data 1
write.table(Summary,file="SUMMARY.DATA.FITNESS.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#################################
#PLOT REACTION NORMS FOR FITNESS#
#################################

Summary[,"ENVIRONMENT"] <- factor(Summary[,"ENVIRONMENT"],levels=c("GLUCOSE","GALACTOSE","GLYCEROL","ETHANOL")) 

PLOT <- Summary[order(Summary$STRAIN,Summary$ENVIRONMENT),]

DATA <- matrix(PLOT$Fitness,ncol=4,byrow=TRUE)
colnames(DATA) <- PLOT[1:4,"ENVIRONMENT"]
rownames(DATA) <- PLOT[seq(1,nrow(PLOT),by=4),"MUTATION"]

MIN <- min(DATA,na.rm=TRUE)
MAX <- max(DATA,na.rm=TRUE)


pdf("FITNESS.REACTION.NORMS.pdf",useDingbats=FALSE)

plot(1:ncol(DATA),DATA[1,],type="n",ylim=c(MIN,MAX),pch=20,ylab="Fitness relative to WT",xlab="",xaxt="n")
axis(1,at=c(1:ncol(DATA)),labels=colnames(DATA))

for (i in 1:nrow(DATA))
{
  points(1:ncol(DATA),DATA[i,],type="b",ylim=c(MIN,MAX),pch=20,ylab="Fitness relative to WT",xlab="",xaxt="n")
}

dev.off()


#############################################################################
# 6 - Relationship between median expression level and fitness (Figure 2C). #
#############################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box


#Load Data
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

#Load files generated in sections 4 and 5.
Fit <- read.table("SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
All.Fit <- read.table("Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]

#Colors for the different categories of mutants
for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}


#######################
#FITNESS vs EXPRESSION#
#######################

#Filter out strains with more than two copies of YFP or TDH3 (as measured by pyrosequencing).
GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","TATA.42_TATA.42","WT"))
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","TATA.42_TATA.42","WT"))
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])

####1. Loess fit#########

#Corresponds to Figure 2C (further formatting was done with Illustrator).
pdf("EXPR.vs.FITNESS.GLUCOSE.pdf",useDingbats=FALSE,height=5,width=6)

par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.EXPR[,"COLOR"],xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,2.1),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.EXPR[,"COLOR"],xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,2.1),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.EXPR[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("REF","TFBS","TATA","TATA & TFBS","2 CONSTRUCTS"),box.lwd=0,text.col=c("black","#FF0000FF","#0000FFFF","#990099FF","green"),bty="n",cex=2)

WT.1 <- subset(GLU.ALL, ID == 2)
WT.2 <- subset(GLU.ALL, ID == 51)

for (i in 1:nrow(GLU.ALL))
{
  EXPR <- subset(GLU.EXPR, MUTATION == GLU.ALL[i,"MUTATION"] & YFP.CONSTRUCT == GLU.ALL[i,"YFP.CONSTRUCT"])
  GLU.ALL[i,"YFP.MEDIAN"] <- EXPR[1,"YFP.MEDIAN.RELATIVE.MEAN"]
}

WEIGHT <- rep(1,nrow(GLU.ALL))
WEIGHT[which(GLU.ALL$ID ==43)] <- 50
WEIGHT[which(GLU.ALL$ID ==2)] <- 5

GLU.MODEL <- loess(Fitness ~ YFP.MEDIAN, data = GLU.ALL, span=2/3, degree=2, weights = WEIGHT)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(GLU.MODEL, x.mid, se=TRUE)

points(x.mid,y.mid$fit,type="l",col="#00000099",lty=2,lwd=1.5)

y.err <- c(y.mid$fit + 2.58*y.mid$se.fit,rev(y.mid$fit - 2.58*y.mid$se.fit))

polygon(c(x.mid,rev(x.mid)),y.err,col="#00000055",border=NA)

dev.off()


##########################################################################
# 7 - Impact of expression noise (noise strength) on fitness (Figure 3). #
##########################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box

PERMUTE <- function(GROUP.1,GROUP.2) {
  N.PERM <- 100000
  
  OBS.DIFF <- abs(median(GROUP.1)-median(GROUP.2))
  
  POOL <- c(GROUP.1,GROUP.2)
  
  PERM.DIFF <- rep(NA,N.PERM)
  
  for (i in 1:N.PERM)
  {
    POS.1 <- sample(1:length(POOL),size=length(GROUP.1),replace=FALSE)
    PERM.1 <- POOL[POS.1]
    PERM.2 <- POOL[-POS.1]
    
    PERM.DIFF[i] <- abs(median(PERM.1)-median(PERM.2))
  }
  
  P.VAL <- (length(which(PERM.DIFF > OBS.DIFF)) + 1)/(N.PERM + 1)
  return(P.VAL)
}

#Load Data
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

Fit <- read.table("GLUCOSE.SD/SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("GLUCOSE.SD/SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
All.Fit <- read.table("GLUCOSE.SD/Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]

#Colors for the different categories of mutants
for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}



#############################################
#####SHOW IMPACT OF NOISE ON FITNESS#############
#############################################

############
#SD Glucose#
############

####1-Fit non linear regression to Expression data#####

GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","WT_WT") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.EXPR[,"MUTATION"])
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])

MODEL <- loess(GLU.EXPR$YFP.FANO.RELATIVE.MEAN~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL, x.mid, se=TRUE)

####2-Identify strains with low and high noise strength given their median expression level######
for (i in 1:nrow(GLU.EXPR))
{
  DIFF <- GLU.EXPR$YFP.FANO.RELATIVE.MEAN[i] - predict(MODEL, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit 
  GLU.EXPR[i,"FANO.DIFF"] <- DIFF
  
  if (DIFF > 0)
  {
    GLU.EXPR[i,"FANO.CLASS"] <- "HIGH"
  } else if (DIFF < 0)
  {
    GLU.EXPR[i,"FANO.CLASS"] <- "LOW"
  }
}

for (i in 1:nrow(GLU.EXPR))
{
  if (abs(GLU.EXPR$FANO.DIFF[i]) < 0.01)
  {
    GLU.EXPR$FANO.CLASS[i] <- "UNCLEAR"
  }
}

GLU.FIT[,"FANO.CLASS"] <- factor(GLU.EXPR[,"FANO.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))

for (i in 1:nrow(GLU.FIT))
{
  if (GLU.FIT[i,"FANO.CLASS"] == "LOW")
  {
    GLU.FIT[i,"COLOR"] <- "blue"
  } 
  if (GLU.FIT[i,"FANO.CLASS"] == "HIGH")
  {
    GLU.FIT[i,"COLOR"] <- "red"
  } 
  if (GLU.FIT[i,"FANO.CLASS"] == "UNCLEAR")
  {
    GLU.FIT[i,"COLOR"] <- "black"
  } 
}

#Corresponds to Figure 3A.
pdf("FANO.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=5)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.FANO.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Expression Fano relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0,2.5),gap=0,main="Median vs Fano Expression in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.FANO.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.FANO.RELATIVE.MEAN,ui=GLU.EXPR$YFP.FANO.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.FANO.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.FANO.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.FANO.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)
legend("topright",c("Delta Fano > +1%","Delta Fano < -1%","|Delta Fano| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
abline(v=1,lty=2)
abline(h=1,lty=2)
dev.off()

####3-Fit model to fitness data and compute expected fitness######

#Corresponds to Figure 3C.
pdf("FITNESS.EXPECTATION.GLUCOSE.FANO.pdf",useDingbats=F)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta Fano > +1%","Delta Fano < -1%","|Delta Fano | < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

MODEL.FIT <- loess(GLU.FIT$Fitness~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)

MAX.FITNESS <- max(y.mid$fit) 
abline(h=MAX.FITNESS-0.005,lty=2)

OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]

ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - 0.005))

EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
abline(v=EXPR.THRESHOLD,lty=2)

dev.off()

#Compute Delta Fitness.
for (i in 1:nrow(GLU.FIT))
{
  GLU.FIT[i,"FIT.DIFF"] <- GLU.FIT$Fitness[i] - predict(MODEL.FIT, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit
}

write.table(GLU.EXPR,"DELTA.FANO.txt",row.names=FALSE,sep="\t",quote=FALSE)
write.table(GLU.FIT,"DELTA.FITNESS.txt",row.names=FALSE,sep="\t",quote=FALSE)


####4-Plot correlation between Delta noise and Mean expression#####

P.EXPR.ALL <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > 0)
P.EXPR.LOW <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
P.EXPR.HIGH <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])

P.FIT.ALL <- subset(GLU.FIT, MUTATION %in% P.EXPR.ALL$MUTATION)
P.FIT.LOW <- subset(GLU.FIT, MUTATION %in% P.EXPR.LOW$MUTATION)
P.FIT.HIGH <- subset(GLU.FIT, MUTATION %in% P.EXPR.HIGH$MUTATION)

P.EXPR.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.EXPR.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.EXPR.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.FIT.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.FIT.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT <- as.data.frame(rbind(P.FIT.ALL,P.FIT.LOW,P.FIT.HIGH))
P.EXPR <- as.data.frame(rbind(P.EXPR.ALL,P.EXPR.LOW,P.EXPR.HIGH))

P.FIT <- subset(P.FIT, FANO.CLASS != "UNCLEAR")
P.EXPR <- subset(P.EXPR, FANO.CLASS != "UNCLEAR")

P.FIT$MEDIAN.CLASS <- factor(P.FIT$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))
P.EXPR$MEDIAN.CLASS <- factor(P.EXPR$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))

P.FIT <- droplevels(P.FIT)
P.EXPR <- droplevels(P.EXPR)

P.FIT[,"FANO.CLASS"] <- factor(P.FIT[,"FANO.CLASS"],levels=c("LOW","HIGH"))
P.EXPR[,"FANO.CLASS"] <- factor(P.EXPR[,"FANO.CLASS"],levels=c("LOW","HIGH"))

####5-Permutation tests to detect significant impact of noise on fitness#####

P.VAL.GLU <- matrix(data = NA, nrow = 3, ncol = 3)
colnames(P.VAL.GLU) <- c("ALL.STRAINS","SUBOPTIMAL.MEDIAN","OPTIMAL.MEDIAN")
rownames(P.VAL.GLU) <- c("Delta.Fitness","Median.Expression","Delta.FANO")


#Fitness#

GROUP.1 <- P.FIT[which(P.FIT$FANO.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$FANO.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"]
P.VAL.GLU[1,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$FANO.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$FANO.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"]
P.VAL.GLU[1,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$FANO.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$FANO.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"]
P.VAL.GLU[1,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[1,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3F.
pdf("FITNESS.vs.FANO.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.FIT$FIT.DIFF~P.FIT$FANO.CLASS+P.FIT$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta Fitness",col=c("#0000FF99","#FF000099"),ylim=c(-0.01,0.01),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta Fano < -1%","Delta Fano > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=1.75,y=-0.008,PV[1],bty="n")
legend(x=4.25,y=-0.008,PV[2],bty="n")
legend(x=6.75,y=-0.008,PV[3],bty="n")
dev.off()

#Median#
GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[2,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8B.
pdf("MEDIAN.vs.FANO.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$YFP.MEDIAN.RELATIVE.MEAN~P.EXPR$FANO.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Median Expression",col=c("#0000FF99","#FF000099"),ylim=c(0,1.5),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("bottomright",c("Delta Fano < -1%","Delta Fano > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=1.5,PV[1],bty="n")
legend(x=2.75,y=1.5,PV[2],bty="n")
legend(x=5.25,y=1.5,PV[3],bty="n")
dev.off()

#Delta FANO#

GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"FANO.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"FANO.DIFF"]
P.VAL.GLU[3,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"FANO.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"FANO.DIFF"]
P.VAL.GLU[3,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$FANO.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"FANO.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$FANO.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"FANO.DIFF"]
P.VAL.GLU[3,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[3,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8C.
pdf("DELTA.FANO.vs.FANO.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$FANO.DIFF~P.EXPR$FANO.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta Fano",col=c("#0000FF99","#FF000099"),ylim=c(-1,2),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta Fano < 0","Delta Fano > 0"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=2,PV[1],bty="n")
legend(x=2.75,y=2,PV[2],bty="n")
legend(x=5.25,y=2,PV[3],bty="n")
dev.off()

write.table(P.VAL.GLU,"P.VAL.DELTA.FANO.txt",row.names=FALSE,sep="\t",quote=FALSE)


####6-Plot Fitness for two classes of Fano#####

#Corresponds to Figure 3B.
pdf("FITNESS.CURVES.FANO.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=6)
#quartz(height=9,width=13)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta Fano > +1%","Delta Fano < -1%","|Delta Fano| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

LOW.FIT <- subset(GLU.FIT, FANO.CLASS == "LOW")
LOW.EXPR <- subset(GLU.EXPR, FANO.CLASS == "LOW")
HIGH.FIT <- subset(GLU.FIT, FANO.CLASS == "HIGH")
HIGH.EXPR <- subset(GLU.EXPR, FANO.CLASS == "HIGH")

WEIGHT <- rep(1,nrow(LOW.EXPR))
WEIGHT[which(LOW.FIT$Fitness == min(LOW.FIT$Fitness))] <- 10

MODEL.FIT <- loess(LOW.FIT$Fitness~LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3,weights=WEIGHT)
x.mid <- seq(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="blue",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#0000FF22",border=NA)

MODEL.FIT <- loess(HIGH.FIT$Fitness~HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3)
x.mid <- seq(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="red",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#FF000022",border=NA)

dev.off()


####7-Plot Delta Fitness vs Delta FANO#####

#Corresponds to Figure 3D-E.
pdf("DELTA.FITNESS.vs.DELTA.FANO.GLUCOSE.pdf",useDingbats=F,height=9,width=9)

#Low Expression

LOW.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
LOW.FIT <- subset(GLU.FIT, MUTATION %in% LOW.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(LOW.EXPR$FANO.DIFF,LOW.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(LOW.EXPR$FANO.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.EXPR$FANO.DIFF+1.96*LOW.EXPR$YFP.FANO.RELATIVE.SD/sqrt(LOW.EXPR$N),li=LOW.EXPR$FANO.DIFF-1.96*LOW.EXPR$YFP.FANO.RELATIVE.SD/sqrt(LOW.EXPR$N),err="x",xlab="Delta Fano",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-1.5,1.5),ylim=c(-0.01,0.01),gap=0,main="Suboptimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(LOW.EXPR$FANO.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.FIT$FIT.DIFF+LOW.FIT$High.95,li=LOW.FIT$FIT.DIFF-LOW.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(LOW.FIT$FIT.DIFF~LOW.EXPR$FANO.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.5,-0.01,1.25,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.4,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)


#High Expression

HIGH.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])
HIGH.FIT <- subset(GLU.FIT, MUTATION %in% HIGH.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(HIGH.EXPR$FANO.DIFF,HIGH.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(HIGH.EXPR$FANO.DIFF, HIGH.FIT$FIT.DIFF,ui=HIGH.EXPR$FANO.DIFF+1.96*HIGH.EXPR$YFP.FANO.RELATIVE.SD/sqrt(HIGH.EXPR$N),li=HIGH.EXPR$FANO.DIFF-1.96*HIGH.EXPR$YFP.FANO.RELATIVE.SD/sqrt(HIGH.EXPR$N),err="x",xlab="Delta Fano",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-1.5,1.5),ylim=c(-0.01,0.01),gap=0,main="Optimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(HIGH.EXPR$FANO.DIFF,HIGH.FIT$FIT.DIFF,ui=HIGH.FIT$FIT.DIFF+HIGH.FIT$High.95,li=HIGH.FIT$FIT.DIFF-HIGH.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(HIGH.FIT$FIT.DIFF~HIGH.EXPR$FANO.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.5,-0.01,1.25,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.4,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)

dev.off()


####7-Plot Median vs Delta FANO#####

#All Strains

#Corresponds to Figure 3 - figure supplement 2A.
pdf("MEDIAN.vs.DELTA.FANO.GLUCOSE.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$FANO.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$FANO.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta Fano",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-1,1.5),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.EXPR$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$FANO.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$FANO.DIFF,ui=GLU.EXPR$FANO.DIFF+1.96*GLU.EXPR$YFP.FANO.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$FANO.DIFF-1.96*GLU.EXPR$YFP.FANO.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.EXPR$FANO.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta Fano > +1%","Delta Fano < -1%","|Delta Fano| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


####8-Plot Median vs Delta Fitness#####

#All Strains

#Corresponds to Figure 3 - figure supplement 3A.
pdf("MEDIAN.vs.DELTA.FITNESS.GLUCOSE.FANO.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-0.01,0.01),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.FIT$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.FIT$FIT.DIFF+GLU.FIT$High.95,li=GLU.FIT$FIT.DIFF-GLU.FIT$Low.95,err="y",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.FIT$FIT.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta Fano > +1%","Delta Fano < -1%","|Delta Fano| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


###########################################################################
# 8 - Impact of expression noise (SD) on fitness (Supplementary figures). #
###########################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box

PERMUTE <- function(GROUP.1,GROUP.2) {
  N.PERM <- 100000
  
  OBS.DIFF <- abs(median(GROUP.1)-median(GROUP.2))
  
  POOL <- c(GROUP.1,GROUP.2)
  
  PERM.DIFF <- rep(NA,N.PERM)
  
  for (i in 1:N.PERM)
  {
    POS.1 <- sample(1:length(POOL),size=length(GROUP.1),replace=FALSE)
    PERM.1 <- POOL[POS.1]
    PERM.2 <- POOL[-POS.1]
    
    PERM.DIFF[i] <- abs(median(PERM.1)-median(PERM.2))
  }
  
  P.VAL <- (length(which(PERM.DIFF > OBS.DIFF)) + 1)/(N.PERM + 1)
  return(P.VAL)
}

#Load Data
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

Fit <- read.table("GLUCOSE.SD/SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("GLUCOSE.SD/SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
All.Fit <- read.table("GLUCOSE.SD/Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]

#Colors for the different categories of mutants
for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}


#################################################
#####SHOW IMPACT OF NOISE ON FITNESS#############
#################################################

############
#SD Glucose#
############

####1-Fit non linear regression to Expression data#####

GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.EXPR[,"MUTATION"])
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])


MODEL <- loess(GLU.EXPR$YFP.SD.RELATIVE.MEAN~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL, x.mid, se=TRUE)

####2-Identify strains with low and high SD given their median expression######

for (i in 1:nrow(GLU.EXPR))
{
  DIFF <- GLU.EXPR$YFP.SD.RELATIVE.MEAN[i] - predict(MODEL, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit 
  GLU.EXPR[i,"SD.DIFF"] <- DIFF
  
  if (DIFF > 0)
  {
    GLU.EXPR[i,"SD.CLASS"] <- "HIGH"
  } else if (DIFF < 0)
  {
    GLU.EXPR[i,"SD.CLASS"] <- "LOW"
  }
}

for (i in 1:nrow(GLU.EXPR))
{
  if (abs(GLU.EXPR$SD.DIFF[i]) < 0.01)
  {
    GLU.EXPR$SD.CLASS[i] <- "UNCLEAR"
  }
}

GLU.FIT[,"SD.CLASS"] <- factor(GLU.EXPR[,"SD.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))

for (i in 1:nrow(GLU.FIT))
{
  if (GLU.FIT[i,"SD.CLASS"] == "LOW")
  {
    GLU.FIT[i,"COLOR"] <- "blue"
  } 
  if (GLU.FIT[i,"SD.CLASS"] == "HIGH")
  {
    GLU.FIT[i,"COLOR"] <- "red"
  } 
  if (GLU.FIT[i,"SD.CLASS"] == "UNCLEAR")
  {
    GLU.FIT[i,"COLOR"] <- "black"
  } 
}

#Corresponds to Figure 3 - figure supplement 1B.
pdf("SD.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=5)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.SD.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Expression SD relative to WT",pch=21,sfrac=0,xlim=c(0,1.4),ylim=c(0,1.6),gap=0,main="Median vs SD Expression in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.SD.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.SD.RELATIVE.MEAN,ui=GLU.EXPR$YFP.SD.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.SD.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.SD.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.SD.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)
legend("bottomright",c("Delta SD > +1%","Delta SD < -1%","|Delta SD| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
abline(v=1,lty=2)
abline(h=1,lty=2)
dev.off()


####3-Fit model to fitness data and compute expected fitness######

#Corresponds to Figure 3 - Figure supplement 1J.
pdf("FITNESS.EXPECTATION.GLUCOSE.pdf",useDingbats=F)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta SD > +1%","Delta SD < -1%","|Delta SD| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

MODEL.FIT <- loess(GLU.FIT$Fitness~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=0.66)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)

MAX.FITNESS <- max(y.mid$fit) 
abline(h=MAX.FITNESS-0.005,lty=2)

OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]

ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - 0.005))

EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
abline(v=EXPR.THRESHOLD,lty=2)

dev.off()

for (i in 1:nrow(GLU.FIT))
{
  GLU.FIT[i,"FIT.DIFF"] <- GLU.FIT$Fitness[i] - predict(MODEL.FIT, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit
}

write.table(GLU.EXPR,"DELTA.SD.txt",row.names=FALSE,sep="\t",quote=FALSE)
write.table(GLU.FIT,"DELTA.FITNESS.txt",row.names=FALSE,sep="\t",quote=FALSE)

####4-Plot correlation between Delta noise and Mean expression#####

P.EXPR.ALL <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > 0)
P.EXPR.LOW <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
P.EXPR.HIGH <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])

P.FIT.ALL <- subset(GLU.FIT, MUTATION %in% P.EXPR.ALL$MUTATION)
P.FIT.LOW <- subset(GLU.FIT, MUTATION %in% P.EXPR.LOW$MUTATION)
P.FIT.HIGH <- subset(GLU.FIT, MUTATION %in% P.EXPR.HIGH$MUTATION)

P.EXPR.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.EXPR.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.EXPR.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.FIT.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.FIT.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT <- as.data.frame(rbind(P.FIT.ALL,P.FIT.LOW,P.FIT.HIGH))
P.EXPR <- as.data.frame(rbind(P.EXPR.ALL,P.EXPR.LOW,P.EXPR.HIGH))

P.FIT <- subset(P.FIT, SD.CLASS != "UNCLEAR")
P.EXPR <- subset(P.EXPR, SD.CLASS != "UNCLEAR")

P.FIT$MEDIAN.CLASS <- factor(P.FIT$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))
P.EXPR$MEDIAN.CLASS <- factor(P.EXPR$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))

P.FIT <- droplevels(P.FIT)
P.EXPR <- droplevels(P.EXPR)

P.FIT[,"SD.CLASS"] <- factor(P.FIT[,"SD.CLASS"],levels=c("LOW","HIGH"))
P.EXPR[,"SD.CLASS"] <- factor(P.EXPR[,"SD.CLASS"],levels=c("LOW","HIGH"))


####5-Permutation tests to detect significant impact of noise on fitness#####

P.VAL.GLU <- matrix(data = NA, nrow = 3, ncol = 3)
colnames(P.VAL.GLU) <- c("ALL.STRAINS","SUBOPTIMAL.MEDIAN","OPTIMAL.MEDIAN")
rownames(P.VAL.GLU) <- c("Delta.Fitness","Median.Expression","Delta.SD")


#Fitness#

GROUP.1 <- P.FIT[which(P.FIT$SD.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$SD.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"]
P.VAL.GLU[1,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$SD.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$SD.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"]
P.VAL.GLU[1,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$SD.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$SD.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"]
P.VAL.GLU[1,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[1,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8D.
pdf("FITNESS.vs.SD.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.FIT$FIT.DIFF~P.FIT$SD.CLASS+P.FIT$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta Fitness",col=c("#0000FF99","#FF000099"),ylim=c(-0.01,0.01),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta SD < -1%","Delta SD > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=1.75,y=-0.008,PV[1],bty="n")
legend(x=4.25,y=-0.008,PV[2],bty="n")
legend(x=6.75,y=-0.008,PV[3],bty="n")
dev.off()

#Median#

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[2,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8E.
pdf("MEDIAN.vs.SD.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$YFP.MEDIAN.RELATIVE.MEAN~P.EXPR$SD.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Median Expression",col=c("#0000FF99","#FF000099"),ylim=c(0,1.5),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("bottomright",c("Delta SD < -1%","Delta SD > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=1.5,PV[1],bty="n")
legend(x=2.75,y=1.5,PV[2],bty="n")
legend(x=5.25,y=1.5,PV[3],bty="n")
dev.off()

#Delta SD#

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"SD.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"SD.DIFF"]
P.VAL.GLU[3,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"SD.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"SD.DIFF"]
P.VAL.GLU[3,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$SD.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"SD.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$SD.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"SD.DIFF"]
P.VAL.GLU[3,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[3,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8F.
pdf("DELTA.SD.vs.SD.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$SD.DIFF~P.EXPR$SD.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta SD",col=c("#0000FF99","#FF000099"),ylim=c(-0.2,0.4),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta SD < -1%","Delta SD > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=0.4,PV[1],bty="n")
legend(x=2.75,y=0.4,PV[2],bty="n")
legend(x=5.25,y=0.4,PV[3],bty="n")
dev.off()

write.table(P.VAL.GLU,"P.VAL.DELTA.SD.txt",row.names=FALSE,sep="\t",quote=FALSE)


####6-Plot Fitness for two classes of SD#####

#Corresponds to Figure 3 - figure supplement 1F.
pdf("FITNESS.CURVES.SD.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=6)
#quartz(height=9,width=13)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta SD > +1%","Delta SD < -1%","|Delta SD| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

LOW.FIT <- subset(GLU.FIT, SD.CLASS == "LOW")
LOW.EXPR <- subset(GLU.EXPR, SD.CLASS == "LOW")
HIGH.FIT <- subset(GLU.FIT, SD.CLASS == "HIGH")
HIGH.EXPR <- subset(GLU.EXPR, SD.CLASS == "HIGH")

WEIGHT <- rep(1,nrow(LOW.EXPR))
WEIGHT[which(LOW.FIT$Fitness == min(LOW.FIT$Fitness))] <- 10

MODEL.FIT <- loess(LOW.FIT$Fitness~LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3,weights=WEIGHT)
x.mid <- seq(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="blue",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#0000FF22",border=NA)

MODEL.FIT <- loess(HIGH.FIT$Fitness~HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3)
x.mid <- seq(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="red",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#FF000022",border=NA)

dev.off()


####7-Plot Delta Fitness vs Delta SD#####

#All Strains

#Corresponds to Figure 3 - figure supplement 4B & 4F.
pdf("DELTA.FITNESS.vs.DELTA.SD.GLUCOSE.pdf",useDingbats=F,height=9,width=9)

#Low Expression

LOW.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
LOW.FIT <- subset(GLU.FIT, MUTATION %in% LOW.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(LOW.EXPR$SD.DIFF,LOW.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(LOW.EXPR$SD.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.EXPR$SD.DIFF+1.96*LOW.EXPR$YFP.SD.RELATIVE.SD/sqrt(LOW.EXPR$N),li=LOW.EXPR$SD.DIFF-1.96*LOW.EXPR$YFP.SD.RELATIVE.SD/sqrt(LOW.EXPR$N),err="x",xlab="Delta SD",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-0.4,0.4),ylim=c(-0.01,0.01),gap=0,main="Suboptimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(LOW.EXPR$SD.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.FIT$FIT.DIFF+LOW.FIT$High.95,li=LOW.FIT$FIT.DIFF-LOW.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(LOW.FIT$FIT.DIFF~LOW.EXPR$SD.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.13,-0.01,0.29,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.035,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)


#High Expression

HIGH.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])
HIGH.FIT <- subset(GLU.FIT, MUTATION %in% HIGH.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(HIGH.EXPR$SD.DIFF,HIGH.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(HIGH.EXPR$SD.DIFF, HIGH.FIT$FIT.DIFF,ui=HIGH.EXPR$SD.DIFF+1.96*HIGH.EXPR$YFP.SD.RELATIVE.SD/sqrt(HIGH.EXPR$N),li=HIGH.EXPR$SD.DIFF-1.96*HIGH.EXPR$YFP.SD.RELATIVE.SD/sqrt(HIGH.EXPR$N),err="x",xlab="Delta SD",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-0.4,0.4),ylim=c(-0.01,0.01),gap=0,main="Optimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(HIGH.EXPR$SD.DIFF,HIGH.FIT$FIT.DIFF,ui=HIGH.FIT$FIT.DIFF+HIGH.FIT$High.95,li=HIGH.FIT$FIT.DIFF-HIGH.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(HIGH.FIT$FIT.DIFF~HIGH.EXPR$SD.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.13,-0.01,0.29,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.035,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)

dev.off()


####7-Plot Median vs Delta SD#####

#All Strains

#Corresponds to Figure 3 - figure supplement 2B.
pdf("MEDIAN.vs.DELTA.SD.GLUCOSE.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$SD.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$SD.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta SD",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-0.45,0.45),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.EXPR$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$SD.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$SD.DIFF,ui=GLU.EXPR$SD.DIFF+1.96*GLU.EXPR$YFP.SD.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$SD.DIFF-1.96*GLU.EXPR$YFP.SD.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.EXPR$SD.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta SD > +1%","Delta SD < -1%","|Delta SD| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


####8-Plot Median vs Delta Fitness#####

#All Strains

#Corresponds to Figure 3 - figure supplement 3B.
pdf("MEDIAN.vs.DELTA.FITNESS.GLUCOSE.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-0.01,0.01),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.FIT$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.FIT$FIT.DIFF+GLU.FIT$High.95,li=GLU.FIT$FIT.DIFF-GLU.FIT$Low.95,err="y",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.FIT$FIT.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta SD > +1%","Delta SD < -1%","|Delta SD| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


############################################################################
# 9 - Impact of expression noise (CV*) on fitness (Supplementary figures). #
############################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box

PERMUTE <- function(GROUP.1,GROUP.2) {
  N.PERM <- 100000
  
  OBS.DIFF <- abs(median(GROUP.1)-median(GROUP.2))
  
  POOL <- c(GROUP.1,GROUP.2)
  
  PERM.DIFF <- rep(NA,N.PERM)
  
  for (i in 1:N.PERM)
  {
    POS.1 <- sample(1:length(POOL),size=length(GROUP.1),replace=FALSE)
    PERM.1 <- POOL[POS.1]
    PERM.2 <- POOL[-POS.1]
    
    PERM.DIFF[i] <- abs(median(PERM.1)-median(PERM.2))
  }
  
  P.VAL <- (length(which(PERM.DIFF > OBS.DIFF)) + 1)/(N.PERM + 1)
  return(P.VAL)
}

#Load Data
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

Fit <- read.table("GLUCOSE.SD/SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("GLUCOSE.SD/SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
All.Fit <- read.table("GLUCOSE.SD/Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]

#Colors for the different categories of mutants
for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}


#############################################
#####SHOW IMPACT OF NOISE ON FITNESS#########
#############################################

############
#CV Glucose#
############

####1-Fit non linear regression to Expression data#####

GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.EXPR[,"MUTATION"])
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])


MODEL <- loess(GLU.EXPR$YFP.CV.RELATIVE.MEAN~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL, x.mid, se=TRUE)

####2-Identify strains with low and high CV given their median expression######

for (i in 1:nrow(GLU.EXPR))
{
  DIFF <- GLU.EXPR$YFP.CV.RELATIVE.MEAN[i] - predict(MODEL, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit 
  GLU.EXPR[i,"CV.DIFF"] <- DIFF
  
  if (DIFF > 0)
  {
    GLU.EXPR[i,"CV.CLASS"] <- "HIGH"
  } else if (DIFF < 0)
  {
    GLU.EXPR[i,"CV.CLASS"] <- "LOW"
  }
}

for (i in 1:nrow(GLU.EXPR))
{
  if (abs(GLU.EXPR$CV.DIFF[i]) < 0.01)
  {
    GLU.EXPR$CV.CLASS[i] <- "UNCLEAR"
  }
}

GLU.FIT[,"CV.CLASS"] <- factor(GLU.EXPR[,"CV.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))

for (i in 1:nrow(GLU.FIT))
{
  if (GLU.FIT[i,"CV.CLASS"] == "LOW")
  {
    GLU.FIT[i,"COLOR"] <- "blue"
  } 
  if (GLU.FIT[i,"CV.CLASS"] == "HIGH")
  {
    GLU.FIT[i,"COLOR"] <- "red"
  } 
  if (GLU.FIT[i,"CV.CLASS"] == "UNCLEAR")
  {
    GLU.FIT[i,"COLOR"] <- "black"
  } 
}

#Corresponds to Figure 3 - figure supplement 1C.
pdf("CV.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=5)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Expression CV relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0,8),gap=0,main="Median vs CV Expression in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.CV.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.CV.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)
legend("topright",c("Delta CV > +1%","Delta CV < -1%","|Delta CV| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
abline(v=1,lty=2)
abline(h=1,lty=2)
dev.off()


####3-Fit model to fitness data and compute expected fitness######

#Corresponds to Figure 3 - figure supplement 1K.
pdf("FITNESS.EXPECTATION.GLUCOSE.CV.pdf",useDingbats=F)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta CV > +1%","Delta CV < -1%","|Delta CV| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

MODEL.FIT <- loess(GLU.FIT$Fitness~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)

MAX.FITNESS <- max(y.mid$fit) 
abline(h=MAX.FITNESS-0.005,lty=2)

OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]

ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - 0.005))

EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
abline(v=EXPR.THRESHOLD,lty=2)

dev.off()

for (i in 1:nrow(GLU.FIT))
{
  GLU.FIT[i,"FIT.DIFF"] <- GLU.FIT$Fitness[i] - predict(MODEL.FIT, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit
}

write.table(GLU.EXPR,"DELTA.CV.txt",row.names=FALSE,sep="\t",quote=FALSE)
write.table(GLU.FIT,"DELTA.FITNESS.txt",row.names=FALSE,sep="\t",quote=FALSE)

####4-Plot correlation between Delta noise and Mean expression#####

P.EXPR.ALL <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > 0)
P.EXPR.LOW <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
P.EXPR.HIGH <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])

P.FIT.ALL <- subset(GLU.FIT, MUTATION %in% P.EXPR.ALL$MUTATION)
P.FIT.LOW <- subset(GLU.FIT, MUTATION %in% P.EXPR.LOW$MUTATION)
P.FIT.HIGH <- subset(GLU.FIT, MUTATION %in% P.EXPR.HIGH$MUTATION)

P.EXPR.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.EXPR.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.EXPR.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.FIT.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.FIT.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT <- as.data.frame(rbind(P.FIT.ALL,P.FIT.LOW,P.FIT.HIGH))
P.EXPR <- as.data.frame(rbind(P.EXPR.ALL,P.EXPR.LOW,P.EXPR.HIGH))

P.FIT <- subset(P.FIT, CV.CLASS != "UNCLEAR")
P.EXPR <- subset(P.EXPR, CV.CLASS != "UNCLEAR")

P.FIT$MEDIAN.CLASS <- factor(P.FIT$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))
P.EXPR$MEDIAN.CLASS <- factor(P.EXPR$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))

P.FIT <- droplevels(P.FIT)
P.EXPR <- droplevels(P.EXPR)

P.FIT[,"CV.CLASS"] <- factor(P.FIT[,"CV.CLASS"],levels=c("LOW","HIGH"))
P.EXPR[,"CV.CLASS"] <- factor(P.EXPR[,"CV.CLASS"],levels=c("LOW","HIGH"))


####5-Permutation tests to detect significant impact of noise on fitness#####

P.VAL.GLU <- matrix(data = NA, nrow = 3, ncol = 3)
colnames(P.VAL.GLU) <- c("ALL.STRAINS","SUBOPTIMAL.MEDIAN","OPTIMAL.MEDIAN")
rownames(P.VAL.GLU) <- c("Delta.Fitness","Median.Expression","Delta.CV")


#Fitness#

GROUP.1 <- P.FIT[which(P.FIT$CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"]
P.VAL.GLU[1,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"]
P.VAL.GLU[1,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"]
P.VAL.GLU[1,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[1,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8G.
pdf("FITNESS.vs.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.FIT$FIT.DIFF~P.FIT$CV.CLASS+P.FIT$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta Fitness",col=c("#0000FF99","#FF000099"),ylim=c(-0.01,0.01),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta CV < -1%","Delta CV > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=1.75,y=-0.008,PV[1],bty="n")
legend(x=4.25,y=-0.008,PV[2],bty="n")
legend(x=6.75,y=-0.008,PV[3],bty="n")
dev.off()

#Median#

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[2,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8H.
pdf("MEDIAN.vs.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$YFP.MEDIAN.RELATIVE.MEAN~P.EXPR$CV.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Median Expression",col=c("#0000FF99","#FF000099"),ylim=c(0,1.5),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("bottomright",c("Delta CV < -1%","Delta CV > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=1.5,PV[1],bty="n")
legend(x=2.75,y=1.5,PV[2],bty="n")
legend(x=5.25,y=1.5,PV[3],bty="n")
dev.off()

#Delta CV#

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"CV.DIFF"]
P.VAL.GLU[3,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"CV.DIFF"]
P.VAL.GLU[3,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"CV.DIFF"]
P.VAL.GLU[3,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[3,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8I.
pdf("DELTA.CV.vs.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$CV.DIFF~P.EXPR$CV.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta CV",col=c("#0000FF99","#FF000099"),ylim=c(-3,4),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta CV < 0","Delta CV > 0"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=4,PV[1],bty="n")
legend(x=2.75,y=4,PV[2],bty="n")
legend(x=5.25,y=4,PV[3],bty="n")
dev.off()

write.table(P.VAL.GLU,"P.VAL.DELTA.CV.txt",row.names=FALSE,sep="\t",quote=FALSE)


####6-Plot Fitness for two classes of CV#####

#Corresponds to Figure 3 - figure supplement 1G.
pdf("FITNESS.CURVES.CV.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=6)
#quartz(height=9,width=13)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta CV > +1%","Delta CV < -1%","|Delta CV| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

LOW.FIT <- subset(GLU.FIT, CV.CLASS == "LOW")
LOW.EXPR <- subset(GLU.EXPR, CV.CLASS == "LOW")
HIGH.FIT <- subset(GLU.FIT, CV.CLASS == "HIGH")
HIGH.EXPR <- subset(GLU.EXPR, CV.CLASS == "HIGH")

WEIGHT <- rep(1,nrow(LOW.EXPR))
WEIGHT[which(LOW.FIT$Fitness == min(LOW.FIT$Fitness))] <- 10

MODEL.FIT <- loess(LOW.FIT$Fitness~LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3,weights=WEIGHT)
x.mid <- seq(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="blue",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#0000FF22",border=NA)

MODEL.FIT <- loess(HIGH.FIT$Fitness~HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3)
x.mid <- seq(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="red",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#FF000022",border=NA)

dev.off()


####7-Plot Delta Fitness vs Delta CV#####

#All Strains

#Corresponds to Figure 3 - figure supplement 4C & 4G.
pdf("DELTA.FITNESS.vs.DELTA.CV.GLUCOSE.pdf",useDingbats=F,height=9,width=9)

#Low Expression

LOW.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
LOW.FIT <- subset(GLU.FIT, MUTATION %in% LOW.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(LOW.EXPR$CV.DIFF,LOW.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(LOW.EXPR$CV.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.EXPR$CV.DIFF+1.96*LOW.EXPR$YFP.CV.RELATIVE.SD/sqrt(LOW.EXPR$N),li=LOW.EXPR$CV.DIFF-1.96*LOW.EXPR$YFP.CV.RELATIVE.SD/sqrt(LOW.EXPR$N),err="x",xlab="Delta CV",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-4,4),ylim=c(-0.01,0.01),gap=0,main="Suboptimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(LOW.EXPR$CV.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.FIT$FIT.DIFF+LOW.FIT$High.95,li=LOW.FIT$FIT.DIFF-LOW.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(LOW.FIT$FIT.DIFF~LOW.EXPR$CV.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(1,-0.01,3,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(1,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)


#High Expression

HIGH.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])
HIGH.FIT <- subset(GLU.FIT, MUTATION %in% HIGH.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(HIGH.EXPR$CV.DIFF,HIGH.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(HIGH.EXPR$CV.DIFF, HIGH.FIT$FIT.DIFF,ui=HIGH.EXPR$CV.DIFF+1.96*HIGH.EXPR$YFP.CV.RELATIVE.SD/sqrt(HIGH.EXPR$N),li=HIGH.EXPR$CV.DIFF-1.96*HIGH.EXPR$YFP.CV.RELATIVE.SD/sqrt(HIGH.EXPR$N),err="x",xlab="Delta CV",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-4,4),ylim=c(-0.01,0.01),gap=0,main="Optimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(HIGH.EXPR$CV.DIFF,HIGH.FIT$FIT.DIFF,ui=HIGH.FIT$FIT.DIFF+HIGH.FIT$High.95,li=HIGH.FIT$FIT.DIFF-HIGH.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(HIGH.FIT$FIT.DIFF~HIGH.EXPR$CV.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(1,-0.01,3,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(1,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)

dev.off()


####8-Plot Median vs Delta CV#####

#All Strains

#Corresponds to Figure 3 - figure supplement 2C.
pdf("MEDIAN.vs.DELTA.CV.GLUCOSE.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$CV.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$CV.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta CV",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-4,4),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.EXPR$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$CV.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$CV.DIFF,ui=GLU.EXPR$CV.DIFF+1.96*GLU.EXPR$YFP.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$CV.DIFF-1.96*GLU.EXPR$YFP.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.EXPR$CV.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta CV > +1%","Delta CV < -1%","|Delta CV| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


####8-Plot Median vs Delta Fitness#####

#All Strains

#Corresponds to Figure 3 - figure supplement 3C.
pdf("MEDIAN.vs.DELTA.FITNESS.GLUCOSE.CV.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-0.01,0.01),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.FIT$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.FIT$FIT.DIFF+GLU.FIT$High.95,li=GLU.FIT$FIT.DIFF-GLU.FIT$Low.95,err="y",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.FIT$FIT.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta CV > +1%","Delta CV < -1%","|Delta CV| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


################################################################################
# 10 - Impact of expression noise (LogCV*) on fitness (Supplementary figures). #
################################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box

PERMUTE <- function(GROUP.1,GROUP.2) {
  N.PERM <- 100000
  
  OBS.DIFF <- abs(median(GROUP.1)-median(GROUP.2))
  
  POOL <- c(GROUP.1,GROUP.2)
  
  PERM.DIFF <- rep(NA,N.PERM)
  
  for (i in 1:N.PERM)
  {
    POS.1 <- sample(1:length(POOL),size=length(GROUP.1),replace=FALSE)
    PERM.1 <- POOL[POS.1]
    PERM.2 <- POOL[-POS.1]
    
    PERM.DIFF[i] <- abs(median(PERM.1)-median(PERM.2))
  }
  
  P.VAL <- (length(which(PERM.DIFF > OBS.DIFF)) + 1)/(N.PERM + 1)
  return(P.VAL)
}

#Load Data
parent.dir <- "/Path.to.input.files"
setwd(parent.dir)

Fit <- read.table("GLUCOSE.SD/SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("GLUCOSE.SD/SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
All.Fit <- read.table("GLUCOSE.SD/Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]

#Colors for the different categories of mutants
for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}


#############################################
#####SHOW IMPACT OF NOISE ON FITNESS#########
#############################################

################
#LogCV* Glucose#
################

####1-Fit non linear regression to Expression data#####
GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.EXPR[,"MUTATION"])
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])

MODEL <- loess(GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL, x.mid, se=TRUE)

####2-Identify strains with low and high SD given their median expression######

for (i in 1:nrow(GLU.EXPR))
{
  DIFF <- GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN[i] - predict(MODEL, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit 
  GLU.EXPR[i,"LOG.CV.DIFF"] <- DIFF
  
  if (DIFF > 0)
  {
    GLU.EXPR[i,"LOG.CV.CLASS"] <- "HIGH"
  } else if (DIFF < 0)
  {
    GLU.EXPR[i,"LOG.CV.CLASS"] <- "LOW"
  }
}

for (i in 1:nrow(GLU.EXPR))
{
  if (abs(GLU.EXPR$LOG.CV.DIFF[i]) < 0.01)
  {
    GLU.EXPR$LOG.CV.CLASS[i] <- "UNCLEAR"
  }
}

GLU.FIT[,"LOG.CV.CLASS"] <- factor(GLU.EXPR[,"LOG.CV.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))

for (i in 1:nrow(GLU.FIT))
{
  if (GLU.FIT[i,"LOG.CV.CLASS"] == "LOW")
  {
    GLU.FIT[i,"COLOR"] <- "blue"
  } 
  if (GLU.FIT[i,"LOG.CV.CLASS"] == "HIGH")
  {
    GLU.FIT[i,"COLOR"] <- "red"
  } 
  if (GLU.FIT[i,"LOG.CV.CLASS"] == "UNCLEAR")
  {
    GLU.FIT[i,"COLOR"] <- "black"
  } 
}

#Corresponds to Figure 3 - figure supplement 1D.
pdf("LOG.CV.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=5)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Expression log2(CV) relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-2,3),gap=0,main="Median vs log2(CV) Expression in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN,ui=GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.LOG.CV.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",col=GLU.FIT[,"COLOR"],add=T,sfrac=0,gap=0,cex=1.5,lwd=1.01,pch=21)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)
legend("topright",c("Delta log2(CV) > +1%","Delta log2(CV) < -1%","|Delta log2(CV)| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
abline(v=1,lty=2)
abline(h=0,lty=2)
dev.off()

####3-Fit model to fitness data and compute expected fitness######

#Corresponds to Figure 3 - figure supplement 1L.
pdf("FITNESS.EXPECTATION.GLUCOSE.LOG.CV.pdf",useDingbats=F)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],xlab="Median Expression relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT[,"COLOR"],pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta log2(CV) > +1%","Delta log2(CV) < -1%","|Delta log2(CV) | < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

MODEL.FIT <- loess(GLU.FIT$Fitness~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=2/3)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="#00000099",lwd=2.5)

MAX.FITNESS <- max(y.mid$fit) 
abline(h=MAX.FITNESS-0.005,lty=2)

OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]

ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - 0.005))

EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
abline(v=EXPR.THRESHOLD,lty=2)

dev.off()

for (i in 1:nrow(GLU.FIT))
{
  GLU.FIT[i,"FIT.DIFF"] <- GLU.FIT$Fitness[i] - predict(MODEL.FIT, GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN[i], se=TRUE)$fit
}

write.table(GLU.EXPR,"DELTA.LOG.CV.txt",row.names=FALSE,sep="\t",quote=FALSE)
write.table(GLU.FIT,"DELTA.FITNESS.txt",row.names=FALSE,sep="\t",quote=FALSE)


####4-Plot correlation between Delta noise and Mean expression#####

P.EXPR.ALL <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > 0)
P.EXPR.LOW <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
P.EXPR.HIGH <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])

P.FIT.ALL <- subset(GLU.FIT, MUTATION %in% P.EXPR.ALL$MUTATION)
P.FIT.LOW <- subset(GLU.FIT, MUTATION %in% P.EXPR.LOW$MUTATION)
P.FIT.HIGH <- subset(GLU.FIT, MUTATION %in% P.EXPR.HIGH$MUTATION)

P.EXPR.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.EXPR.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.EXPR.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT.ALL[,"MEDIAN.CLASS"] <- "ALL"
P.FIT.LOW[,"MEDIAN.CLASS"] <- "LOW"
P.FIT.HIGH[,"MEDIAN.CLASS"] <- "HIGH"

P.FIT <- as.data.frame(rbind(P.FIT.ALL,P.FIT.LOW,P.FIT.HIGH))
P.EXPR <- as.data.frame(rbind(P.EXPR.ALL,P.EXPR.LOW,P.EXPR.HIGH))

P.FIT <- subset(P.FIT, LOG.CV.CLASS != "UNCLEAR")
P.EXPR <- subset(P.EXPR, LOG.CV.CLASS != "UNCLEAR")

P.FIT$MEDIAN.CLASS <- factor(P.FIT$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))
P.EXPR$MEDIAN.CLASS <- factor(P.EXPR$MEDIAN.CLASS, levels=c("ALL","LOW","HIGH"))

P.FIT <- droplevels(P.FIT)
P.EXPR <- droplevels(P.EXPR)

P.FIT[,"LOG.CV.CLASS"] <- factor(P.FIT[,"LOG.CV.CLASS"],levels=c("LOW","HIGH"))
P.EXPR[,"LOG.CV.CLASS"] <- factor(P.EXPR[,"LOG.CV.CLASS"],levels=c("LOW","HIGH"))

####5-Permutation tests to detect significant impact of noise on fitness#####

P.VAL.GLU <- matrix(data = NA, nrow = 3, ncol = 3)
colnames(P.VAL.GLU) <- c("ALL.STRAINS","SUBOPTIMAL.MEDIAN","OPTIMAL.MEDIAN")
rownames(P.VAL.GLU) <- c("Delta.Fitness","Median.Expression","Delta.LOG.CV")


#Fitness#

GROUP.1 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "ALL"),"FIT.DIFF"]
P.VAL.GLU[1,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "LOW"),"FIT.DIFF"]
P.VAL.GLU[1,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "LOW" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"] 
GROUP.2 <- P.FIT[which(P.FIT$LOG.CV.CLASS == "HIGH" & P.FIT$MEDIAN.CLASS == "HIGH"),"FIT.DIFF"]
P.VAL.GLU[1,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[1,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8J.
pdf("FITNESS.vs.LOG.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.FIT$FIT.DIFF~P.FIT$LOG.CV.CLASS+P.FIT$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta Fitness",col=c("#0000FF99","#FF000099"),ylim=c(-0.01,0.01),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta log2(CV) < -1%","Delta log2(CV) > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=1.75,y=-0.008,PV[1],bty="n")
legend(x=4.25,y=-0.008,PV[2],bty="n")
legend(x=6.75,y=-0.008,PV[3],bty="n")
dev.off()

#Median#

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"YFP.MEDIAN.RELATIVE.MEAN"]
P.VAL.GLU[2,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[2,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8K.
pdf("MEDIAN.vs.LOG.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$YFP.MEDIAN.RELATIVE.MEAN~P.EXPR$LOG.CV.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Median Expression",col=c("#0000FF99","#FF000099"),ylim=c(0,1.5),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("bottomright",c("Delta log2(CV) < -1%","Delta log2(CV) > +1%"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=1.5,PV[1],bty="n")
legend(x=2.75,y=1.5,PV[2],bty="n")
legend(x=5.25,y=1.5,PV[3],bty="n")
dev.off()

#Delta LOG.CV#

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "ALL"),"LOG.CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "ALL"),"LOG.CV.DIFF"]
P.VAL.GLU[3,1] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "LOW"),"LOG.CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "LOW"),"LOG.CV.DIFF"]
P.VAL.GLU[3,2] <- PERMUTE(GROUP.1,GROUP.2)

GROUP.1 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "LOW" & P.EXPR$MEDIAN.CLASS == "HIGH"),"LOG.CV.DIFF"] 
GROUP.2 <- P.EXPR[which(P.EXPR$LOG.CV.CLASS == "HIGH" & P.EXPR$MEDIAN.CLASS == "HIGH"),"LOG.CV.DIFF"]
P.VAL.GLU[3,3] <- PERMUTE(GROUP.1,GROUP.2)

PV <- paste("P =",format.pval(P.VAL.GLU[3,],digits=4,eps=0.0001,sep=""))

#Corresponds to Figure 3 - figure supplement 8L.
pdf("DELTA.LOG.CV.vs.LOG.CV.CLASS.GLUCOSE.pdf",useDingbats=F,height=6,width=12)
#quartz(height=6,width=12)
boxplot(P.EXPR$LOG.CV.DIFF~P.EXPR$LOG.CV.CLASS+P.EXPR$MEDIAN.CLASS,notch=T,varwidth=TRUE,ylab="Delta log2(CV)",col=c("#0000FF99","#FF000099"),ylim=c(-2,2),at=c(1,2,3.5,4.5,6,7),xaxt="n")
abline(v=c(2.75,5.25),lty=2)
axis(1,at=c(1.5,4,6.5),labels=c("All Strains","Suboptimal Median Expression","Optimal Median Expression"),tick=F,font=2)
legend("topright",c("Delta log2(CV) < 0","Delta log2(CV) > 0"),pch=22,bty="n",cex=1.2,pt.cex=2,pt.bg=c("#0000FF99","#FF000099"))
legend(x=0.25,y=2,PV[1],bty="n")
legend(x=2.75,y=2,PV[2],bty="n")
legend(x=5.25,y=2,PV[3],bty="n")
dev.off()

write.table(P.VAL.GLU,"P.VAL.DELTA.LOG.CV.txt",row.names=FALSE,sep="\t",quote=FALSE)


####6-Plot Fitness for two classes of log2(CV)#####

#Corresponds to Figure 3 - figure supplement 1H.
pdf("FITNESS.CURVES.LOG.CV.CATEGORIES.GLUCOSE.pdf",useDingbats=F,height=5,width=6)
#quartz(height=9,width=13)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,xlab="Expression level relative to WT",ylab="Fitness relative to WT",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(0.936,1.01),gap=0,main="Expression vs Fitness in Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.FIT$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
legend("bottomright",c("Delta log2(CV) > +1%","Delta log2(CV) < -1%","|Delta log2(CV)| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")

LOW.FIT <- subset(GLU.FIT, LOG.CV.CLASS == "LOW")
LOW.EXPR <- subset(GLU.EXPR, LOG.CV.CLASS == "LOW")
HIGH.FIT <- subset(GLU.FIT, LOG.CV.CLASS == "HIGH")
HIGH.EXPR <- subset(GLU.EXPR, LOG.CV.CLASS == "HIGH")

WEIGHT <- rep(1,nrow(LOW.EXPR))
WEIGHT[which(LOW.FIT$Fitness == min(LOW.FIT$Fitness))] <- 10

MODEL.FIT <- loess(LOW.FIT$Fitness~LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3,weights=WEIGHT)
x.mid <- seq(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="blue",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#0000FF22",border=NA)

MODEL.FIT <- loess(HIGH.FIT$Fitness~HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN,span=2/3)
x.mid <- seq(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
points(x.mid,y.mid$fit,type="l",col="red",lwd=1.5,lty=2)
y.err <- c(y.mid$fit + 1.96*y.mid$se.fit,rev(y.mid$fit - 1.96*y.mid$se.fit))
polygon(c(x.mid,rev(x.mid)),y.err,col="#FF000022",border=NA)

dev.off()


####7-Plot Delta Fitness vs Delta FANO#####

#Corresponds to Figure 3 - figure supplement 4D & 4H.
pdf("DELTA.FITNESS.vs.DELTA.LOG.CV.GLUCOSE.pdf",useDingbats=F,height=9,width=9)

#Low Expression

LOW.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
LOW.FIT <- subset(GLU.FIT, MUTATION %in% LOW.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(LOW.EXPR$LOG.CV.DIFF,LOW.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(LOW.EXPR$LOG.CV.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.EXPR$LOG.CV.DIFF+1.96*LOW.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(LOW.EXPR$N),li=LOW.EXPR$LOG.CV.DIFF-1.96*LOW.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(LOW.EXPR$N),err="x",xlab="Delta log2(CV)",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-2.5,2.5),ylim=c(-0.01,0.01),gap=0,main="Suboptimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(LOW.EXPR$LOG.CV.DIFF,LOW.FIT$FIT.DIFF,ui=LOW.FIT$FIT.DIFF+LOW.FIT$High.95,li=LOW.FIT$FIT.DIFF-LOW.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(LOW.FIT$FIT.DIFF~LOW.EXPR$LOG.CV.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.75,-0.01,1.75,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(LOW.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.5,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)


#High Expression

HIGH.EXPR <- subset(GLU.EXPR, YFP.MEDIAN.RELATIVE.MEAN > EXPR.THRESHOLD[1])
HIGH.FIT <- subset(GLU.FIT, MUTATION %in% HIGH.EXPR$MUTATION)

COL <- colorRampPalette(c("blue","red"))
COLOR <- COL(15)[as.numeric(cut(sqrt(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),breaks=15))]

SEQ <- seq(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),by=0.01)
GRADIENT <- COL(15)[as.numeric(cut(SEQ,breaks=15))]
TEST <- cor.test(HIGH.EXPR$LOG.CV.DIFF,HIGH.FIT$FIT.DIFF)
MID <- mean(c(sqrt(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN)),sqrt(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN))))^2

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(HIGH.EXPR$LOG.CV.DIFF, HIGH.FIT$FIT.DIFF,ui=HIGH.EXPR$LOG.CV.DIFF+1.96*HIGH.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(HIGH.EXPR$N),li=HIGH.EXPR$LOG.CV.DIFF-1.96*HIGH.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(HIGH.EXPR$N),err="x",xlab="Delta log2(CV)",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(-2.5,2.5),ylim=c(-0.01,0.01),gap=0,main="Optimal Expression in Glucose",cex=1.6,cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,pt.bg=COLOR,col="#00000066",lwd=1.25)
abline(v=0,lty=2,lwd=1.25)
abline(h=0,lty=2,lwd=1.25)
plotCI(HIGH.EXPR$LOG.CV.DIFF,HIGH.FIT$FIT.DIFF,ui=HIGH.FIT$FIT.DIFF+HIGH.FIT$High.95,li=HIGH.FIT$FIT.DIFF-HIGH.FIT$Low.95,err="y",pt.bg=COLOR,pch=21,cex=1.6,sfrac=0,gap=0,col="#00000066",lwd=1.25,add=TRUE)
abline(lm(HIGH.FIT$FIT.DIFF~HIGH.EXPR$LOG.CV.DIFF),lwd=2,col="#00000066")
legend("topleft",c(paste("R^2 = ",round(TEST$estimate^2,2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n",cex=1.3)
color.legend(0.75,-0.01,1.75,-0.0085,rect.col=GRADIENT,gradient="x",legend=c(round(min(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2),round(MID,2),round(max(HIGH.EXPR$YFP.MEDIAN.RELATIVE.MEAN),2)))
legend(0.5,-0.0063,"Median Expression",bty="n",cex=1.1)
box(lwd=2)

dev.off()


####7-Plot Median vs Delta LOG.CV#####

#All Strains

#Corresponds to Figure 4 - figure supplement 2D.
pdf("MEDIAN.vs.DELTA.LOG.CV.GLUCOSE.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$LOG.CV.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$LOG.CV.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta log2(CV)",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-2.5,2),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.EXPR$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$LOG.CV.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.EXPR$LOG.CV.DIFF,ui=GLU.EXPR$LOG.CV.DIFF+1.96*GLU.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$LOG.CV.DIFF-1.96*GLU.EXPR$YFP.LOG.CV.RELATIVE.SD/sqrt(GLU.EXPR$N),err="y",pch=21,sfrac=0,gap=0,col=GLU.EXPR$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.EXPR$LOG.CV.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta log2(CV) > +1%","Delta log2(CV) < -1%","|Delta log2(CV)| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


####8-Plot Median vs Delta Fitness#####

#All Strains

#Corresponds to Figure 4 - figure supplement 3D.
pdf("MEDIAN.vs.DELTA.FITNESS.GLUCOSE.LOG.CV.pdf",useDingbats=F,height=5,width=5)

TEST <- cor.test(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF)

#quartz(height=9,width=9)
par(mar=c(6,6,4,1))
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",xlab="Median Expression relative to WT",ylab="Delta Fitness",pch=21,sfrac=0,xlim=c(0,1.3),ylim=c(-0.01,0.01),gap=0,main="All Strains in Glucose",cex.axis=1.2,col=GLU.FIT$COLOR,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$FIT.DIFF,ui=GLU.FIT$FIT.DIFF+GLU.FIT$High.95,li=GLU.FIT$FIT.DIFF-GLU.FIT$Low.95,err="y",pch=21,sfrac=0,gap=0,col=GLU.FIT$COLOR,add=TRUE,cex=1.5,lwd=1.01)
abline(lm(GLU.FIT$FIT.DIFF~GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),lwd=2,col="#00000066")
legend("bottomleft",c(paste("R^2 = ",format(TEST$estimate^2,digits=2),sep=""),paste("P = ",format(TEST$p.value,digits=3),sep="")),bty="n")
legend("bottomright",c("Delta log2(CV) > +1%","Delta log2(CV) < -1%","|Delta log2(CV)| < +1%"),box.lwd=0,text.col=c("red","blue","black"),bty="n")
dev.off()


#################################################################################################################
# 11 - Expression noise vs fitness: Robustness to variation in 3 parameters (Figure 3 - figure supplement 5-7). #
#################################################################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Libraries and functions
library(plotrix)
library(Hmisc)
library(pcaPP)
library(msir)
library(quantmod)

box <- graphics::box

#Load Data
parent.dir <- "/Path.to.input.file"
setwd(parent.dir)

TEMPLATE <- read.table("DELTA.ANALYSIS.ROBUSTNESS.txt",header=TRUE,as.is=TRUE)
Fit <- read.table("SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- read.table("Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)
All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]


for (i in 1:nrow(Expression))
{
  if (Expression[i,"CLASS"] == "REF")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] == "REF.2X")
  {
    Expression[i,"COLOR"] <- "black"
  }
  if (Expression[i,"CLASS"] %in% c("TFBS","TFBS.2"))
  {
    Expression[i,"COLOR"] <- "#FF0000FF"
  }
  if (Expression[i,"CLASS"] == "TATA")
  {
    Expression[i,"COLOR"] <- "#0000FFFF"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA")
  {
    Expression[i,"COLOR"] <- "#990099FF"
  }
  if (Expression[i,"CLASS"] == "WT")
  {
    Expression[i,"COLOR"] <- "gray"
  }
  if (Expression[i,"CLASS"] == "TFBS.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "TFBS.TATA.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
  if (Expression[i,"CLASS"] == "WT.2X")
  {
    Expression[i,"COLOR"] <- "green"
  }
}


#Loop over all conditions
for (i in 1:nrow(TEMPLATE))
{
  if (TEMPLATE[i,"NOISE.METRIC"] == "SD")
  {
    
    DATA.EXPR <- subset(Expression, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
    DATA.FIT <- subset(Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.EXPR[,"MUTATION"])
    DATA.ALL <- subset(All.Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.FIT[,"MUTATION"])
    
    ####Compute Delta Noise######
    MODEL <- loess(DATA.EXPR$YFP.SD.RELATIVE.MEAN~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.NOISE"])
    
    for (j in 1:nrow(DATA.EXPR))
    {
      DIFF <- DATA.EXPR$YFP.SD.RELATIVE.MEAN[j] - predict(MODEL, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit 
      DATA.EXPR[j,"NOISE.DIFF"] <- DIFF
      
      if (DIFF > 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "HIGH"
      } else if (DIFF < 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "LOW"
      }
    }
    
    for (j in 1:nrow(DATA.EXPR))
    {
      if (abs(DATA.EXPR$NOISE.DIFF[j]) < 0.01)
      {
        DATA.EXPR$NOISE.CLASS[j] <- "UNCLEAR"
      }
    }
    
    DATA.FIT[,"NOISE.CLASS"] <- factor(DATA.EXPR[,"NOISE.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))
    
    for (j in 1:nrow(DATA.FIT))
    {
      if (DATA.FIT[j,"NOISE.CLASS"] == "LOW")
      {
        DATA.FIT[j,"COLOR"] <- "blue"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "HIGH")
      {
        DATA.FIT[j,"COLOR"] <- "red"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "UNCLEAR")
      {
        DATA.FIT[j,"COLOR"] <- "black"
      } 
    }
    
    
    ####Compute Delta Fitness######
    
    MODEL.FIT <- loess(DATA.FIT$Fitness~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.FITNESS"])
    
    x.mid <- seq(min(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
    y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
    
    MAX.FITNESS <- max(y.mid$fit) 
    
    OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]
    
    ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - TEMPLATE[i,"FITNESS.CUTOFF"]))
    
    EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
    
    for (j in 1:nrow(DATA.FIT))
    {
      DATA.FIT[j,"FIT.DIFF"] <- DATA.FIT$Fitness[j] - predict(MODEL.FIT, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit
    }
    
    
    ####Compute correlation between Delta Noise and Delta Fitness#####
    
    #All strains
    TEST <- cor.test(DATA.EXPR$NOISE.DIFF,DATA.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.ALL"] <- TEST$estimate
    TEMPLATE[i,"P.ALL"] <- TEST$p.value
    
    
    #Low Expression
    LOW.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
    LOW.FIT <- subset(DATA.FIT, MUTATION %in% LOW.EXPR$MUTATION)
    TEST <- cor.test(LOW.EXPR$NOISE.DIFF,LOW.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.LOW"] <- TEST$estimate
    TEMPLATE[i,"P.LOW"] <- TEST$p.value
    
    #High Expression
    HIGH.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN >= EXPR.THRESHOLD[1])
    HIGH.FIT <- subset(DATA.FIT, MUTATION %in% HIGH.EXPR$MUTATION)
    
    if (nrow(HIGH.EXPR) >= 3)
    {
      TEST <- cor.test(HIGH.EXPR$NOISE.DIFF,HIGH.FIT$FIT.DIFF)
      TEMPLATE[i,"COR.HIGH"] <- TEST$estimate
      TEMPLATE[i,"P.HIGH"] <- TEST$p.value
    }
  }
  
  if (TEMPLATE[i,"NOISE.METRIC"] == "CV")
  {
    
    DATA.EXPR <- subset(Expression, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
    DATA.FIT <- subset(Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.EXPR[,"MUTATION"])
    DATA.ALL <- subset(All.Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.FIT[,"MUTATION"])
    
    ####Compute Delta Noise######
    MODEL <- loess(DATA.EXPR$YFP.CV.RELATIVE.MEAN~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.NOISE"])
    
    for (j in 1:nrow(DATA.EXPR))
    {
      DIFF <- DATA.EXPR$YFP.CV.RELATIVE.MEAN[j] - predict(MODEL, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit 
      DATA.EXPR[j,"NOISE.DIFF"] <- DIFF
      
      if (DIFF > 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "HIGH"
      } else if (DIFF < 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "LOW"
      }
    }
    
    for (j in 1:nrow(DATA.EXPR))
    {
      if (abs(DATA.EXPR$NOISE.DIFF[j]) < 0.01)
      {
        DATA.EXPR$NOISE.CLASS[j] <- "UNCLEAR"
      }
    }
    
    DATA.FIT[,"NOISE.CLASS"] <- factor(DATA.EXPR[,"NOISE.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))
    
    for (j in 1:nrow(DATA.FIT))
    {
      if (DATA.FIT[j,"NOISE.CLASS"] == "LOW")
      {
        DATA.FIT[j,"COLOR"] <- "blue"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "HIGH")
      {
        DATA.FIT[j,"COLOR"] <- "red"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "UNCLEAR")
      {
        DATA.FIT[j,"COLOR"] <- "black"
      } 
    }
    
    
    ####Compute Delta Fitness######
    
    MODEL.FIT <- loess(DATA.FIT$Fitness~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.FITNESS"])
    
    x.mid <- seq(min(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
    y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
    
    MAX.FITNESS <- max(y.mid$fit) 
    
    OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]
    
    ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - TEMPLATE[i,"FITNESS.CUTOFF"]))
    
    EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
    
    for (j in 1:nrow(DATA.FIT))
    {
      DATA.FIT[j,"FIT.DIFF"] <- DATA.FIT$Fitness[j] - predict(MODEL.FIT, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit
    }
    
    
    ####Compute correlation between Delta Noise and Delta Fitness#####
    
    #All strains
    TEST <- cor.test(DATA.EXPR$NOISE.DIFF,DATA.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.ALL"] <- TEST$estimate
    TEMPLATE[i,"P.ALL"] <- TEST$p.value
    
    
    #Low Expression
    LOW.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
    LOW.FIT <- subset(DATA.FIT, MUTATION %in% LOW.EXPR$MUTATION)
    TEST <- cor.test(LOW.EXPR$NOISE.DIFF,LOW.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.LOW"] <- TEST$estimate
    TEMPLATE[i,"P.LOW"] <- TEST$p.value
    
    #High Expression
    HIGH.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN >= EXPR.THRESHOLD[1])
    HIGH.FIT <- subset(DATA.FIT, MUTATION %in% HIGH.EXPR$MUTATION)
    if (nrow(HIGH.EXPR) >= 3)
    {
      TEST <- cor.test(HIGH.EXPR$NOISE.DIFF,HIGH.FIT$FIT.DIFF)
      TEMPLATE[i,"COR.HIGH"] <- TEST$estimate
      TEMPLATE[i,"P.HIGH"] <- TEST$p.value
    }
  }
  
  if (TEMPLATE[i,"NOISE.METRIC"] == "FANO")
  {
    
    DATA.EXPR <- subset(Expression, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
    DATA.FIT <- subset(Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.EXPR[,"MUTATION"])
    DATA.ALL <- subset(All.Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.FIT[,"MUTATION"])
    
    ####Compute Delta Noise######
    MODEL <- loess(DATA.EXPR$YFP.FANO.RELATIVE.MEAN~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.NOISE"])
    
    for (j in 1:nrow(DATA.EXPR))
    {
      DIFF <- DATA.EXPR$YFP.FANO.RELATIVE.MEAN[j] - predict(MODEL, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit 
      DATA.EXPR[j,"NOISE.DIFF"] <- DIFF
      
      if (DIFF > 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "HIGH"
      } else if (DIFF < 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "LOW"
      }
    }
    
    for (j in 1:nrow(DATA.EXPR))
    {
      if (abs(DATA.EXPR$NOISE.DIFF[j]) < 0.01)
      {
        DATA.EXPR$NOISE.CLASS[j] <- "UNCLEAR"
      }
    }
    
    DATA.FIT[,"NOISE.CLASS"] <- factor(DATA.EXPR[,"NOISE.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))
    
    for (j in 1:nrow(DATA.FIT))
    {
      if (DATA.FIT[j,"NOISE.CLASS"] == "LOW")
      {
        DATA.FIT[j,"COLOR"] <- "blue"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "HIGH")
      {
        DATA.FIT[j,"COLOR"] <- "red"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "UNCLEAR")
      {
        DATA.FIT[j,"COLOR"] <- "black"
      } 
    }
    
    
    ####Compute Delta Fitness######
    
    MODEL.FIT <- loess(DATA.FIT$Fitness~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.FITNESS"])
    
    x.mid <- seq(min(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
    y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
    
    MAX.FITNESS <- max(y.mid$fit) 
    
    OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]
    
    ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - TEMPLATE[i,"FITNESS.CUTOFF"]))
    
    EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
    
    for (j in 1:nrow(DATA.FIT))
    {
      DATA.FIT[j,"FIT.DIFF"] <- DATA.FIT$Fitness[j] - predict(MODEL.FIT, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit
    }
    
    
    ####Compute correlation between Delta Noise and Delta Fitness#####
    
    #All strains
    TEST <- cor.test(DATA.EXPR$NOISE.DIFF,DATA.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.ALL"] <- TEST$estimate
    TEMPLATE[i,"P.ALL"] <- TEST$p.value
    
    
    #Low Expression
    LOW.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
    LOW.FIT <- subset(DATA.FIT, MUTATION %in% LOW.EXPR$MUTATION)
    TEST <- cor.test(LOW.EXPR$NOISE.DIFF,LOW.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.LOW"] <- TEST$estimate
    TEMPLATE[i,"P.LOW"] <- TEST$p.value
    
    #High Expression
    HIGH.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN >= EXPR.THRESHOLD[1])
    HIGH.FIT <- subset(DATA.FIT, MUTATION %in% HIGH.EXPR$MUTATION)
    if (nrow(HIGH.EXPR) >= 3)
    {
      TEST <- cor.test(HIGH.EXPR$NOISE.DIFF,HIGH.FIT$FIT.DIFF)
      TEMPLATE[i,"COR.HIGH"] <- TEST$estimate
      TEMPLATE[i,"P.HIGH"] <- TEST$p.value
    }
  }
  
  if (TEMPLATE[i,"NOISE.METRIC"] == "LOGCV")
  {
    
    DATA.EXPR <- subset(Expression, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","WT","TDH3.Deletion","TATA.42_TATA.42","BARRY.DOUBLE") & YFP.MEDIAN.RELATIVE.MEAN < 1.5)
    DATA.FIT <- subset(Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.EXPR[,"MUTATION"])
    DATA.ALL <- subset(All.Fit, ENVIRONMENT == TEMPLATE[i,"ENVIRONMENT"] & MUTATION %in% DATA.FIT[,"MUTATION"])
    
    ####Compute Delta Noise######
    MODEL <- loess(DATA.EXPR$YFP.LOG.CV.RELATIVE.MEAN~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.NOISE"])
    
    for (j in 1:nrow(DATA.EXPR))
    {
      DIFF <- DATA.EXPR$YFP.LOG.CV.RELATIVE.MEAN[j] - predict(MODEL, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit 
      DATA.EXPR[j,"NOISE.DIFF"] <- DIFF
      
      if (DIFF > 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "HIGH"
      } else if (DIFF < 0)
      {
        DATA.EXPR[j,"NOISE.CLASS"] <- "LOW"
      }
    }
    
    for (j in 1:nrow(DATA.EXPR))
    {
      if (abs(DATA.EXPR$NOISE.DIFF[j]) < 0.01)
      {
        DATA.EXPR$NOISE.CLASS[j] <- "UNCLEAR"
      }
    }
    
    DATA.FIT[,"NOISE.CLASS"] <- factor(DATA.EXPR[,"NOISE.CLASS"],levels=c("LOW","HIGH","UNCLEAR"))
    
    for (j in 1:nrow(DATA.FIT))
    {
      if (DATA.FIT[j,"NOISE.CLASS"] == "LOW")
      {
        DATA.FIT[j,"COLOR"] <- "blue"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "HIGH")
      {
        DATA.FIT[j,"COLOR"] <- "red"
      } 
      if (DATA.FIT[j,"NOISE.CLASS"] == "UNCLEAR")
      {
        DATA.FIT[j,"COLOR"] <- "black"
      } 
    }
    
    
    ####Compute Delta Fitness######
    
    MODEL.FIT <- loess(DATA.FIT$Fitness~DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN,degree=2,span=TEMPLATE[i,"LOESS.FITNESS"])
    
    x.mid <- seq(min(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
    y.mid <- predict(MODEL.FIT, x.mid, se=TRUE)
    
    MAX.FITNESS <- max(y.mid$fit) 
    
    OPT.EXPR <- x.mid[which(y.mid$fit == MAX.FITNESS)]
    
    ABS.FIT <- abs(y.mid$fit-(MAX.FITNESS - TEMPLATE[i,"FITNESS.CUTOFF"]))
    
    EXPR.THRESHOLD <- x.mid[findValleys(ABS.FIT)]
    
    for (j in 1:nrow(DATA.FIT))
    {
      DATA.FIT[j,"FIT.DIFF"] <- DATA.FIT$Fitness[j] - predict(MODEL.FIT, DATA.EXPR$YFP.MEDIAN.RELATIVE.MEAN[j], se=TRUE)$fit
    }
    
    
    ####Compute correlation between Delta Noise and Delta Fitness#####
    
    #All strains
    TEST <- cor.test(DATA.EXPR$NOISE.DIFF,DATA.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.ALL"] <- TEST$estimate
    TEMPLATE[i,"P.ALL"] <- TEST$p.value
    
    
    #Low Expression
    LOW.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN < EXPR.THRESHOLD[1])
    LOW.FIT <- subset(DATA.FIT, MUTATION %in% LOW.EXPR$MUTATION)
    TEST <- cor.test(LOW.EXPR$NOISE.DIFF,LOW.FIT$FIT.DIFF)
    TEMPLATE[i,"COR.LOW"] <- TEST$estimate
    TEMPLATE[i,"P.LOW"] <- TEST$p.value
    
    #High Expression
    HIGH.EXPR <- subset(DATA.EXPR, YFP.MEDIAN.RELATIVE.MEAN >= EXPR.THRESHOLD[1])
    HIGH.FIT <- subset(DATA.FIT, MUTATION %in% HIGH.EXPR$MUTATION)
    if (nrow(HIGH.EXPR) >= 3)
    {
      TEST <- cor.test(HIGH.EXPR$NOISE.DIFF,HIGH.FIT$FIT.DIFF)
      TEMPLATE[i,"COR.HIGH"] <- TEST$estimate
      TEMPLATE[i,"P.HIGH"] <- TEST$p.value
    }
  }
}

#Dataset used to make Figure 3 - figure supplements 5-7.
write.table(TEMPLATE,"DELTA.ANALYSIS.ROBUSTNESS.txt",sep="\t",row.names=FALSE)


#####PLOT RESULTS#################

DATA <- read.table("DELTA.ANALYSIS.ROBUSTNESS.txt",header=TRUE,as.is=TRUE)
DATA[,"ENVIRONMENT"] <- factor(DATA[,"ENVIRONMENT"], levels=c("GLUCOSE","GALACTOSE","GLYCEROL","ETHANOL"))
DATA[,"NOISE.METRIC"] <- factor(DATA[,"NOISE.METRIC"], levels=c("SD","CV","FANO","LOGCV"))


#A-Boxplots of correlations across all conditions and environments

COLOR <- c("red","darkgreen","blue","orange")

pdf("ROBUSTNESS.DELTA.NOISE.vs.DELTA.FITNESS.pdf",useDingbats=F,height=10,width=7)

#quartz(height=10,width=7)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$NOISE.METRIC)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == levels(DATA$NOISE.METRIC)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$ENVIRONMENT, border=COLOR, xaxt="n", ylab="Correlation Delta Noise vs Delta Fitness",main=paste(PLOT[1,"NOISE.METRIC"],"ALL",sep=" "),ylim=c(-0.5,1))
  legend("topright",c("Glucose","Galactose","Glycerol","Ethanol"),text.col=COLOR,bty="n")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$ENVIRONMENT, border=COLOR, xaxt="n", ylab="Correlation Delta Noise vs Delta Fitness",main=paste(PLOT[1,"NOISE.METRIC"],"LOW",sep=" "),ylim=c(-0.5,1))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$ENVIRONMENT, border=COLOR, xaxt="n", ylab="Correlation Delta Noise vs Delta Fitness",main=paste(PLOT[1,"NOISE.METRIC"],"HIGH",sep=" "),ylim=c(-1,1))
  abline(h=0, lty=2)
  
}

dev.off()


#B-Boxplots showing effect of Fitness Cutoff
#Corresponds to Figure 3 - figure supplement 7.

pdf("ROBUSTNESS.FITNESS.CUTOFF.SD.pdf",useDingbats=F,height=10,width=8)

#quartz(height=10,width=8)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "SD" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
}

dev.off()



pdf("ROBUSTNESS.FITNESS.CUTOFF.CV.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "CV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.FITNESS.CUTOFF.LOG.CV.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "LOGCV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.FITNESS.CUTOFF.FANO.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "FANO" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$FITNESS.CUTOFF, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Fitness Cutoff")
  abline(h=0, lty=2)
  
}

dev.off()


#C-Boxplots showing effect of smoothing factor used to calculate Delta Noise.
#Corresponds to Figure 3 - figure supplement 5.

pdf("ROBUSTNESS.LOESS.SD.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "SD" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()

pdf("ROBUSTNESS.LOESS.CV.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "CV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.LOESS.LOG.CV.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "LOGCV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.LOESS.FANO.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "FANO" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.NOISE, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


#D-Boxplots showing effect of smoothing factor used to calculate Delta Fitness.
#Corresponds to Figure 3 - figure supplement 6.

pdf("ROBUSTNESS.LOESS.FITNESS.SD.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "SD" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.LOESS.FITNESS.CV.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "CV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()



pdf("ROBUSTNESS.LOESS.FITNESS.LOG.CV.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "LOGCV" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


pdf("ROBUSTNESS.LOESS.FITNESS.FANO.ROBUSTNESS.pdf",useDingbats=F,height=10,width=8)

#quartz(height=9.5,width=9)
par(mfrow=c(4,3))

for (i in 1:length(levels(DATA$ENVIRONMENT)))
  
{
  
  PLOT <- subset(DATA, NOISE.METRIC == "FANO" & ENVIRONMENT == levels(DATA$ENVIRONMENT)[i])
  boxplot(PLOT$COR.ALL ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"ALL",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.LOW ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"LOW",sep=" "),ylim=c(-0.5,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
  boxplot(PLOT$COR.HIGH ~ PLOT$LOESS.FITNESS, ylab="Correlation Noise vs Fitness",main=paste(levels(DATA$ENVIRONMENT)[i],"HIGH",sep=" "),ylim=c(-1,1),xlab="Degree of Smoothing Delta Noise",xaxt="n")
  axis(1,at=c(1:5),labels=c("2/6","3/6","4/6","5/6","1"))
  abline(h=0, lty=2)
  
}

dev.off()


#################################################################################################################################
# 12 - Fitness: Direct competition between strains with low expression noise and strains with high expression noise (Figure 4). #
#################################################################################################################################

#Clear memory
rm(list=ls())
options(warn=-1)

#Necessary libraries
library(flowCore)
library(flowClust)
library(mixtools)
library(mratios)
library(gplots)
library(fitdistrplus)
library(RColorBrewer)
library(pcaPP)
library(plotrix)


#######################
#Set working directory#
#######################

parent.dir <- "/Path.to.input.file"
setwd(parent.dir)

DATA <- read.table("TEMPLATE.GLUCOSE.txt",header=TRUE,as.is=TRUE)


################################
#Calculate cell density from OD#
################################

format.table <- read.table("StandardCurveSunrise.txt", as.is=TRUE, header=TRUE, sep="\t")

OD <- scan()
0
0.01
0.02
0.03
0.04
0.05
0.06
0.07
0.08
0.09
0.1
0.11
0.12
0.13
0.14
0.15
0.16
0.17
0.18
0.19
0.2
0.21
0.22
0.23
0.24
0.25
0.26
0.27
0.28
0.29
0.3
0.31
0.32
0.33
0.34
0.35
0.36
0.37
0.38
0.39
0.4
0.41
0.42
0.43
0.44
0.45
0.46
0.47
0.48
0.49
0.5
0.51
0.52
0.53
0.54
0.55
0.56
0.57
0.58
0.59
0.6
0.61
0.62
0.63
0.64
0.65
0.66
0.67
0.68
0.69
0.7
0.71
0.72
0.73
0.74
0.75
0.76
0.77
0.78
0.79
0.8
0.81
0.82
0.83
0.84
0.85
0.86
0.87
0.88
0.89
0.9
0.91
0.92
0.93
0.94
0.95
0.96
0.97
0.98
0.99
1
1.01
1.02
1.03
1.04
1.05
1.06
1.07
1.08
1.09
1.1
1.11
1.12
1.13
1.14
1.15
1.16
1.17
1.18
1.19
1.2
1.21
1.22
1.23
1.24
1.25
1.26
1.27
1.28
1.29
1.3
1.31
1.32
1.33
1.34
1.35
1.36
1.37
1.38
1.39
1.4
1.41
1.42
1.43
1.44
1.45
1.46
1.47
1.48
1.49
1.5
1.51
1.52
1.53
1.54
1.55
1.56
1.57
1.58
1.59
1.6
1.61
1.62
1.63
1.64
1.65
1.66
1.67
1.68
1.69
1.7

cells <- scan()
0
0.015
0.025
0.04
0.053
0.065
0.078
0.09
0.103
0.115
0.128
0.14
0.153
0.165
0.178
0.19
0.204
0.216
0.229
0.241
0.255
0.268
0.28
0.293
0.305
0.319
0.33
0.342
0.356
0.37
0.385
0.399
0.412
0.426
0.44
0.455
0.47
0.484
0.499
0.514
0.53
0.547
0.564
0.58
0.6
0.617
0.633
0.65
0.666
0.683
0.7
0.717
0.733
0.75
0.766
0.783
0.8
0.817
0.833
0.85
0.866
0.883
0.9
0.917
0.933
0.95
0.966
0.983
1
1.023
1.046
1.07
1.093
1.116
1.14
1.16
1.18
1.2
1.22
1.24
1.26
1.283
1.306
1.33
1.353
1.376
1.4
1.43
1.46
1.49
1.52
1.55
1.58
1.61
1.64
1.67
1.703
1.736
1.77
1.81
1.85
1.89
1.926
1.963
2
2.04
2.08
2.12
2.163
2.206
2.25
2.296
2.343
2.39
2.433
2.476
2.52
2.566
2.613
2.66
2.706
2.753
2.8
2.85
2.9
2.95
3.002
3.055
3.107
3.16
3.22
3.28
3.34
3.4
3.46
3.52
3.58
3.64
3.7
3.76
3.82
3.88
3.94
4
4.065
4.13
4.2
4.27
4.34
4.41
4.48
4.55
4.625
4.7
4.775
4.85
4.925
5
5.075
5.15
5.225
5.3
5.38
5.46
5.54
5.63
5.7
5.8
5.89
5.98
6.07

#Add column with cell densities
the.df <- 10
the.spline <- smooth.spline(OD,cells,df=the.df)

for (i in 1:nrow(format.table))
{
  x <- format.table$Spectro[i]
  Density <- predict(the.spline,x)$y
  format.table$Cell.Density[i] <- Density
}

SAT <- subset(format.table, Condition == "Saturated")
OD.Plate <- SAT$Sunrise
Cell.Plate <- SAT$Cell.Density
new.df <- 4
new.spline <- smooth.spline(OD.Plate,Cell.Plate,df=new.df)

DATA[,"DENSITY.T"] <- predict(new.spline,DATA[,"OD620.T"])$y
DATA[1:288,"DENSITY.I"] <- predict(new.spline,DATA[1:288,"OD620.I"])$y

SORT.DATA <- DATA[order(DATA[,"SORT"],DATA[,"TIME.POINT"]),]

write.table(SORT.DATA,file="SORTED.DATA.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)

######################
##Remove bad samples##
######################

SORT.DATA <- subset(SORT.DATA, POSITION != "D09")

###########################################################################################################################
#####Reformat table to move the different time points of the same sample in different columns instead of different rows####
###########################################################################################################################

Data.preformat <- SORT.DATA

N.points <- 4

Data.format <- Data.preformat[,c("SORT","POSITION","ROW","COLUMN","PAIR","UNIQUE.PAIR","STRAIN.1","STRAIN.2","MUT.1","MUT.2","PYRO","ALLELE.1","ALLELE.2")] 

Raw.names <- rep(colnames(Data.preformat[,15:ncol(Data.preformat)]),each=N.points)
Numbers <- rep(c(1:N.points),length(Raw.names)/N.points)
Full.names <- c()

for (i in 1:length(Raw.names))
{
  Full.names[i] <- paste(Raw.names[i],"_T",Numbers[i],sep="")
} 

# Data.format[1,1:ncol(Data.format)] <- rep(NA,ncol(Data.format))

for (i in Full.names)
{
  Data.format[,i] <- NA
}

Data.format <- Data.format[seq(1,nrow(Data.preformat),by=4),]

###Automatic filling of all columns

Data <- Data.preformat

for (j in 15:ncol(Data))
{
  Current.matrix <- matrix(Data[,j],ncol=N.points,nrow=nrow(Data)/N.points,byrow=TRUE)
  Data.format[,((17+((j-14)*N.points-(N.points-1)))-4):((17+((j-14)*N.points))-4)] <- as.data.frame(Current.matrix)
}

write.table(Data.format,file="DATA.FORMATED.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#################################
#Calculate number of generations#
#################################

DATA <- Data.format
DATA[,"G.T_T1"] <- 0
DATA[,"G.I_T1"] <- NA
DATA[,"G.T_T2"] <- NA
DATA[,"G.I_T2"] <- NA
DATA[,"G.T_T3"] <- NA
DATA[,"G.I_T3"] <- NA
DATA[,"G.T_T4"] <- NA
DATA[,"G.I_T4"] <- NA

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.T_T1"]/500*DATA[i,"DENSITY.T_T1"]
  DATA[i,"G.I_T1"] <- log(DATA[i,"DENSITY.I_T1"]/START)/log(2)
}

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.I_T1"]/500*DATA[i,"DENSITY.I_T1"]
  DATA[i,"G.T_T2"] <- log(DATA[i,"DENSITY.T_T2"]/START)/log(2)
}

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.T_T2"]/500*DATA[i,"DENSITY.T_T2"]
  DATA[i,"G.I_T2"] <- log(DATA[i,"DENSITY.I_T2"]/START)/log(2)
}

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.I_T2"]/500*DATA[i,"DENSITY.I_T2"]
  DATA[i,"G.T_T3"] <- log(DATA[i,"DENSITY.T_T3"]/START)/log(2)
}

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.T_T3"]/500*DATA[i,"DENSITY.T_T3"]
  DATA[i,"G.I_T3"] <- log(DATA[i,"DENSITY.I_T3"]/START)/log(2)
}

for (i in 1:nrow(DATA))
{
  START <- DATA[i,"VOL.I_T3"]/500*DATA[i,"DENSITY.I_T3"]
  DATA[i,"G.T_T4"] <- log(DATA[i,"DENSITY.T_T4"]/START)/log(2)
}

DATA[,"G.T_T2"] <- DATA[,"G.T_T2"] + DATA[,"G.I_T1"]
DATA[,"G.I_T2"] <- DATA[,"G.I_T2"] + DATA[,"G.T_T2"]
DATA[,"G.T_T3"] <- DATA[,"G.T_T3"] + DATA[,"G.I_T2"]
DATA[,"G.I_T3"] <- DATA[,"G.I_T3"] + DATA[,"G.T_T3"]
DATA[,"G.T_T4"] <- DATA[,"G.T_T4"] + DATA[,"G.I_T3"]

write.table(DATA,file="DATA.FORMATED.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)

#########################
####Remove bad samples###
#########################

FILTER <- DATA

for (i in 1:nrow(FILTER))
{
  CUR <- subset(DATA, PAIR == FILTER[i,"PAIR"])
  
  LOW.T1 <- mean(CUR[,"FREQ.ALLELE.1_T1"],na.rm=T) - 5*mad(CUR[,"FREQ.ALLELE.1_T1"],na.rm=T)
  HIGH.T1 <- mean(CUR[,"FREQ.ALLELE.1_T1"],na.rm=T) + 5*mad(CUR[,"FREQ.ALLELE.1_T1"],na.rm=T)
  LOW.T2 <- mean(CUR[,"FREQ.ALLELE.1_T2"],na.rm=T) - 5*mad(CUR[,"FREQ.ALLELE.1_T2"],na.rm=T)
  HIGH.T2 <- mean(CUR[,"FREQ.ALLELE.1_T2"],na.rm=T) + 5*mad(CUR[,"FREQ.ALLELE.1_T2"],na.rm=T)
  LOW.T3 <- mean(CUR[,"FREQ.ALLELE.1_T3"],na.rm=T) - 5*mad(CUR[,"FREQ.ALLELE.1_T3"],na.rm=T)
  HIGH.T3 <- mean(CUR[,"FREQ.ALLELE.1_T3"],na.rm=T) + 5*mad(CUR[,"FREQ.ALLELE.1_T3"],na.rm=T)
  LOW.T4 <- mean(CUR[,"FREQ.ALLELE.1_T4"],na.rm=T) - 5*mad(CUR[,"FREQ.ALLELE.1_T4"],na.rm=T)
  HIGH.T4 <- mean(CUR[,"FREQ.ALLELE.1_T4"],na.rm=T) + 5*mad(CUR[,"FREQ.ALLELE.1_T4"],na.rm=T)
  
  if (FILTER[i,"FREQ.ALLELE.1_T1"] < LOW.T1 | FILTER[i,"FREQ.ALLELE.1_T1"] > HIGH.T1)
  {
    FILTER[i,"FREQ.ALLELE.1_T1"] <- NA
  }
  
  if (FILTER[i,"FREQ.ALLELE.1_T2"] < LOW.T2 | FILTER[i,"FREQ.ALLELE.1_T2"] > HIGH.T2)
  {
    FILTER[i,"FREQ.ALLELE.1_T2"] <- NA
  }
  
  if (FILTER[i,"FREQ.ALLELE.1_T3"] < LOW.T3 | FILTER[i,"FREQ.ALLELE.1_T3"] > HIGH.T3)
  {
    FILTER[i,"FREQ.ALLELE.1_T3"] <- NA
  }
  
  if (FILTER[i,"FREQ.ALLELE.1_T4"] < LOW.T4 | FILTER[i,"FREQ.ALLELE.1_T4"] > HIGH.T4)
  {
    FILTER[i,"FREQ.ALLELE.1_T4"] <- NA
  }
  
}

write.table(FILTER,file="DATA.FILTERED.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#####################################
####Compute selection coefficient####
#####################################

for (i in 1:nrow(FILTER))
{
  LOG.RATIO <- unname(unlist(log(FILTER[i,c("FREQ.ALLELE.1_T1","FREQ.ALLELE.1_T2","FREQ.ALLELE.1_T3","FREQ.ALLELE.1_T4")])))
  GEN <- unname(unlist(FILTER[i,c("G.T_T1","G.T_T2","G.T_T3","G.T_T4")]))
  WEIGHT <- c(5,1,1,5)
  MODEL <- lm(LOG.RATIO ~ GEN, weights = WEIGHT, na.action=na.exclude)
  FILTER[i,"S.ESTIMATE"] <- MODEL$coef[2]
  F.test <- anova(MODEL, test="F")
  FILTER[i,"F.TEST"] <- F.test[5]
}

#Save data contained in Supplementary File 1 - Dataset 5.
write.table(FILTER,file="S.ESTIMATES.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


###############################
#Make summary table with power#
###############################

Data <- read.table("S.ESTIMATES.txt",header=TRUE,as.is=TRUE)

Mean.table <- aggregate(Data[,14:ncol(Data)],by=list(Data$PAIR,Data$MUT.1,Data$MUT.2,Data$PYRO,Data$ALLELE.1,Data$ALLELE.2),FUN=mean)
colnames(Mean.table) <- c(c("PAIR","MUT.1","MUT.2","PYRO","ALLELE.1","ALLELE.2"),paste(colnames(Mean.table[,7:ncol(Mean.table)]),"mean",sep="_"))

SD.table <- aggregate(Data[,14:ncol(Data)],by=list(Data$PAIR,Data$MUT.1,Data$MUT.2,Data$PYRO,Data$ALLELE.1,Data$ALLELE.2),FUN=sd)
colnames(SD.table) <- c(c("PAIR","MUT.1","MUT.2","PYRO","ALLELE.1","ALLELE.2"),paste(colnames(SD.table[,7:ncol(SD.table)]),"sd",sep="_"))

Length.table <- aggregate(Data[,14:ncol(Data)],by=list(Data$PAIR,Data$MUT.1,Data$MUT.2,Data$PYRO,Data$ALLELE.1,Data$ALLELE.2),FUN=length)
colnames(Length.table) <- "N.rep"

Combined <- cbind(Mean.table,SD.table[,7:ncol(SD.table)],Length.table[,ncol(Length.table)])
colnames(Combined)[ncol(Combined)] <- "N.rep"

Combined <- Combined[order(Combined[,"PAIR"]),]

for (i in 1:nrow(Combined))
{
  CUR <- subset(Data, PAIR == Combined[i,"PAIR"])
  Combined[i,"P.VAL.FITNESS"] <- t.test(CUR[,"S.ESTIMATE"],mu=0)$p.value
}

write.table(Combined,file="SUMMARY.DATA.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


#####################
###Expression Data###
#####################

FILTER <- subset(Data, PAIR != "F")
Combined <- subset(Combined, PAIR != "F")

EXPR <- read.table("SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)
EXPR <- subset(EXPR, ENVIRONMENT == "GLUCOSE" & GROWTH == "WHEEL" & MUTATION %in% c(Combined$MUT.1,Combined$MUT.2))

for (i in 1:nrow(FILTER))
{
  MUT.1 <- subset(EXPR, MUTATION == FILTER[i,"MUT.1"])
  MUT.2 <- subset(EXPR, MUTATION == FILTER[i,"MUT.2"])
  
  FILTER[i,"YFP.MEAN.1"] <- MUT.1[1,"YFP.MEDIAN.RELATIVE.MEAN"]
  FILTER[i,"YFP.SD.1"] <- MUT.1[1,"YFP.SD.SCALED.MEAN"]
  FILTER[i,"YFP.NOISE.1"] <- MUT.1[1,"YFP.CV.RELATIVE.MEAN"]
  
  FILTER[i,"YFP.MEAN.2"] <- MUT.2[1,"YFP.MEDIAN.RELATIVE.MEAN"]
  FILTER[i,"YFP.SD.2"] <- MUT.2[1,"YFP.SD.SCALED.MEAN"]
  FILTER[i,"YFP.NOISE.2"] <- MUT.2[1,"YFP.CV.RELATIVE.MEAN"]
}

for (i in 1:nrow(Combined))
{
  MUT.1 <- subset(EXPR, MUTATION == Combined[i,"MUT.1"])
  MUT.2 <- subset(EXPR, MUTATION == Combined[i,"MUT.2"])
  
  Combined[i,"YFP.MEAN.1"] <- MUT.1[1,"YFP.MEDIAN.RELATIVE.MEAN"]
  Combined[i,"YFP.SD.1"] <- MUT.1[1,"YFP.SD.SCALED.MEAN"]
  Combined[i,"YFP.NOISE.1"] <- MUT.1[1,"YFP.CV.RELATIVE.MEAN"]
  
  Combined[i,"YFP.MEAN.2"] <- MUT.2[1,"YFP.MEDIAN.RELATIVE.MEAN"]
  Combined[i,"YFP.SD.2"] <- MUT.2[1,"YFP.SD.SCALED.MEAN"]
  Combined[i,"YFP.NOISE.2"] <- MUT.2[1,"YFP.CV.RELATIVE.MEAN"]
}

Combined <- Combined[order(Combined[,"PAIR"]),]

write.table(FILTER,file="S.ESTIMATES.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)

#Save data contained in Figure 4 - Source Data 1.
write.table(Combined,file="SUMMARY.DATA.txt",row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE)


######################################
###Plot noise vs mean for each pair###
######################################

FILTER <- read.table("S.ESTIMATES.txt",header=TRUE,as.is=TRUE)
Combined <- read.table("SUMMARY.DATA.txt",header=TRUE,as.is=TRUE)
EXPR <- read.table("EXPRESSION.DIRECT.COMPETITION.2.txt",header=TRUE,as.is=TRUE)
FIT <- read.table("FITNESS.PAIRS.txt",header=TRUE,as.is=TRUE)

for (i in 1:nrow(FILTER))
{
  if (FILTER$PAIR[i] == "A")
  {
    FILTER[i,"COLOR"] <- "blue"
  }
  if (FILTER$PAIR[i] == "B")
  {
    FILTER[i,"COLOR"] <- "red"
  }
  if (FILTER$PAIR[i] == "C")
  {
    FILTER[i,"COLOR"] <- "green"
  }
  if (FILTER$PAIR[i] == "D")
  {
    FILTER[i,"COLOR"] <- "purple"
  }
  if (FILTER$PAIR[i] == "E")
  {
    FILTER[i,"COLOR"] <- "orange"
  }
}

for (i in 1:nrow(Combined))
{
  if (Combined$PAIR[i] == "A")
  {
    Combined[i,"COLOR"] <- "blue"
  }
  if (Combined$PAIR[i] == "B")
  {
    Combined[i,"COLOR"] <- "red"
  }
  if (Combined$PAIR[i] == "C")
  {
    Combined[i,"COLOR"] <- "green"
  }
  if (Combined$PAIR[i] == "D")
  {
    Combined[i,"COLOR"] <- "purple"
  }
  if (Combined$PAIR[i] == "E")
  {
    Combined[i,"COLOR"] <- "orange"
  }
}

for (i in 1:nrow(EXPR))
{
  if (EXPR$PAIR[i] == "A")
  {
    EXPR[i,"COLOR"] <- "blue"
  }
  if (EXPR$PAIR[i] == "B")
  {
    EXPR[i,"COLOR"] <- "red"
  }
  if (EXPR$PAIR[i] == "C")
  {
    EXPR[i,"COLOR"] <- "green"
  }
  if (EXPR$PAIR[i] == "D")
  {
    EXPR[i,"COLOR"] <- "purple"
  }
  if (EXPR$PAIR[i] == "E")
  {
    EXPR[i,"COLOR"] <- "orange"
  }
}

for (i in 1:nrow(FIT))
{
  if (FIT$PAIR[i] == "A")
  {
    FIT[i,"COLOR"] <- "blue"
  }
  if (FIT$PAIR[i] == "B")
  {
    FIT[i,"COLOR"] <- "red"
  }
  if (FIT$PAIR[i] == "C")
  {
    FIT[i,"COLOR"] <- "green"
  }
  if (FIT$PAIR[i] == "D")
  {
    FIT[i,"COLOR"] <- "purple"
  }
  if (FIT$PAIR[i] == "E")
  {
    FIT[i,"COLOR"] <- "orange"
  }
}


# Fano Relative

#Corresponds to Figure 4A.
pdf("PAIRS.MEAN.vs.FANO.EXPR.pdf",useDingbats=F,height=5,width=5.6)
#quartz(height=5,width=5.6)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.1"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.FANO.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Fano",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,3),gap=0,lwd=2.02,cex=1.5,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.FANO.RELATIVE.MEAN,ui=PLOT$YFP.FANO.RELATIVE.MEAN+1.96*PLOT$YFP.FANO.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.FANO.RELATIVE.MEAN-1.96*PLOT$YFP.FANO.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Fano",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,3),gap=0,lwd=2.02,cex=1.5,scol="#00000066",add=T)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.2"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.FANO.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Fano",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,3),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.FANO.RELATIVE.MEAN,ui=PLOT$YFP.FANO.RELATIVE.MEAN+1.96*PLOT$YFP.FANO.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.FANO.RELATIVE.MEAN-1.96*PLOT$YFP.FANO.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Fano",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,3),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")

dev.off()


# SD Relative

#Corresponds to Figure 4 - figure supplement 1B.
pdf("PAIRS.MEAN.vs.SD.EXPR.pdf",useDingbats=F,height=5,width=5.6)
#quartz(height=5,width=5.6)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.1"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.SD.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression SD",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,2),gap=0,lwd=2.02,cex=1.5,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.SD.RELATIVE.MEAN,ui=PLOT$YFP.SD.RELATIVE.MEAN+1.96*PLOT$YFP.SD.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.SD.RELATIVE.MEAN-1.96*PLOT$YFP.SD.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression SD",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,3),gap=0,lwd=2.02,cex=1.5,scol="#00000066",add=T)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.2"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.SD.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression SD",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,2),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.SD.RELATIVE.MEAN,ui=PLOT$YFP.SD.RELATIVE.MEAN+1.96*PLOT$YFP.SD.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.SD.RELATIVE.MEAN-1.96*PLOT$YFP.SD.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression SD",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,3),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")

dev.off()


#CV Relative

#Corresponds to Figure 4 - figure supplement 1C.
pdf("PAIRS.MEAN.vs.CV.EXPR.pdf",useDingbats=F,height=5,width=5.6)
#quartz(height=5,width=5.6)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.1"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.CV.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression CV",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,4),gap=0,lwd=2.02,cex=1.5,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.CV.RELATIVE.MEAN,ui=PLOT$YFP.CV.RELATIVE.MEAN+1.96*PLOT$YFP.CV.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.CV.RELATIVE.MEAN-1.96*PLOT$YFP.CV.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression CV",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,8),gap=0,lwd=2.02,cex=1.5,scol="#00000066",add=T)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.2"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.CV.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression CV",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,8),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.CV.RELATIVE.MEAN,ui=PLOT$YFP.CV.RELATIVE.MEAN+1.96*PLOT$YFP.CV.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.CV.RELATIVE.MEAN-1.96*PLOT$YFP.CV.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression CV",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,8),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")

dev.off()


#Log(CV) Relative

#Corresponds to Figure 4 - figure supplement 1D.
pdf("PAIRS.MEAN.vs.LOGCV.EXPR.pdf",useDingbats=F,height=5,width=5.6)
#quartz(height=5,width=5.6)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.1"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.LOGCV.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Log(CV)",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(-2,3),gap=0,lwd=2.02,cex=1.5,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.LOGCV.RELATIVE.MEAN,ui=PLOT$YFP.LOGCV.RELATIVE.MEAN+1.96*PLOT$YFP.LOGCV.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.LOGCV.RELATIVE.MEAN-1.96*PLOT$YFP.LOGCV.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression LOGCV",pch=24,sfrac=0,xlim=c(0,1.2),ylim=c(0,8),gap=0,lwd=2.02,cex=1.5,scol="#00000066",add=T)

PLOT <- subset(EXPR, MUTATION %in% Combined[,"MUT.2"])

plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.LOGCV.RELATIVE.MEAN,ui=PLOT$YFP.MEDIAN.RELATIVE.MEAN+1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.MEDIAN.RELATIVE.MEAN-1.96*PLOT$YFP.MEDIAN.RELATIVE.SD/sqrt(PLOT$N),err="x",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression Log(CV)",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,8),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")
plotCI(PLOT$YFP.MEDIAN.RELATIVE.MEAN,PLOT$YFP.LOGCV.RELATIVE.MEAN,ui=PLOT$YFP.LOGCV.RELATIVE.MEAN+1.96*PLOT$YFP.LOGCV.RELATIVE.SD/sqrt(PLOT$N),li=PLOT$YFP.LOGCV.RELATIVE.MEAN-1.96*PLOT$YFP.LOGCV.RELATIVE.SD/sqrt(PLOT$N),err="y",col=PLOT[,"COLOR"],xlab="Median Expression",ylab="Expression LOGCV",pch=21,sfrac=0,xlim=c(0,1.1),ylim=c(0,8),gap=0,add=TRUE,lwd=2.02,cex=1.7,,scol="#00000066")

dev.off()


####################################################
#Plot Mean vs Fitness for each pair on top of curve#
####################################################

Fit <- read.table("SUMMARY.DATA.FITNESS.txt",header=TRUE,as.is=TRUE)
Expression <- read.table("SUMMARY.DATA.EXPRESSION.txt",header=TRUE,as.is=TRUE)

Expression <- subset(Expression, STRAIN != "Y2675")
Expression[which(Expression$ID == 3),"YFP.CONSTRUCT"] <- "SINGLE" 

All.Fit <- read.table("Experiment_s.estimates_filtered.txt",header=TRUE,as.is=TRUE)
All.Fit <- subset(All.Fit, OUTLIER.2 == "NO")
All.Fit[which(All.Fit$MUTATION == "URA3" & All.Fit$YFP.CONSTRUCT == "DOUBLE"),"MUTATION"] <- "WT_WT"

for (i in 1:nrow(Expression))
{
  if (Expression[i,"MUTATION"] == "URA3" & Expression[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Expression[i,"MUTATION"] <- "WT_WT"
  }
  
  if (Expression[i,"MUTATION"] == "NEGATIVE")
  {
    Expression[i,"MUTATION"] <- "TDH3.Deletion"
  }
  
}

for (i in 1:nrow(Fit))
{
  if (Fit[i,"MUTATION"] == "URA3" & Fit[i,"YFP.CONSTRUCT"] == "DOUBLE")
  {
    Fit[i,"MUTATION"] <- "WT_WT"
  }
}

Expression <- Expression[order(Expression$ENVIRONMENT,Expression$MUTATION),]
Fit <- Fit[order(Fit$ENVIRONMENT, Fit$MUTATION),]



####1. Classic Loess fit#########

#Corresponds to Figure 4B.
pdf("MEAN.EXPR.vs.FITNESS.GLUCOSE.pdf",useDingbats=FALSE,height=5,width=5.6)

#quartz(height=5,width=5.6)

GLU.EXPR <- subset(Expression, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","TATA.42_TATA.42","WT"))
GLU.FIT <- subset(Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %nin% c("TATA.81_TATA.81","m75_TATA.81","m63.m90_m63.m90","TATA.129_TATA.129","TATA.42_TATA.42","WT"))
GLU.ALL <- subset(All.Fit, ENVIRONMENT == "GLUCOSE" & MUTATION %in% GLU.FIT[,"MUTATION"])

WT.1 <- subset(GLU.ALL, ID == 2)
WT.2 <- subset(GLU.ALL, ID == 51)

for (i in 1:nrow(GLU.ALL))
{
  EXP <- subset(GLU.EXPR, MUTATION == GLU.ALL[i,"MUTATION"] & YFP.CONSTRUCT == GLU.ALL[i,"YFP.CONSTRUCT"])
  GLU.ALL[i,"YFP.MEDIAN"] <- EXP[1,"YFP.MEDIAN.RELATIVE.MEAN"]
  if (EXP[1,"YFP.CONSTRUCT"] != "DOUBLE" & EXP[1,"MUTATION"] != "URA3")
  {
    GLU.ALL[i,"Fitness"] <- (GLU.ALL[i,"s.estimate.2"] + 1)/(mean(WT.1[,"s.estimate.2"])+1)
  } else {
    GLU.ALL[i,"Fitness"] <- (GLU.ALL[i,"s.estimate.2"] + 1)/(mean(WT.2[,"s.estimate.2"])+1)
  }
  
}

WEIGHT <- rep(1,nrow(GLU.ALL))
WEIGHT[which(GLU.ALL$ID ==2)] <- 2

GLU.MODEL <- loess(Fitness ~ YFP.MEDIAN, data = GLU.ALL, span=2/3, degree=2, weights = WEIGHT)

x.mid <- seq(min(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),max(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN),by=0.001)
y.mid <- predict(GLU.MODEL, x.mid, se=TRUE)

GLU.FIT <- subset(FIT, MUTATION %in% Combined[,"MUT.1"])
GLU.EXPR <- subset(EXPR, MUTATION %in% GLU.FIT[,"MUTATION"])

plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,uiw=10,liw=10,err="x",col=GLU.EXPR$COLOR,xlab="Median Expression",ylab="Fitness",pch=24,sfrac=0,xlim=c(0,2.1),ylim=c(0.936,1.01),gap=0,cex=1.5,lwd=2.02,scol="#00000000")
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.EXPR$COLOR,pch=24,sfrac=0,xlim=c(0,2.1),ylim=c(0.936,1.02),gap=0,main="Expression vs Fitness in Glycerol",cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,add=TRUE,cex=1.5,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.EXPR$COLOR,pch=24,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)

GLU.FIT <- subset(FIT, MUTATION %in% Combined[,"MUT.2"])
GLU.EXPR <- subset(EXPR, MUTATION %in% GLU.FIT[,"MUTATION"])

plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,uiw=10,liw=10,err="x",col=GLU.EXPR$COLOR,xlab="Median Expression",ylab="Fitness",pch=21,sfrac=0,xlim=c(0,1.5),ylim=c(0.936,1.01),gap=0,cex=1.7,lwd=2.02,scol="#00000000",add=TRUE)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN+1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),li=GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN-1.96*GLU.EXPR$YFP.MEDIAN.RELATIVE.SD/sqrt(GLU.EXPR$N),err="x",col=GLU.EXPR$COLOR,pch=21,sfrac=0,xlim=c(0,2.1),ylim=c(0.936,1.02),gap=0,main="Expression vs Fitness in Glycerol",cex.axis=1.2,cex.lab=1.4,font.lab=2,font.axis=2,cex.main=2,add=TRUE,cex=1.7,lwd=1.01)
plotCI(GLU.EXPR$YFP.MEDIAN.RELATIVE.MEAN,GLU.FIT$Fitness,ui=GLU.FIT$Fitness+GLU.FIT$High.95,li=GLU.FIT$Fitness-GLU.FIT$Low.95,err="y",col=GLU.EXPR$COLOR,pch=21,sfrac=0,gap=0,add=TRUE,cex=1.7,lwd=1.01)

points(x.mid,y.mid$fit,type="l",col="#00000099",lty=2,lwd=1.5)

y.err <- c(y.mid$fit + 2.58*y.mid$se.fit,rev(y.mid$fit - 2.58*y.mid$se.fit))

polygon(c(x.mid,rev(x.mid)),y.err,col="#00000055",border=NA)

dev.off()


############################
#Boxplot Fitness Difference#
############################

#Corresponds to Figure 4C.
pdf("BOXPLOT.FITNESS.pdf", useDingbats=F, height=5, width=5.6)
#quartz(height=5, width=5.6)
boxplot(S.ESTIMATE + 1 ~ PAIR, data=FILTER, notch=T, ylim=c(0.9965,1.0052),ylab="Fitness of High vs Low Noise", border=Combined$COLOR)
abline(h=1,col="#00000066",lwd=1.5)
dev.off()


#################################################################################################################################################
# 13 - Expression : Comparison of effects of 20 pTDH3 alleles at the HO locus and at the native TDH3 locus (Figure 2 - figure supplement 1A-B). #
#################################################################################################################################################

########################
# a) LOADING LIBRARIES #
########################

#Clear memory
rm(list=ls())
options(warn=-1)

library(flowCore)
library(flowClust)
library(flowViz)
library(pcaPP)
library(mixtools)
library(plyr)
library(robustlmm)
library(plotrix)

box <- graphics::box

######################################
# b) Quality Control and Corrections #
######################################

parent.dir <- "Path.to.input.file"
setwd(parent.dir)

DATA.TYPE <- c(
  rep("factor",20),rep("integer",4),rep("numeric",23))

###Read in data set

###Clean.Data.txt was generated from .fcs data and from the input file "TEMPLATE.FIX.HO.NATIVE" using R scripts provided in sections 2 & 3.
DATA <- read.table("Clean.Data.txt",header=TRUE,colClasses=DATA.TYPE)

REF <- 0.905811693
NEG <- 0.519116913

DATA[,"log.RNA.MEDIAN"] <- log(((DATA[,"YFP.MEDIAN.FINAL"] - NEG) / (REF - NEG)) + 0.05)
DATA[,"log.RNA.SD"] <- DATA[,"YFP.SD.FINAL"] / ((DATA[,"YFP.MEDIAN.FINAL"] - NEG) + (REF - NEG)*0.05)


###############################################
# c) Correction for FLOW.RUN and ROW effects. #
###############################################

DATA[,"FLOW.RUN"] <- as.factor(DATA[,"FLOW.RUN"])
DATA[,"ROW"] <- as.factor(DATA[,"ROW"])

###Focus on EMS mutants
CIS.DATA <- subset(DATA, ENVIRONMENT == "GLUCOSE" & GROWTH == "SHAKER")
CIS.DATA <- subset(CIS.DATA, POSITION != "A01" | PLATE %nin% c(1:2))
CIS.DATA <- droplevels(CIS.DATA)

CIS.DATA <- CIS.DATA[complete.cases(CIS.DATA),]

###Separate controls and remove FSC outliers
TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")


######1-CORRECT FOR FSC MEDIAN##############
FSC.MEDIAN.CORRECT <- rlm(FSC.MEDIAN.FINAL ~ FLOW.RUN, data = TRANS.CONTROL)
COEF.FLOW.RUN   <- c(0,coefficients(FSC.MEDIAN.CORRECT)[grep("FLOW.RUN",names(coefficients(FSC.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}
CIS.DATA[,"FSC.MEDIAN.CORRECT"] <- CIS.DATA$FSC.MEDIAN.FINAL - OUT

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")

FSC.MEDIAN.CORRECT <- rlm(FSC.MEDIAN.CORRECT ~ ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(0,coefficients(FSC.MEDIAN.CORRECT)[grep("ROW",names(coefficients(FSC.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"FSC.MEDIAN.CORRECT"] <- CIS.DATA$FSC.MEDIAN.CORRECT - OUT

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")

# plot(TRANS.CONTROL$FSC.MEDIAN.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$FSC.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$FSC.MEDIAN.FINAL ~ TRANS.CONTROL$DAY)

LOW <- median(TRANS.CONTROL$FSC.MEDIAN.CORRECT) - 4*mad(TRANS.CONTROL$FSC.MEDIAN.CORRECT)
HIGH <- median(TRANS.CONTROL$FSC.MEDIAN.CORRECT) + 4*mad(TRANS.CONTROL$FSC.MEDIAN.CORRECT)
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)


######2-CORRECT FOR YFP MEDIAN##############
YFP.MEDIAN.CORRECT <- rlm(log.RNA.MEDIAN ~ 0 + FLOW.RUN, data = TRANS.CONTROL)

COEF.FLOW.RUN   <- c(coefficients(YFP.MEDIAN.CORRECT)[grep("FLOW.RUN",names(coefficients(YFP.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}

CIS.DATA[,"log.RNA.MEDIAN.CORRECT"] <- CIS.DATA$log.RNA.MEDIAN - OUT + mean(TRANS.CONTROL$log.RNA.MEDIAN)

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

YFP.MEDIAN.CORRECT <- rlm(log.RNA.MEDIAN.CORRECT ~ 0 + ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(coefficients(YFP.MEDIAN.CORRECT)[grep("ROW",names(coefficients(YFP.MEDIAN.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"log.RNA.MEDIAN.CORRECT"] <- CIS.DATA$log.RNA.MEDIAN.CORRECT - OUT + mean(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT)
CIS.DATA[,"YFP.MEDIAN.CORRECT"] <- (exp(CIS.DATA[,"log.RNA.MEDIAN.CORRECT"]) - 0.05) * (REF - NEG) + NEG

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

# plot(TRANS.CONTROL$log.RNA.MEDIAN ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$DAY)
# plot(TRANS.CONTROL$log.RNA.MEDIAN.CORRECT ~ TRANS.CONTROL$BLOCK)

# plot(TRANS.CONTROL$YFP.MEDIAN.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$DAY)
# plot(TRANS.CONTROL$YFP.MEDIAN.CORRECT ~ TRANS.CONTROL$BLOCK)


######2-CORRECT FOR YFP MAD ON LOG SCALE##############
YFP.MAD.CORRECT <- rlm(log.RNA.SD ~ 0 + FLOW.RUN, data = TRANS.CONTROL)
COEF.FLOW.RUN   <- c(coefficients(YFP.MAD.CORRECT)[grep("FLOW.RUN",names(coefficients(YFP.MAD.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  FLOW.RUN <- which(levels(CIS.DATA$FLOW.RUN) == CIS.DATA$FLOW.RUN[i])
  OUT[i] <- COEF.FLOW.RUN[FLOW.RUN]
}

CIS.DATA[,"log.RNA.SD.CORRECT"] <- CIS.DATA$log.RNA.SD - OUT + mean(TRANS.CONTROL$log.RNA.SD) 

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)

YFP.MAD.CORRECT <- rlm(log.RNA.SD.CORRECT ~ 0 + ROW, data = TRANS.CONTROL)
COEF.ROW   <- c(coefficients(YFP.MAD.CORRECT)[grep("ROW",names(coefficients(YFP.MAD.CORRECT)))])

I <- 1:nrow(CIS.DATA)
OUT <- numeric(length(I))
for(i in I) {
  ROW <- which(levels(CIS.DATA$ROW) == CIS.DATA$ROW[i])
  OUT[i] <- COEF.ROW[ROW]
}

CIS.DATA[,"log.RNA.SD.CORRECT"] <- CIS.DATA$log.RNA.SD.CORRECT - OUT + mean(TRANS.CONTROL$log.RNA.SD.CORRECT)
CIS.DATA[,"YFP.SD.CORRECT"] <- CIS.DATA[,"log.RNA.SD.CORRECT"] * ((CIS.DATA[,"YFP.MEDIAN.CORRECT"] - NEG) + (REF - NEG)*0.05)

TRANS.CONTROL <- subset(CIS.DATA,CIS.DATA$ID == "CTRL")
TRANS.CONTROL <- subset(TRANS.CONTROL, FSC.MEDIAN.CORRECT >= LOW & FSC.MEDIAN.CORRECT <= HIGH)


# plot(TRANS.CONTROL$log.RNA.SD ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$log.RNA.SD.CORRECT ~ TRANS.CONTROL$DAY)


# plot(TRANS.CONTROL$YFP.SD.FINAL ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$FLOW.RUN)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$ROW)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$COL)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$POSITION)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$PLATE)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$REP)
# plot(TRANS.CONTROL$YFP.SD.CORRECT ~ TRANS.CONTROL$DA

write.table(CIS.DATA,"GLUCOSE.SHAKER.CORRECT.txt",row.names=FALSE,sep="\t",quote=FALSE)


###########################################################
# d) Calculation of expression phenotypes for each sample #
###########################################################

CIS.DATA <- read.table("GLUCOSE.SHAKER.CORRECT.txt",header=TRUE)

CIS.DATA <- CIS.DATA[complete.cases(CIS.DATA),]

WT.1 <- subset(CIS.DATA, ID %in% c(1,76))
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"HO.MEDIAN.ADJUST"] <- CIS.DATA[i,"YFP.MEDIAN.CORRECT"] - mean(NEG.1[,"YFP.MEDIAN.CORRECT"])
  CIS.DATA[i,"HO.SD.ADJUST"] <- CIS.DATA[i,"YFP.SD.CORRECT"] - mean(NEG.1[,"YFP.SD.CORRECT"])
}

WT.1 <- subset(CIS.DATA, ID %in% c(1,76))
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"HO.MEDIAN.RELATIVE"] <- CIS.DATA[i,"HO.MEDIAN.ADJUST"]/mean(WT.1[,"HO.MEDIAN.ADJUST"])
  CIS.DATA[i,"HO.SD.SCALED"] <- CIS.DATA[i,"HO.SD.ADJUST"]/mean(WT.1[,"HO.MEDIAN.ADJUST"])
}

WT.1 <- subset(CIS.DATA, ID %in% c(1,76))
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"HO.CV"] <- CIS.DATA[i,"HO.SD.SCALED"]/CIS.DATA[i,"HO.MEDIAN.RELATIVE"]
  CIS.DATA[i,"HO.FANO"] <- CIS.DATA[i,"HO.SD.SCALED"]^2/CIS.DATA[i,"HO.MEDIAN.RELATIVE"]
  CIS.DATA[i,"HO.LOG.CV"] <- log2(abs(CIS.DATA[i,"HO.CV"]))
}

WT.1 <- subset(CIS.DATA, ID %in% c(1,76))
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"HO.SD.RELATIVE"] <- CIS.DATA[i,"HO.SD.SCALED"]/mean(WT.1[,"HO.SD.SCALED"])
  CIS.DATA[i,"HO.CV.RELATIVE"] <- CIS.DATA[i,"HO.SD.RELATIVE"]/CIS.DATA[i,"HO.MEDIAN.RELATIVE"]
  CIS.DATA[i,"HO.FANO.RELATIVE"] <- CIS.DATA[i,"HO.SD.RELATIVE"]^2/CIS.DATA[i,"HO.MEDIAN.RELATIVE"]
  CIS.DATA[i,"HO.LOG.CV.RELATIVE"] <- log2(abs(CIS.DATA[i,"HO.CV.RELATIVE"]))
}


#Add NA for samples where CV or FANO are undefined.
for (i in 1:nrow(CIS.DATA))
{
  if (CIS.DATA[i,"HO.MEDIAN.ADJUST"] < 0 | CIS.DATA[i,"HO.SD.ADJUST"] < 0) {
    CIS.DATA[i,"HO.CV"] <- NA
    CIS.DATA[i,"HO.CV.RELATIVE"] <- NA
    CIS.DATA[i,"HO.FANO"] <- NA
    CIS.DATA[i,"HO.FANO.RELATIVE"] <- NA		
    CIS.DATA[i,"HO.LOG.CV"] <- NA
    CIS.DATA[i,"HO.LOG.CV.RELATIVE"] <- NA
  }
}


WT.1 <- subset(CIS.DATA, STRAIN == "Y1965")
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"NATIVE.MEDIAN.ADJUST"] <- CIS.DATA[i,"YFP.MEDIAN.CORRECT"] - mean(NEG.1[,"YFP.MEDIAN.CORRECT"])
  CIS.DATA[i,"NATIVE.SD.ADJUST"] <- CIS.DATA[i,"YFP.SD.CORRECT"] - mean(NEG.1[,"YFP.SD.CORRECT"])
}

WT.1 <- subset(CIS.DATA, STRAIN == "Y1965")
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"NATIVE.MEDIAN.RELATIVE"] <- CIS.DATA[i,"NATIVE.MEDIAN.ADJUST"]/mean(WT.1[,"NATIVE.MEDIAN.ADJUST"])
  CIS.DATA[i,"NATIVE.SD.SCALED"] <- CIS.DATA[i,"NATIVE.SD.ADJUST"]/mean(WT.1[,"NATIVE.MEDIAN.ADJUST"])
}

WT.1 <- subset(CIS.DATA, STRAIN == "Y1965")
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"NATIVE.CV"] <- CIS.DATA[i,"NATIVE.SD.SCALED"]/CIS.DATA[i,"NATIVE.MEDIAN.RELATIVE"]
  CIS.DATA[i,"NATIVE.FANO"] <- CIS.DATA[i,"NATIVE.SD.SCALED"]^2/CIS.DATA[i,"NATIVE.MEDIAN.RELATIVE"]
  CIS.DATA[i,"NATIVE.LOG.CV"] <- log2(abs(CIS.DATA[i,"NATIVE.CV"]))

}

WT.1 <- subset(CIS.DATA, STRAIN == "Y1965")
NEG.1 <- subset(CIS.DATA, ID == 2)

for (i in 1:nrow(CIS.DATA))
{
  CIS.DATA[i,"NATIVE.SD.RELATIVE"] <- CIS.DATA[i,"NATIVE.SD.SCALED"]/mean(WT.1[,"NATIVE.SD.SCALED"])
  CIS.DATA[i,"NATIVE.CV.RELATIVE"] <- CIS.DATA[i,"NATIVE.SD.RELATIVE"]/CIS.DATA[i,"NATIVE.MEDIAN.RELATIVE"]
  CIS.DATA[i,"NATIVE.FANO.RELATIVE"] <- CIS.DATA[i,"NATIVE.SD.RELATIVE"]^2/CIS.DATA[i,"NATIVE.MEDIAN.RELATIVE"]
  CIS.DATA[i,"NATIVE.LOG.CV.RELATIVE"] <- log2(abs(CIS.DATA[i,"NATIVE.CV.RELATIVE"]))
}


#Add NA for samples where CV or FANO are undefined.
for (i in 1:nrow(CIS.DATA))
{
  if (CIS.DATA[i,"NATIVE.MEDIAN.ADJUST"] < 0 | CIS.DATA[i,"NATIVE.SD.ADJUST"] < 0) {
    CIS.DATA[i,"NATIVE.CV"] <- NA
    CIS.DATA[i,"NATIVE.CV.RELATIVE"] <- NA
    CIS.DATA[i,"NATIVE.FANO"] <- NA
    CIS.DATA[i,"NATIVE.FANO.RELATIVE"] <- NA	
    CIS.DATA[i,"NATIVE.LOG.CV"] <- NA
    CIS.DATA[i,"NATIVE.LOG.CV.RELATIVE"] <- NA
  }
}

#Write processed data to file
write.table(CIS.DATA,"GLUCOSE.SHAKER.DATA.txt",sep="\t",quote=FALSE,row.names=FALSE)


###########################################################
# e) Calculation of expression phenotypes for each strain #
###########################################################

ALL.DATA <- read.table("GLUCOSE.SHAKER.DATA.txt",header=TRUE)
FILTER.DATA <- subset(ALL.DATA, ID != "CTRL" & STRAIN %nin% c("Y1991","Y2001","Y1004","Y1964","Y1993") & MUTATION %nin% c("V","NULL","m131") & ID != "EMPTY")
FILTER.DATA <- subset(FILTER.DATA, PLATE != 4 | POSITION != "C12")

for (i in 1:nrow(FILTER.DATA))
{
  if (FILTER.DATA[i,"STRAIN"] == "Y1952")
  {
    FILTER.DATA[i,"MUTATION"] <- "m76"
  }
  if (FILTER.DATA[i,"STRAIN"] == "Y1953")
  {
    FILTER.DATA[i,"MUTATION"] <- "m184"
  }
  if (FILTER.DATA[i,"YFP.CONSTRUCT"] == "ANCESTOR")
  {
    FILTER.DATA[i,"YFP.CONSTRUCT"] <- "HO"
  }
  if (FILTER.DATA[i,"MUTATION"] == "CONTROL")
  {
    FILTER.DATA[i,"MUTATION"] <- "REF"
  }
}

FILTER.DATA <- droplevels(FILTER.DATA)

#Remove outliers
for (i in 1:nrow(FILTER.DATA))
{
  CUR <- subset(FILTER.DATA, MUTATION == FILTER.DATA[i,"MUTATION"] & YFP.CONSTRUCT == FILTER.DATA[i,"YFP.CONSTRUCT"] & ENVIRONMENT == FILTER.DATA[i,"ENVIRONMENT"])
  if (FILTER.DATA[i,"YFP.MEDIAN.CORRECT"] > median(CUR$YFP.MEDIAN.CORRECT) + 4*mad(CUR$YFP.MEDIAN.CORRECT) | FILTER.DATA[i,"YFP.MEDIAN.CORRECT"] < median(CUR$YFP.MEDIAN.CORRECT) - 4*mad(CUR$YFP.MEDIAN.CORRECT)) {
    FILTER.DATA[i,"MEDIAN.OUTLIER"] <- "YES"
  } else {
    FILTER.DATA[i,"MEDIAN.OUTLIER"] <- "NO"
  }
  if (FILTER.DATA[i,"YFP.SD.CORRECT"] > median(CUR$YFP.SD.CORRECT) + 4*mad(CUR$YFP.SD.CORRECT) | FILTER.DATA[i,"YFP.SD.CORRECT"] < median(CUR$YFP.SD.CORRECT) - 4*mad(CUR$YFP.SD.CORRECT)) {
    FILTER.DATA[i,"SD.OUTLIER"] <- "YES"
  } else {
    FILTER.DATA[i,"SD.OUTLIER"] <- "NO"
  }	
}

FILTER.DATA <- subset(FILTER.DATA, MEDIAN.OUTLIER == "NO" & SD.OUTLIER == "NO")

write.table(FILTER.DATA,"ALL.DATA.FILTER.txt",sep="\t",quote=FALSE,row.names=FALSE)

FILTER.DATA <- read.table("ALL.DATA.FILTER.txt",header=TRUE)

TRANS.MEDIAN <- aggregate(cbind(HO.MEDIAN.ADJUST,HO.SD.ADJUST,HO.MEDIAN.RELATIVE,HO.SD.SCALED,HO.CV,HO.FANO,HO.LOG.CV,HO.SD.RELATIVE,HO.CV.RELATIVE,HO.FANO.RELATIVE,HO.LOG.CV.RELATIVE,NATIVE.MEDIAN.ADJUST,NATIVE.SD.ADJUST,NATIVE.MEDIAN.RELATIVE,NATIVE.SD.SCALED,NATIVE.CV,NATIVE.FANO,NATIVE.LOG.CV,NATIVE.SD.RELATIVE,NATIVE.CV.RELATIVE,NATIVE.FANO.RELATIVE,NATIVE.LOG.CV.RELATIVE) ~ MUTATION + YFP.CONSTRUCT + ENVIRONMENT + GROWTH, data=FILTER.DATA, FUN = mean)
TRANS.SD <- aggregate(cbind(HO.MEDIAN.ADJUST,HO.SD.ADJUST,HO.MEDIAN.RELATIVE,HO.SD.SCALED,HO.CV,HO.FANO,HO.LOG.CV,HO.SD.RELATIVE,HO.CV.RELATIVE,HO.FANO.RELATIVE,HO.LOG.CV.RELATIVE,NATIVE.MEDIAN.ADJUST,NATIVE.SD.ADJUST,NATIVE.MEDIAN.RELATIVE,NATIVE.SD.SCALED,NATIVE.CV,NATIVE.FANO,NATIVE.LOG.CV,NATIVE.SD.RELATIVE,NATIVE.CV.RELATIVE,NATIVE.FANO.RELATIVE,NATIVE.LOG.CV.RELATIVE) ~ MUTATION + YFP.CONSTRUCT + ENVIRONMENT + GROWTH, data=FILTER.DATA, FUN = sd)
TRANS.N <- aggregate(cbind(HO.MEDIAN.ADJUST,HO.SD.ADJUST,HO.MEDIAN.RELATIVE,HO.SD.SCALED,HO.CV,HO.FANO,HO.LOG.CV,HO.SD.RELATIVE,HO.CV.RELATIVE,HO.FANO.RELATIVE,HO.LOG.CV.RELATIVE,NATIVE.MEDIAN.ADJUST,NATIVE.SD.ADJUST,NATIVE.MEDIAN.RELATIVE,NATIVE.SD.SCALED,NATIVE.CV,NATIVE.FANO,NATIVE.LOG.CV,NATIVE.SD.RELATIVE,NATIVE.CV.RELATIVE,NATIVE.FANO.RELATIVE,NATIVE.LOG.CV.RELATIVE) ~ MUTATION + YFP.CONSTRUCT + ENVIRONMENT + GROWTH, data=FILTER.DATA, FUN = length)

TRANS.MEDIAN <- TRANS.MEDIAN[which(TRANS.N[,9] >= 2),]
TRANS.SD <- TRANS.SD[which(TRANS.N[,9] >= 2),]
TRANS.N <- TRANS.N[which(TRANS.N[,9] >= 2),]

TRANS <- cbind.data.frame(TRANS.MEDIAN,TRANS.SD[,5:26],TRANS.N[,26])

colnames(TRANS) <- c("MUTATION","YFP.CONSTRUCT","ENVIRONMENT","GROWTH","HO.MEDIAN.ADJUST.MEAN","HO.SD.ADJUST.MEAN","HO.MEDIAN.RELATIVE.MEAN","HO.SD.SCALED.MEAN","HO.CV.MEAN","HO.FANO.MEAN","HO.LOG.CV.MEAN","HO.SD.RELATIVE.MEAN","HO.CV.RELATIVE.MEAN","HO.FANO.RELATIVE.MEAN","HO.LOG.CV.RELATIVE.MEAN","NATIVE.MEDIAN.ADJUST.MEAN","NATIVE.SD.ADJUST.MEAN","NATIVE.MEDIAN.RELATIVE.MEAN","NATIVE.SD.SCALED.MEAN","NATIVE.CV.MEAN","NATIVE.FANO.MEAN","NATIVE.LOG.CV.MEAN","NATIVE.SD.RELATIVE.MEAN","NATIVE.CV.RELATIVE.MEAN","NATIVE.FANO.RELATIVE.MEAN","NATIVE.LOG.CV.RELATIVE.MEAN","HO.MEDIAN.ADJUST.SD","HO.SD.ADJUST.SD","HO.MEDIAN.RELATIVE.SD","HO.SD.SCALED.SD","HO.CV.SD","HO.FANO.SD","HO.LOG.CV.SD","HO.SD.RELATIVE.SD","HO.CV.RELATIVE.SD","HO.FANO.RELATIVE.SD","HO.LOG.CV.RELATIVE.SD","NATIVE.MEDIAN.ADJUST.SD","NATIVE.SD.ADJUST.SD","NATIVE.MEDIAN.RELATIVE.SD","NATIVE.SD.SCALED.SD","NATIVE.CV.SD","NATIVE.FANO.SD","NATIVE.LOG.CV.SD","NATIVE.SD.RELATIVE.SD","NATIVE.CV.RELATIVE.SD","NATIVE.FANO.RELATIVE.SD","NATIVE.LOG.CV.RELATIVE.SD","N")

write.table(TRANS,"SUMMARY.DATA.txt",sep="\t",quote=FALSE,row.names=FALSE)


#####################################################################
# f) Plot effects of pTDH3 alleles at HO locus vs native TDH3 locus #
#####################################################################

library(plotrix)
library(Hmisc)

for (i in 1:nrow(TRANS))
{
  if (TRANS[i,"MUTATION"] %in% c("TATA.129","TATA.132","TATA.26","TATA.81"))
  {
    TRANS[i,"COLOR"] <- "blue"
  } else if (TRANS[i,"MUTATION"] %in% c("m66","m63","m89","m90","m91","m75","m76")) {
    TRANS[i,"COLOR"] <- "red"
  } else {
    TRANS[i,"COLOR"] <- "black"
  }
}

PLOT <- subset(TRANS, ENVIRONMENT == "GLUCOSE")
PLOT <- droplevels(PLOT)

HO <- subset(PLOT, YFP.CONSTRUCT == "HO")
NATIVE <- subset(PLOT, YFP.CONSTRUCT == "FUSION")

#Corresponds to Figure 2 - figure supplement 1A.
pdf("GLUCOSE.MEAN.HO.vs.NATIVE.pdf",useDingbats=F,height=5,width=5)

plotCI(HO$HO.MEDIAN.RELATIVE.MEAN,NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN,ui=HO$HO.MEDIAN.RELATIVE.MEAN+1.96*HO$HO.MEDIAN.RELATIVE.SD/sqrt(HO$N),li=HO$HO.MEDIAN.RELATIVE.MEAN-1.96*HO$HO.MEDIAN.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Mean Expression HO Locus",ylab="Mean Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.4),ylim=c(0,1.4),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(HO$HO.MEDIAN.RELATIVE.MEAN,NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN,ui=HO$HO.MEDIAN.RELATIVE.MEAN+1.96*HO$HO.MEDIAN.RELATIVE.SD/sqrt(HO$N),li=HO$HO.MEDIAN.RELATIVE.MEAN-1.96*HO$HO.MEDIAN.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Mean Expression HO Locus",ylab="Mean Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.4),ylim=c(0,1.4),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(HO$HO.MEDIAN.RELATIVE.MEAN,NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN,ui=NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN+1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),li=NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN-1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],pch=1,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(HO$HO.MEDIAN.RELATIVE.MEAN,NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN,method="spearman")
MODEL <- lm(NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
abline(MODEL,col="#00000066")

COR <- round(COR$estimate, digits=3)
SLOPE <- round(MODEL$coefficients[2], digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("Slope = ",SLOPE,sep="")),box.lwd=0)

dev.off()


pdf("GLUCOSE.CV.HO.vs.NATIVE.pdf",useDingbats=F,height=5,width=5)

plotCI(HO$HO.CV.RELATIVE.MEAN,NATIVE$NATIVE.CV.RELATIVE.MEAN,ui=HO$HO.CV.RELATIVE.MEAN+1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),li=HO$HO.CV.RELATIVE.MEAN-1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="CV Expression HO Locus",ylab="CV Expression Native Locus",pch=21,sfrac=0,xlim=c(0,2.5),ylim=c(0,2.5),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(HO$HO.CV.RELATIVE.MEAN,NATIVE$NATIVE.CV.RELATIVE.MEAN,ui=HO$HO.CV.RELATIVE.MEAN+1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),li=HO$HO.CV.RELATIVE.MEAN-1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="CV Expression HO Locus",ylab="CV Expression Native Locus",pch=21,sfrac=0,xlim=c(0,2.5),ylim=c(0,2.5),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(HO$HO.CV.RELATIVE.MEAN,NATIVE$NATIVE.CV.RELATIVE.MEAN,ui=NATIVE$NATIVE.CV.RELATIVE.MEAN+1.96*NATIVE$NATIVE.CV.RELATIVE.SD/sqrt(NATIVE$N),li=NATIVE$NATIVE.CV.RELATIVE.MEAN-1.96*NATIVE$NATIVE.CV.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],pch=1,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(HO$HO.CV.RELATIVE.MEAN,NATIVE$NATIVE.CV.RELATIVE.MEAN,method="spearman")
MODEL <- lm(NATIVE$NATIVE.CV.RELATIVE.MEAN~HO$HO.CV.RELATIVE.MEAN)
abline(MODEL,col="#00000066")

COR <- round(COR$estimate, digits=3)
SLOPE <- round(MODEL$coefficients[2], digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("Slope = ",SLOPE,sep="")),box.lwd=0)

dev.off()


pdf("GLUCOSE.SD.HO.vs.NATIVE.pdf",useDingbats=F,height=5,width=5)

plotCI(HO$HO.SD.RELATIVE.MEAN,NATIVE$NATIVE.SD.RELATIVE.MEAN,ui=HO$HO.SD.RELATIVE.MEAN+1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),li=HO$HO.SD.RELATIVE.MEAN-1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="SD Expression HO Locus",ylab="SD Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.5),ylim=c(0,1.5),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(HO$HO.SD.RELATIVE.MEAN,NATIVE$NATIVE.SD.RELATIVE.MEAN,ui=HO$HO.SD.RELATIVE.MEAN+1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),li=HO$HO.SD.RELATIVE.MEAN-1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="SD Expression HO Locus",ylab="SD Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.5),ylim=c(0,1.5),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(HO$HO.SD.RELATIVE.MEAN,NATIVE$NATIVE.SD.RELATIVE.MEAN,ui=NATIVE$NATIVE.SD.RELATIVE.MEAN+1.96*NATIVE$NATIVE.SD.RELATIVE.SD/sqrt(NATIVE$N),li=NATIVE$NATIVE.SD.RELATIVE.MEAN-1.96*NATIVE$NATIVE.SD.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],pch=1,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(HO$HO.SD.RELATIVE.MEAN,NATIVE$NATIVE.SD.RELATIVE.MEAN,method="spearman")
MODEL <- lm(NATIVE$NATIVE.SD.RELATIVE.MEAN~HO$HO.SD.RELATIVE.MEAN)
abline(MODEL,col="#00000066")

COR <- round(COR$estimate, digits=3)
SLOPE <- round(MODEL$coefficients[2], digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("Slope = ",SLOPE,sep="")),box.lwd=0)

dev.off()


#Corresponds to Figure 2 - figure supplement 1B.
pdf("GLUCOSE.FANO.HO.vs.NATIVE.pdf",useDingbats=F,height=5,width=5)

plotCI(HO$HO.FANO.RELATIVE.MEAN,NATIVE$NATIVE.FANO.RELATIVE.MEAN,ui=HO$HO.FANO.RELATIVE.MEAN+1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),li=HO$HO.FANO.RELATIVE.MEAN-1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Fano Expression HO Locus",ylab="Fano Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.5),ylim=c(0,1.5),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(HO$HO.FANO.RELATIVE.MEAN,NATIVE$NATIVE.FANO.RELATIVE.MEAN,ui=HO$HO.FANO.RELATIVE.MEAN+1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),li=HO$HO.FANO.RELATIVE.MEAN-1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Fano Expression HO Locus",ylab="Fano Expression Native Locus",pch=21,sfrac=0,xlim=c(0,1.5),ylim=c(0,1.5),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(HO$HO.FANO.RELATIVE.MEAN,NATIVE$NATIVE.FANO.RELATIVE.MEAN,ui=NATIVE$NATIVE.FANO.RELATIVE.MEAN+1.96*NATIVE$NATIVE.FANO.RELATIVE.SD/sqrt(NATIVE$N),li=NATIVE$NATIVE.FANO.RELATIVE.MEAN-1.96*NATIVE$NATIVE.FANO.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],pch=1,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=1,lty=2)
abline(h=1,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(HO$HO.FANO.RELATIVE.MEAN,NATIVE$NATIVE.FANO.RELATIVE.MEAN,method="spearman")
MODEL <- lm(NATIVE$NATIVE.FANO.RELATIVE.MEAN~HO$HO.FANO.RELATIVE.MEAN)
abline(MODEL,col="#00000066")

COR <- round(COR$estimate, digits=3)
SLOPE <- round(MODEL$coefficients[2], digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("Slope = ",SLOPE,sep="")),box.lwd=0)

dev.off()


pdf("GLUCOSE.LOG.CV.HO.vs.NATIVE.pdf",useDingbats=F,height=5,width=5)

plotCI(HO$HO.LOG.CV.RELATIVE.MEAN,NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN,ui=HO$HO.LOG.CV.RELATIVE.MEAN+1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),li=HO$HO.LOG.CV.RELATIVE.MEAN-1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="log2(CV) Expression HO Locus",ylab="log2(CV) Expression Native Locus",pch=21,sfrac=0,xlim=c(-3,1.5),ylim=c(-3,1.5),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(HO$HO.LOG.CV.RELATIVE.MEAN,NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN,ui=HO$HO.LOG.CV.RELATIVE.MEAN+1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),li=HO$HO.LOG.CV.RELATIVE.MEAN-1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="CV2 Expression HO Locus",ylab="CV2 Expression Native Locus",pch=21,sfrac=0,xlim=c(0,2.5),ylim=c(0,2.5),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(HO$HO.LOG.CV.RELATIVE.MEAN,NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN,ui=NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN+1.96*NATIVE$NATIVE.LOG.CV.RELATIVE.SD/sqrt(NATIVE$N),li=NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN-1.96*NATIVE$NATIVE.LOG.CV.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],pch=1,sfrac=0,gap=0,add=TRUE,cex=1.5,lwd=1.01)
abline(v=0,lty=2)
abline(h=0,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(HO$HO.LOG.CV.RELATIVE.MEAN,NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN,method="spearman")
MODEL <- lm(NATIVE$NATIVE.LOG.CV.RELATIVE.MEAN~HO$HO.LOG.CV.RELATIVE.MEAN)
abline(MODEL,col="#00000066")

COR <- round(COR$estimate, digits=3)
SLOPE <- round(MODEL$coefficients[2], digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("Slope = ",SLOPE,sep="")),box.lwd=0)

dev.off()


#####TEST IMPACT OF NOISE ON MEAN SHIFT#####

#1-Compute Delta Mean
MODEL <- lm(NATIVE$NATIVE.MEDIAN.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
DELTA.MEAN <- MODEL$residuals

#2-Compute Delta Noise
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.CV.RELATIVE.MEAN,col=HO$COLOR)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,log2(HO$HO.CV.MEAN),col=HO$COLOR)

plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.FANO.MEAN,col=HO$COLOR)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.SD.RELATIVE.MEAN,col=HO$COLOR)
plot(log2(HO$HO.CV.RELATIVE.MEAN),log2(NATIVE$NATIVE.CV.RELATIVE.MEAN),col=HO$COLOR,xlim=c(-1,1.2),ylim=c(-1,1.2))
abline(v=1,lty=2)
abline(h=1,lty=2)
abline(a=0,b=1,col="#000000AA",lty=2)

lm(log2(NATIVE$NATIVE.CV.RELATIVE.MEAN)~log2(HO$HO.CV.RELATIVE.MEAN))
cor.test(log2(HO$HO.CV.RELATIVE.MEAN),log2(NATIVE$NATIVE.CV.RELATIVE.MEAN))

#Another cool property of Fano Factor
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.FANO.SD,col=HO$COLOR)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.CV.SD,col=HO$COLOR)

#Fano Factor
MODEL <- lm(HO$HO.FANO.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.FANO.RELATIVE.MEAN,col=HO$COLOR)
abline(MODEL)

DELTA.NOISE <- MODEL$residuals

#Plot Relation between Delta Mean and Delta Noise

pdf("GLUCOSE.DELTA.MEAN.vs.DELTA.FANO.pdf",useDingbats=F,height=5,width=5)

plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta Fano HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.FANO.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta Fano HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.MEAN+1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),li=DELTA.MEAN-1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],xlab="Delta Noise HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(DELTA.MEAN,DELTA.NOISE,method="spearman")
MODEL <- lm(DELTA.MEAN~DELTA.NOISE)
abline(MODEL,col="#00000066")

P.VAL <- round(COR$p.value, digits=3)
COR <- round(COR$estimate, digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("P = ",P.VAL,sep="")),box.lwd=0)

dev.off()


#CV
MODEL <- lm(HO$HO.CV.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.CV.RELATIVE.MEAN,col=HO$COLOR)
abline(MODEL)

DELTA.NOISE <- MODEL$residuals

#Plot Relation between Delta Mean and Delta Noise

pdf("GLUCOSE.DELTA.MEAN.vs.DELTA.CV.pdf",useDingbats=F,height=5,width=5)

plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-1.1,1.1),ylim=c(-0.04,0.06),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-1.1,1.1),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.MEAN+1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),li=DELTA.MEAN-1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(DELTA.MEAN,DELTA.NOISE,method="spearman")
MODEL <- lm(DELTA.MEAN~DELTA.NOISE)
abline(MODEL,col="#00000066")

P.VAL <- round(COR$p.value, digits=3)
COR <- round(COR$estimate, digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("P = ",P.VAL,sep="")),box.lwd=0)

dev.off()


#log(CV)
MODEL <- lm(HO$HO.LOG.CV.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.LOG.CV.RELATIVE.MEAN,col=HO$COLOR)
abline(MODEL)

DELTA.NOISE <- MODEL$residuals

#Plot Relation between Delta Mean and Delta Noise

pdf("GLUCOSE.DELTA.MEAN.vs.DELTA.logCV.pdf",useDingbats=F,height=5,width=5)

plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta log(CV) HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-2,1.3),ylim=c(-0.04,0.06),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.LOG.CV.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-1.1,1.1),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.MEAN+1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),li=DELTA.MEAN-1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(DELTA.MEAN,DELTA.NOISE,method="spearman")
MODEL <- lm(DELTA.MEAN~DELTA.NOISE)
abline(MODEL,col="#00000066")

P.VAL <- round(COR$p.value, digits=3)
COR <- round(COR$estimate, digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("P = ",P.VAL,sep="")),box.lwd=0)

dev.off()


#SD
MODEL <- lm(HO$HO.SD.RELATIVE.MEAN~HO$HO.MEDIAN.RELATIVE.MEAN)
plot(HO$HO.MEDIAN.RELATIVE.MEAN,HO$HO.SD.RELATIVE.MEAN,col=HO$COLOR)
abline(MODEL)

DELTA.NOISE <- MODEL$residuals

#Plot Relation between Delta Mean and Delta Noise

pdf("GLUCOSE.DELTA.MEAN.vs.DELTA.SD.pdf",useDingbats=F,height=5,width=5)

plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta SD HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.3,0.3),ylim=c(-0.04,0.06),gap=0,main="Glucose",cex=1.5,lwd=2.02,scol="#00000000")
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.NOISE+1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),li=DELTA.NOISE-1.96*HO$HO.SD.RELATIVE.SD/sqrt(HO$N),err="x",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-1.1,1.1),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
plotCI(DELTA.NOISE,DELTA.MEAN,ui=DELTA.MEAN+1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),li=DELTA.MEAN-1.96*NATIVE$NATIVE.MEDIAN.RELATIVE.SD/sqrt(NATIVE$N),err="y",col=HO[,"COLOR"],xlab="Delta CV HO Locus",ylab="Delta Mean Native vs HO",pch=21,sfrac=0,xlim=c(-0.5,0.7),ylim=c(-0.04,0.06),gap=0,main="Glucose",add=TRUE,cex=1.5,lwd=1.01)
legend("topleft",c("TFBS","TATA","Other cis"),box.lwd=0,text.col=c("red","blue","black"))

COR <- cor.test(DELTA.MEAN,DELTA.NOISE,method="spearman")
MODEL <- lm(DELTA.MEAN~DELTA.NOISE)
abline(MODEL,col="#00000066")

P.VAL <- round(COR$p.value, digits=3)
COR <- round(COR$estimate, digits=3)
legend("bottomright",c(paste("R = ",COR,sep=""),paste("P = ",P.VAL,sep="")),box.lwd=0)

dev.off()


#################################################################################
# 14 - Analysis of single-cell doubling times (Figure 5 - figure supplement 1). #
#################################################################################

#LOADING LIBRARIES#

library(plotrix)
library(pcaPP)
library(mixtools)
library(plyr)
library(ggplot2)
library(Hmisc)
library(stats)

box <- graphics::box
options(warn=-1)

#Set source directory
source.dir <- "Path.to.input.files"
setwd(source.dir)

#Read cell doubling time data
DATA <- read.csv("DoublingTimes.csv",header=TRUE,sep=";")

#Calculate doubling time for each cell.
DATA[,"Diff"] <- (DATA[,"Division.Frame"] - DATA[,"Birth.Frame"])*6


#####################
#a-Analyze all cells#
#####################

YPW2879 <- subset(DATA, Strain == "YPW2879" & Diff > 1)
YPW3064 <- subset(DATA, Strain == "YPW3064" & Diff > 1 & Position != "5")
YPW2868 <- subset(DATA, Strain == "YPW2868" & Diff > 1)
YPW3047 <- subset(DATA, Strain == "YPW3047" & Diff > 1)

Summary <- matrix(NA, nrow=4, ncol=14)
colnames(Summary) <- c("Strain","Pair","Median.Expression","Noise.Expression","Status","N.Cells","Doubling.Mean","Doubling.Median","Doubling.SD","Doubling.CV","Doubling.Fano","Pval.Mean","Pval.Median","Pval.SD")

Summary <- as.data.frame(Summary)

Summary[,"Strain"] <- c("YPW2879","YPW2868","YPW3064","YPW3047")
Summary[,"Pair"] <- c("B","B","D","D")
Summary[,"Median.Expression"] <- c(0.1924194,0.1841942,0.9601117,0.8996451)
Summary[,"Noise.Expression"] <- c(0.3627838,2.355751,1.0847789,2.173219)
Summary[,"Status"] <- rep("All",4)
Summary[,"N.Cells"] <- c(nrow(YPW2879),nrow(YPW2868),nrow(YPW3064),nrow(YPW3047))
Summary[,"Doubling.Mean"] <- c(mean(YPW2879$Diff),mean(YPW2868$Diff),mean(YPW3064$Diff),mean(YPW3047$Diff))
Summary[,"Doubling.Median"] <- c(median(YPW2879$Diff),median(YPW2868$Diff),median(YPW3064$Diff),median(YPW3047$Diff))
Summary[,"Doubling.SD"] <- c(sd(YPW2879$Diff),sd(YPW2868$Diff),sd(YPW3064$Diff),sd(YPW3047$Diff))
Summary[,"Doubling.CV"] <- c(sd(YPW2879$Diff)/mean(YPW2879$Diff),sd(YPW2868$Diff)/mean(YPW2868$Diff),sd(YPW3064$Diff)/mean(YPW3064$Diff),sd(YPW3047$Diff)/mean(YPW3047$Diff))
Summary[,"Doubling.Fano"] <- c(sd(YPW2879$Diff)^2/mean(YPW2879$Diff),sd(YPW2868$Diff)^2/mean(YPW2868$Diff),sd(YPW3064$Diff)^2/mean(YPW3064$Diff),sd(YPW3047$Diff)^2/mean(YPW3047$Diff))

#Permutation tests to compare mean doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Mean"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Mean"] <- P.VAL

#Pair B vs Pair D
DATA.1 <- c(YPW2879$Diff, YPW2868$Diff)
DATA.2 <- c(YPW3047$Diff, YPW3064$Diff)

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					


#Permutation tests to compare median doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Median"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Median"] <- P.VAL

#Permutation tests to compare standard deviation of doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff + mean(YPW2879$Diff) - mean(YPW2868$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.SD"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff + mean(YPW3064$Diff) - mean(YPW3047$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.SD"] <- P.VAL

write.table(Summary,"Summary.Doubling.Time.All.txt",sep="\t",quote=FALSE,row.names=FALSE)

#Histograms of doubling time.

BREAKS <- seq(0,300,6)

pdf("Distribution.Doubling.All.pdf",height=10,width=12,useDingbats=F)					
#windows(height=10,width=12)					
par(mfrow=c(2,2))
hist(YPW2879$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.025), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2879",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.025,by=0.005),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 392", "Mean = 115.6", "SD = 25.2"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,1))
hist(YPW2868$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.025), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2868",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.025,by=0.005),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 412", "Mean = 115.2", "SD = 30.7"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2868$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(1,2))
hist(YPW3064$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.025), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3064",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.025,by=0.005),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 460", "Mean = 109.3", "SD = 29.1"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW3064$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,2))
hist(YPW3047$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.025), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3047",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.025,by=0.005),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 362", "Mean = 107.8", "SD = 35.0"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW3047$Diff),0.0007,pch=25,col="black",bg="black")

dev.off()					


Density1 <- density(YPW2879$Diff,from=0,to=300)
Density2 <- density(YPW2868$Diff,from=0,to=300)

#windows(height=10,width=12)					
plot(Density1, col="green", breaks=BREAKS, xlim=c(0,300),ylim=c(0,0.02),type="l",lwd=2)
points(Density2, col="orange", breaks=BREAKS, xlim=c(0,300),ylim=c(0,0.02),type="l",lwd=2)

y <- rep(0,512)


Density3 <- density(YPW3064$Diff,from=0,to=300)
Density4 <- density(YPW3047$Diff,from=0,to=300)

#windows(height=10,width=12)					
plot(Density3, col="green", breaks=BREAKS, xlim=c(0,300),ylim=c(0,0.02),type="l",lwd=2)
points(Density4, col="orange", breaks=BREAKS, xlim=c(0,300),ylim=c(0,0.02),type="l",lwd=2)


#############################
#b-Analyze only mother cells#
#############################

YPW2879 <- subset(DATA, Strain == "YPW2879" & Diff > 1 & Status == "Mother")
YPW3064 <- subset(DATA, Strain == "YPW3064" & Diff > 1 & Position != "5" & Status == "Mother")
YPW2868 <- subset(DATA, Strain == "YPW2868" & Diff > 1 & Status == "Mother")
YPW3047 <- subset(DATA, Strain == "YPW3047" & Diff > 1 & Status == "Mother")

Summary <- matrix(NA, nrow=4, ncol=14)
colnames(Summary) <- c("Strain","Pair","Median.Expression","Noise.Expression","Status","N.Cells","Doubling.Mean","Doubling.Median","Doubling.SD","Doubling.CV","Doubling.Fano","Pval.Mean","Pval.Median","Pval.SD")

Summary <- as.data.frame(Summary)

Summary[,"Strain"] <- c("YPW2879","YPW2868","YPW3064","YPW3047")
Summary[,"Pair"] <- c("B","B","D","D")
Summary[,"Median.Expression"] <- c(0.1924194,0.1841942,0.9601117,0.8996451)
Summary[,"Noise.Expression"] <- c(0.3627838,2.355751,1.0847789,2.173219)
Summary[,"Status"] <- rep("Mothers",4)
Summary[,"N.Cells"] <- c(nrow(YPW2879),nrow(YPW2868),nrow(YPW3064),nrow(YPW3047))
Summary[,"Doubling.Mean"] <- c(mean(YPW2879$Diff),mean(YPW2868$Diff),mean(YPW3064$Diff),mean(YPW3047$Diff))
Summary[,"Doubling.Median"] <- c(median(YPW2879$Diff),median(YPW2868$Diff),median(YPW3064$Diff),median(YPW3047$Diff))
Summary[,"Doubling.SD"] <- c(sd(YPW2879$Diff),sd(YPW2868$Diff),sd(YPW3064$Diff),sd(YPW3047$Diff))
Summary[,"Doubling.CV"] <- c(sd(YPW2879$Diff)/mean(YPW2879$Diff),sd(YPW2868$Diff)/mean(YPW2868$Diff),sd(YPW3064$Diff)/mean(YPW3064$Diff),sd(YPW3047$Diff)/mean(YPW3047$Diff))
Summary[,"Doubling.Fano"] <- c(sd(YPW2879$Diff)^2/mean(YPW2879$Diff),sd(YPW2868$Diff)^2/mean(YPW2868$Diff),sd(YPW3064$Diff)^2/mean(YPW3064$Diff),sd(YPW3047$Diff)^2/mean(YPW3047$Diff))

#Permutation tests to compare mean doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Mean"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Mean"] <- P.VAL


#Permutation tests to compare median doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Median"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Median"] <- P.VAL

#Permutation tests to compare standard deviation of doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff + mean(YPW2879$Diff) - mean(YPW2868$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.SD"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff + mean(YPW3064$Diff) - mean(YPW3047$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.SD"] <- P.VAL

write.table(Summary,"Summary.Doubling.Time.Mothers.txt",sep="\t",quote=FALSE,row.names=FALSE)


#Histograms of doubling time.

BREAKS <- seq(0,300,6)

pdf("Distribution.Doubling.Mothers.pdf",height=10,width=12,useDingbats=F)					
#windows(height=10,width=13)					
par(mfrow=c(2,2))
hist(YPW2879$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.04), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2879",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.04,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 215", "Mean = 104.4", "SD = 19.6"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,1))
hist(YPW2868$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.04), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2868",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.04,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 229", "Mean = 101.3", "SD = 22.3"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2868$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(1,2))
hist(YPW3064$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.04), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3064",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.04,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 273", "Mean = 95.3", "SD = 20.3"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW3064$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,2))
hist(YPW3047$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.04), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3047",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.04,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 214", "Mean = 96.8", "SD = 30.2"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW3047$Diff),0.0007,pch=25,col="black",bg="black")

dev.off()					



###############################
#c-Analyze only daughter cells#
###############################

YPW2879 <- subset(DATA, Strain == "YPW2879" & Diff > 1 & Status == "Daughter")
YPW3064 <- subset(DATA, Strain == "YPW3064" & Diff > 1 & Position != "5" & Status == "Daughter")
YPW2868 <- subset(DATA, Strain == "YPW2868" & Diff > 1 & Status == "Daughter")
YPW3047 <- subset(DATA, Strain == "YPW3047" & Diff > 1 & Status == "Daughter")

Summary <- matrix(NA, nrow=4, ncol=14)
colnames(Summary) <- c("Strain","Pair","Median.Expression","Noise.Expression","Status","N.Cells","Doubling.Mean","Doubling.Median","Doubling.SD","Doubling.CV","Doubling.Fano","Pval.Mean","Pval.Median","Pval.SD")

Summary <- as.data.frame(Summary)

Summary[,"Strain"] <- c("YPW2879","YPW2868","YPW3064","YPW3047")
Summary[,"Pair"] <- c("B","B","D","D")
Summary[,"Median.Expression"] <- c(0.1924194,0.1841942,0.9601117,0.8996451)
Summary[,"Noise.Expression"] <- c(0.3627838,2.355751,1.0847789,2.173219)
Summary[,"Status"] <- rep("Daughters",4)
Summary[,"N.Cells"] <- c(nrow(YPW2879),nrow(YPW2868),nrow(YPW3064),nrow(YPW3047))
Summary[,"Doubling.Mean"] <- c(mean(YPW2879$Diff),mean(YPW2868$Diff),mean(YPW3064$Diff),mean(YPW3047$Diff))
Summary[,"Doubling.Median"] <- c(median(YPW2879$Diff),median(YPW2868$Diff),median(YPW3064$Diff),median(YPW3047$Diff))
Summary[,"Doubling.SD"] <- c(sd(YPW2879$Diff),sd(YPW2868$Diff),sd(YPW3064$Diff),sd(YPW3047$Diff))
Summary[,"Doubling.CV"] <- c(sd(YPW2879$Diff)/mean(YPW2879$Diff),sd(YPW2868$Diff)/mean(YPW2868$Diff),sd(YPW3064$Diff)/mean(YPW3064$Diff),sd(YPW3047$Diff)/mean(YPW3047$Diff))
Summary[,"Doubling.Fano"] <- c(sd(YPW2879$Diff)^2/mean(YPW2879$Diff),sd(YPW2868$Diff)^2/mean(YPW2868$Diff),sd(YPW3064$Diff)^2/mean(YPW3064$Diff),sd(YPW3047$Diff)^2/mean(YPW3047$Diff))

#Permutation tests to compare mean doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Mean"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(mean(DATA.1) - mean(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(mean(PERMUT.1) - mean(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Mean"] <- P.VAL


#Permutation tests to compare median doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.Median"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff				

N.TEST <- 100000					
DELTA.OBS <- abs(median(DATA.1) - median(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(median(PERMUT.1) - median(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.Median"] <- P.VAL

#Permutation tests to compare standard deviation of doubling times

#Pair B
DATA.1 <- YPW2879$Diff					
DATA.2 <- YPW2868$Diff + mean(YPW2879$Diff) - mean(YPW2868$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[1:2,"Pval.SD"] <- P.VAL

#Pair D
DATA.1 <- YPW3064$Diff					
DATA.2 <- YPW3047$Diff + mean(YPW3064$Diff) - mean(YPW3047$Diff)		

N.TEST <- 100000					
DELTA.OBS <- abs(sd(DATA.1) - sd(DATA.2))					
DELTA.PERMUT <- rep(0,N.TEST)	
COMBINE <- c(DATA.1,DATA.2)

for (i in 1:N.TEST)					
{					
  POS.1 <- sample(1:length(COMBINE),size=length(DATA.1),replace=FALSE)				
  PERMUT.1 <- COMBINE[POS.1]				
  PERMUT.2 <- COMBINE[-POS.1]			
  DELTA.PERMUT[i] <- abs(sd(PERMUT.1) - sd(PERMUT.2))				
}					

P.VAL <- 1 - length(which(DELTA.PERMUT < DELTA.OBS))/length(DELTA.PERMUT)					
Summary[3:4,"Pval.SD"] <- P.VAL

write.table(Summary,"Summary.Doubling.Time.Daughters.txt",sep="\t",quote=FALSE,row.names=FALSE)


#Histograms of doubling time.

BREAKS <- seq(0,300,6)

pdf("Distribution.Doubling.Daughters.pdf",height=10,width=12,useDingbats=F)					
#windows(height=10,width=13)					
par(mfrow=c(2,2))
hist(YPW2879$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.03), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2879",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.03,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 177", "Mean = 129.3", "SD = 24.6"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,1))
hist(YPW2868$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.03), xlab="Doubling Time (minutes)",ylab="Density",main="YPW2868",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.03,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 183", "Mean = 132.7", "SD = 31.0"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(1,2))
hist(YPW3064$Diff, col="#FF000099", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.03), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3064",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.03,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 130", "Mean = 131.2", "SD = 28.2"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

par(mfg=c(2,2))
hist(YPW3047$Diff, col="#0000FF99", breaks=BREAKS, xlim=c(0,300), ylim=c(0,0.03), xlab="Doubling Time (minutes)",ylab="Density",main="YPW3047",border=NA,xaxt="n",yaxt="n",font.lab=2,freq=F)					
axis(2, at=seq(0,0.03,by=0.01),lwd=2, font=2, pos=0)					
axis(1, at=seq(0,300,by=50),font=2,lwd=2,pos=0)					
legend("topright",legend=c("N = 148", "Mean = 123.8", "SD = 35.4"),box.lty=0,cex=1.2,inset=0.05)
points(mean(YPW2879$Diff),0.0007,pch=25,col="black",bg="black")

dev.off()			







