## Analysis of daily plate reader measurments of populations evolving in galactose (Evolution Experiment #24 analysis)

# # evo-cassette Locus E with and without IS deletion ("deltaIS1C" and "(IT0)30", resp.);

# data: EE24.12 plates 1-3 (delta IS1C - medium C,B,A) + plates 7-10 (IT030 - medium C, B, A; controll plate medium E) May 2019
# evolution in Gal (A-1%, B-0.1%, C-0.01%) + 0.1% CAS; plate E = CAS only ctr.
#########################################################################################


######read in data
setwd("/Users/itomanek/Documents/promoter_evolution/experiments/platereader_data/2019_May/EE24") 

#read data
#IT030
OD_30A= read.csv2("EE24.12_30-A_OD600.txt", header=TRUE, sep= "\t")
YFP_30A=read.csv2("EE24.12_30-A_YFP2.txt", header=TRUE, sep= "\t")
CFP_30A= read.csv2("EE24.12_30-A_CFP2.txt", header=TRUE, sep= "\t")
OD_30B= read.csv2("EE24.12_30-B_OD600.txt", header=TRUE, sep= "\t")
YFP_30B=read.csv2("EE24.12_30-B_YFP2.txt", header=TRUE, sep= "\t")
CFP_30B= read.csv2("EE24.12_30-B_CFP2.txt", header=TRUE, sep= "\t")
OD_30C= read.csv2("EE24.12_30-C_OD600.txt", header=TRUE, sep= "\t")
YFP_30C=read.csv2("EE24.12_30-C_YFP2.txt", header=TRUE, sep= "\t")
CFP_30C= read.csv2("EE24.12_30-C_CFP2.txt", header=TRUE, sep= "\t")
#IS-
OD_ISC= read.csv2("EE24.12_IS-C_OD600.txt", header=TRUE, sep= "\t")
YFP_ISC=read.csv2("EE24.12_IS-C_YFP2.txt", header=TRUE, sep= "\t")
CFP_ISC= read.csv2("EE24.12_IS-C_CFP2.txt", header=TRUE, sep= "\t")
OD_ISB= read.csv2("EE24.12_IS-B_OD600.txt", header=TRUE, sep= "\t")
YFP_ISB=read.csv2("EE24.12_IS-B_YFP2.txt", header=TRUE, sep= "\t")
CFP_ISB= read.csv2("EE24.12_IS-B_CFP2.txt", header=TRUE, sep= "\t")
OD_ISA= read.csv2("EE24.12_IS-A_OD600.txt", header=TRUE, sep= "\t")
YFP_ISA=read.csv2("EE24.12_IS-A_YFP2.txt", header=TRUE, sep= "\t")
CFP_ISA= read.csv2("EE24.12_IS-A_CFP2.txt", header=TRUE, sep= "\t")
#ctr - all 3 (IT030, ISC-, "ISB-")
OD_E= read.csv2("EE24.12_E_OD600.txt", header=TRUE, sep= "\t")
YFP_E=read.csv2("EE24.12_E_YFP2.txt", header=TRUE, sep= "\t")
CFP_E= read.csv2("EE24.12_E_CFP2.txt", header=TRUE, sep= "\t")
#############################################################

##########rearrange the data so that each row contains the value for one 
#read of one well
library(reshape2)

#With melt, you specify which columns are identity variables, 
#and which columns are measured variables
#od
reshaped_30A <- melt(OD_30A, id=c("Time"), variable.name="Well", value.name="OD600")
reshaped_30B <- melt(OD_30B, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_30C <- melt(OD_30C, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_ISA <- melt(OD_ISA, id=c("Time"), variable.name="Well", value.name="OD600")
reshaped_ISB <- melt(OD_ISB, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_ISC <- melt(OD_ISC, id=c("Time"), variable.name="Well",value.name="OD600")
reshaped_E <- melt(OD_E, id=c("Time"), variable.name="Well",value.name="OD600")
#yfp
yfp_reshaped_30A= melt(YFP_30A, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_30B= melt(YFP_30B, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_30C= melt(YFP_30C, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_ISA= melt(YFP_ISA, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_ISB= melt(YFP_ISB, id=c("Time"), variable.name="Well",value.name="YFP")
yfp_reshaped_ISC= melt(YFP_ISC, id=c("Time"), variable.name="Well", value.name="YFP")
yfp_reshaped_E= melt(YFP_E, id=c("Time"), variable.name="Well", value.name="YFP")
#cfp
cfp_reshaped_30A= melt(CFP_30A, id=c("Time"), variable.name="Well", value.name="CFP")
cfp_reshaped_30B= melt(CFP_30B, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_30C= melt(CFP_30C, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_ISA= melt(CFP_ISA, id=c("Time"), variable.name="Well", value.name="CFP")
cfp_reshaped_ISB= melt(CFP_ISB, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_ISC= melt(CFP_ISC, id=c("Time"), variable.name="Well",value.name="CFP")
cfp_reshaped_E= melt(CFP_E, id=c("Time"), variable.name="Well",value.name="CFP")


##########transform the time to minues (platereader has format 00:00:00)
time<-reshaped_30A[,1]
time=as.character(time)
time=sapply(strsplit(time,":"),
            function(x) {
              x <- as.numeric(x)
              (x[1]+x[2]/60)/24
            })
time=round(time,2)
time #in days (with minutes as comma)

reshaped_30A$Time=time  #replace the old time format with time in min 
reshaped_30B$Time=time  
reshaped_30C$Time=time  
reshaped_ISA$Time=time  
reshaped_ISB$Time=time 
reshaped_ISC$Time=time  
reshaped_E$Time=time  
yfp_reshaped_30A$Time=time  
yfp_reshaped_30B$Time=time  
yfp_reshaped_30C$Time=time  
yfp_reshaped_ISA$Time=time  
yfp_reshaped_ISB$Time=time  
yfp_reshaped_ISC$Time=time  
yfp_reshaped_E$Time=time  
cfp_reshaped_30A$Time=time  
cfp_reshaped_30B$Time=time  
cfp_reshaped_30C$Time=time  
cfp_reshaped_ISA$Time=time  
cfp_reshaped_ISB$Time=time  
cfp_reshaped_ISC$Time=time  
cfp_reshaped_E$Time=time  


###### PLATE INFO (TEMPLATE)  ########

#read in the plate template - i.e. metadata, additional info
#plate_info=read.csv2(".txt", header=TRUE, sep="\t")

# format of plate_template: well, strain 
#head(plate_info)

#combine reshaped and plate_info
#install.packages("dplyr")  ##info: https://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html
library("dplyr")
#annotated <- inner_join(reshaped, plate_info, by="Well", copy=TRUE)

#join all data to existing data frame "annotated"
annotated_30A<- inner_join(reshaped_30A, yfp_reshaped_30A, by= c("Time","Well"))
annotated_30A <- inner_join(annotated_30A, cfp_reshaped_30A, by=c("Time","Well"))
annotated_30B<- inner_join(reshaped_30B, yfp_reshaped_30B, by= c("Time","Well"))
annotated_30B <- inner_join(annotated_30B, cfp_reshaped_30B, by=c("Time","Well"))
annotated_30C<- inner_join(reshaped_30C, yfp_reshaped_30C, by= c("Time","Well"))
annotated_30C <- inner_join(annotated_30C, cfp_reshaped_30C, by=c("Time","Well"))
annotated_ISA<- inner_join(reshaped_ISA, yfp_reshaped_ISA, by= c("Time","Well"))
annotated_ISA <- inner_join(annotated_ISA, cfp_reshaped_ISA, by=c("Time","Well"))
annotated_ISB<- inner_join(reshaped_ISB, yfp_reshaped_ISB, by= c("Time","Well"))
annotated_ISB <- inner_join(annotated_ISB, cfp_reshaped_ISB, by=c("Time","Well"))
annotated_ISC<- inner_join(reshaped_ISC, yfp_reshaped_ISC, by= c("Time","Well"))
annotated_ISC <- inner_join(annotated_ISC, cfp_reshaped_ISC, by=c("Time","Well"))
annotated_E<- inner_join(reshaped_E, yfp_reshaped_E, by= c("Time","Well"))
annotated_E <- inner_join(annotated_E, cfp_reshaped_E, by=c("Time","Well"))
#fuse A and B via well, keep the evolution medium info in additional column
annotated_30A$medium=rep(1,dim(annotated_30A)[1])
annotated_30B$medium=rep(0.1,dim(annotated_30B)[1])
annotated_30C$medium=rep(0.01,dim(annotated_30C)[1])
annotated_ISA$medium=rep(1,dim(annotated_ISA)[1])
annotated_ISB$medium=rep(0.1,dim(annotated_ISB)[1])
annotated_ISC$medium=rep(0.01,dim(annotated_ISC)[1])
annotated_E$medium=rep(0.0,dim(annotated_30C)[1])



###### Group data ###############################################

grouped_30A <- group_by(annotated_30A,Time, Well, medium)
grouped_30B <- group_by(annotated_30B,Time, Well, medium)
grouped_30C <- group_by(annotated_30C,Time, Well, medium)
grouped_ISA <- group_by(annotated_ISA,Time, Well, medium)
grouped_ISB <- group_by(annotated_ISB,Time, Well, medium)
grouped_ISC <- group_by(annotated_ISC,Time, Well, medium)
grouped_E <- group_by(annotated_E,Time, Well, medium)

## exclude spill-over wells in delatIS1C B plate (see below tilefunction analysis)
grouped_ISB_clean=subset(grouped_ISB,Well!="C3"&Well!="C2"&Well!="D2"&Well!="E2"&Well!="A1"&Well!="A2"&Well!="A3"&Well!="B1"&Well!="B3"&Well!="C1"&Well!="H1"&Well!="H2"&Well!="F1"&Well!="A6"&Well!="E10"&Well!="F11"&Well!="G2")

unique(grouped_ISB_clean$Well)

#### combine data
grouped_All=rbind(grouped_30A,grouped_30B,grouped_30C,grouped_ISA,grouped_ISB,grouped_ISC,grouped_E)

################## PLOTS ##########################
#install.packages("ggplot2")
library(ggplot2)
#install.packages("Hmisc")
library("Hmisc")
greys <-c("black","#4D4D4D", "#888888", "#AEAEAE", "#CCCCCC")
library("Rmisc")
#RGB colors
light_blue=rgb(0,0.75,1,1)
intermediate_blue=rgb(0,0.5,1,1)
###############  ############### ###############

## CFP in detail  #figure paper
#CFP
plot = ggplot() + 
  ylim(0, 35)+scale_x_continuous(breaks=seq(0,14,1))+
  theme_bw()+  #no grey background
  theme(legend.position = "none", axis.title.x=element_blank(),axis.title.y=element_blank())+#no legend
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
a=plot+geom_line(data=subset(grouped_30A), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISA), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)), color="red")
b=plot+ geom_line(data=subset(grouped_30B), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISB_clean), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)), color="red")
c=plot+ geom_line(data=subset(grouped_30C), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISC), aes(x=Time+1, y=CFP/OD600/1000, group=Well, linetype=as.factor(medium)), color="red")
e=plot+geom_line(data=subset(grouped_E), aes(x=Time+1, y=CFP/OD600/1000, group=Well,  linetype=as.factor(medium)))
multiplot(e,c,b,a,cols=1)  




## YFP in detail
#YFP
plot = ggplot() + 
  ylim(0, 41000)+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
a=plot+geom_line(data=subset(grouped_30A), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISA), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)), color="red")
b=plot+ geom_line(data=subset(grouped_30B), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISB_clean), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)), color="red")
c=plot+ geom_line(data=subset(grouped_30C), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_line(data=subset(grouped_ISC), aes(x=Time, y=YFP/OD600, group=Well, linetype=as.factor(medium)), color="red")
e=plot+geom_line(data=subset(grouped_E), aes(x=Time, y=YFP/OD600, group=Well,  linetype=as.factor(medium)))
multiplot(a,b,c,e,cols=1)  

#### plot in the paper
#FOLD CHANGES
#normalize by mean CFP and YFP of E-control (=no gal, ie ancestral fluor.)
plot= ggplot()+
 # xlim(0,55)+
 # ylim(0,20)+
  scale_x_continuous(trans = "log", breaks = c(-1, 0, 1, 10, 100), limits = c(NA, 100))+
  scale_y_continuous(trans = "log", breaks = c(-1, 0, 1, 10, 50), limits = c(NA,50))+
  
  theme_bw()+ theme(legend.position ="none")+ #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
#normalize
n=mean(grouped_E$CFP)
ny=mean(grouped_E$YFP)
b= plot+geom_line(data=grouped_30B, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_ISB_clean, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_30B, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_ISB_clean, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
b
c=plot+geom_line(data=grouped_30C, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_ISC, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_30C, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_ISC, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
c

a=plot+geom_line(data=grouped_30A, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_line(data=grouped_ISA, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_30A, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)+
  geom_point(data=grouped_ISA, aes(x=(CFP/n), y=(YFP/ny), color="IS1C-",alpha=-Time),size=0.5)
a
e=plot+geom_line(data=grouped_E, aes(x=(CFP/n), y=(YFP/ny),group=Well,alpha=-Time),size=0.5,alpha=0.5)+
  geom_point(data=grouped_E, aes(x=(CFP/n), y=(YFP/ny),alpha=-Time),size=0.5)
e
multiplot(a,b,c,e,cols=2)

###TRYING OUT LOG PLOT FOR PAPER INSDEAD (visibility greater?)
## log CFP-YFP plot - 
plot=ggplot()+ theme_bw()+theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
a30=plot + geom_point(data=grouped_30A, aes(x=log(CFP), y=log(YFP), group=Well))
aIS=plot + geom_point(data=grouped_ISA, aes(x=log(CFP), y=log(YFP), group=Well))
b30=plot + geom_point(data=grouped_30B, aes(x=log(CFP), y=log(YFP), group=Well))
bIS=plot + geom_point(data=grouped_ISB_clean, aes(x=log(CFP), y=log(YFP), group=Well))
c30=plot + geom_point(data=grouped_30C, aes(x=log(CFP), y=log(YFP), group=Well))
cIS=plot + geom_point(data=grouped_ISC, aes(x=log(CFP), y=log(YFP), group=Well))
multiplot(bIS,c30,b30,bIS,a30,aIS,cols=2)


###level of amplification differs between strains
#CFP
ggplot() + xlim(0,11)+ ylim(0,30000)+
  geom_line(data=subset(grouped_30C), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium), alpha=0.7))+
  geom_line(data=subset(grouped_30B), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium)))+
  geom_line(data=subset(grouped_30A), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium)))+ 
  # geom_line(data=subset(grouped_D), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium), linetype=as.factor(medium), alpha=0.7))+
  #geom_line(data=subset(grouped_E), aes(x=Time, y=CFP/OD600, group=Well, linetype=as.factor(medium), alpha=0.2),color="grey")+
  theme_bw()+  scale_colour_manual(values = c(light_blue,intermediate_blue,"blue")) + #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#nr amplified > mean of ctr + 4*SD of ctr
mean_E=mean(grouped_E$CFP/grouped_E$OD600) #mean CFP over all days E (normalized to OD)
sd_E=sd(grouped_E$CFP/grouped_E$OD600) #sd CFP over all days E (normalized to OD)

no_amplified <- function(group) {
  subgroup <- length(which((subset(group, Time=="10.99")$CFP/subset(group, Time=="10")$OD600)>mean_E+4*sd_E)) 
  return(subgroup)
}
no_amplified(grouped_30A)
no_amplified(grouped_ISA)
no_amplified(grouped_30B)
no_amplified(grouped_ISB_clean)
no_amplified(grouped_30C) #nr ok, see also plot below
no_amplified(grouped_ISC)


#clear to see who is amplified in 30C   ##Supplementary Figure
plot = ggplot() + 
  ylim(0,29000)+scale_x_continuous(breaks=seq(0,14,1))+
  theme_bw()+  #no grey background
  theme(legend.position = "none")+ #no legend
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 
amp1=plot+ geom_line(data=subset(grouped_30C), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS+ 0.01%")

amp2=plot+ geom_line(data=subset(grouped_ISC), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS- 0.01%")

amp3=plot+ geom_line(data=subset(grouped_30B), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS+ 0.1%")

amp4=plot+ geom_line(data=subset(grouped_ISB_clean), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS- 0.1%")

amp5=plot+ geom_line(data=subset(grouped_30A), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS+ 1%")

amp6=plot+ geom_line(data=subset(grouped_ISA), aes(x=Time+1, y=CFP/OD600, group=Well, linetype=as.factor(medium)))+
  geom_hline(yintercept=mean_E+4*sd_E,col="green")+ggtitle("IS- 1%")

multiplot(amp1,amp3,amp5, amp2,amp4,amp6,cols=2)


#### analyse copy number differences in the evolved IS+ population, which depend on medium
#CFP
ggplot() + xlim(0,11)+ ylim(0,30000)+
  geom_line(data=subset(grouped_30C), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium), alpha=0.7))+
  geom_line(data=subset(grouped_30B), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium)))+
  geom_line(data=subset(grouped_30A), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium)))+ 
  # geom_line(data=subset(grouped_D), aes(x=Time, y=CFP/OD600, group=Well, color=as.factor(medium), linetype=as.factor(medium), alpha=0.7))+
  #geom_line(data=subset(grouped_E), aes(x=Time, y=CFP/OD600, group=Well, linetype=as.factor(medium), alpha=0.2),color="grey")+
  theme_bw()+  scale_colour_manual(values = c(light_blue,intermediate_blue,"blue")) + #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) 

#get mean of max(CFP/OD600 of each well)
sum_grouped_30C <-ddply(grouped_30C, c("Well"),summarise, max_nCFP=max(CFP/OD600))
sum_grouped_30B <-ddply(grouped_30B, c("Well"),summarise, max_nCFP=max(CFP/OD600))
sum_grouped_30A <-ddply(grouped_30A, c("Well"),summarise, max_nCFP=max(CFP/OD600))
sum_grouped_E <-ddply(grouped_E, c("Well"),summarise, max_nCFP=max(CFP/OD600))

dim(sum_grouped_30C)#value for each Well.

anc=mean(sum_grouped_E$max_nCFP)+4*sd(sum_grouped_E$max_nCFP)

#exclude ancestral values (< mean ctr+4*sd) to obtain mean of amplified
##30C
evolved_max_30C=c()
for(i in 1:96){
  if(sum_grouped_30C[i,2] > mean(sum_grouped_E$max_nCFP)+4*sd(sum_grouped_E$max_nCFP)) {#going through values
    evolved_max_30C[i]<-  sum_grouped_30C[i,2]
  }
  else {
    evolved_max_30C[i]<-NA
  }
}
##30A
evolved_max_30A=c()
for(i in 1:96){
  if(sum_grouped_30A[i,2] > mean(sum_grouped_E$max_nCFP)+4*sd(sum_grouped_E$max_nCFP)) {#going through values
    evolved_max_30A[i]<-  sum_grouped_30A[i,2]
  }
  else {
    evolved_max_30A[i]<-NA
  }
}
##30B
evolved_max_30B=c()
for(i in 1:96){
  if(sum_grouped_30B[i,2] > mean(sum_grouped_E$max_nCFP)+4*sd(sum_grouped_E$max_nCFP)) {#going through values
    evolved_max_30B[i]<-  sum_grouped_30B[i,2]
  }
  else {
    evolved_max_30B[i]<-NA
  }
}

a=mean(na.omit(evolved_max_30A))
as=sd(na.omit(evolved_max_30A))
b=mean(na.omit(evolved_max_30B))
bs=sd(na.omit(evolved_max_30B))
c=mean(na.omit(evolved_max_30C))
cs=sd(na.omit(evolved_max_30C))

name=c("0.01%","0.1%","1%")
max_fluor=c(c,b,a)
sd_fluor=c(cs,bs,as)
df=data.frame(max_fluor,sd_fluor,name)

#plot all with increased CFP fluorescence on last day of evolution
ggplot(df) +theme_bw()+ theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_bar( aes(x=name, y=max_fluor), stat="identity", fill="skyblue", alpha=0.7) +
  geom_errorbar( aes(x=name, ymin=max_fluor-sd_fluor, ymax=max_fluor+sd_fluor), width=0.4, colour="black", alpha=0.5, size=1.3)

AB=t.test(na.omit(evolved_max_30A),na.omit(evolved_max_30B))
AC=t.test(na.omit(evolved_max_30A),na.omit(evolved_max_30C)) #A and C differ significantly (also bonferroni)
BC=t.test(na.omit(evolved_max_30B),na.omit(evolved_max_30C)) 
p.adjust(c(AB$p.value,AC$p.value,BC$p.value),method="bonferroni")

### qPCR analysis identified yfp-only amplified yfp+wells (see also 20190912_qPCR_analysis.R)
#plot all sequenced populations, amplified in 0.1% gal with IS-
#YFP
ggplot() + 
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  geom_line(data=subset(grouped_ISB_clean), aes(x=Time, y=YFP/CFP, group=Well), color="grey")+
  geom_line(data=subset(grouped_ISB,Well=="B2"|Well=="E2"|Well=="A5"|Well=="G1"|Well=="F1"), aes(x=Time, y=YFP/CFP, group=Well, color=as.factor(Well)))



## analying all wells from the mixed fraction 
### paper plot sumarrizing EE24 fraction analysis 
##### the following plots show ALL wells from mixed fractions highlighted (as informed by FACS data - i.e. mixed fraction confirmed by FACS)
##### identifying ALL mixed fraction populations #############
## IS+ A
## log CFP-YFP plot - A30 -well ID -find mixed wells informed by FACS (d12)
ggplot()+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  #  geom_point(data=grouped_30A, aes(x=log(CFP), y=log(YFP), group=Well, color=as.factor(Well)), size=2)+
  geom_point(data=grouped_30A, aes(x=log(CFP), y=log(YFP), group=Well))+
  #specific wells:
  geom_point(data=subset(grouped_30A,Well=="F3"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="F3-FACS"), size=2)+
  geom_point(data=subset(grouped_30A,Well=="H1"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="H1-FACS"), size=2)+
  geom_point(data=subset(grouped_30A,Well=="G7"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="G7-FACS"), size=2)+
  geom_point(data=subset(grouped_30A,Well=="D11"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="D11-FACS"), size=2)

#IS- A
## log CFP-YFP plot - AIS -well ID -find mixed wells informed by FACS (d12)
ggplot()+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  #geom_point(data=grouped_ISA, aes(x=log(CFP), y=log(YFP), group=Well, color=as.factor(Well)), size=2)
  geom_point(data=grouped_ISA, aes(x=log(CFP), y=log(YFP), group=Well))+
  #specific wells:
  geom_point(data=subset(grouped_ISA,Well=="A2"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="A2-FACS"), size=2)+
  #geom_point(data=subset(grouped_ISA,Well=="A1"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="A1-FACS"), size=2)+ #seems clear in FACS
  geom_point(data=subset(grouped_ISA,Well=="D1"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="D1-FACS"), size=2)+
  geom_point(data=subset(grouped_ISA,Well=="E12"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="D12-FACS"), size=2)+
  geom_point(data=subset(grouped_ISA,Well=="H1"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="H1-FACS"), size=2)+
  geom_point(data=subset(grouped_ISA,Well=="B2"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="B2-FACS"), size=2)

## IS+ B
## log CFP-YFP plot - B30 -well ID -find mixed wells informed by FACS (d12)
ggplot()+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  # geom_point(data=grouped_30B, aes(x=log(CFP), y=log(YFP), group=Well, color=as.factor(Well)), size=2)
  geom_point(data=grouped_30B, aes(x=log(CFP), y=log(YFP), group=Well))+
  #specific wells:
  geom_point(data=subset(grouped_30B,Well=="B2"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="B2-FACS"), size=2)+
  geom_point(data=subset(grouped_30B,Well=="B8"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="B8-FACS"), size=2)+
  geom_point(data=subset(grouped_30B,Well=="E5"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="E5-FACS"), size=2)+
  geom_point(data=subset(grouped_30B,Well=="A10"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="A10-FACS"), size=2)+
  geom_point(data=subset(grouped_30B,Well=="H3"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="H3-FACS"), size=2)

##IS- B
## log CFP-YFP plot - BIS -well ID -find mixed wells informed by FACS (d12)
ggplot()+
  theme_bw()+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  #  geom_point(data=grouped_ISB_clean, aes(x=log(CFP), y=log(YFP), group=Well, color=as.factor(Well)), size=2)
  geom_point(data=grouped_ISB_clean, aes(x=log(CFP), y=log(YFP), group=Well))+
  #specific wells:
  geom_point(data=subset(grouped_ISB_clean,Well=="B9"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="B9-FACS"), size=2)+
  #geom_point(data=subset(grouped_ISB_clean,Well=="F3"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="F3-FACS"), size=2)+
  geom_point(data=subset(grouped_ISB_clean,Well=="D10"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="D10-FACS"), size=2)+
  geom_point(data=subset(grouped_ISB_clean,Well=="G10"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="G10-FACS"), size=2)+
  geom_point(data=subset(grouped_ISB_clean,Well=="E9"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time),color="E9-FACS"), size=2)


## C (0.01% gal)
## log CFP-YFP plot - AIS -well ID -find mixed wells informed by FACS (d12)
ggplot()+
  theme_bw()+ theme(legend.position = "none")+theme(axis.title.x=element_blank(),axis.title.y=element_blank())+  #no grey background
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),text=element_text(size=15)) +
  # geom_point(data=grouped_30C, aes(x=log(CFP), y=log(YFP), group=Well, color=as.factor(Well)), size=2)
  geom_point(data=grouped_30C, aes(x=log(CFP), y=log(YFP), group=Well),color="black")+
  geom_point(data=grouped_ISC, aes(x=log(CFP), y=log(YFP), group=Well),color="red",alpha=0.7)+
  geom_point(data=grouped_E, aes(x=log(CFP), y=log(YFP), group=Well), color="dark grey",alpha=0.6)+
  
  #specific wells:
  geom_point(data=subset(grouped_30C,Well=="B1"), aes(x=log(CFP), y=log(YFP), group=Well, alpha=as.factor(Time)),color="blue", size=2)
#############




## analysis leading to grouped_ISB_clean
## spilling occured in IS- B (while handling plate); exclude wells, which didnt evolve but just got inoculatd by spill based on tile plots
## exclusion of wells may result in under-estimate of the number of amplified populations.
## tile plot function ## ## ##
tilefun <- function(OD,time) {  #input OD_30A etc. matrix
  df <-data.frame((expand.grid(1:12,8:1)))
  df$z <- t(OD[time,2:97]) #vector with OD of right dimension
  a=ggplot(df,aes(x = Var1,y = Var2,fill = z)) + 
    geom_tile() + 
    scale_fill_gradient(low = "red", high = "blue")
  
  return(a)
}
### ### ### ### ### ### ### ### 

# tiles 30A 
b= tilefun(OD_30B,3) #input data + time
c=tilefun(OD_30B,4)
d=tilefun(OD_30B,5)
e=tilefun(OD_30B,9)
f=tilefun(OD_30B,10)
g=tilefun(OD_30B,11)
multiplot(b,c,d,e,f,g,cols=1)

# tiles ISB 
a= tilefun(OD_ISB,1) #input data + time
b=tilefun(OD_ISB,2)
c=tilefun(OD_ISB,3)
d=tilefun(OD_ISB,4)
e=tilefun(OD_ISB,5)
f=tilefun(OD_ISB,6)
g=tilefun(OD_ISB,7)
h=tilefun(OD_ISB,8)
i=tilefun(OD_ISB,9)
j=tilefun(OD_ISB,10)
k=tilefun(OD_ISB,11)
l=tilefun(OD_ISB,12)
multiplot(a,b,c,d,e,f,cols=1)
multiplot(g,h,i,j,k,l,cols=1)
## >> exclude wells for analysis based on spill over
#(spill from B2) exclude C3,C2,D2,E2, A1,A2,A3,B1,B3,C1
#(spill from G1) exclude H1,H2,F1, G2
#(spill from A5) exclude A6
#(spill from F9) exclude E10
#(spill from G10) exclude F11
## everything after is considered independent, because 1 well distance (also diagonally)





