#################################################################################################################################
# Script for 3 color HSS analysis 
# FACS/Flow analysis in R
# 
#################################################################################################################################
#################################################################################################################################

# Load Packages
library(flowCore)

#################################################################################################################################
## Reading in FCS files ##

#reads .fcs files and converts to readable flowFrame matrix
flowData<-exprs(read.FCS("/Users/PATH/to/FILE")) 

#flowData contains a titled column for each parameter measured:
# "FSC-A" is Forward Scatter 
# "SSC-A" is Side Scatter 
# "Alexa Fluor 488-A" is for signal from "green" or SF-GFP
# "DsRed-A" is for signal from "orange" or mKO2
# "PE-Cy5-A" is for signal from "red" or E2C


#################################################################################################################################
## Determing cell size gate ##

#plot FSC vs SSC to determine head of comet, done for all strains per experiment
plot(flowData[,"FSC-A"], flowData[,"SSC-A"], pch = '.', main = "Strain Name",
          xlab =  "fsc", ylab = "ssc")

#using above plot determine the min and max values for FSC and SSC to be within the gate
#exact values depended on cytometer settings and vary day to day 
#must be determined for each separate flow experiment, determined as in Figure 1 supplement D

fscMin<- # Determined from plot
fscMax<- # Determined from plot
sscMin<- # Determined from plot
sscMax<- # Determined from plot
  
# write a function called mask() to isolate cells within this gate
mask = function(x) {
  subset(x, x[,"FSC-A"]>fscMin & x[,"FSC-A"]<fscMax
         & x[,"SSC-A"]>sscMin & x[,"SSC-A"]<sscMax)
}

#call the mask function on flowData
maskedData<-mask(flowData)

#################################################################################################################################
## Isolating successfully nucleated cells for Fig1+2 ##

#read-in and call fsc/ssc gate for strain with no XFPs aka "unstained"
unstained<-mask(exprs(read.FCS("/Users/PATH/to/FILE")))

#setting a cutoff for green#
green_ave <- median(unstained[,"Alexa Fluor 488-A"]) #determine median green value of unstained cells
green_sd<- sd(unstained[,"Alexa Fluor 488-A"]) #determine standard deviation in green of unstained cells
GREENoff = green_ave + 2*green_sd #cutoff value is median plus two standard deviations

#setting a cutoff for orange#
orange_ave <- median(unstained[,"DsRed-A"]) #determine median orange value of unstained cells
orange_sd<- sd(unstained[,"DsRed-A"]) #determine standard deviation in orange of unstained cells
ORANGEoff = orange_ave + 2*orange_sd #cutoff value is median plus two standard deviations

#write a function to isolate cells with green signal less than GREENoff
gate_greenOFF = function(x) {
  subset(x, x[,"Alexa Fluor 488-A"]<GREENoff)
}

#write a function to isolate cells with green signal less than ORANGEoff
gate_orangeOFF = function(x) {
  subset(x, x[,"DsRed-A"]<ORANGEoff)
}

#call appropriate nucleation gate funtion on maskedData
nucleatedData<-gate_greenOFF(maskedData)
#or#
nucleatedData<-gate_orangeOFF(maskedData)

#################################################################################################################################
## Normalizing to max fluorescence values from Δclr4 strains ##

#read-in and call fsc/ssc gate for Δclr4 strain
dclr4<-mask(exprs(read.FCS("/Users/PATH/to/FILE")))

#determine median of each color
green_max<- median(dclr4[,"Alexa Fluor 488-A"])
orange_max<- median(dclr4[,"DsRed-A"])
red_max <- median(dclr4[,"PE-Cy5-A"])

#write a function to filter data by dividing each cell by dClr4 "max" values for each color
#creates a new dataframe with a column for the new normalized GREEN, ORANGE, RED values
filterData<- function(filename) {
  Green <- filename[,"Alexa Fluor 488-A"]/green_max #normalize to dClr4 green
  Orange <- filename[,"DsRed-A"]/orange_max #normalize to dClr4 orange
  Red <- filename[,"PE-Cy5-A"]/red_max #normalize to dClr4 red
  data.frame (Green, Orange, Red)
}

#call filterData function on data in maskedData to normalize to max for all cells in size gate
filteredDataAll<-filterData(maskedData)

##or##

#call filterData function on data in nuleatedData to normalize to max for nucleated cells
filteredDataNuc<-filterData(nucleatedData)

#################################################################################################################################
## Normalizing by red values ##

#write a function to divide green and orange by red for each data point
#creates a new dataframe with a column for the new red normalized green and orange
redNorm <- function(filename){
  GreenNorm <- filename[,"Green"]/filename[,"Red"] #normalize green to red
  OrangeNorm <- filename[,"Orange"]/filename[,"Red"] #normalize orange to red
  data.frame (GreenNorm, OrangeNorm)
}

#call redNorm funtion on data in filteredDataAll or  filteredDataNuc
normalizedDataAll<-redNorm(filteredDataAll)
##or##
normalizedDataNuc<-redNorm(filteredDataNuc)

#################################################################################################################################
#################################################################################################################################
## Output ##

#normalizedDataAll is a two column dataframe containing normalized green and orange values 
#for cells of the strain analyzed cells have been gated for FSC & SSC 
#normalizedDataNuc additionally has been gated for successful nucleation
#values are normalized to a "max value" in each color from a matched background Δclr4 strain
#for each cell green and orange values have been normalized to red

#################################################################################################################################
#################################################################################################################################



#################################################################################################################################
## Calculating fractions for Fig4 B,C ##

#to be performed for every condition for each strain
#read in flow data to be analyzed 
flowData<-exprs(read.FCS("/Users/PATH/to/FILE"))

#write a function to define a cutoff for green OFF
#lasers on cytometer were tuned so color negavtive "unstained" cells had average green values of 10^2
#isolates cells with green signal less than 400
gate_gfpOFF = function(x) {
  subset(x, x[,"Alexa Fluor 488-A"]<400)
}  


#write a function to normalize data by dividng through by red for each data point
#output is a dataframe containing normalized green and orange values
normData <- function(filename){
  GreenNorm <- filename[,"Alexa Fluor 488-A"]/filename[,"PE-Cy5-A"] #normalize green to red
  OrangeNorm <- filename[,"DsRed-A"]/filename[,"PE-Cy5-A"] #normalize orange to red
  data.frame (GreenNorm, OrangeNorm)
}

#call gate gfpOFF function on flow data
#then divide through by red for each cell
data_gfpGate<-gate_gfpOFF(flowData)
norm_data<-normData(data_gfpGate)
#if there are less than 500 cells which meet this criteria STOP
if (nrow(norm_data)<500) {
  norm_data<-NULL
}


#write a function to red normalize the dclr4 strain then calculate median values to define "MAX"
#output is a dataframe containing the max values in normalized green and orange
normMax <- function(filename){
  GreenM <- filename[,"Alexa Fluor 488-A"]/filename[,"PE-Cy5-A"] #normalize green to red
  GreenMax<- median(GreenM, na.rm=TRUE) #determine median value of normalized green
  OrangeM <- filename[,"DsRed-A"]/filename[,"PE-Cy5-A"] #normalize orange to red
  OrangeMax<-median(OrangeM, na.rm=TRUE) #determine median value of normalized orange
  data.frame (GreenMax, OrangeMax) 
}

#read in flow data for matched dClr4 strain and condition
#call normMax function to determine max values
dclr4Data<-normMax(exprs(read.FCS("/Users/PATH/to/FILE"))) 


#read in and normalize data from PAS217 0uM TSA 0uM TSA Pre-Condition
#this will be used to set the orangeOFF cutoff value
OFFnorm<-normData(exprs(read.FCS("/Users/PATH/to/FILE")))

#write a function to determine OFF cutoff for orange
#normalizes values in OFFnorm to the Δclr4 max orange value 
#then calculates a cutoff based on mean plus two standard deviations
defineOFF <- function(file1,file2){
  OrangeDiv <- file1[,"OrangeNorm"]/file2[,"OrangeMax"] #normalize to dclr4 max
  orangeOFF<-mean(OrangeDiv)+2*sd(OrangeDiv) #calculate the cutoff
}

#call defineOFF function to get orange cutoff value
orangeOFF<-defineOFF(OFFnorm,dclr4Data)

#divide orange values in norm_data by orange max value from dclr4Data
orangeFiltered<-norm_data[,"OrangeNorm"]/dclr4Data[,"OrangeMax"]

#isolate the cells that have orange values less than orangeOFF cutoff
OFF<-subset(orangeFiltered, orangeFiltered < orangeOFF)

#calculate the fraction of cells with orange less than cutoff
frac<-length(OFF)/length(orangeFiltered)



#################################################################################################################################
## Calculating fractions for Fig4 D ##

#to be performed for every condition for each strain
#read in flow data to be analyzed 
flowData<-exprs(read.FCS("/Users/PATH/to/FILE"))
#then divide through by red for each cell
norm_data<-normData(flowData)

#read in flow data for matched dClr4 strain and condition
#call normMax function to determine max values
dclr4Data<-normMax(exprs(read.FCS("/Users/PATH/to/FILE"))) 

#call defineOFF function to cutoff value
orangeOFF<-defineOFF(OFFnorm,dclr4Data)

#divide orange values in norm_data by orange max value from dclr4Data
orangeFiltered<-norm_data[,"OrangeNorm"]/dclr4Data[,"OrangeMax"]

#isolate the cells that have orange values less than orangeOFF cutoff
OFF<-subset(orangeFiltered, orangeFiltered < orangeOFF)

#calculate the fraction of cells with orange less than cutoff
frac<-length(OFF)/length(orangeFiltered)


#################################################################################################################################
## Calculating fractions for Fig4 F ##

#to be performed for every condition for each strain each day
#read in flow data to be analyzed 
flowData<-exprs(read.FCS("/Users/PATH/to/FILE"))
#then divide through by red for each cell
norm_data<-normData(flowData)

#read in flow data for matched dclr4 strain and condition
#call normMax function to determine max values
dclr4Data<-normMax(exprs(read.FCS("/Users/PATH/to/FILE"))) 

#write a function to divide by clr4 max values for both green and orange
#output is a dataframe containing values now normalized to clr4 "max"
filtering<- function(filename, dclr4) {
  Green <- filename[,"GreenNorm"]/ dclr4[1,"GreenMax"] #normalize to dclr4 green
  Orange <- filename[,"OrangeNorm"]/dclr4[1,"OrangeMax"] #normalize to dclr4 orange
  data.frame (Green, Orange)
}

#call filtering function on data in norm_data to normalize to dclr4
filteredData<-filtering(norm_data,dclr4Data)

#a "red only" control strain was normalized to the dclr4 strains similar to analysis strains
#mean plus two standard deviations were calculated and the resulting values were similar
#but varied slightly across days, hard cutoffs were determined based on these values so
#all data to standardize the analysis
greenOFF<-0.35 #green cutoff
orangeOFF<-0.4 #orange cutoff

#isolate the cells in filteredData that have green less than the green cutoff
green_gated<-subset(filteredData, filteredData[,"Green"] < greenOFF)
# then in these cells further isolate the cells with orange less than orange cutoff
OFF<-subset(green_gated, green_gated[,"Orange"] < orangeOFF)

#calculate the fraction of cells with orange AND green less than these cutoffs
# number of rows (=number of cells in OFF) divided by number of rows in norm_data
frac<-nrow(OFF)/nrow(norm_data)

## Calculating fractions for Fig 4 Supplement 2 A ##

#fractions were calculated as for Figure 4 with the exceptions that a size gate was called
#as described previously on the data in flowData before the normData function was called; 
#size gates were determined independently for each temperature 

#for this experiment cutoff values were
greenOFF<-0.4
orangeOFF<-0.4

#################################################################################################################################
## Calculating fractions for Fig 5B and Fig 5 supplement A ##

#fractions were calculated as for Figure 4 with the exceptions that a size gate was called
#as described previously on the data in flowData before the normData function was called; 
#green and orange values in filteredData were plotted in Fig5C and S5A
#for this experiment cutoff values were
greenOFF<-0.4
orangeOFF<-0.4

## Calculating fractions for Fig 5E and Fig 5 supplement B##

#fractions were calculated as for Figure 4 with no size gate becuase of poor strain growth
#green and orange values in filteredData were plotted in Fig5F and S5B
#for this experiment cutoff values were
greenOFF<-0.4
orangeOFF<-0.4

#################################################################################################################################







