library(Seurat)
library(dplyr)
wt.data <-Read10X(data.dir ="#path to file") 
wt <- CreateSeuratObject(raw.data = wt.data, project = "OTX2WT")

ko.data <-Read10X(data.dir = "C#path to file")
ko <- CreateSeuratObject(raw.data = ko.data, project = "OTX2CRISPR")

#merge objects
merged<- MergeSeurat(object1 = wt, object2 = ko, add.cell.id1 = "WT", add.cell.id2 = "KO",
                     +                    project = "OTX2WTvCRISPR")

#filter mito and ribo out
MTgenes_gg5 = read.csv("#path to/MTgenes_gg5.csv")
mito.genes <- grep(pattern = "^MT", x = MTgenes_gg5$gene, value = TRUE)
percent.mito <- Matrix::colSums(merged@raw.data[mito.genes, ])/Matrix::colSums(merged@raw.data)

ribogenes = read.csv("#path to/ribogenes.csv")
ribo.genes <- grep(pattern = "^R", x = ribogenes$gene, value = TRUE)
percent.ribo <- Matrix::colSums(merged@raw.data[ribo.genes, ])/Matrix::colSums(merged@raw.data)

merged <- AddMetaData(object = merged, metadata = percent.mito, col.name = "percent.mito")
merged <- AddMetaData(object = merged, metadata = percent.ribo, col.name = "percent.ribo")

VlnPlot(object = merged, features.plot = c("nGene", "nUMI", "percent.mito", "percent.ribo"), nCol = 2)

#filter cells out
merged <- FilterCells(object = merged,
                      subset.names = c("nGene", "percent.mito"),
                      low.thresholds = c(200, -Inf),
                      high.thresholds = c(2500, 0.004))

merged <- FilterCells(object = merged,
                      subset.names = c("nGene", "percent.ribo"),
                      low.thresholds = c(200, -Inf),
                      high.thresholds = c(3200, 0.5))

#cell cycle scoring
cc.genes <- readLines(con = "#path to/regev_lab_cell_cycle_genes_gg5adapted.txt")
s.genes <- cc.genes[1:43]
g2m.genes <- cc.genes[44:94]
merged <- CellCycleScoring(object = merged, s.genes = s.genes, g2m.genes = g2m.genes, set.ident = TRUE)

merged <- NormalizeData(object = merged)

#find variable genes to use for PCA
#Setting the y.cutoff parameter to 2 identifies genes that are more than 
#two standard deviations away from the average dispersion within a bin.
merged <- FindVariableGenes(object = merged)
length(merged@var.genes)

percent.mito <- Matrix::colSums(merged@raw.data[mito.genes, ])/Matrix::colSums(merged@raw.data)
merged <- AddMetaData(object = merged, metadata = percent.mito, col.name = "percent.mito")
percent.ribo <- Matrix::colSums(merged@raw.data[ribo.genes, ])/Matrix::colSums(merged@raw.data)
merged <- AddMetaData(object = merged, metadata = percent.ribo, col.name = "percent.ribo")

#scale data
merged <- ScaleData(object = merged, vars.to.regress = c("nUMI", "percent.mito", "percent.ribo", "S.Score", "G2M.Score"), display.progress = TRUE)
merged <- RunPCA(object = merged,
                 +                pc.genes = merged@var.genes,
                 +                do.print = TRUE,
                 +                pcs.print = 1:5,
                 +                genes.print = 5, pcs.compute = 40)

#ident is now "Phase"; needes to be changed back to original (wt/ko)
merged <- SetAllIdent(merged, id = "orig.ident")
PCAPlot(object = merged, dim.1 = 1, dim.2 = 2)
#Jackstraw with 40 PCs
merged <- JackStraw(object = merged, num.replicate = 100, display.progress = TRUE, num.pc = 40)
JackStrawPlot(object = merged, PCs = 1:40)

merged <- FindClusters(object = merged, reduction.type = "pca", dims.use = 1:33, 
                       resolution = 0.6, print.output = 0, save.SNN = TRUE)

merged<- RunTSNE(object = merged,
                 dims.use = 1:33,
                 do.fast = TRUE)
TSNEPlot(object = merged)
