library(Seurat)
library(dplyr)
wt.data <-Read10X(data.dir ="#path to folder")
wt <- CreateSeuratObject(raw.data = wt.data, min.cells = 3, min.genes = 200, project = "OTX2CRISPR")

MTgenes_gg5 = read.csv("#path to file/MTgenes_gg5.csv")

grep(pattern = "^MT", x = MTgenes_gg5$gene, value = TRUE)
mitowt.genes <- grep(pattern = "^MT", x = MTgenes_gg5$gene, value = TRUE)
percent.mitowt <- Matrix::colSums(wt@raw.data[mitowt.genes, ])/Matrix::colSums(wt@raw.data)
wt <- AddMetaData(object = wt, metadata = percent.mitowt, col.name = "percent.mito")
VlnPlot(object = wt, features.plot = c("nGene", "nUMI", "percent.mito"), nCol = 3)

#filter cells 
wt <- FilterCells(object = wt, subset.names = c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), high.thresholds = c(3200, 0.05))

#normalize data and find var genes
wt <- NormalizeData(object = wt)
wt <- FindVariableGenes(object = wt)


#cell cycle scoring and regression
cc.genes <- readLines(con = "#path to file/regev_lab_cell_cycle_genes_gg5adapted.txt")
s.genes <- cc.genes[1:43]
g2m.genes <- cc.genes[44:94]
wt <- CellCycleScoring(object = wt, s.genes = s.genes, g2m.genes = g2m.genes, set.ident = TRUE)

#scaling data; does the regression
wt <- ScaleData(object = wt, vars.to.regress = c("nUMI", "percent.mito", "S.Score", "G2M.Score"), display.progress = TRUE)

#run 30 PCAs; in these heatmaps check if addidtional cell cycle genes are added as well as ribosomal;
wt <- RunPCA(object = wt, pc.genes = wt@var.genes, do.print = TRUE, pcs.print = 1:5, genes.print = 5, pcs.compute = 30)
PCHeatmap(object = wt, pc.use = 1:30, cells.use = 500, do.balanced = TRUE, label.columns = FALSE, use.full = FALSE)
PCHeatmap(object = wt, pc.use = 10, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)

wt@var.genes
write.csv(wt@var.genes, "#path/variablegeneswt.csv")
vargeneslist = as.list(variablegenesclean)
variablegenesclean <-read.csv("C:/Miruna/10X scRNAseq data/DATA/variablegeneswt_cleaned.csv", header = TRUE)
wt <- RunPCA(object = wt, pc.genes = variablegenesclean$gene, do.print = TRUE, pcs.print = 1:5, genes.print = 5, pcs.compute = 30)
PCHeatmap(object = wt, pc.use = 10, cells.use = 500, do.balanced = TRUE, label.columns = FALSE)
#visualization of the distribution of p values of each PC with uniform distribution; dots showld be separated than the dotted line
wt <- JackStraw(object = wt, num.replicate = 100, display.progress = TRUE, num.pc = 30)
JackStrawPlot(object = wt, PCs = 1:30)
wt <- FindClusters(object = wt, reduction.type = "pca", dims.use = 1:17, resolution = 0.6, print.output = 0, save.SNN = TRUE)
PrintFindClustersParams(object = wt)
wt <- RunTSNE(object = wt, dims.use = 1:17, do.fast = TRUE)
TSNEPlot(object = wt)