# first set of commands are using ENS95-quantified data from CellRanger 
# read in data and generate Seurat Object file 
data=Read10X(data.dir="/project/umw_nathan_lawson/DolphinOuts/1_wholeEmbryo5dpf_ENS95/cellranger_count/SRR10095965/outs/filtered_feature_bc_matrix")
emb5dpf=CreateSeuratObject(counts=data, project = "emb5dpf_ENS95", min.cells = 3, min.features = 200)

# add column with proportion of mitochodrial contribution and plot features
# and % mitochondrial 
emb5dpf[["percent.mt"]] = PercentageFeatureSet(emb5dpf, pattern = "mt-")
#export following plot to 8.5w x 6h PDF in landscape 
VlnPlot(emb5dpf, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

# filter based on feature # and %mito
# normalize, identify variable genes, and calculate PCA
emb5dpf=subset(emb5dpf, subset =  nFeature_RNA > 200 & nFeature_RNA<2500 & percent.mt<5)
emb5dpf=NormalizeData(emb5dpf) 
emb5dpf=FindVariableFeatures(emb5dpf, selection.method = "vst", nfeatures=2000)
all.genes=rownames(emb5dpf)
emb5dpf=ScaleData(emb5dpf,features=all.genes)
emb5dpf=RunPCA(emb5dpf, features=VariableFeatures(object=emb5dpf), npcs = 200)

#export this as a 8wx10h PDF in landscape orientation
ElbowPlot(emb5dpf, ndims=200)

# based on Elbow plot, choose 75 PCs for clustering; use same for V4.2 
emb5dpf=FindNeighbors(emb5dpf, dims = 1:75)
emb5dpf=FindClusters(emb5dpf, resolution = 2)
emb5dpf=RunTSNE(emb5dpf, reduction="pca", dims=1:75, tsne.method = "Rtsne")
# output following PDF in Rstudio 
DimPlot(emb5dpf, reduction="tsne", label = TRUE)+NoLegend()

# check values on ElbowPlot for comparison between annotations
# generate table of values for ElbowPlot 
stdVal = emb5dpf[["pca"]]@stdev
write.csv(stdVal,"4_StdDevVsPCvalues.csv")
# generate table of percent of variation associated with each PC
pct = emb5dpf[["pca"]]@stdev / sum(emb5dpf[["pca"]]@stdev) * 100
# Calculate cumulative percents for each PC
cumu = cumsum(pct)
write.csv(cumu,"4b_cumulativePercentVariationPC_Ens95.csv"

# Find all cluster-specific markers and write to comma-separated text file
emb5dpf.markers=FindAllMarkers(emb5dpf, only.pos=TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(emb5dpf.markers,"/home/nl80w/1_wholeEmbryo5dpfSC_ENS95_plotsOut/AllMarkers.csv")


######################################################

# Below are commands use for clustering V4.2 quantified data
# same parameters as above 

# read in data and generate Seurat Object file 
data=Read10X(data.dir="/project/umw_nathan_lawson/DolphinOuts/2_wholeEmbryo5dpf_V4.2/cellranger_count/SRR10095965_outs/filtered_feature_bc_matrix")
emb5dpf=CreateSeuratObject(counts=data, project = "emb5dpf_V4.2", min.cells = 3, min.features = 200)

# add column with proportion of mitochodrial contribution and plot features
# and % mitochondrial 
emb5dpf[["percent.mt"]] = PercentageFeatureSet(emb5dpf, pattern = "mt-")
#export following plot to 8.5w x 6h PDF in landscape 
VlnPlot(emb5dpf, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

# filter based on feature # and %mito
# normalize, identify variable genes, and calculate PCA
emb5dpf=subset(emb5dpf, subset =  nFeature_RNA > 200 & nFeature_RNA<2500 & percent.mt<5)
emb5dpf=NormalizeData(emb5dpf) 
emb5dpf=FindVariableFeatures(emb5dpf, selection.method = "vst", nfeatures=2000)
all.genes=rownames(emb5dpf)
emb5dpf=ScaleData(emb5dpf,features=all.genes)
emb5dpf=RunPCA(emb5dpf, features=VariableFeatures(object=emb5dpf), npcs = 200)

# plot StdDev v PC
ElbowPlot(emb5dpf, ndims=200)

# see above for Ens95 on getting values for StDev vs PC and cumulative percent variation for each PC

# use same PC# (75) as for ENS95 for clustering 
emb5dpf=FindNeighbors(emb5dpf, dims = 1:75)
emb5dpf=FindClusters(emb5dpf, resolution = 2)
emb5dpf=RunTSNE(emb5dpf, reduction="pca", dims=1:75, tsne.method = "Rtsne")
# output following as 8.5wx10h PDF in normal orientation
DimPlot(emb5dpf, reduction="tsne", label = TRUE)+NoLegend()

# Find all cluster-specific markers and write to comma-separated text file
emb5dpf.markers=FindAllMarkers(emb5dpf, only.pos=TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(emb5dpf.markers,"/home/nl80w/2_wholeEmbryo5dpfSC_V4_2plotsOut/AllMarkers.csv")