# Load libraries
library("DESeq2")
library("pheatmap")
library("RColorBrewer")
library("ggplot2")
library("dplyr")
library("ggrepel")

# Load Data
read.counts<-read.table("Mouse_Whole_Cell_Vsx2_Grik1_Read_Counts.txt", header=FALSE, stringsAsFactors=FALSE)
row.names(read.counts)<-read.counts[,1]
read.counts<-read.counts[, -c(0:1)]
names(read.counts)<-c("V-1", "V-2", "G-1", "G-2", "G+1", "G+2")
sample.info<-data.frame(Mouse=c("1","2","1","2", "1","2"), condition=c(rep("Vsx2_Neg",2), rep("Grik1_Neg",2), rep("Grik1_Pos",2)), row.names=names(read.counts))

# Differential Expression analysis
# Create DESeq object
DESeq.ds<-DESeqDataSetFromMatrix(countData=read.counts, colData=sample.info, design = ~ Mouse + condition)
DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) >0,]
DESeq.ds <- estimateSizeFactors(DESeq.ds)
counts.sf_normalized<-counts(DESeq.ds, normalized=TRUE)
log.norm.counts <- log2(counts.sf_normalized +1)
str(colData(DESeq.ds)$condition)
colData(DESeq.ds)$condition <- relevel(colData(DESeq.ds)$condition, "Grik1_Neg")
DESeq.ds <- DESeq(DESeq.ds)

results.VN.GN <- results(DESeq.ds, pAdjustMethod="BH", contrast = c("condition", "Vsx2_Neg","Grik1_Neg"))
results.VN.GP <- results(DESeq.ds, pAdjustMethod="BH", contrast = c("condition", "Vsx2_Neg","Grik1_Pos"))
results.GN.GP <- results(DESeq.ds, pAdjustMethod="BH", contrast = c("condition", "Grik1_Pos","Grik1_Neg"))
DGE.results <- c(results.VN.GN, results.VN.GP, results.GN.GP)

table(results.VN.GN$padj<0.05)
table(results.VN.GP$padj<0.05)
table(results.GN.GP$padj<0.05)

# Sort and obtain differentially expressed genes in a csv file
results.VN.GN.sorted <- results.VN.GN[order(results.VN.GN$padj),]
results.VN.GP.sorted <- results.VN.GP[order(results.VN.GP$padj),]
results.GN.GP.sorted <- results.GN.GP[order(results.GN.GP$padj),]

DGEgenes.VN.GN <- rownames(subset(results.VN.GN.sorted, padj<0.05))
DGEgenes.VN.GP <- rownames(subset(results.VN.GP.sorted, padj<0.05))
DGEgenes.GN.GP <- rownames(subset(results.GN.GP.sorted, padj<0.05))
All.DGEgenes <- c(DGEgenes.GN.GP, DGEgenes.VN.GN, DGEgenes.VN.GP)
All.DGEgenes.VN <- c(DGEgenes.VN.GN, DGEgenes.VN.GP)

DE_genes.VN.GN <- as.data.frame(results.VN.GN.sorted)
DE_genes.VN.GP <- as.data.frame(results.VN.GP.sorted)
DE_genes.GN.GP <- as.data.frame(results.GN.GP.sorted)
write.csv(DE_genes.VN.GN, "Mouse_Whole_Cell_Vsx2_Grik1_DE_Gene_List_Vsx2-_vs_Grik1-.csv")
write.csv(DE_genes.VN.GP, "Mouse_Whole_Cell_Vsx2_Grik1_DE_Gene_List_Vsx2-_vs_Grik1+.csv")
write.csv(DE_genes.GN.GP, "Mouse_Whole_Cell_Vsx2_Grik1_DE_Gene_List_Grik1-_vs_Grik1+.csv")

#DE genes that are specific to each population
Grik1.Pos <- rownames(subset(results.VN.GP.sorted, log2FoldChange<0))
Grik1.Pos <- intersect(Grik1.Pos, DGEgenes.GN.GP)

Grik1.Neg <- rownames(subset(results.GN.GP.sorted, log2FoldChange<0))
Grik1.Neg <- intersect(Grik1.Neg, DGEgenes.VN.GN)

Vsx2.Neg <- rownames(subset(results.VN.GP.sorted, log2FoldChange>0))
Vsx2.Neg <- intersect(Vsx2.Neg, DGEgenes.VN.GN)

# Heatmap plot of Top 20 differentially expressed genes for each population
DGE_Top<-Grik1.Pos[1:20]
hm.mat_DGEgenes<-log.norm.counts[DGE_Top,]
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Top20_DEGenes_Grik1_Pos_Heatmap.pdf", onefile=FALSE)
pheatmap(hm.mat_DGEgenes, clustering_method = "average", scale="row")
dev.off()

DGE_Top<-Grik1.Neg[1:20]
hm.mat_DGEgenes<-log.norm.counts[DGE_Top,]
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Top20_DEGenes_Grik1_Neg_Heatmap.pdf", onefile=FALSE)
pheatmap(hm.mat_DGEgenes, clustering_method = "average", scale="row")
dev.off()

DGE_Top<-Vsx2.Neg[1:20]
hm.mat_DGEgenes<-log.norm.counts[DGE_Top,]
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Top20_DEGenes_Vsx2_Neg_Heatmap.pdf", onefile=FALSE)
pheatmap(hm.mat_DGEgenes, clustering_method = "average", scale="row")
dev.off()

# Quality control of the RNA-seq analysis
# For histogram of frequencies of p-values
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Histogram_VN_GN.pdf", onefile=FALSE)
hist(results.VN.GN$pvalue, col="grey", border = "white", xlab = "", ylab = "", main="frequencies of p-values")
dev.off()

pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Histogram_VN_GP.pdf", onefile=FALSE)
hist(results.VN.GP$pvalue, col="grey", border = "white", xlab = "", ylab = "", main="frequencies of p-values")
dev.off()

pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Histogram_GN_GP.pdf", onefile=FALSE)
hist(results.GN.GP$pvalue, col="grey", border = "white", xlab = "", ylab = "", main="frequencies of p-values")
dev.off()

# For Boxplot of log2-transformed read counts
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Boxplot.pdf", onefile=FALSE)
boxplot(log.norm.counts, notch=TRUE, main = "log2-transformed read counts", ylab="log2(read counts)")
dev.off()

# For Dendrogram
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Dendrogram.pdf", onefile=FALSE)
distance.log<- as.dist(1-cor(log.norm.counts, method="pearson"))
plot(hclust(distance.log), labels= colnames(log.norm.counts), main="log2 transformed read counts\ndistance:Pearson correlation")
dev.off()

# For MA Plot
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_MAplot_VN_GN.pdf", onefile=FALSE, useDingbats=FALSE)
plotMA(results.VN.GN, alpha=0.05, main="Vsx2- vs Grik1-", ylim= c(-10,10))
dev.off()

pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_MAplot_VN_GP.pdf", onefile=FALSE, useDingbats=FALSE)
plotMA(results.VN.GP, alpha=0.05, main="Vsx2- vs Grik1+", ylim= c(-10,10))
dev.off()

pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_MAplot_GN_GP.pdf", onefile=FALSE, useDingbats=FALSE)
plotMA(results.GN.GP, alpha=0.05, main="Grik1- vs Grik1+", ylim= c(-10,10))
dev.off()

# Function to change the upper case gene names to lower case + upper case first
simpleCap <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
        sep="", collapse=" ")
}

# Heatmap for Cell Type Specific Markers in the Retina from Macoscko et al. that are unique to a certain cell type and is DE between VN and GN/GP. 
# Rod markers are not unique (likely because of contamination in other cell types) so the Top 20 DE rod markers are added.
# Minor cell types were discarded (Astrocytes, fibroblasts, endothelium, microglia, pericytes) as too few cells were sequenced.
Cells <- read.delim("Mouse_Retina_Cell_Class_Markers_Macoscko.prn", sep=" ", header=FALSE)
Cell_clusters <- as.character(Cells$V1)
Cell_clusters <- tolower(Cell_clusters)
Cell_clusters <- sapply(Cell_clusters, simpleCap)
Rod_cluster <- Cell_clusters[2679:2699]
duplicates <- Cell_clusters[duplicated(Cell_clusters)]
Cell_clusters <- setdiff(Cell_clusters, duplicates)
Cell_clusters <- c(Rod_cluster, Cell_clusters)
Cell_clusters <- Cell_clusters[1:403]

true <- Cell_clusters %in% All.DGEgenes
a<-as.data.frame(true)
b<-as.data.frame(Cell_clusters)
total<-cbind(a,b)
ind <- which(with(total, true=="TRUE"))
all<-total[ind,]
DE_markers <- as.character(all$Cell_clusters)
DE_markerheatmap <- log.norm.counts[DE_markers,]
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Known_Macoscko_Markers_Heatmap.pdf", onefile=FALSE, useDingbats=FALSE)
pheatmap(DE_markerheatmap, cluster_rows=FALSE, cluster_cols = FALSE, scale="row", fontsize=3)
dev.off()

# Heatmap for BC Subtype Specific Markers in the Retina from Karthik et al. that are within high confidence and unique to a certain subtype and is DE between GN and GP
Bipolar_clusters <- read.delim("Mouse_Bipolar_cluster_HiConf_unique.txt", sep=",", header=FALSE)
Bipolar_clusters <- as.character(Bipolar_clusters$V2)
duplicates <- Bipolar_clusters[duplicated(Bipolar_clusters)]
Bipolar_clusters <- setdiff(Bipolar_clusters, duplicates)

true <- Bipolar_clusters %in% DGEgenes.GN.GP
a<-as.data.frame(true)
b<-as.data.frame(Bipolar_clusters)
total<-cbind(a,b)
ind <- which(with(total, true=="TRUE"))
all<-total[ind,]
DE_markers <- as.character(all$Bipolar_clusters)
DE_markerheatmap <- log.norm.counts[DE_markers,3:6]
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Known_Bipolar_Markers_HiConf_Heatmap.pdf", onefile=FALSE, useDingbats=FALSE)
pheatmap(DE_markerheatmap, cluster_rows=FALSE, cluster_cols = FALSE, scale="row", fontsize=3)
dev.off()

# Plot for Tpbgl expression
myGenes<-c("Tpbgl")
pdf(file="Mouse_Whole_Cell_Vsx2_Grik1_Counts_Tpbgl.pdf", onefile=FALSE)
geneCounts <- plotCounts(DESeq.ds, gene=myGenes, intgroup = c("condition"), returnData = TRUE)
ggplot(geneCounts, aes(x=condition, y=count)) + geom_point(position=position_jitter(width=.1, height = 0), size=5) + coord_cartesian(ylim=c(0,3000)) + stat_summary(fun.data=mean_sdl, fun.args=list(mult=1), geom="errorbar", color = "red", width=0.2) + stat_summary(fun.y=mean, geom="point", color="red", size=5)
dev.off()
