# Load libraries
library("DESeq2")
library("pheatmap")
library("ggplot2")
library("dplyr")
library("ggrepel")

# Load Data
read.counts<-read.table("Mouse_Nuclei_Grik1_Read_Counts.txt", header=FALSE, stringsAsFactors=FALSE)
row.names(read.counts)<-read.counts[,1]
read.counts<-read.counts[, -c(0:1)]
names(read.counts)<-c("C-", "7-", "6-", "C+", "7+", "6+")
sample.info<-data.frame(Mouse=c("C","7","6","C","7","6"), condition=c(rep("Negative",3), rep("Positive",3)), row.names=names(read.counts))

# Differential Expression analysis
# Create DESeq object
DESeq.ds<-DESeqDataSetFromMatrix(countData=read.counts, colData=sample.info, design = ~ Mouse + condition)
DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) >0,]
DESeq.ds <- estimateSizeFactors(DESeq.ds)
counts.sf_normalized<-counts(DESeq.ds, normalized=TRUE)
log.norm.counts <- log2(counts.sf_normalized +1)
str(colData(DESeq.ds)$condition)
colData(DESeq.ds)$condition <- relevel(colData(DESeq.ds)$condition, "Negative")
DESeq.ds <- DESeq(DESeq.ds, fitType = "local")

# Obtaining Results
DGE.results <- results(DESeq.ds, pAdjustMethod="BH")
summary(DGE.results)
head(DGE.results)

# Number of Differentially expressed genes at p<0.05 (n)
table(DGE.results$padj<0.05)

# Sort and obtain differentially expressed genes in a csv file
DGE.results.sorted <- DGE.results[order(DGE.results$padj),]
DGEgenes <- rownames(subset(DGE.results.sorted, padj<0.05))
All_DE_genes <- as.data.frame(DGE.results.sorted)
write.csv(All_DE_genes, "Mouse_Grik1_DE_Gene_List.csv")

# Quality control of the RNA-seq analysis
# For histogram of frequencies of p-values
pdf(file="Mouse_Grik1_Histogram.pdf", onefile=FALSE)
hist(DGE.results$pvalue, col="grey", border = "white", xlab = "", ylab = "", main="frequencies of p-values")
dev.off()

# For Boxplot of log2-transformed read counts
pdf(file="Mouse_Grik1_Boxplot.pdf", onefile=FALSE)
boxplot(log.norm.counts, notch=TRUE, main = "log2-transformed read counts", ylab="log2(read counts)")
dev.off()

# For Dendrogram
pdf(file="Mouse_Grik1_Dendrogram.pdf", onefile=FALSE)
distance.log<- as.dist(1-cor(log.norm.counts, method="pearson"))
plot(hclust(distance.log), labels= colnames(log.norm.counts), main="log2 transformed read counts\ndistance:Pearson correlation")
dev.off()

# Heatmap plot of Top 50 differentially expressed genes
DGE_Top<-DGEgenes[1:50]
hm.mat_DGEgenes<-log.norm.counts[DGE_Top,]
pdf(file="Mouse_Grik1_Top50_DEGenes_Heatmap.pdf", onefile=FALSE)
pheatmap(hm.mat_DGEgenes, clustering_method = "average", scale="row")
dev.off()

# Heatmap for BC Subtype Specific Markers in the Retina from Karthik et al. that are within high confidence and unique to a certain subtype and is DE between GN and GP
Bipolar_clusters <- read.delim("Mouse_Bipolar_cluster_HiConf_unique.txt", sep=",", header=FALSE)
Bipolar_clusters <- as.character(Bipolar_clusters$V2)
duplicates <- Bipolar_clusters[duplicated(Bipolar_clusters)]
Bipolar_clusters <- setdiff(Bipolar_clusters, duplicates)

true <- Bipolar_clusters %in% DGEgenes
a<-as.data.frame(true)
b<-as.data.frame(Bipolar_clusters)
total<-cbind(a,b)
ind <- which(with(total, true=="TRUE"))
all<-total[ind,]
DE_markers <- as.character(all$Bipolar_clusters)
DE_markerheatmap <- log.norm.counts[DE_markers,]
pdf(file="Mouse_Nuclei_Grik1_Known_Bipolar_Markers_HiConf_Heatmap.pdf", onefile=FALSE, useDingbats=FALSE)
pheatmap(DE_markerheatmap, cluster_rows=FALSE, cluster_cols = FALSE, scale="row", fontsize=5)
dev.off()
