# Load libraries
library("DESeq2")
library("pheatmap")
library("RColorBrewer")
library("ggplot2")
library("dplyr")
library("ggrepel")

# Load Data
read.counts<-read.table("Mouse_Live_vs_Probe-Seq_Grik1_Read_Counts.txt", header=FALSE, stringsAsFactors=FALSE)
row.names(read.counts)<-read.counts[,1]
read.counts <- read.counts[, -c(0:1)]
names(read.counts)<-c("Fresh_Pos_1", "Fresh_Pos_2", "Fresh_Pos_3", "Probe_Pos_1", "Probe_Pos_2", "Probe_Pos_3")
sample.info<-data.frame(condition=c(rep("Fresh_Pos",3), rep("Probe_Pos",3)), row.names=names(read.counts))

# Differential Expression analysis
# Create DESeq object
DESeq.ds<-DESeqDataSetFromMatrix(countData=read.counts, colData=sample.info, design = ~ condition)
DESeq.ds <- DESeq.ds[rowSums(counts(DESeq.ds)) >0,]
DESeq.ds <- estimateSizeFactors(DESeq.ds)
counts.sf_normalized<-counts(DESeq.ds, normalized=TRUE)
log.norm.counts <- log2(counts.sf_normalized +1)
str(colData(DESeq.ds)$condition)
colData(DESeq.ds)$condition <- relevel(colData(DESeq.ds)$condition, "Fresh_Pos")
DESeq.ds <- DESeq(DESeq.ds)

# Obtaining Results
DGE.results <- results(DESeq.ds, pAdjustMethod="BH")
summary(DGE.results)
head(DGE.results)

# Number of Differentially expressed genes at p<0.05 (n)
table(DGE.results$padj<0.05)

# Sort and obtain differentially expressed genes in a csv file
DGE.results.sorted <- DGE.results[order(DGE.results$padj),]
DGEgenes <- rownames(subset(DGE.results.sorted, padj<0.05))
All_DE_genes <- as.data.frame(DGE.results.sorted)
write.csv(All_DE_genes, "Fresh_Probe_DE_Gene_List.csv")

# Heatmap plot of Top 50 differentially expressed genes
DGE_Top<-DGEgenes[1:50]
hm.mat_DGEgenes<-log.norm.counts[DGE_Top,]
pdf(file="Fresh_Probe_Top50_DEGenes_Heatmap.pdf", onefile=FALSE)
pheatmap(hm.mat_DGEgenes, clustering_method = "average", scale="row", fontsize=7)
dev.off()

# Quality control of the RNA-seq analysis

# For Boxplot of log2-transformed read counts
pdf(file="Mouse_Fresh_Probe_Boxplot.pdf", onefile=FALSE)
boxplot(log.norm.counts, notch=FALSE, main = "log2-transformed read counts", ylab="log2(read counts)")
dev.off()

# For Dendrogram
pdf(file="Mouse_Fresh_Probe_Dendrogram.pdf", onefile=FALSE)
distance.log<- as.dist(1-cor(log.norm.counts, method="pearson"))
plot(hclust(distance.log), labels= colnames(log.norm.counts), main="log2 transformed read counts\ndistance:Pearson correlation")
dev.off()

# For Scatterplot of all genes
pdf(file="Mouse_Fresh_Probe_ScatterPlot.pdf", onefile=FALSE)
plot(log.norm.counts[,5], log.norm.counts[,1], col=ifelse(rownames(log.norm.counts)=="Glul","red","black"))
abline(a=0, b=1, col="red")
dev.off()
cor(log.norm.counts)

# For Scatterplot of BC markers
Bipolar_clusters <- read.delim("Mouse_BC2-BC4_Hi_Conf.csv", sep=",", header=FALSE)
Bipolar_clusters <- as.character(Bipolar_clusters$V1)
Bipolar_clusters <- intersect(Bipolar_clusters, row.names(log.norm.counts))
cor(log.norm.counts[Bipolar_clusters,])
pdf(file="Mouse_Fresh_Probe_BC2-4_Markers_ScatterPlot.pdf", onefile=FALSE)
plot(log.norm.counts[Bipolar_clusters,5], log.norm.counts[Bipolar_clusters,1], col=ifelse(rownames(log.norm.counts)=="Ackr1","red","black"))
abline(a=0, b=1, col="red")
dev.off()
