#!/usr/bin/Rscript

###################################################################################################
#### This script was written by Serkan Erdin to compare differential expression results        ####
#### between HttQ111vsWT in Hdac2WT background and Hdac2KOvsWT in HttQ111 background at p<0.05 ####
#### and amongst these overlapping genes, identify genes with similar expression levels        ####
#### at HttWT Hdac2WT and HttQ111 Hdac2KO genotypes,                                           ####
###################################################################################################

############################################################################################
#### R version 3.4.3 (2017-11-30)                                                          #
#### Platform: x86_64-apple-darwin15.6.0 (64-bit)                                          #
#### Running under: macOS Sierra 10.12.6                                                   #
####                                                                                       #
#### Matrix products: default                                                              #
#### BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib     #
#### LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib   #
####                                                                                       #
#### locale:                                                                               #
#### [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8                     #
####                                                                                       #
#### attached base packages:                                                               #
#### [1] grid      stats     graphics  grDevices utils     datasets  base                  #
####                                                                                       #
#### other attached packages:                                                              #
#### [1] RColorBrewer_1.1-2 pheatmap_1.0.8                                                 #
####                                                                                       #
#### loaded via a namespace (and not attached):                                            #
#### [1] colorspace_1.3-2 scales_0.5.0     compiler_3.4.3   plyr_1.8.4                     #
#### [5] gtable_0.2.0     Rcpp_0.12.17     methods_3.4.3    munsell_0.4.3                  #
############################################################################################

library(pheatmap)
library(RColorBrewer)
library(grid)

DE.file_1 <- "DEG_lists/HttQ111vsWT_WTHdac2_2vs1_edgeR_quasilikelihoodFtest.SVA.txt"
DE.file_2 <- "DEG_lists/Hdac2KOvsHdac2WT_HttQ111_3vs1_edgeR_quasilikelihoodFtest.SVA.txt"
input.file <- "Counts/ThisStudy.SVAcorrectedcount.txt"

DE_1 <- read.table(file=DE.file_1,head=T,sep="\t",check.names=F,stringsAsFactors=F)
DE_2 <- read.table(file=DE.file_2,head=T,sep="\t",check.names=F,stringsAsFactors=F)

DE_1$id <- rownames(DE_1)
DE_2$id <- rownames(DE_2)

merged <- merge(DE_1,DE_2,by.x="id",by.y="id")

cat("Comparison of two studies at p < 0.05\n")
background <- nrow(merged)
cat(paste0("Number of compared genes in both studies: ",background),"\n")

n_up_1 <- nrow(merged[merged$PValue.x < 0.05 & merged$logFC.x > 0,])
n_down_1 <- nrow(merged[merged$PValue.x < 0.05 & merged$logFC.x < 0,])

n_up_2 <- nrow(merged[merged$PValue.y < 0.05 & merged$logFC.y > 0,])
n_down_2 <- nrow(merged[merged$PValue.y < 0.05 & merged$logFC.y < 0,])

cat(paste0("Number of up regulated DEGs from HttQ111vsWT at Hdac2WT  analyzed in both studies (p<0.05): ",n_up_1),"\n")
cat(paste0("Number of down regulated DEGs from HttQ111vsWT at Hdac2WT analyzed in both studies (p<0.05): ",n_down_1),"\n")

cat(paste0("Number of up regulated DEGs from HttQ111vsWT at Hdac2KO  analyzed in both studies (p<0.05): ",n_up_2),"\n")
cat(paste0("Number of down regulated DEGs from HttQ111vsWT at Hdac2KO analyzed in both studies (p<0.05): ",n_down_2),"\n")

overlap <- merged[merged$PValue.x < 0.05 & merged$PValue.y < 0.05,]
n11 <- nrow(overlap)
n1 <- nrow(merged[merged$PValue.x < 0.05,])
n2 <- nrow(merged[merged$PValue.y < 0.05,])

uu <- nrow(overlap[overlap$logFC.x > 0 & overlap$logFC.y > 0,])
ud <- nrow(overlap[overlap$logFC.x > 0 & overlap$logFC.y < 0,])
du <- nrow(overlap[overlap$logFC.x < 0 & overlap$logFC.y > 0,])
dd <- nrow(overlap[overlap$logFC.x < 0 & overlap$logFC.y < 0,])

print("Table of overlap")
print(matrix(c(uu,du,ud,dd),nrow=2))
table <- matrix(c(n11,n2-n11,n1-n11,background-n1-n2+n11),nrow=2)
pvalue <- fisher.test(table,alternative="greater")$p.value
conf_int_lower <- fisher.test(table,alternative="greater")$conf.int[1]
conf_int_upper <- fisher.test(table,alternative="greater")$conf.int[2]
oddsratio <- fisher.test(table,alternative="greater")$estimate

print("Overlap statistics at p < 0.05")
cat(paste0("Pvalue =",pvalue),"\n")
cat(paste0("Odss ratio =",oddsratio),"\n")
cat(paste0("Conf Int Upper =",conf_int_upper),"\n")
cat(paste0("Conf Int Lower =",conf_int_lower),"\n")

cat(paste0("Overlap no: ",nrow(overlap)),"\n")

norm_counts <- read.table(file=input.file,head=T,sep="\t",check.names=F,stringsAsFactors=F)
norm_counts <- norm_counts[rownames(norm_counts) %in% overlap$id,]

result <- NULL
for(i in c(1:nrow(norm_counts))){
#     print(i)
     httwt_hdac2wt <- as.vector(unlist(norm_counts[i,c(8:15)]))
     httq111_hdac2ko <- as.vector(unlist(norm_counts[i,c(16:26)]))
     mean_httwt_hdac2wt <- mean(as.vector(unlist(norm_counts[i,c(8:15)])))
     mean_httq111_hdac2ko <- mean(as.vector(unlist(norm_counts[i,c(16:26)])))
     log2FC <- log2(mean_httwt_hdac2wt/mean_httq111_hdac2ko)
     pvalue <- t.test(httwt_hdac2wt,httq111_hdac2ko)$p.value
     t_statistic <- t.test(httwt_hdac2wt,httq111_hdac2ko)$statistic
     newline <- data.frame(rownames(norm_counts)[i],mean_httwt_hdac2wt,mean_httq111_hdac2ko,log2FC,t_statistic,pvalue)
     result <- rbind(result,newline)
}

colnames(result) <- c("id","httwt_hdac2wt","httq111_hdac2ko","log2FC","t_statistic","pvalue")

result$BH <- p.adjust(result$pvalue,method="BH")

result_insignificant <- result[result$pvalue > 0.05,]

merged_overlap <- merge(overlap,result_insignificant,by.x="id",by.y="id")

cat(paste0("Overlap after_merged: ",nrow(merged_overlap)),"\n")

write.table(merged_overlap,file="Overlap_HttQ111vsWT_WThdac2vsHdac2KOvsWT_HttQ111.txt",sep="\t",quote=F,row.names=F)

merged_overlap <- merged_overlap[order(merged_overlap$logFC.x),]
selected_counts <- norm_counts[rownames(norm_counts) %in% merged_overlap$id,]
selected_counts <- selected_counts[match(merged_overlap$id,rownames(selected_counts)),]

elems <- unlist(strsplit(as.character(rownames(selected_counts)),"\\|"))
m <- matrix(elems,ncol=2,byrow=T)
rownames(selected_counts) <- m[,1]

annotation_col <- data.frame(Htt_genotype=c(rep("Q111/+",7),rep("+/+",8),rep("Q111/+",11),rep("+/+",9)),
Hdac2_genotype=rep(c("WT","KO"),c(15,20)))

rownames(annotation_col) <- colnames(selected_counts)

legend_col <- list(Htt_genotype=c("+/+"="#ef8a62","Q111/+"="#67a9cf"),
Hdac2_genotype=c("WT"="#af8dc3","KO"="#7fbf7b"))

pdf(file="152gene_heatmap.pdf",height=20,width=10)
pheatmap(selected_counts,cluster_col=F,cluster_row=F,annotation_col=annotation_col,show_colnames=F,scale="row",
fontsize=15,gaps_row=97,gaps_col=15,annotation_colors=legend_col,annotation_legend=F,fontsize_row=11,cellheight=9,cellwidth=12,legend=FALSE)
#color = colorRampPalette((brewer.pal(n = 7, name ="YlOrRd")))(100),gaps_col=c(15))
grid.text(c("WT"),x=0.24,y=0.983,gp=gpar(fontsize=17))
grid.text(c("KO"),x=0.56,y=0.983,gp=gpar(fontsize=17))
#grid.text(c("Z score"),x=0.84,y=0.995,gp=gpar(fontsize=14,fontface=2))
grid.text(c("Q111/+"),x=0.18,y=0.971,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.31,y=0.971,gp=gpar(fontsize=17))
grid.text(c("Q111/+"),x=0.47,y=0.971,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.635,y=0.971,gp=gpar(fontsize=17))
dev.off()

pdf(file="97gene_heatmap.pdf",height=12.76,width=10)
pheatmap(selected_counts[c(1:97),],cluster_col=F,cluster_row=F,annotation_col=annotation_col,show_colnames=F,scale="row",
fontsize=15,gaps_col=15,annotation_colors=legend_col,annotation_legend=F,fontsize_row=11,cellheight=9,cellwidth=12,
legend=FALSE)

grid.text(c("WT"),x=0.24,y=0.986,gp=gpar(fontsize=17))
grid.text(c("KO"),x=0.565,y=0.986,gp=gpar(fontsize=17))
grid.text(c("Q111/+"),x=0.18,y=0.965,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.31,y=0.965,gp=gpar(fontsize=17))
grid.text(c("Q111/+"),x=0.47,y=0.965,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.635,y=0.965,gp=gpar(fontsize=17))
dev.off()

pdf(file="55gene_heatmap.pdf",height=9,width=10)
pheatmap(selected_counts[c(98:152),],cluster_col=F,cluster_row=F,annotation_col=annotation_col,show_colnames=F,scale="row",
fontsize=15,gaps_col=15,annotation_colors=legend_col,annotation_legend=F,fontsize_row=11,cellheight=9,cellwidth=12,
legend=FALSE)

grid.text(c("WT"),x=0.24,y=0.895,gp=gpar(fontsize=17))
grid.text(c("KO"),x=0.565,y=0.895,gp=gpar(fontsize=17))
grid.text(c("Q111/+"),x=0.18,y=0.87,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.31,y=0.87,gp=gpar(fontsize=17))
grid.text(c("Q111/+"),x=0.47,y=0.87,gp=gpar(fontsize=17))
grid.text(c("+/+"),x=0.635,y=0.87,gp=gpar(fontsize=17))
dev.off()




sessionInfo()



