#!/usr/bin/Rscript

##### This script was written by Serkan Erdin to plot
##### reduced effect in HttQ111 relative HttWT in Hdac2KO
##### background compared to Hdac2WT background and assess
##### observing such an effect. 

##############################################################################################
##### R version 3.4.3 (2017-11-30)                                                           #
##### Platform: x86_64-apple-darwin15.6.0 (64-bit)                                           #
##### Running under: macOS Sierra 10.12.6                                                    #
#####                                                                                        #
##### Matrix products: default                                                               #
##### BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib      #
##### LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib    #
#####                                                                                        #
##### locale:                                                                                #
##### [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8                      #
#####                                                                                        #
##### attached base packages:                                                                #
##### [1] stats     graphics  grDevices utils     datasets  base                             #
#####                                                                                        #
##### other attached packages:                                                               #
##### [1] ggplot2_2.2.1                                                                      #
#####                                                                                        #
##### loaded via a namespace (and not attached):                                             #
#####  [1] labeling_0.3     colorspace_1.3-2 scales_0.5.0     compiler_3.4.3                 #
#####  [5] lazyeval_0.2.1   plyr_1.8.4       pillar_1.4.2     gtable_0.2.0                   #
#####  [9] tibble_2.1.3     crayon_1.3.4     Rcpp_0.12.17     grid_3.4.3                     #
##### [13] methods_3.4.3    pkgconfig_2.0.1  rlang_0.4.0      munsell_0.4.3                  #
##############################################################################################

gene.file <- "29_genes.txt"
deresults_1 <- "DEG_lists/HttQ111vsWT_WTHdac2_2vs1_edgeR_quasilikelihoodFtest.SVA.txt"
deresults_2 <- "DEG_lists/HttQ111vsWT_KOHdac2_3vs4_edgeR_quasilikelihoodFtest.SVA.txt"

no_iterations <- 100000

genes <- scan(gene.file,what="character")

de_all_1 <-read.table(file=deresults_1,head=T,sep="\t",check.names=F,stringsAsFactors=F)
de_all_1$id <- rownames(de_all_1)
de_1 <- de_all_1[rownames(de_all_1) %in% genes,]
#de_1$id <- rownames(de_1)
de_1 <- de_1[,colnames(de_1) %in% c("id","logFC")]
de_1$FC <- 2^(de_1$logFC)
de_1$FC2 <- ifelse(de_1$logFC >= 0, 2^(-de_1$logFC), 2^(de_1$logFC))

de_all_2 <-read.table(file=deresults_2,head=T,sep="\t",check.names=F,stringsAsFactors=F)
de_all_2$id <- rownames(de_all_2)
de_2 <- de_all_2[rownames(de_all_2) %in% genes,]
#de_2$id <- rownames(de_2) 
de_2 <- de_2[,colnames(de_2) %in% c("id","logFC")]
de_2$FC <- 2^(de_2$logFC)
de_2$FC2 <- ifelse(de_2$logFC >= 0, 2^(-de_2$logFC), 2^(de_2$logFC))

merged <- merge(de_1,de_2,by.x="id",by.y="id")
elems <- unlist(strsplit(as.character(merged$id),"\\|"))
m <- matrix(elems,ncol=2,byrow=T)
merged$symbol <- m[,1]

difference <- abs(merged$logFC.x) - abs(merged$logFC.y)
average_difference <- mean(difference)
cat(paste0("Average difference in absolute log fold changes:", average_difference),"\n")

merged_all <- merge(de_all_1,de_all_2,by.x="id",by.y="id")

print(dim(merged_all))

set.seed(123)

count <- 1

events <- NULL

for(i in c(1:no_iterations)){
   new_row <- sample(c(1:nrow(merged_all)),29,replace=FALSE)
   selected_rows <- merged_all[new_row,]
   selected_diff <-  abs(selected_rows$logFC.x) - abs(selected_rows$logFC.y)
   if(length(selected_diff[selected_diff > 0]) == 29){
         count -> count + 1
   }
   events <- append(events,length(selected_diff[selected_diff > 0]))
}


lower_CI <- sort(events)[no_iterations*0.025]
upper_CI <- sort(events)[no_iterations*0.975]

cat(paste0("95% confidence intervals: ",lower_CI, "   ",upper_CI),"\n")
cat(paste0("Median value: ",median(events)),"\n")

p_value <- count/(no_iterations+1) 

cat(paste0("P value: ",p_value),"\n")

merged_sorted <- merged[order(merged$FC.x),]

col=c("#8dd3c7","#fdb462")
pdf(file="29_gene_actual_ratio.pdf",height=8,width=8)
par(mar=c(5.5,5.1,2.1,2.1))
barplot(t(as.matrix(merged_sorted[,c(3,6)])),beside=T,names=merged_sorted$symbol,las=2,col=col,
ylab=expression(paste("Fold change (","Htt"^"Q111/+","/","Htt"^"+/+",")")),cex.axis=1.3,cex.lab=1.3,cex.names=1.3)
legend("topleft",legend=c(expression("Hdac2"^"+/+"),expression("Hdac2"^"KO")),fil=col,bty="n",cex=1.3)
dev.off()

merged_sorted <- merged[order(abs(merged$logFC.x)),]

pdf(file="29_gene_abslogfc.pdf",height=8,width=8)
par(mar=c(5.5,5.1,2.1,2.1))
barplot(t(as.matrix(abs(merged_sorted[,c(2,5)]))),beside=T,names=merged_sorted$symbol,las=2,col=col,
ylab=expression(paste("abs(log2(","Htt"^"Q111/+","/","Htt"^"+/+","))")),cex.axis=1.3,cex.lab=1.3,cex.names=1.3)
legend("topleft",legend=c(expression("Hdac2"^"+/+"),expression("Hdac2"^"KO")),fil=col,bty="n",cex=1.3)
dev.off()

library(ggplot2)

Hdac2 <- c(rep("WT",29),rep("KO",29))
FoldChange <- c(merged_sorted[,4],merged_sorted[,7])

logFoldChange <- c(abs(merged_sorted[,2]),abs(merged_sorted[,5]))

df <- data.frame(Hdac2=Hdac2,logFoldChange=logFoldChange)
df$Hdac2 <- relevel(df$Hdac2,ref="WT")

pdf(file="Dotplot_abslogFC.pdf",height=6,width=6)
ggplot(data=df, aes(y=logFoldChange,x=Hdac2)) + geom_dotplot(binaxis="y",binwidth=0.02,stackdir="center") +
theme(legend.position="none",panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank()
, axis.line = element_line(size = 0.5, linetype="solid",colour = "black"),axis.title.x=element_blank(), text=element_text(size=30)) + labs(y=expression(paste("abs(log2(","Htt"^"Q111/+","/","Htt"^"+/+","))"))) +
coord_cartesian(ylim=c(0,1.5)) + stat_summary(fun.y=median,fun.ymin=median,fun.ymax=median,geom="crossbar",width=0.5) +
scale_x_discrete(labels=c(expression("Hdac2"^"+/+"),expression("Hdac2"^"KO")))
dev.off()

sessionInfo()




