#!/usr/bin/Rscript

##### This script was written by Serkan Erdin to perform differential expression analysis on Langfelder data.


#################################################################################
##### R version 3.3.2 (2016-10-31)                                              #
##### Platform: x86_64-pc-linux-gnu (64-bit)                                    #
##### Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)     #
#####                                                                           #
##### locale:                                                                   #
##### [1] LC_CTYPE=en_US.UTF-8  LC_NUMERIC=C                                    #
##### [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8                     #
##### [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8                    #
##### [7] LC_PAPER=en_US.UTF-8  LC_NAME=C                                       #
##### [9] LC_ADDRESS=C               LC_TELEPHONE=C                             #
##### [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C                       #
#####                                                                           #
##### attached base packages:                                                   #
##### [1] stats     graphics  grDevices utils     datasets  base                #
#####                                                                           #
##### other attached packages:                                                  #
##### [1] sva_3.18.0        genefilter_1.56.0 mgcv_1.8-17         nlme_3.1-131  #
##### [5] gap_1.1-16        edgeR_3.12.0        limma_3.30.13                   #
#####                                                                           #
##### loaded via a namespace (and not attached):                                #
##### [1] Rcpp_0.12.10         AnnotationDbi_1.36.2 splines_3.3.2               #
##### [4] BiocGenerics_0.20.0  IRanges_2.8.1        statmod_1.4.29              #
##### [7] xtable_1.8-2         lattice_0.20-34      parallel_3.3.2              #
##### [10] grid_3.3.2           Biobase_2.34.0       DBI_0.6                    #
##### [13] survival_2.40-1        digest_0.6.12        Matrix_1.2-8             #
##### [16] S4Vectors_0.12.1     bitops_1.0-6         RCurl_1.95-4.8             #
##### [19] memoise_1.0.0        RSQLite_1.1-2        methods_3.3.2              #
##### [22] locfit_1.5-9.1         stats4_3.3.2         XML_3.98-1.6             #
##### [25] annotate_1.52.1                                                      #
#################################################################################


##### REQUIRED LIBRARIES #####

library(edgeR)
library(gap)
library(sva)

source("cleaningP.R")

##### INPUT FILES AND PARAMETERS #####

countfile <- "Counts/Langfelder_wt_q111_countData.txt"
idlist <- "MetaData/Langfelder_wt_q111_metaData.txt"

prefix <- "LangfelderStudy"

data <- read.table(file=countfile, header = T, sep = "\t",check.names=FALSE,stringsAsFactors=F)
colnames(data)

ids <- read.table(file=idlist,header=T,sep="\t",stringsAsFactors=F)
ids_sorted <- ids[order(ids$genotype),]

print(ids_sorted$filename_prefix)

rownames(data) <- data$feature
data <- data[,-1]
data <- data[,match(ids_sorted$filename_prefix,colnames(data))]
colnames(data)

keep <- (rowSums(data[,c(1:8)] >= 6) == 8  ) | (rowSums(data[,c(9:16)] >= 6) == 8)

data <- data[keep,]

group <- factor(ids_sorted$genotype)
group <- relevel(group,ref="WT")
gender <- factor(ids_sorted$gender)

y <-DGEList(data,group=group)
y <- calcNormFactors(y,method="TMM")

mod <- model.matrix(~0+group,data=y$samples)
mod0 <- model.matrix(~ 1,data=y$samples)
svobj <- svaseq(cpm(y,normalized.lib.sizes=TRUE,log=FALSE),mod,mod0)
dim(svobj$sv)
designSVA <- cbind(mod,svobj$sv)
designSVA

cleanCount <- cleaningP(cpm(y,normalized.lib.sizes=TRUE, log=FALSE),mod,svobj)

write.table(cleanCount,file=paste0(prefix,".SVAcorrectedcount.txt"),sep="\t",quote=F)
DEcontrast = c(-1,1,rep(0,times=dim(svobj$sv)[2]))
print(DEcontrast)

print(y$samples)

y <- estimateDisp(y,designSVA,robust=TRUE)
print(y$common.dispersion)

pdf(paste(prefix,"_BCVplot.pdf",sep=""), width = 7 , height = 7 )
plotBCV(y)
dev.off()

pdf(paste(prefix,"_MeanVariance.pdf",sep=""), width = 7 , height = 7 )
meanvar <- plotMeanVar(y, show.raw.vars=TRUE,show.tagwise.vars=TRUE,show.ave.raw.vars=FALSE,NBline=TRUE,main = "Mean-Variance Plot")
plotMeanVar(y, meanvar=meanvar, show.tagwise.vars=TRUE, NBline=TRUE)
dev.off()

fit <- glmQLFit(y,designSVA)
head(fit$coefficients)

pdf(paste(prefix,"_QLDisplot.pdf",sep=""), width = 7 , height = 7 )
plotQLDisp(fit)
dev.off()
qlf <- glmQLFTest(fit,contrast=DEcontrast)
qlf$table$BH = p.adjust(qlf$table$PValue,"BH")
qlf$table$bonferroni = p.adjust(qlf$table$PValue,"bonferroni")

is.de <- decideTestsDGE(qlf, p.value=0.05)
summary(is.de)
png(filename=paste(prefix,"_edgeR_qlf_qqplot.png",sep=""),width=800,height=600)
qqunif(qlf$table$PValue)
dev.off()

png(filename=paste(prefix,"_edgeR_qlf_plotsmear.png",sep=""),width=800,height=600)
plotSmear(qlf, de.tags=rownames(qlf)[is.de!=0])
dev.off()

write.table(qlf$table,file=paste(prefix,"_edgeR_quasilikelihoodFtest.txt",sep=""),sep="\t",quote=FALSE)

sessionInfo()



