#!/usr/bin/Rscript

### This script was written by Serkan Erdin to perform pathway analysis for selected 
### gene list (query) and background list using DAVID version 6.8

###### Session Info  ####################################################################									
#  R version 3.4.3 (2017-11-30)								#
#  Platform: x86_64-apple-darwin15.6.0 (64-bit)						#
#  Running under: macOS Sierra 10.12.6#							#
#											#
#  Matrix products: default								#	
#  BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib	#
#  LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib	#
#											#	
#  locale:										#
#  [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8			#
#											#
#  attached base packages:								#
#  [1] stats4    parallel  methods   stats     graphics  grDevices utils    		#
#  [8] datasets  base     								#
#											#
#  other attached packages:								#
#  [1] RDAVIDWebService_1.16.0 ggplot2_2.2.1           GOstats_2.44.0         		#
#  [4] Category_2.44.0         Matrix_1.2-12           AnnotationDbi_1.40.0   		#
#  [7] IRanges_2.12.0          S4Vectors_0.16.0        Biobase_2.38.0         		#
#  [10] graph_1.56.0            BiocGenerics_0.24.0     rJava_0.9-10           		#
#											#
#  loaded via a namespace (and not attached):						#
#  [1] Rcpp_0.12.17           plyr_1.8.4             pillar_1.4.2          		#
#  [4] compiler_3.4.3         bitops_1.0-6           digest_0.6.15         		#
#  [7] bit_1.1-12             gtable_0.2.0           annotate_1.56.1       		#
#  [10] RSQLite_2.0            memoise_1.1.0          tibble_2.1.3          		#	
#  [13] lattice_0.20-35        pkgconfig_2.0.1        rlang_0.4.0           		#
#  [16] DBI_0.8                Rgraphviz_2.22.0       genefilter_1.60.0     		#
#  [19] bit64_0.9-7            grid_3.4.3             GSEABase_1.40.1       		#
#  [22] XML_3.98-1.10          RBGL_1.54.0            survival_2.44-1.1     		#
#  [25] GO.db_3.5.0            blob_1.1.0             scales_0.5.0          		#
#  [28] splines_3.4.3          colorspace_1.3-2       AnnotationForge_1.20.0		#
#  [31] xtable_1.8-2           lazyeval_0.2.1         munsell_0.4.3         		#
#  [34] RCurl_1.95-4.10        crayon_1.3.4          					#
#											#
#########################################################################################			

args<-commandArgs(TRUE)
DEresultFile <- args[1]  ### differential expression result file from edgeR in  DEG_lists
statistics <- args[2] ### how to define differentially expressed genes: nominal p value or fdr?
option <- args[3]  ### direction of dysregulation: up regulated, down regulated or both (all)
threshold <- args[4] ### p value or fdr threshold
prefix <- args[5]  ### prefix for output files

threshold <- as.numeric(threshold)

dyn.load("/Library/Java/JavaVirtualMachines/jdk-9.0.1.jdk/Contents/Home/lib/server/libjvm.dylib")
library(rJava)
library("RDAVIDWebService")
david <- DAVIDWebService$new(email="your email address",url="https://david.ncifcrf.gov/webservice/services/DAVIDWebService.DAVIDWebServiceHttpSoap12Endpoint/")

DEresults <- read.table(file=DEresultFile,head=F,sep="\t",skip=1)
colnames(DEresults) <- c("gene","logFC","logCPM","F","PValue","BH","bonferroni")

elems <- unlist(strsplit(as.character(DEresults$gene),"\\|"))
m <- matrix(elems,ncol=2,byrow=T)
DEresults$symbol <- m[,1]
DEresults$ensemblid <- m[,2]

setTimeOut(david,200000)

if(statistics=="nominal"){
  if(option=="all"){
        query_list <- DEresults[which(DEresults$PValue < threshold),]$ensemblid
  }else if(option=="up"){
        query_list <- DEresults[which((DEresults$PValue < threshold) & (DEresults$logFC > 0)),]$ensemblid
  }else if(option=="down"){
        query_list <- DEresults[which((DEresults$PValue < threshold) & (DEresults$logFC < 0)),]$ensemblid
  }
}else if(statistics=="fdr"){
  if(option=="all"){
        query_list <- DEresults[which(DEresults$BH < threshold),]$ensemblid
  }else if(option=="up"){
        query_list <- DEresults[which((DEresults$BH < threshold) & (DEresults$logFC > 0)),]$ensemblid
  }else if(option=="down"){
        query_list <- DEresults[which((DEresults$BH < threshold) & (DEresults$logFC < 0)),]$ensemblid
  }
}

print(length(query_list))

background_list <- DEresults$ensemblid

print(length(background_list))

FG <- addList(david,query_list,idType="ENSEMBL_GENE_ID",listName="query",listType="Gene")
BG <- addList(david,background_list,idType="ENSEMBL_GENE_ID",listName="all",listType="Background")
FG$inDavid
length(FG$unmappedIds)
BG$inDavid
length(BG$unmappedIds)

setAnnotationCategories(david,c("GOTERM_BP_DIRECT","GOTERM_MF_DIRECT","GOTERM_CC_DIRECT","KEGG_PATHWAY"))
FuncAnnotChart <- getFunctionalAnnotationChart(david)
getFunctionalAnnotationChartFile(david,paste0(prefix,"_",statistics,"_",option,"_",threshold,".FuncAnnotChart.tsv"),threshold=1.0)

sessionInfo()


