###############################################################
###############################################################
##                                                           ##
## The following R-scripts were generated by                 ##
##                                                           ## 
## Jarod A. Rollins                                          ##
## MDI Biological Laboratory                                 ##
##                                                           ##
## and                                                       ##
##                                                           ##
## Markus Schosserer                                         ##
## University of Natural Resources and Life Sciences, Vienna ##
##                                                           ## 
###############################################################
###############################################################
#
#
#############################################################
### Analysis of differential gene expression using DEseq2 ###
#############################################################
#
# Call the DeSeq2 package
library("DESeq2")
#
# Read the count data file and the metadata file from the working directory.
# The "read_counts.txt" file is a tab-delimited table of read counts per Wormbase Gene ID (rows) vs. Sample ID (columns)
# The "metadata.csv" file is comma-separated table of Sample ID (rows) vs. batch and condition (columns).
#
cts <- read.table("read_counts.txt", header = TRUE, check.names = TRUE, blank.lines.skip = TRUE, row.names = 1)
coldata <- read.csv("metadata.csv", header = TRUE, sep = ",", check.names = TRUE, blank.lines.skip = TRUE, row.names = 1, colClasses = "factor")
#
# Generate a DESeq2 data file.
dds <- DESeqDataSetFromMatrix(countData = cts,
                              colData = coldata,
                              design= ~ batch + condition)
#
# Run DESeq2 and define the contrast to be analyzed.
# For nsun-1 RNAi treated samples, re-run the analysis from this step and replace "control_polysomes" and "control_total" with "nsun_1_polysomes" and "nsun_1_total". Make sure to change the names of the generated tables and plots to avoid that control files are replaced.
#
dds <- DESeq(dds)
resCont <- results(dds, contrast = c("condition", "control_polysomes","control_total"))
#
# Save the result as comma-seperated table.
write.csv( as.data.frame(resCont), file="results_control.csv" )
#
#
######################################################
### Filter results according to different criteria ###
######################################################
#
# Generate and download a tab-delimited table of Stable Gene IDs (rows) vs. corresponding Gene names, Gene descriptions and Gene type (columns) from ensembl: "https://www.ensembl.org/biomart/martview/".
# Save this list in the working directory as "worm_gene_type.txt".
#
# Load this list into R.
worm_genes <- read.table(file="worm_gene_type.txt", sep='\t', header = TRUE)
#
# Merge the DEseq2 result with the annotation file.
results_control <- merge(resCont, worm_genes, by.x = "Wormbase_ID", by.y = "Gene.stable.ID")
#
# Filter the dataset to include only protein coding genes.
results_control = subset(results_control,Gene.type=='protein_coding')
#
# Filter the dataset to remove genes with minimal basal expression.
results_control = subset(results_control, baseMean > 1)
#
# Consider only at least 2-fold up-regulated genes.
Up.quant = subset(results_control,log2FoldChange > 1)
#
# Filter out non-significant genes according to the defined adjusted p-value cutoff.
Up.quant = subset(Up.quant, padj < 0.05)
#
# Repeat these two steps for down-regulated genes.
Down.quant = subset(results_control,log2FoldChange < -1)
Down.quant = subset(Down.quant, padj < 0.05)
#
# Sort the dataset by fold-change.
Up.quant = Up.quant[order(Up.quant$log2FoldChange,decreasing=T),]
Down.quant = Down.quant[order(Down.quant$log2FoldChange,decreasing=F),]
#
# Export the results as tab-delimited table.
write.table(Up.quant[,c("Gene.stable.ID", "Gene.name", "Gene.description", "baseMean", "log2FoldChange", "lfcSE", "stat","pvalue", "padj")],file='control_TI_up.txt',sep='\t',row.names=F)
write.table(Down.quant[,c("Gene.stable.ID", "Gene.name", "Gene.description", "baseMean", "log2FoldChange", "lfcSE", "stat","pvalue", "padj")],file='control_TI_down.txt',sep='\t',row.names=F)
#
#
#####################
### Volcano plots ###
#####################
#
# Call the EnahncedVolcano package
library(EnhancedVolcano)
#
# Define gene names as row names.
row.names(results_control) <- results_control$Gene.name
top5up <- Up.quant[c(1:5),c(1:9)] 
top5down <- Down.quant[c(1:5),c(1:9)]
top <- rbind(top5up, top5down)
#
# Generate the volcano plot. Change the plotting-parameters as needed.
EnhancedVolcano(results_control,
                lab = rownames(results_control),
                x = 'log2FoldChange',
                y = 'padj',
                title = 'translational index',
                subtitle = 'control',
                selectLab = top$Gene.name, 
                xlim=c(-7.5,7.5),
                ylim=c(0,20),
                xlab = bquote(~Log[2]~ 'fold change'),
                ylab = bquote(~-Log[10]~adjusted~italic(P)),
                pCutoff = 0.05,
                FCcutoff = 1.0,
                labSize = 3.0,
                labCol = 'black',
                labFace = 'bold',
                boxedLabels = TRUE,
                colAlpha = 0.5,
                legend=c('not significant','Log2 FC','Adjusted p-value',
                         'Adjusted p-value & Log2 FC'),
                legendPosition = 'right',
                legendLabSize = 12,
                legendIconSize = 5.0,
                drawConnectors = TRUE,
                widthConnectors = 0.5,
                colConnectors = 'black')
#
#
################################
### Gene Ontology annotation ###
################################
#
# Initialize libraries
library("RDAVIDWebService")
library('Rgraphviz')
library("Rcpp")
library("ggplot2")
library('rJava')
library("colorspace")
#
# GO function to return a list of enriched genes in a list for given annotations.
# The function uses background consisting of all genes with average CPM >3 'quantifable'.
# The correct e-mail adress needs to be specified.
#
GetGO=function(List,Anno){
  library("RDAVIDWebService")
  david=DAVIDWebService$new(email='xxx.yy',url="https://david.ncifcrf.gov/webservice/services/DAVIDWebService.DAVIDWebServiceHttpSoap12Endpoint/")
  #to prevent server errors for submission of long lists
  setTimeOut(david,500000)
  setAnnotationCategories(david,Anno)
  addList(david,List,"WORMBASE_GENE_ID",listName='NEW',listType='Gene')
  addList(david,results_control$Gene.stable.ID,"WORMBASE_GENE_ID",listName="Quant_all",listType='Background')
  chart=getFunctionalAnnotationChart(david)
  return(chart)
}
########## END OF FUNCTION ###############

# Use the GetGO function to retrieve enriched GO terms in the "BP", "CC" and "MF" categories.
control.Up.BP = GetGO(Up.quant$Gene.stable.ID,'GOTERM_BP_FAT')
control.Up.CC = GetGO(Up.quant$Gene.stable.ID,'GOTERM_CC_FAT')
control.Up.MF = GetGO(Up.quant$Gene.stable.ID,'GOTERM_MF_FAT')
control.Down.BP = GetGO(Down.quant$Gene.stable.ID,'GOTERM_BP_FAT')
control.Down.CC = GetGO(Down.quant$Gene.stable.ID,'GOTERM_CC_FAT')
control.Down.MF = GetGO(Down.quant$Gene.stable.ID,'GOTERM_MF_FAT')

# Take only 'significant' terms into account.
control.Up.BP.sig = subset(control.Up.BP, FDR < 0.05)
control.Up.CC.sig = subset(control.Up.CC, FDR < 0.05)
control.Up.MF.sig = subset(control.Up.MF, FDR < 0.05)
control.Down.BP.sig = subset(control.Down.BP, FDR < 0.05)
control.Down.CC.sig = subset(control.Down.CC, FDR < 0.05)
control.Down.MF.sig = subset(control.Down.MF, FDR < 0.05)

# Order by fold enrichment.
control.Up.BP.sig = control.Up.BP.sig[order(control.Up.BP.sig$Fold.Enrichment,decreasing=T),]
control.Up.CC.sig = control.Up.CC.sig[order(control.Up.CC.sig$Fold.Enrichment,decreasing=T),]
control.Up.MF.sig = control.Up.MF.sig[order(control.Up.MF.sig$Fold.Enrichment,decreasing=T),]
control.Down.BP.sig = control.Down.BP.sig[order(control.Down.BP.sig$Fold.Enrichment,decreasing=T),]
control.Down.CC.sig = control.Down.CC.sig[order(control.Down.CC.sig$Fold.Enrichment,decreasing=T),]
control.Down.MF.sig = control.Down.MF.sig[order(control.Down.MF.sig$Fold.Enrichment,decreasing=T),]

# Write results to tab-delimited tables.
write.table(control.Up.BP.sig,file='DAVID_BP_control_up.txt',sep='\t',row.names=F)
write.table(control.Up.CC.sig,file='DAVID_CC_control_up.txt',sep='\t',row.names=F)
write.table(control.Up.MF.sig,file='DAVID_MF_control_up.txt',sep='\t',row.names=F)
write.table(control.Down.BP.sig,file='DAVID_BP_control_down.txt',sep='\t',row.names=F)
write.table(control.Down.CC.sig,file='DAVID_CC_control_down.txt',sep='\t',row.names=F)
write.table(control.Down.MF.sig,file='DAVID_MF_control_down.txt',sep='\t',row.names=F)