#!/usr/bin/env Rscript

#DESeq2 requires a table called "samples" with sample names and condition for each.
#Sample	Condition
#1	A
#2	B

#Run the script from the folder containing the "samples" file

#-----------------------------START-----------------------------

#Activate arguments

arg <- commandArgs(TRUE)

#arg[1] is the directory containing the quantification folders.
#arg[2] is the name of the reference condition.

library("tximport")
library("readr")
library("DESeq2")

#--------------------------PREPARE------------------------------

#Import the samples descriptions.
samples <- read.table("./samples", header=TRUE)

#Import the sample quantifications.
quants <- file.path(arg[1], samples$Sample, "quant.sf")
names(quants) <- samples$Sample

#Import the transcript to genes table.
t2g <- read.table("/home/nico/Software/Salmon/t2g", header=TRUE)

#Import quantification data with tximport.
txi <- tximport(quants, type="salmon", tx2gene=t2g, ignoreTxVersion = TRUE)

#Construct the DESeqDataSet.
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ Condition)

#Eliminate rows with total reads less than 20.
keep <- rowSums(counts(dds)) >= 20
dds <- dds[keep,]

#Set the reference condition.
dds$Condition <- relevel(dds$Condition, ref = arg[2])

#------------------------EXPLORE-------------------------------

#Do exploratory analyses.

##Transform data with VST.
vsd <- vst(dds, blind = FALSE) #Assume variables do not contribute to variance-mean relationship.

##Make distance matrix heatmap.
library("pheatmap")
library("RColorBrewer")
pdf("dist_matrix.pdf")
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix( sampleDists )
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, col = colors)
dev.off()

##Make a PCA plot (using DESeq2 PCA function)
pdf("PCA.pdf")
plotPCA(vsd, intgroup = "Condition")
dev.off()

#Export normalized counts.
dds <- estimateSizeFactors(dds)
normcounts <- counts(dds, normalized = TRUE)
normcounts <- as.data.frame(normcounts)

library("AnnotationDbi")
library("org.Mm.eg.db")

normcounts$symbol <- mapIds(
	org.Mm.eg.db, keys=row.names(normcounts), column="SYMBOL", keytype="ENSEMBL", multiVals="first"
)

write.table(normcounts, file = "./normalized_counts", sep = "\t")

#----------------------COMPARE--------------------------------

#Perform differential expression test and dump results to object named res. This code uses FDR 5%.

dds <- DESeq(dds)
res <- results(dds, alpha=0.05)

#Map gene symbols to Gene IDs.
library("AnnotationDbi")
library("org.Mm.eg.db")

res$symbol <- mapIds(
org.Mm.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first"
)

#Export results.
write.table(res, file = "./Diff_expression", sep = "\t")

#---------------------EPILOGUE---------------------------

#Output the version of the software used.
sink("session_info")
sessionInfo()
sink()
