library(edgeR)
library(limma)
library(DESeq2)
library(tools)
trim <- function (x) gsub("^\\s+|\\s+$", "", x)

edgeR.de.analysis <- function(countTabpath, metaTabpath, cpmCutoff, sampCutoff, group1, group2) {
  count.all <- read.delim(as.character(countTabpath), row.names=1, check.names=FALSE)
  metaTab <- read.delim(as.character(metaTabpath), header = T)
  colnames(metaTab) <- tolower(colnames(metaTab))
  metaTab <- metaTab[match(colnames(count.all), metaTab$sample),]
  # print('===')
  # print(head(count.all))
  # print(metaTabpath)
  # print('===')
  if (dim(metaTab)[2]>2) {
    groupinfo <- metaTab[,2]
    for (i in 3:length(metaTab[1,])) {
      groupinfo <- paste(groupinfo, metaTab[,i], sep=".")
    }
    Group <- factor(groupinfo)
  } else {
    Group <- factor(metaTab[,2])
  }
  
  dge.count <- DGEList(counts=count.all, group=Group)
  dge.count <- calcNormFactors(dge.count)
  
  ### Remove low expression reads from NA/0 values
  cpm.count <- cpm(dge.count$counts) #cpm normalization
  threshold.value <- as.numeric(sampCutoff)
  keep = rowSums(cpm.count > as.numeric(cpmCutoff)) >=threshold.value  
  
  ##(dge.count.rmlow): DGEList of count rm low expression values.
  dge.count.rmlow <- dge.count[keep,]
  # dim(dge.count.rmlow$counts)
  ##update library size after removing low expressed genes
  dge.count.rmlow$samples$lib.size <- colSums(dge.count.rmlow$counts)
  
  ##TMM library normalization
  dge.count.rmlow <- calcNormFactors(dge.count.rmlow,method="TMM")
  log.cpm.rmlow <- cpm(dge.count.rmlow, normalized.lib.sizes=T, log=T)
  
  ##save cpm/logcpm results after removing the low expression genes
    edgeR.res.path <- paste(getwd(), "/edgeR-res-", file_path_sans_ext(basename(metaTabpath)), '-comp-', as.character(trim(group1)), "_", as.character(trim(group2)), sep="")
    if (!dir.exists(edgeR.res.path)) dir.create(edgeR.res.path)
    logcpm.fname <- paste(as.character(edgeR.res.path), "/rmlow-logcpm-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), "-", as.character(dim(log.cpm.rmlow)[1]), ".txt", sep=""  )
    if (!file.exists(logcpm.fname)) write.table(log.cpm.rmlow, as.character(logcpm.fname), row.names=T, col.names=NA, quote=F, sep="\t")
  
  ##buildup the model matrix based on the metadata table
  f <- factor(Group, levels=levels(Group))
  design <- model.matrix(~0+f)
  rownames(design) <- rownames(dge.count.rmlow$samples)
  colnames(design) <- levels(Group)
  
  ##start model fitting and contrast comparison
  dge.count.rmlow <- estimateDisp(dge.count.rmlow, design)
  dge.count.rmlow$common.dispersion
  summary(dge.count.rmlow$tagwise.dispersion)
  # print('===')
  # print(dge.count.rmlow)
  # print('===')
  glm.fit <- glmFit(dge.count.rmlow, design)
  
  comp <- makeContrasts(contrasts=paste(as.character(trim(group2)), as.character(trim(group1)), sep="-"), levels=design )
  test <- glmLRT(glm.fit, contrast=comp)
  
  tpsave <- topTags(test,n=Inf, adjust.method="BH", sort.by="PValue")
  
  ##save the full DE analysis results 
    tp.fname <- paste(as.character(edgeR.res.path), "/edgeR-full-GLMDisp-est-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), ".txt", sep="" )
    # print(head(tpsave$table))
    if (!file.exists(tp.fname)) write.table(tpsave$table, as.character(tp.fname), row.names=T, col.names=NA, quote=F, sep="\t")

  ##return the DE analysis results, res saved file name and file path
  return(list(res=as.data.frame(tpsave$table), res.fname=tp.fname, res.dir=edgeR.res.path ))
}

####################################################
####limma-voom DE analysis
####################################################

limmavoom.de.analysis <- function(countTabpath, metaTabpath, cpmCutoff, sampCutoff, group1, group2) {
  count.all <- read.delim(as.character(countTabpath), row.names=1, check.names=FALSE)
  metaTab <- read.delim(as.character(metaTabpath), header = T)
  colnames(metaTab) <- tolower(colnames(metaTab))
  metaTab <- metaTab[match(colnames(count.all), metaTab$sample),]
  
  if (dim(metaTab)[2]>2) {
    groupinfo <- metaTab[,2]
    for (i in 3:length(metaTab[1,])) {
      groupinfo <- paste(groupinfo, metaTab[,i], sep=".")
    }
    Group <- factor(groupinfo)
  } else {
    Group <- factor(metaTab[,2])
  }
  dge.count <- DGEList(counts=count.all, group=Group)
  dge.count <- calcNormFactors(dge.count)
  
  ### Remove low expression reads from NA/0 values
  cpm.count <- cpm(dge.count$counts) #cpm normalization
  threshold.value <- as.numeric(sampCutoff)
  keep = rowSums(cpm.count > as.numeric(cpmCutoff)) >=threshold.value  
  
  ##(dge.count.rmlow): DGEList of count rm low expression values.
  dge.count.rmlow <- dge.count[keep,]
  # dim(dge.count.rmlow$counts)
  ##update library size after removing low expressed genes
  dge.count.rmlow$samples$lib.size <- colSums(dge.count.rmlow$counts)
  
  ##update TMM library normalization, update library normalization factor
  dge.count.rmlow <- calcNormFactors(dge.count.rmlow,method="TMM")
  log.cpm.rmlow <- cpm(dge.count.rmlow, normalized.lib.sizes=T, log=T)
  
  ##save cpm/logcpm results after removing the low expression genes
    limmavoom.res.path <- paste(getwd(), "/limma-voom-res-", file_path_sans_ext(basename(metaTabpath)), '-comp-', as.character(trim(group1)), "_", as.character(trim(group2)), sep="")
    if (!dir.exists(limmavoom.res.path)) dir.create(limmavoom.res.path)
    logcpm.fname <- paste(as.character(limmavoom.res.path), "/rmlow-logcpm-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), "-", as.character(dim(log.cpm.rmlow)[1]), ".txt", sep=""  )
    if (!file.exists(logcpm.fname)) write.table(log.cpm.rmlow, as.character(logcpm.fname), row.names=T, col.names=NA, quote=F, sep="\t")
  
  ##buildup the model matrix based on the metadata table
  f <- factor(Group, levels=levels(Group))
  design <- model.matrix(~0+f)
  rownames(design) <- rownames(dge.count.rmlow$samples)
  colnames(design) <- levels(Group)
  
  ##start model fitting and contrast comparison
  v <- voom(dge.count.rmlow, design=design, plot=T, normalize="quantile")
  fit <- lmFit(v, design)
  comp <- makeContrasts(contrasts=paste(as.character(trim(group2)), as.character(trim(group1)), sep="-"), levels=design )
  contrast.fit <- contrasts.fit(fit, contrasts=comp)
  contrast.fit <- eBayes(contrast.fit)  
  tpvoom.save <- topTable(contrast.fit, number=Inf, adjust="BH", sort.by="P")
  # print(head(tpvoom.save))
  
  ##save full DE analysis results to a file named in tpvoom.fname
    tpvoom.fname <- paste(as.character(limmavoom.res.path), "/limma-voom-full-GLMDisp-est-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), ".txt", sep="" )
    if(!file.exists(tpvoom.fname)) write.table(tpvoom.save, as.character(tpvoom.fname), row.names=T, col.names=NA, quote=F, sep="\t")

  ##return the DE analysis results, res saved file name and file path
  return(list(res=as.data.frame(tpvoom.save), res.fname=tpvoom.fname, res.dir=limmavoom.res.path))  
}

############################################################
##### DESeq2 DE analysis
############################################################
deseq2.de.analysis <- function(countTabpath, metaTabpath, cpmCutoff, sampCutoff, group1, group2) {
  count.all <- read.delim(as.character(countTabpath), row.names=1, check.names=FALSE)
  metaTab <- read.delim(as.character(metaTabpath), header = T)
  colnames(metaTab) <- tolower(colnames(metaTab))
  metaTab <- metaTab[match(colnames(count.all), metaTab$sample),]
  
  if (dim(metaTab)[2]>2) {
    groupinfo <- metaTab[,2]
    for (i in 3:length(metaTab[1,])) {
      groupinfo <- paste(groupinfo, metaTab[,i], sep=".")
    }
    Group <- factor(groupinfo)
  } else {
    Group <- factor(metaTab[,2])
  }
  
  dge.count <- DGEList(counts=count.all, group=Group)
  dge.count <- calcNormFactors(dge.count)
  
  ### Remove low expression reads from NA/0 values
  cpm.count <- cpm(dge.count$counts) #cpm normalization
  threshold.value <- as.numeric(sampCutoff)
  keep = rowSums(cpm.count > as.numeric(cpmCutoff)) >=threshold.value  
  
  ##(dge.count.rmlow): DGEList of count rm low expression values.
  dge.count.rmlow <- dge.count[keep,]
  # dim(dge.count.rmlow$counts)
  ##update library size after removing low expressed genes
  dge.count.rmlow$samples$lib.size <- colSums(dge.count.rmlow$counts)
  
  ##update TMM library normalization, update library normalization factor
  dge.count.rmlow <- calcNormFactors(dge.count.rmlow,method="TMM")
  log.cpm.rmlow <- cpm(dge.count.rmlow, normalized.lib.sizes=T, log=T)
  
  ##
    deseq2.res.path <- paste(getwd(), "/DEseq2-res-", file_path_sans_ext(basename(metaTabpath)), '-comp-', as.character(trim(group1)), "_", as.character(trim(group2)), sep="")
    if (!dir.exists(deseq2.res.path)) dir.create(deseq2.res.path)
    logcpm.fname <- paste(deseq2.res.path, "/rmlow-logcpm-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), "-", as.character(dim(log.cpm.rmlow)[1]), ".txt", sep=""  )
    if (!file.exists(logcpm.fname)) write.table(log.cpm.rmlow, as.character(logcpm.fname), row.names=T, col.names=NA, quote=F, sep="\t")
 
  ##start readin count matrix to DESeq2 object, model fitting, and contrast comparison
  dds <- DESeqDataSetFromMatrix(countData=as.matrix(dge.count.rmlow$count), colData=DataFrame(Group), design=formula(~0+Group) )
  dds <- DESeq(dds, test="Wald") 
  print(resultsNames(dds))
  if ( ( grepl('[+]', as.character(trim(group1))) | grepl('-', as.character(trim(group1))) ) | ( grepl('[+]', as.character(trim(group2))) | grepl('-', as.character(trim(group2))) ) ) {
    group1Split <- unlist(strsplit(x = trim(group1), split = '[+]|-'))
    group2Split <- unlist(strsplit(x = trim(group2), split = '[+]|-'))
    ddsResNames <- gsub('Group', '', resultsNames(dds))
    contrastVector <- rep(0, length(resultsNames(dds)))
    names(contrastVector) <- ddsResNames
    # ---
    if ( grepl('[+]', as.character(trim(group2))) ) {
      contrastVector[match(group2Split, names(contrastVector))] <- 1
    } else {
      contrastVector[match(group2Split, names(contrastVector))[1]] <- 1
      contrastVector[match(group2Split, names(contrastVector))[2]] <- -1
    }
    # - 
    if ( grepl('[+]', as.character(trim(group1))) ) {
      contrastVector[match(group1Split, names(contrastVector))[1]] <- contrastVector[match(group1Split, names(contrastVector))[1]] - 1
      contrastVector[match(group1Split, names(contrastVector))[2]] <- contrastVector[match(group1Split, names(contrastVector))[2]] + 1
    } else {
      contrastVector[match(group1Split, names(contrastVector))] <- contrastVector[match(group1Split, names(contrastVector))] -1
    }
    print(contrastVector)
    # ---
    res <- results(dds, contrast=contrastVector, format="DataFrame")
  } else {
    res <- results(dds, contrast=c("Group", as.character(trim(group2)), as.character(trim(group1))), format="DataFrame")
  }
  res <- as.data.frame(res)
  tpdeseq2.save <- res[order(res$padj) , ]
  # print(head(tpdeseq2.save))
  
  ##save full DE analysis results to a file named in tpdeseq2.fname
    tpdeseq2.fname <- paste(as.character(deseq2.res.path), "/DEseq2-full-GLMDisp-est-", "cpmCutoof_", as.character(cpmCutoff), "-sampCutoff_", as.character(sampCutoff), ".txt", sep="" )
    if(!file.exists(tpdeseq2.fname)) write.table(tpdeseq2.save, as.character(tpdeseq2.fname), row.names=T, col.names=NA, quote=F, sep="\t")

  ##return the DE analysis results, res saved file name and file path
  return(list(res=as.data.frame(tpdeseq2.save), res.fname=tpdeseq2.fname, res.dir=deseq2.res.path))  
    
}

###############################################################
##DE analysis results post-analysis process based on FC, adj-p/org-p, up/down/all regulation
###############################################################
get.filtered.deg <- function(de.res, padjust, Pcutoff, FCcutoff, GeneRegFilter, de.method){
  ##use results from above function, it returned full DEGs in $res, res directory in $res.dir, DEGs saved file name in $res.fname
  res <- as.data.frame(de.res$res)
  ##redirect to all DE analysis results for each comparison directory.
  workdir <- de.res$res.dir
  setwd(workdir)
  
  ##add regulation feature for all DE analysis results
  res$regulation <- 0
  padjust <- as.logical(padjust)
  Pcutoff <- as.numeric(Pcutoff)
  FCcutoff <- as.numeric(FCcutoff)
  if (padjust) {
    print(sprintf("The filter is on %s with FDR adjusted p-value at FDR=%s and FC = %s.", as.character(GeneRegFilter), Pcutoff, FCcutoff))
  }else{
    print(sprintf("The filter is on %s with original p-value at p-val=%s and FC = %s.", as.character(GeneRegFilter), Pcutoff, FCcutoff))
  }
  if(FCcutoff == 0) FCcutoff <- 1
  
  if (de.method == 'edgeR') {
    if (padjust) {
      res$regulation[res$FDR<=Pcutoff & res$logFC >= log2(FCcutoff) ] <- 1
      res$regulation[res$FDR<=Pcutoff & res$logFC <= -log2(FCcutoff) ] <- -1
    } else {
      res$regulation[res$PValue<=Pcutoff & res$logFC >= log2(FCcutoff) ] <- 1
      res$regulation[res$PValue<=Pcutoff & res$logFC <= -log2(FCcutoff) ] <- -1
    }
    filter.res.fname.suffix <- paste(strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][5],
                                     strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][6],
                                     sep = "-")
    filter.res.fname <- paste("edgeR-", GeneRegFilter, "-regDEGs-", filter.res.fname.suffix, "-fdr_", Pcutoff, "-FC_", FCcutoff, "-full.txt", sep = "" )
  }else if (de.method == 'DESeq2') {
    if (padjust) {
      res$regulation[res$padj<=Pcutoff & res$log2FoldChange >= log2(FCcutoff) ] <- 1
      res$regulation[res$padj<=Pcutoff & res$log2FoldChange <= -log2(FCcutoff) ] <- -1
    } else {
      res$regulation[res$PValue<=Pcutoff & res$logFC >= log2(FCcutoff) ] <- 1
      res$regulation[res$PValue<=Pcutoff & res$logFC <= -log2(FCcutoff) ] <- -1
    }
    filter.res.fname.suffix <- paste(strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][5],
                                     strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][6],
                                     sep = "-")
    filter.res.fname <- paste("DESeq2-", GeneRegFilter, "-regDEGs-", filter.res.fname.suffix, "-fdr_", Pcutoff, "-FC_", FCcutoff, "-full.txt", sep = "" )
    
  }else if (de.method == 'voom') {
    if (padjust) {
      res$regulation[res$adj.P.Val <=Pcutoff & res$logFC >= log2(FCcutoff) ] <- 1
      res$regulation[res$adj.P.Val <=Pcutoff & res$logFC <= -log2(FCcutoff) ] <- -1
    } else {
      res$regulation[res$P.Value<=Pcutoff & res$logFC >= log2(FCcutoff) ] <- 1
      res$regulation[res$P.Value<=Pcutoff & res$logFC <= -log2(FCcutoff) ] <- -1
    }
    filter.res.fname.suffix <- paste(strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][5],
                                     strsplit(file_path_sans_ext(basename(de.res$res.fname)), split = "-")[[1]][6],
                                     sep = "-")
    filter.res.fname <- paste("voom-", GeneRegFilter, "-regDEGs-", filter.res.fname.suffix, "-fdr_", Pcutoff, "-FC_", FCcutoff, "-full.txt", sep = "" )
    
  }
  
  ##use different filtering category to filter out DEGs
  if (as.character(GeneRegFilter) == "both") {
    res.filter <- subset(res, regulation!=0)
  } else if (as.character(GeneRegFilter) == "up") {
    res.filter <- subset(res, regulation==1)
  } else if (as.character(GeneRegFilter) == "down") {
    res.filter <- subset(res, regulation==-1)
  }
  
  ##save filtered DEGs analysis results
  if (padjust) {
    print(sprintf("%s DEGs were identified on %s regulated genes at FDR = %s, FC = %s.", dim(res.filter)[1], as.character(GeneRegFilter), Pcutoff, FCcutoff))
  }else{
    print(sprintf("%s DEGs were identified on %s regulated genes at orignal p-val = %s, FC = %s.", dim(res.filter)[1], as.character(GeneRegFilter), Pcutoff, FCcutoff))
  }
  print(sprintf("Filtered results is saved in %s", filter.res.fname))
  if (!file.exists(filter.res.fname)) write.table(res.filter, filter.res.fname, quote = F, col.names = NA, row.names = T, sep = "\t")
  
  ##return filter DEGs, filter category, DEG names only, res saved file name and directory.
  return( list(filter.res=res.filter, filter=GeneRegFilter, degName=rownames(res.filter), filter.res.fname = filter.res.fname, filter.res.dir = workdir ) )
}

###############################################################
##Cuffdiff res process
###############################################################

cuffdiff.res.process <- function(respath, group1, group2, padjust, Pcutoff, FCcutoff, GeneRegFilter, saveRes) {
  cuffdiff.res<- read.table(as.character(respath), header = T)
  head(cuffdiff.res)
  
  if(saveRes==TRUE){
    cuff.res.path <- paste(getwd(), "/cuffdiff-res", sep="")
    if (!dir.exists(cuff.res.path)) dir.create(cuff.res.path)
  }
  pvalue=as.numeric(Pcutoff)
  FC=as.numeric(FCcutoff)
  gfilter <- as.character(GeneRegFilter)
  
  cuffdiff.res.sub <- cuffdiff.res[ ( (cuffdiff.res$sample_1 == as.character(group1) & cuffdiff.res$sample_2==as.character(group2)) | (cuffdiff.res$sample_1 == as.character(group2) & cuffdiff.res$sample_2==as.character(group1))), ]
  cuffdiff.res.sub.order <- cuffdiff.res.sub[ order(cuffdiff.res.sub$q_value),]
  
  if(saveRes==TRUE) {
    cuff.save.name <- paste(as.character(cuff.res.path), "/cuffdiff-", as.character(group1), "_", as.character(group2), ".txt", sep="")
    if (!file.exists(cuff.save.name)) write.table(cuffdiff.res.sub.order, as.character(cuff.save.name), row.names = F, quote=F, sep="\t")
  }
  
  cuffdiff.res.sub.order$filter <- 0
  if(as.character(padjust)==T){
    cuffdiff.res.sub.order$filter[cuffdiff.res.sub.order$q_value < pvalue & cuffdiff.res.sub.order$log2.fold_change. > log2(FC)] <- 1
    cuffdiff.res.sub.order$filter[cuffdiff.res.sub.order$q_value < pvalue & cuffdiff.res.sub.order$log2.fold_change. < -log2(FC)] <- -1
  } else {
    cuffdiff.res.sub.order$filter[cuffdiff.res.sub.order$q_value < pvalue & cuffdiff.res.sub.order$log2.fold_change. > log2(FC)] <- 1
    cuffdiff.res.sub.order$filter[cuffdiff.res.sub.order$q_value < pvalue & cuffdiff.res.sub.order$log2.fold_change. < -log2(FC)] <- -1
  }
  cuffdiff.res.sub.order$filter <- as.factor(cuffdiff.res.sub.order$filter)
  # print(summary(cuffdiff.res.sub.order$filter))
  
  if (gfilter == "up") {
    upreg.cuffdiff <- subset(cuffdiff.res.sub.order, filter==1)
    if (saveRes==TRUE) {
      filter.name <- paste(as.character(cuff.res.path), "/cuffdiff-upreg-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-GeneName.txt", sep="" )
      filter.name.full <- paste(as.character(cuff.res.path), "/cuffdiff-upreg-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-full.txt", sep="" )
    }
  } else if (gfilter == "down") {
    upreg.cuffdiff <- subset(cuffdiff.res.sub.order, filter==-1)
    if (saveRes==TRUE) {
      filter.name <- paste(as.character(cuff.res.path), "/cuffdiff-downreg-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-GeneName.txt", sep="" )
      filter.name.full <- paste(as.character(cuff.res.path), "/cuffdiff-downreg-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-full.txt", sep="" )
    }
  } else if (gfilter == "both") {
    upreg.cuffdiff <- rbind(subset(cuffdiff.res.sub.order, filter==1 ), subset(cuffdiff.res.sub.order, filter==-1) )
    if (saveRes==TRUE) {
      filter.name <- paste(as.character(cuff.res.path), "/cuffdiff-DEall-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-GeneName.txt", sep="" )
      filter.name.full <- paste(as.character(cuff.res.path), "/cuffdiff-DEall-", "fdr_", as.character(pvalue), "-FC_",as.character(FC), "-full.txt", sep="" )
    }
  }
  if (saveRes==TRUE) {
    if (!file.exists(as.character(filter.name))) write.table(upreg.cuffdiff$gene, as.character(filter.name), row.names=F, col.names=F, quote=F)
    if (!file.exists(as.character(filter.name.full))) write.table(upreg.cuffdiff, as.character(filter.name.full), row.names=F, quote=F, sep="\t")
  }
  
  return( list(filter=(cuffdiff.res.sub.order$filter), res=cuffdiff.res.sub.order, de=upreg.cuffdiff, degName=as.character(upreg.cuffdiff$gene)) )
  
}
