library(tidyverse)
library(biomaRt)
library(ggplot2)
library(edgeR)
library(EnhancedVolcano)

################################Running edgeR###########################################
#load read table produced from featureCount and map file for Deseq2
setwd("~/Documents/Roz_Pichia_Deseq/")
table<-read.table("../Roz_featurecounts.txt", header = TRUE, check.names = F,row.names = 1)
exp.table<-table[,-c(1:5)]
map<-read.table("../Roz_map.txt", header = TRUE, check.names = F)

#Create DGEList
dgList <- DGEList(counts=exp.table, genes=rownames(exp.table))

#keep only genes that have more than 1 cpm in atleast 2 samples
countsPerMillion <- cpm(dgList)
countCheck <- countsPerMillion > 1
keep <- which(rowSums(countCheck) >= 1)
dgList <- dgList[keep,] #limits genes to ~16k

#perform TMM normalization
dgList <- calcNormFactors(dgList, method="TMM")

#create design matrix
sampleType<- rep("CTRL", ncol(dgList)) 
sampleType[grep("PICHIA", colnames(dgList))] <-"PICHIA"
View(sampleType)
designMat <- model.matrix(~sampleType)
designMat

#estimate glm
dgList <- estimateGLMCommonDisp(dgList, design=designMat)
dgList <- estimateGLMTrendedDisp(dgList, design=designMat)
dgList <- estimateGLMTagwiseDisp(dgList, design=designMat)

#perform LRT on the two groups
fit <- glmFit(dgList, designMat)
lrt <- glmLRT(fit)
edgeR_result <- topTags(lrt, n=16608)
#only select top different genes
edgeRtop10_result <- topTags(lrt)
#create table of differentially abundant genes
edgeR.df<-edgeR_result$table
edgeRtop10.df<-edgeRtop10_result$table

edgeRtop10.df<-edgeRtop10.df %>% rownames_to_column("GeneID") %>% 
  left_join(annot, "GeneID") 

edgeR.df<-edgeR.df %>% rownames_to_column("GeneID") %>% 
  left_join(annot, "GeneID") 

edgeR.df<-data.frame(edgeR.df, row.names = 1)

library(org.Mm.eg.db) 

# Add gene full name
edgeR.df$description <- mapIds(x = org.Mm.eg.db,
                              keys = row.names(edgeR.df),
                              column = "GENENAME",
                              keytype = "ENSEMBL",
                              multiVals = "first")

# Add ENTREZ ID
edgeR.df$entrez <- mapIds(x = org.Mm.eg.db,
                         keys = row.names(edgeR.df),
                         column = "ENTREZID",
                         keytype = "ENSEMBL",
                         multiVals = "first")

# Add SYMBOL
edgeR.df$symbol <- mapIds(x = org.Mm.eg.db,
                          keys = row.names(edgeR.df),
                          column = "SYMBOL",
                          keytype = "ENSEMBL",
                          multiVals = "first")

# Subset for only significant genes (q < 0.05)
results_sig <- subset(edgeR.df, FDR < 0.05)
siggenes<-results_sig$GeneName


#Enhanced Volcano to look at edgeR results
#pdf(file = "PichiaVolcano_011121.pdf", wi = 7, he = 7, pointsize = 14, bg = "transparent") #remove '#' for saving
EnhancedVolcano(edgeR.df,
                lab = edgeR.df$GeneName,
                x = 'logFC',
                y = 'FDR',
                title = '',
                pCutoff = 0.1,
                FCcutoff = 0.5,
                pointSize = 3.0,
                labSize = 6.0,
                ylim = c(0,2.5),
                xlim = c(-10,10),
                xlab = bquote(~Log[2]~ 'fold change'),
                ylab = bquote(~-Log[10]~italic(FDR)),
                caption = '',
                subtitle = '',
                selectLab = siggenes
                )
#dev.off()
