#R version 4.0.4

#load libraries
library("readxl") #reading in excel files
library("dplyr")
library("ggplot2")  #plotting
library("ggsignif")
library("reshape2") #melt function
library("vegan")
library("FSA")
library("patchwork") #putting plots together
library("plyr")
library("ggthemes")
library("pals")
library("tidyverse")
library("ape")
library("seqinr")
library("rentrez")
library("devtools")
library("msa")
library("Biostrings")
library("compbio4all")
library("phangorn")

#set path and working directory
PATH = "/your/path/here/AlvarezBuylla_ABG_SI_DataDryad/Figure6/"
setwd(PATH)

#creating the theme for all plots
theme_aab <- function() {
  theme_bw() +

    theme(
      #legend.position = "none",
      legend.text = element_text(size = 10), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(size = 0.5, color = "black"),
        plot.title = element_text(
                size = 20, 
                hjust = 0.5,
                vjust = 0.5),
        axis.title = element_text(size = 15, color = "black"),
        axis.text = element_text(size = 13, color = "black"))
}

##############################################################################################################################
######### FIGURE 6B ##########################################################################################################
##############################################################################################################################

#load rnaseq data
tmm_df <- read.csv(file ="/6BD/Os_justABG_TMMvalues.csv")

#filter for just the OsPBG - showing expression in the skin
tmm_justOsPBG <- tmm_df[tmm_df$gene == "OopSylGTT00000004675",]

OsPBG <- ggplot(tmm_justOsPBG, aes(x=tissue, y=TMM.expr, fill=tissue, color = tissue)) + 
  theme_aab() + 
  #ggtitle("TRINITY_DN15846_c0_g2") +
  xlab("species") +
  ylab("TMM normalized counts") +
  geom_boxplot(outlier.shape = NA, alpha = 0.3) +
  scale_fill_brewer(palette = "Dark2")  +
  scale_color_brewer(palette = "Dark2")  +
  geom_dotplot(aes(fill = tissue), binaxis='y', stackdir='center', dotsize = 0.7, position = position_dodge(0.8))

ggsave("justOsPBG_tissues.pdf", plot = OsPBG, width =13, height = 7, units = c("cm"))


##############################################################################################################################
######### FIGURE 6C ##########################################################################################################
##############################################################################################################################

#load proteomics data - spectral counts across conditions
nc_df <- read.csv(file ="/6C/OsABG_LAM_proteomics.csv")

osABG_df <- nc_df[nc_df$family == "OsABG",]

lamPBG <- ggplot(osABG_df, aes(x=tissue, y=normcounts, fill=tissue, color = tissue)) + 
  theme_aab() + 
  xlab("") +
  ylab("normalized counts") +
  geom_boxplot(outlier.shape = NA, alpha = 0.3) +
  scale_fill_brewer(palette = "Dark2")  +
  scale_color_brewer(palette = "Dark2")  +
  geom_dotplot(binaxis='y', stackdir='center', dotsize = 1) 
  
ggsave("LAM_OsPBG_tissues.pdf", plot = lamPBG, width =12, height = 7, units = c("cm"))

##############################################################################################################################
######### FIGURE 6D ##########################################################################################################
##############################################################################################################################

### PHYLOGENY ###
#read in fasta file as a AAstringSet object
s = readAAStringSet("osylvatica_genome_unique_serpinA_protein_sequences_shortenedheaders.fasta")

#align the sequences, using default substitution matrix
aligned_sequences <- msa(s, method = "ClustalW")

class(aligned_sequences) <- "AAMultipleAlignment"
aligned_sequences_seqinr <- msaConvert(aligned_sequences, type = "seqinr::alignment")

#making distance matrix
aligned_sequences_dist <- seqinr::dist.alignment(aligned_sequences_seqinr, matrix = "identity")

# run neighbor joining
tree <- njs(aligned_sequences_dist)

#open an empty pdf to save into
pdf(file = "unique_serpinA_Phylogeny_noAlbumin.pdf",  width = 8, height = 5)

# plot tree
plot.phylo(tree, use.edge.length = F, label.offset = 2, align.tip.label = T)

#close png
dev.off()

# get ordered tip labels for plotting expression
is_tip <- tree$edge[,2] <= length(tree$tip.label)
ordered_tips <- tree$edge[is_tip, 2]
cat(tree$tip.label[ordered_tips])

### BOXPLOTS ###
#load data
TMM <- read.table(file = '/6BD/os_kallisto.isoform.TMM.EXPR.matrix', sep = '\t', header = TRUE)

#only serpinA data
serpins <- read.table(file = '/6BD/osylvatica_genome_uniqueserpinA_accessions.txt', sep = '\t', header = FALSE)

# select for only the columns that contain liver data
liverTMM <- TMM %>% select(matches(c("X","liver")))

#select for only the accessions of the serpin IDs
serpin_liverTMM <- liverTMM %>% filter(liverTMM$X %in% serpins$V1)

# convert the TMM matrix to long format
serpin_liverTMM  <- melt(serpin_liverTMM , id=c("X"))

#order the boxplot to be the same as the phylogeny
serpin_liverTMM$X <- factor(serpin_liverTMM$X,     # Reorder factor levels
                         c("OopSylGTT00000003067","OopSylGTT00000004697","OopSylGTT00000004696","OopSylGTT00000004681","OopSylGTT00000004693","OopSylGTT00000004626","OopSylGTT00000004627","OopSylGTT00000004649","OopSylGTT00000004675","OopSylGTT00000004677","OopSylGTT00000004682","OopSylGTT00000004665","OopSylGTT00000004694","OopSylGTT00000004695"))

#plot the boxplot
serpinComparisons <- ggplot(serpin_liverTMM, aes(x=X, y=value)) + 
  theme_aab() + 
  xlab("gene") +
  ylab("TMM normalized counts") +
  geom_boxplot(color = "gray39", fill = "gray39",outlier.shape = NA, alpha = 0.3) + 
  geom_boxplot(data=serpin_liverTMM[serpin_liverTMM$X=="OopSylGTT00000004675",],aes(x = X, y = value),fill="#d95f02", color="#d95f02", alpha = 0.3, outlier.shape = NA) +
  coord_flip() +
  geom_dotplot(data=serpin_liverTMM[serpin_liverTMM$X=="OopSylGTT00000004675",],binaxis='y', stackdir='center', fill="#d95f02", color="#d95f02", dotsize = 0.3, position = position_dodge(0.8))

ggsave("serpinExpressionComparison.pdf", plot = serpinComparisons, width =15, height = 15, units = c("cm"))


##############################################################################################################################
######### FIGURE 6E ##########################################################################################################
##############################################################################################################################

#load alkaloid data and sample metadata
featureTable <- read.csv(file ="/6E/featureTableY_normalized.csv")
sampleMetadata <- read.csv(file = "/6E/all_sample_metadata.csv")

#deleating unneeded info from feature table
featureTable <- featureTable[,-which(names(featureTable) %in% c( 'X','rowID', 'superfamily', 'daly2005_identifier', 'pf_alkaloid', 'row.m.z','row.retention.time', 'Compound_Name'))]

#summing abundance values accross toxin familiies 
featureTable_fams <- aggregate(featureTable[,2:225], by=list(Category=featureTable$family), FUN=sum)

#changing rownames to first column vaules
rownames(featureTable_fams) <- featureTable_fams$Category
featureTable_fams$Category <- NULL

#transposing dataframe and adding in metadata to match ggplot info
featureTable_fams <-as.data.frame(t(featureTable_fams))
featureTable_fams$filename<- rownames(featureTable_fams)

#merging with the metadata
featureTable_fams_md <- merge(x = sampleMetadata[ , c("Species", "Population", "filename")], y = featureTable_fams, by = "filename", all.x=TRUE)

#summing toxin family abundance values accross Species
featureTable_fams_md_summed <- ddply(featureTable_fams_md, "Species", numcolwise(sum))

#changing rownames to be the species
rownames(featureTable_fams_md_summed) <- featureTable_fams_md_summed$Species
featureTable_fams_md_summed$Species <- NULL

#getting the percentage that each toxin family contributes by row (each specpop)
#divides each cell by the corresponding rowsums value
featureTable_fams_md_summed_proportions<-(featureTable_fams_md_summed/rowSums(featureTable_fams_md_summed))*100

#rounding all of the values to the nearest whole number
featureTable_fams_md_summed_proportions <-round(featureTable_fams_md_summed_proportions,0)

#adding the specpop column back in
featureTable_fams_md_summed_proportions$species <- rownames(featureTable_fams_md_summed_proportions)

#melting the dataframe for plotting, keeping specpop as ID
featureTable_fams_md_summed_proportions <- melt(featureTable_fams_md_summed_proportions, id=c("species"))

#keeping only O. sylvatica
featureTable_fams_md_summed_proportions_osylvatica <- featureTable_fams_md_summed_proportions[featureTable_fams_md_summed_proportions$species == "Oophaga sylvatica",]

#making a bubble grid of everyone
ggplot(featureTable_fams_md_summed_proportions_osylvatica,
       aes(x = str_to_title(variable), 
           y = str_to_title(species),
           size = value)) +
  geom_point() +
  geom_text(aes(label = paste(value,"%")), 
            colour = "white", 
            size = 2) +
  scale_x_discrete(position = "top") +
  scale_size_continuous(range = c(0, 10)) + # Adjust as required.
  labs(x = NULL, y = NULL) +
  theme(legend.position = "none",
        panel.background = element_blank(),
        panel.grid = element_blank(),
        axis.ticks = element_blank())

ggsave("/Volumes/GoogleDrive/Shared drives/LOBSU/Projects/AAB_Experiments/AAB-E12/Figures/osylvatica_toxins.eps", last_plot(), width = 6, height = 2, units = 'in')
