library(phyloseq)
library(ggpubr)
library(reshape2)
library(ggplot2)
theme_set(theme_bw())
library(cowplot)

setwd("Path/to/PhyloseqObject")
load("DropletPlate_subsampled_filtered.RData", verbose=TRUE)


# median normalization

total <- median(sample_sums(DropletPlate_subsampled_filtered))
standf <- function(x, t=total) round(t * (x / sum(x)))
DropletPlate_subsampled_filtered_normed <- transform_sample_counts(DropletPlate_subsampled_filtered, standf)


# effect size

cohens_d <- function(x, y) {
  lx <- length(x)- 1
  ly <- length(y)- 1
  md  <- abs(mean(x) - mean(y))        
  csd <- lx * var(x) + ly * var(y)
  csd <- csd/(lx + ly)
  csd <- sqrt(csd)                     
  cd  <- md/csd                       
}

vorzeichen <- function(x, y) {
  vor  <- (mean(x) - mean(y))/(abs(mean(x) - mean(y)))
}


# sample subsetting

dp<-subset_samples(DropletPlate_subsampled_filtered_normed, sampleType %in% c("droplet_sample", "plate_sample"))
dp_rel<-transform_sample_counts(dp, function(x) x / sum(x) )
dp_rel_genus<-tax_glom(dp_rel, taxrank="genus")


# top 10

top10.genus<-names(sort(taxa_sums(dp_rel_genus), TRUE))[1:10]
dp_rel_genus_top10<-prune_taxa(top10.genus, dp_rel_genus)

dp_df<-psmelt(dp_rel_genus_top10) 


# wilcoxon rank sum test

dp_df_test<-compare_means(Abundance ~ sampleType, dp_df, method = "wilcox.test", paired = FALSE, group.by = "genus", p.adjust.method = "holm")

# cohens d

dp_df_wide<-dcast(dp_df, sampleType + Sample ~ genus, value.var="Abundance", fun.aggregate=sum)

dp_df_test_ordered<-dp_df_test[order(dp_df_test$genus),] 
dp_df_test_ordered$effect<-0
dp_df_test_ordered$direction<-0
dp_df_test_ordered$effect_direct<-0

for (i in 3:length(dp_df_wide)){
  dp_df_test_ordered$effect[i-2]<-cohens_d(dp_df_wide[1:8,i], dp_df_wide[9:16,i])
}

for (i in 3:length(dp_df_wide)){
  dp_df_test_ordered$direction[i-2]<-vorzeichen(dp_df_wide[1:8,i], dp_df_wide[9:16,i])
}
dp_df_test_ordered$effect_direct<-dp_df_test_ordered$direction*dp_df_test_ordered$effect

# dp_df_test_ordered$p.signif.new<-0
# for (i in 1:nrow(dp_df_test_ordered)){
#   dp_df_test_ordered$p.signif.new[i]<-ifelse(dp_df_test_ordered$p.adj[i]<0.0001, "****",ifelse(dp_df_test_ordered$p.adj[i]<0.001, "***", ifelse(dp_df_test_ordered$p.adj[i]<0.01, "**", ifelse(dp_df_test_ordered$p.adj[i]<0.05, "*","")) ))
# }


# plotting

dp_df_long<-melt(dp_df_wide, id.vars=c("Sample", "sampleType"))

shading <- data.frame(min = seq(from = 0.5, to = max(as.numeric(as.factor(dp_df_long$variable))), by = 1),
                      max = seq(from = 1.5, to = max(as.numeric(as.factor(dp_df_long$variable))) + 0.5, by = 1),
                      col = c(rep(c(3,4),5)))

my.colors<-c('#66a61e', '#7570b3')
genus.top10<-ggplot(dp_df_long)+#, aes(x=variable, y=value, color=sampleType)
  geom_boxplot(aes(x=variable, y=value, color=sampleType)) +# 
  geom_rect(data = shading,aes(xmin = min, xmax = max, ymin = -Inf, ymax = Inf,fill = factor(col), alpha = 0.1)) +
  scale_fill_manual(values = c("white", "gray80")) +
  geom_boxplot(aes(x=variable, y=value, color=sampleType)) +
  geom_point(aes(x=variable, y=value, color=sampleType), size = 2, shape = 17, alpha= 0.5, position = position_jitterdodge())+
  labs(x="Genus", y="Relative abundance") +
  scale_colour_manual(values=my.colors,labels=c("droplet sample", "plate sample")) + 
  theme(axis.text=element_text(size=12), axis.text.y = element_text(face = "italic"), axis.title=element_text(size=12))+
  coord_flip()+
  theme(plot.margin = unit(c(0,0,0,0), "cm"))+
  theme(legend.position="none", legend.title=element_blank(), panel.border = element_blank(), axis.line = element_line(colour = "black"), panel.grid.major= element_line(colour = "grey80"))

effect.genus.top10<-ggplot(dp_df_test_ordered, aes(x= dp_df_test_ordered$genus,y= dp_df_test_ordered$effect_direct))+
  geom_bar(stat="identity", width=0.5)+
  geom_rect(data = shading, aes(xmin = min, xmax = max, ymin = -Inf, ymax = Inf,fill = factor(col), alpha = 0.1)) +
  geom_bar(stat="identity", width=0.5, alpha=0.8, aes(fill = factor(dp_df_test_ordered$direction)))+
  coord_flip()+
  theme_minimal()+
  labs(y = "Cohen's d")+
  geom_hline(yintercept=0)+
  scale_y_continuous(labels=c("6","4","2", "0", "2"))+
  scale_fill_manual(values=c('#7570b3', '#66a61e', "white", "gray80"))+
  theme(axis.text=element_text(size=12), axis.title=element_text(size=12))+
  theme(plot.margin = unit(c(0,0.3,0,0), "cm"), legend.position="none")+
  theme(axis.line.x = element_line(colour = "black"),axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank(), panel.grid.major= element_line(colour = "grey80"), panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank())

psignif.genus.top10<-ggplot(dp_df_test_ordered, aes(x=dp_df_test_ordered$genus,y=dp_df_test_ordered$effect_direct))+
  geom_blank()+
  coord_flip()+
  theme_minimal()+
  labs(y = "p-value")+
  geom_text(aes(x=dp_df_test_ordered$genus, label=round(dp_df_test_ordered$p.adj, digits=4), y=0), size=3, vjust=0.7, hjust=0.8)+
  theme(axis.text=element_text(size=12), axis.title=element_text(size=12))+
  theme(panel.grid.major.y=element_blank(), plot.margin = unit(c(0,0,0,0), "cm"), legend.position="none")+
  theme(axis.title.y=element_blank(),axis.line.y=element_blank(),axis.line.x=element_line(color="black"), axis.text.x=element_text(color="white"),axis.text.y=element_blank(), axis.ticks=element_blank(), panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank())


ggenus.top10 <- ggplotGrob(genus.top10)
geffect.genus.top10 <- ggplotGrob(effect.genus.top10)
gpsignif.genus.top10 <- ggplotGrob(psignif.genus.top10)

geffect.genus.top10$heights <- ggenus.top10$heights 
gpsignif.genus.top10$heights  <- ggenus.top10$heights 

plot.genus.top10<-plot_grid(ggenus.top10, gpsignif.genus.top10, geffect.genus.top10, ncol = 3, nrow=1, rel_widths = c(4, 0.4, 1),  align = 'h')
ggsave(file="Figure3a.svg", plot=plot.genus.top10, height=10, width=18, units="cm", pointsize=12)