# Figure 1A Top10generaPA.pdf

# BothNorm is a large data frame with 61 rows and 167 columns


BothNorm.RA <- apply(BothNorm, 2, function(s){
  s/sum(s)
} )

Coverage <- apply(BothNorm.RA[c("Prevotella" , "Pseudomonas", "Streptococcus", "Staphylococcus" ),],
                  2, sum)

Coverage5 <- apply(BothNorm.RA[c("Prevotella" , "Pseudomonas", "Streptococcus", 
                                 "Staphylococcus", "Burkholderia" ),],2, sum)

Coverage5achr <- apply(BothNorm.RA[c("Prevotella" , "Pseudomonas", "Streptococcus", 
                                     "Staphylococcus", "Achromobacter" ),],2, sum)

PrevFrac <- apply(BothNorm.RA, 1, function (x){
  sum(x > 0)/length(x)
}
)

MeanRA <- apply(BothNorm.RA, 1, mean )

AbPrev <- data.frame(Ab = log10(MeanRA[PrevFrac > .1]),
                     Prev = PrevFrac[PrevFrac > .1],
                     Genus = names(PrevFrac[PrevFrac > .1]))

ggplot(AbPrev[], aes(x=Prev, y=Ab)) +
  # geom_smooth(method = "loess") + 
  geom_point(aes(col=Genus), size = 3) +
  theme(text=element_text(size=14),
        legend.text = element_text(face = "italic")) +
  labs( title = "Top 10 Genera",
        y = "Relative Abundance (log10)",
        x = "Prevalence")  

ggsave("Top10generaPA.pdf")

# Figure 1B Coverage.pdf  

pdf("Coverage.pdf")
hist(Coverage, xlab = "Fraction of Reads Covered",
     ylab = "Number of Subjects",
     main = "Coverage by Prev, Pa, Strep and Staph")
dev.off()


# Supplementary Figure 1B Coverage5.pdf

pdf("Coverage5.pdf")
hist(Coverage5, xlab = "Fraction of Reads Covered",
     ylab = "Number of Subjects",
     main = "Coverage by Prev, Pa, Strep, Staph, Burk")
dev.off()

# Supplementary Figure 1C Coverage5Achr.pdf in LipumaCluster

pdf("Coverage5Achr.pdf")
hist(Coverage5achr, xlab = "Fraction of Reads Covered",
     ylab = "Number of Subjects",
     main = "Coverage by Prev, Pa, Strep, Staph, Achr")
dev.off()


# Supplementary Figure 2A ModelvCompendium.pdf LipumaCluster

For_Tom_relative_abundance_plot_mixed_community <- read_excel("~/Documents/FabriceReplot/For Tom relative abundance plot mixed community.xlsx", 
                                                              col_names = FALSE)
InVitro <- t(as.data.frame (For_Tom_relative_abundance_plot_mixed_community[,2:12]))
colnames(InVitro) <- make.names(For_Tom_relative_abundance_plot_mixed_community$...1)

InVitroGeoMeans <- apply(InVitro, 2, mean)

InVitroMcounts <- 10^InVitroGeoMeans

InVitroRA <- as.data.frame(InVitroMcounts/sum(InVitroMcounts))


SplitAnnot2 <- SplitAnnot 
rownames(SplitAnnot2)<- SplitAnnot2$Study

SplitAnnot2RA <- merge(SplitAnnot2, t(BothRA), by = 0)
M1orM2studies <- SplitAnnot2[SplitAnnot2$kmNick %in% c("Pa.M1", "Pa.M2"), "Study"]

M1orM2RA <- SplitAnnot2RA[SplitAnnot2RA$Study %in% M1orM2studies, 10:70]
M1orM2RA.modelBugs <- M1orM2RA[, c("Pseudomonas", "Prevotella", "Staphylococcus", "Streptococcus")]



library(tidyr)
MBlong <-  gather(M1orM2RA.modelBugs, key = "Genus", value = "Abundance")
MBlong$Type <- "Compendium"


tIVra <- data.frame(Genus = c("Prevotella",
                              "Pseudomonas",
                              "Staphylococcus",
                              "Streptococcus"),
                    Abundance = c(InVitroRA["Prevotella", 1],
                                  InVitroRA["Pseudomonas", 1],
                                  InVitroRA["Staphylococcus", 1],
                                  InVitroRA["Streptococcus", 1])
)
tIVra$Type = "Model"

MBlongBoth <- rbind(MBlong, tIVra )

ggplot(MBlongBoth, aes(x = Genus, y = Abundance, color = Type )) +
  geom_jitter(size=2, width = .2) + coord_flip() +
  xlab("") + 
  ylab("Relative Abundance") +
  theme( axis.text.y = element_text(face = "italic"))

ggsave("ModelvCompendium.pdf")