####
# count reads from specific region of 4C across samples
####

######################################################################
# load lib
######################################################################
source("~/easyfunc.R")
library("ggplot2")
library("pheatmap")
library("reshape")
library("ggpubr")


######################################################################
# set pars
######################################################################
## look for +/- ?-bp region
ext = 5000

## my col
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))


######################################################################
# readin filelist
######################################################################
## readin filelist
fl = read.delim("fl.merged.RPM.smooth.bedGraph", header=F, stringsAsFactors=F)[,1]
vp = gsub(".*_", "", gsub("../../data/|/.*|_Rep.*", "", fl)); vp = factor(vp, levels=unique(vp))
gp = gsub("../../data/|/.*|_ND.*|_mod.*", "", fl); gp = factor(gp, levels=unique(gp))

## readin iRegion
iRegion = read.delim("iRegion.bed", header=F, stringsAsFactors=F)
iMid = floor((iRegion[,3]+iRegion[,2])/2)
iRegion[,2] = iMid-ext
iRegion[,3] = iMid+ext

## count the total reads from the region
RPM = sapply(fl, function(x){
    f.map = easy.bedtools(iRegion, x, "bedtools map", "-c 4 -o sum -null 0")
    f.out = f.map[,5]; names(f.out) = f.map[,4]
    f.out
})
colnames(RPM) = NULL
RPM = t(RPM)

## plot heatmap
pdf("iRegion_RPM_heatmap.pdf", 3, 5)
pheatmap(log2(RPM+1), cluster_rows=F, cluster_cols=F, col=myPalette(100), gaps_row=seq(3, nrow(RPM)-3, 3), gaps_col=1:ncol(RPM))
dev.off()

## make data frame
gg = melt(RPM)
gg$vp = rep(vp, ncol(RPM))
gg$gp = rep(gp, ncol(RPM))
colnames(gg)[2] = "contact"
gg$contact = factor(gg$contact, levels=unique(gg$contact))
#gg$value = log2(gg$value+1)

## make plots
pdf("iRegion_RPM_boxplot.pdf", 6, 10)
ggplot(gg, aes(x=gp, y=value, fill=gp)) + geom_boxplot() +
    facet_grid(vp ~ contact, scales = "free_y") + theme_nogrid() +
    theme(legend.position="none")
dev.off()



######################################################################
# make individual plot
######################################################################
## local functions for idividual plot c("#00A0DC", "#DD2E1F")
local.enrich.boxplot <- function(f.iRegion, f.sample, xcol=c("#4481b2", "#b83039"), f.outprefix){
  ## readin filelist
  fl = read.delim("fl.merged.RPM.smooth.bedGraph", header=F, stringsAsFactors=F)[,1][f.sample]
  vp = gsub(".*_", "", gsub("../../data/|/.*|_Rep.*", "", fl)); vp = factor(vp, levels=unique(vp))
  gp = gsub("../../data/|/.*|_ND.*|_mod.*", "", fl); gp = factor(gp, levels=unique(gp))
  
  ## count the total reads from the region
  RPM = sapply(fl, function(x){
    f.map = easy.bedtools(f.iRegion, x, "bedtools map", "-c 4 -o sum -null 0")
    f.out = f.map[,5]; names(f.out) = f.map[,4]
    f.out
  })
  colnames(RPM) = NULL
  # pval = round(t.test(log2(RPM) ~ gp)$p.value, 3)
  pval = round(summary(aov(log2(RPM) ~ gp))[[1]][1,5], 3)
  RPM = RPM/1000
  
  ## make plots
  pdf(paste0("iRegion_Counts_", f.outprefix, "_RPM_boxplot.pdf"), 3, 3)
  easy.par2()
  boxplot(RPM ~ gp, border=xcol, col=easy.col2alpha(xcol, 0.3),
      ylab="Total 4C reads (x1000) (RPM)", outpch=".", las=1,
      whisklty=1, whisklwd=2, staplelty=1, staplelwd=2, boxlwd=2,
      main=paste0("pvalue=", pval))
  dev.off()
}

local.enrich.barplot <- function(f.iRegion, f.sample, xcol=c("#4481b2", "#b83039"), f.outprefix){
    ## readin filelist
    fl = read.delim("fl.merged.RPM.smooth.bedGraph", header=F, stringsAsFactors=F)[,1][f.sample]
    vp = gsub(".*_", "", gsub("../../data/|/.*|_Rep.*", "", fl)); vp = factor(vp, levels=unique(vp))
    gp = gsub("../../data/|/.*|_ND.*|_mod.*", "", fl); gp = factor(gp, levels=unique(gp))

    ## count the total reads from the region
    RPM = sapply(fl, function(x){
        f.map = easy.bedtools(f.iRegion, x, "bedtools map", "-c 4 -o sum -null 0")
        f.out = f.map[,5]; names(f.out) = f.map[,4]
        f.out
    })
    colnames(RPM) = NULL
    pval = round(summary(aov(log2(RPM) ~ gp))[[1]][1,5], 3)
    RPM = RPM/1000
    
    ## prepare ggplot
    gg = data.frame(RPM=RPM, gp=gp)
    
    ## make plots
    pdf(paste0("iRegion_Counts_", f.outprefix, "_RPM_barplot.pdf"), 3, 3)
    print(ggbarplot(gg, x = "gp", y = "RPM", ylab="Total 4C reads (x1000) (RPM)",
          add = c("mean_se", "jitter"), title = pval,
          color = "gp", palette = xcol,
          position = position_dodge(0.8)))
          # + stat_compare_means(method = "anova"))
    dev.off()
}

## boxplot of total counts (old)
local.enrich.boxplot(matrix(c("chr8", "55075132", "55086175", "Boundary_1"), nrow=1), 1:6, f.outprefix="Ongene_vs_Boundary1")
local.enrich.boxplot(matrix(c("chr8", "54730000", "54830000", "Boundary_2"), nrow=1), 1:6, f.outprefix="Ongene_vs_Boundary2")
local.enrich.boxplot(matrix(c("chr8", "54830000", "55075132", "UpTAD"), nrow=1), 10:15, f.outprefix="Ongene_vs_UpTAD")

## barplot of total counts
local.enrich.barplot(matrix(c("chr8", "55080000", "55090000", "Boundary_1"), nrow=1), 1:6, f.outprefix="Ongene_vs_Boundary1")
local.enrich.barplot(matrix(c("chr8", "54785000", "54795000", "Boundary_2"), nrow=1), 1:6, f.outprefix="Ongene_vs_Boundary2")
local.enrich.barplot(matrix(c("chr8", "54795000", "55080000", "UpTAD"), nrow=1), 10:15, f.outprefix="Ongene_vs_UpTAD")




