####
# count reads from specific region of 4C across samples
####

######################################################################
# load lib
######################################################################
source("~/easyfunc.R")
library("tidyverse")
library("reshape")
library("GenomicRanges")
library("ggpubr")


######################################################################
# local functions
######################################################################
local.binBed <- function(
  f.chr = "chr8", f.stt = 54500000, 
  f.end = 55700000, f.bin=10000){
  #####
  # get the bed like object for bins
  #####
  data.frame(chrom=f.chr, 
             start=seq(f.stt, f.end-f.bin, f.bin), 
             end=seq(f.stt+f.bin, f.end, f.bin)
  )
}

local.binCount <- function(
  f.fn, f.chr = "chr8", f.stt = 54500000, 
  f.end = 55700000, f.bin=10000, f.method = viewMeans){
  #####
  # get the counts per bin for 
  # easy comparison between samples
  #####
  ## readin bedgraph
  f.dat = read.delim(f.fn, header=F, stringsAsFactors = F)
  colnames(f.dat) = c("chrom", "start", "stop", "Score")
  f.sub = f.dat[f.dat[,1] == f.chr & f.dat[,3] >= f.stt & f.dat[,2] <= f.end,]
  f.GR = makeGRangesFromDataFrame(f.sub, keep.extra.columns = T)
  f.GRwin = makeGRangesFromDataFrame(
    data.frame(chrom=f.chr, 
               start=seq(f.stt, f.end-f.bin, f.bin), 
               end=seq(f.stt+f.bin, f.end, f.bin)
    )
  )
  f.out = easy.gr.binSummary(f.GRwin, f.GR, "Score", f.method)
  f.out = f.out$Score
  f.out[f.out==0] = NA
  f.out
}

local.prepare.data <- function(f.fl, f.grp, f.pRegion=pRegion, f.bin=10000){
  #####
  # prepare data for track plot
  #####
  
  ## get the chromosome coordinates
  phe = local.binBed(f.chr = f.pRegion[1,1], f.stt = f.pRegion[1,2], 
                     f.end = f.pRegion[1,3], f.bin = f.bin)
  
  ## get the binned read counts
  dat = (do.call(rbind, lapply(f.fl, function(x){
    local.binCount(x, f.chr = f.pRegion[1,1], f.stt = f.pRegion[1,2], 
                   f.end = f.pRegion[1,3], f.bin = f.bin)
  })))
  
  ## get the mean and sd
  d.mean = (t(easy.aggrByRow(data.frame(dat), f.grp, colMeans)))
  d.sd = (t(easy.aggrByRow(data.frame(dat), f.grp, colSds)))
  
  ## return
  list(mean=d.mean, sd=d.sd, phe=phe, dat=data.frame(t(dat)), grp = f.grp)
}

local.plotmat <- function(pfmat, pferror, f.phe, 
                          f.vp, f.iregion, f.vp.ext = 20000, f.scale=1000000, 
                          f.col=c("#4481b2", "#b83039"), 
                          f.ylab="4C singal per 10kb", ...){
  #####
  # plot track plot for 4C data
  #####
  ## set the view point
  f.ext = f.vp
  iMid = floor((f.vp[,3]+f.vp[,2])/2)
  f.ext[,2] = iMid-f.vp.ext
  f.ext[,3] = iMid+f.vp.ext
  
  ## remove the data arround the view point
  ss = f.phe[,3]>f.ext[1,2] & f.phe[,2]<f.ext[1,3]
  pfmat[ss, ] = NA
  pferror[ss, ] = NA
  
  ## prepare data
  gg = do.call(rbind, lapply(1:ncol(pfmat), function(i){
    data.frame(x=c(f.phe[,2], f.phe[,3]-1), y=c(pfmat[,i], pfmat[,i]), yer=c(pferror[,i], pferror[,i]), sample=colnames(pfmat)[i])
  }))
  gg[,1] = gg[,1]/f.scale
  gg$upper = gg$y+gg$yer
  gg$lower = gg$y-gg$yer
  gg$lower[gg$lower<0] = 0
  gg$y[gg$y<0] = 0

  ## iregion format
  f.iregion[,2] = f.iregion[,2] / f.scale
  f.iregion[,3] = f.iregion[,3] / f.scale
  
  ## make plot 
  print(
    ggplot(gg, aes(x=x, y=y)) + 
      geom_ribbon(aes(ymax=upper, ymin=lower, fill=sample), alpha=0.2) +
      geom_line(aes(col=sample), size=0.5) +
      scale_fill_manual(values=f.col) +
      scale_color_manual(values=f.col) +
      theme_my() + theme(legend.position="top") +
      xlab(paste0("Genomic coordinates (x", f.scale, ")")) +
      ylab(f.ylab) +
      geom_rect(data = f.iregion, aes(NULL, NULL, xmin = V2, xmax = V3, ymin = -Inf, ymax = Inf), fill = "grey", alpha = 0.2)
  )
}

local.plot.wrap <- function(dat, tag, f.vp.ext = c(20000, 30000), slt = c("ongene|WT_undiff", "ongene|WT_dEN_CXCR4pos"), ...){
  ####
  # wrap plot all track plot in a certain resolution
  ####
  ## View point: ongene; Compare: undifferentiated WT vs KO
  pdf(easy.Dir(paste0("_OnGene_D0vsD5_", tag, ".pdf")), 5, 3)
  local.plotmat(dat$mean[,slt], dat$sd[,slt], dat$phe, iRegion[5,], iRegion[c(2,4),], f.ylab=paste0("4C singal per ", tag), f.vp.ext = f.vp.ext[1], ...)
  dev.off()
}

local.barplot <- function(dat, res = 1000, tag = "1kb",
        f.iregion = iRegion[c(2,4),], xcol=c("#4481b2", "#b83039"),
        slt = c("ongene|WT_undiff", "ongene|WT_dEN_CXCR4pos")){

  ## format data
  f.iregion$mid = floor((f.iregion[,2] + f.iregion[,3])/2/res)*res
  dd = dat$dat[match(f.iregion[, "mid"], dat$phe[,2]), c(which(dat$grp == slt[1]), which(dat$grp == slt[2]))]
  gg = melt(dd)
  gg$loci = rep(f.iregion$V4, ncol(dd))
  gg$gp = rep(slt, each = ncol(dd))
  ggg = gg

  ## make plots
  pdf(easy.Dir(paste0("_OnGene_D0vsD5_", tag, "_barplot.pdf")), 5, 4)
  print(ggbarplot(gg, x = "gp", y = "value", ylab=paste0("4C singal per ", tag),
        add = c("mean_se", "jitter"),
        color = "gp", palette = xcol,
        position = position_dodge(0.8),
        facet.by = "loci")
        + stat_compare_means(method = "t.test"))
  dev.off()

  ## make plots
  pdf(easy.Dir(paste0("_OnGene_D0vsD5_", tag, "_barplot_2.pdf")), 5, 4)
  print(ggbarplot(gg, x = "loci", y = "value", ylab=paste0("4C singal per ", tag),
        add = c("mean_se", "jitter"),
        color = "gp", palette = rev(xcol),
        position = position_dodge(0.8),
        facet.by = "gp")
        + stat_compare_means(method = "t.test"))
  dev.off()

  ## make plots log2
  gg$value = log2(gg$value)
  pdf(easy.Dir(paste0("_OnGene_D0vsD5_", tag, "_barplot_log2.pdf")), 5, 4)
  print(ggbarplot(gg, x = "gp", y = "value", ylab=paste0("4C singal per ", tag, " (log2)"),
        add = c("mean_se", "jitter"),
        color = "gp", palette = xcol,
        position = position_dodge(0.8),
        facet.by = "loci")
        + stat_compare_means(method = "t.test"))
  dev.off()

  ## normalize
  g2 = ggg
  ss = seq(1, nrow(ggg), 2)
  g2[ss, "value"] = ggg[ss, "value"] / mean(ggg[ss[1:3], "value"])
  ss = seq(1, nrow(ggg), 2) + 1
  g2[ss,"value"] = ggg[ss,"value"] / mean(ggg[ss[1:3], "value"])
  g2 = g2[7:12,]
  ## make plots
  pdf(easy.Dir(paste0("_OnGene_D0vsD5_", tag, "_barplot_norm.pdf")), 4, 4)
  xcol=c("#4481b2", "#b83039")
  print(ggbarplot(g2, x = "loci", y = "value", ylab=paste0("4C singal per ", tag, " (FC: D5/D0)"),
        add = c("mean_se", "jitter"),
        color = "loci", palette = xcol,
        position = position_dodge(0.8))
        + stat_compare_means(method = "t.test"))
  dev.off()

  ## fold change
  ss1 = seq(1, nrow(ggg), 2)
  ss2 = seq(1, nrow(ggg), 2) + 1
  fc = c(mean(ggg[ss1[4:6], "value"]) / mean(ggg[ss1[1:3], "value"]), 
    mean(ggg[ss2[4:6], "value"]) / mean(ggg[ss2[1:3], "value"]))
  return(list(dat = ggg, fc = fc))
}



######################################################################
# set pars
######################################################################
## set outprefix
outprefix = "Track_Plot_lncRNA"

## set plot regions
# pRegion = data.frame(chrom="chr8", start=55136408, end=55142341, bin=10000)
# pRegion = data.frame(chrom="chr8", start=54595055, end=55596154, bin=10000)
pRegion = data.frame(chrom="chr8", start=55100000, end=55200000, bin=10000)

## readin iRegion
iRegion = read.delim("iRegion_lncRNA.bed", header=F, stringsAsFactors=F)


######################################################################
# readin file and process data
######################################################################
## readin filelist
fl = read.delim("fl.merged.RPM.smooth.bedGraph", header=F, stringsAsFactors=F)[,1]
vp = gsub(".*_", "", gsub("../../data/|/.*|_Rep.*", "", fl)); vp = factor(vp, levels=unique(vp))
gp = gsub("../../data/|/.*|_ND.*|_mod.*", "", fl); gp = factor(gp, levels=unique(gp))


######################################################################
# process data
######################################################################
dat_10k = local.prepare.data(fl, paste(vp, gp, sep="|"))
dat_5k = local.prepare.data(fl, paste(vp, gp, sep="|"), f.bin=5000)
dat_1k = local.prepare.data(fl, paste(vp, gp, sep="|"), f.bin=1000)


######################################################################
# make track plot for 4C
######################################################################
local.plot.wrap(dat_10k, "10kb")
local.plot.wrap(dat_5k, "5kb")
local.plot.wrap(dat_1k, "1kb")


######################################################################
# significant test
######################################################################
local.barplot(dat_1k)


# left grey shape: enhancer; right grey shape: promoter

# [1]  9.253764 13.380013

## data if used to calculate fold change
   variable       value             loci                     gp
# 1        R1   95.324417 lncSOX17Promoter       ongene|WT_undiff
# 2        R1  170.759970    SOX17Enhancer       ongene|WT_undiff
# 3        R2    8.763403 lncSOX17Promoter       ongene|WT_undiff
# 4        R2   39.992461    SOX17Enhancer       ongene|WT_undiff
# 5        R3   24.858793 lncSOX17Promoter       ongene|WT_undiff
# 6        R3  159.935074    SOX17Enhancer       ongene|WT_undiff
# 7        R1  317.287025 lncSOX17Promoter ongene|WT_dEN_CXCR4pos
# 8        R1  913.904728    SOX17Enhancer ongene|WT_dEN_CXCR4pos
# 9        R2  415.492921 lncSOX17Promoter ongene|WT_dEN_CXCR4pos
# 10       R2 2327.603591    SOX17Enhancer ongene|WT_dEN_CXCR4pos
# 11       R3  460.461542 lncSOX17Promoter ongene|WT_dEN_CXCR4pos
# 12       R3 1718.295375    SOX17Enhancer ongene|WT_dEN_CXCR4pos





######################################################################
# test
######################################################################
# dat = dat_1k
# tag = "1kb"
# f.vp.ext = c(20000, 30000)

# slt = c("ongene|WT_undiff", "ongene|WT_dEN_CXCR4pos")
# pfmat = dat$mean[,slt]
# pferror = dat$sd[,slt]
# f.phe = dat$phe
# f.vp = iRegion[5,]
# f.iregion = iRegion[c(2,4),]
# f.ylab=paste0("4C singal per ", tag)
# f.vp.ext = f.vp.ext[1]
# f.scale=1000000
# f.col=c("#4481b2", "#b83039")
# f.ylab="4C singal per 10kb"



#   ## set the view point
#   f.ext = f.vp
#   iMid = floor((f.vp[,3]+f.vp[,2])/2)
#   f.ext[,2] = iMid-f.vp.ext
#   f.ext[,3] = iMid+f.vp.ext
  
#   ## remove the data arround the view point
#   ss = f.phe[,3]>f.ext[1,2] & f.phe[,2]<f.ext[1,3]
#   pfmat[ss, ] = NA
#   pferror[ss, ] = NA
  
#   ## prepare data
#   gg = do.call(rbind, lapply(1:ncol(pfmat), function(i){
#     data.frame(x=c(f.phe[,2], f.phe[,3]-1), y=c(pfmat[,i], pfmat[,i]), yer=c(pferror[,i], pferror[,i]), sample=colnames(pfmat)[i])
#   }))
#   gg[,1] = gg[,1]/f.scale
#   gg$upper = gg$y+gg$yer
#   gg$lower = gg$y-gg$yer
#   gg$lower[gg$lower<0] = 0
#   gg$y[gg$y<0] = 0
  
#   ## iregion format
#   f.iregion[,2] = f.iregion[,2] / f.scale
#   f.iregion[,3] = f.iregion[,3] / f.scale
#   f.iregion$mid = round((f.iregion[,2] + f.iregion[,3])/2, 3)

#   ## select iRegion
#   g2 = gg[gg$x %in% f.iregion$mid,]


# pdf("test.pdf")
# ggplot(data = gg, aes(x=x, y=y)) + 
#       geom_ribbon(aes(ymax=upper, ymin=lower, fill=sample), alpha=0.2) +
#       geom_line(aes(col=sample), size=0.5) +
#       scale_fill_manual(values=f.col) +
#       scale_color_manual(values=f.col) +
#       theme_my() + theme(legend.position="top") +
#       geom_rect(data = f.iregion, aes(NULL, NULL, xmin = V2, xmax = V3, ymin = -Inf, ymax = Inf), fill = "green", alpha = 0.4) +
#       xlab(paste0("Genomic coordinates (x", f.scale, ")")) +
#       ylab(f.ylab)
# dev.off()

