####
# count reads from specific region of 4C across samples
####

######################################################################
# load lib
######################################################################
source("~/easyfunc.R")
library("tidyverse")
library("reshape")
library("GenomicRanges")


######################################################################
# local functions
######################################################################
local.binBed <- function(
  f.chr = "chr8", f.stt = 54500000, 
  f.end = 55700000, f.bin=10000){
  #####
  # get the bed like object for bins
  #####
  data.frame(chrom=f.chr, 
             start=seq(f.stt, f.end-f.bin, f.bin), 
             end=seq(f.stt+f.bin, f.end, f.bin)
  )
}

local.binCount <- function(
  f.fn, f.chr = "chr8", f.stt = 54500000, 
  f.end = 55700000, f.bin=10000, f.method = viewMeans){
  #####
  # get the counts per bin for 
  # easy comparison between samples
  #####
  ## readin bedgraph
  f.dat = read.delim(f.fn, header=F, stringsAsFactors = F)
  colnames(f.dat) = c("chrom", "start", "stop", "Score")
  f.sub = f.dat[f.dat[,1] == f.chr & f.dat[,3] >= f.stt & f.dat[,2] <= f.end,]
  f.GR = makeGRangesFromDataFrame(f.sub, keep.extra.columns = T)
  f.GRwin = makeGRangesFromDataFrame(
    data.frame(chrom=f.chr, 
               start=seq(f.stt, f.end-f.bin, f.bin), 
               end=seq(f.stt+f.bin, f.end, f.bin)
    )
  )
  f.out = easy.gr.binSummary(f.GRwin, f.GR, "Score", f.method)
  f.out = f.out$Score
  f.out[f.out==0] = NA
  f.out
}

local.prepare.data <- function(f.fl, f.grp, f.pRegion=pRegion, f.bin=10000){
  #####
  # prepare data for track plot
  #####
  
  ## get the chromosome coordinates
  phe = local.binBed(f.chr = f.pRegion[1,1], f.stt = f.pRegion[1,2], 
                     f.end = f.pRegion[1,3], f.bin = f.bin)
  
  ## get the binned read counts
  dat = (do.call(rbind, lapply(f.fl, function(x){
    local.binCount(x, f.chr = f.pRegion[1,1], f.stt = f.pRegion[1,2], 
                   f.end = f.pRegion[1,3], f.bin = f.bin)
  })))
  
  ## get the mean and sd
  d.mean = (t(easy.aggrByRow(data.frame(dat), f.grp, colMeans)))
  d.sd = (t(easy.aggrByRow(data.frame(dat), f.grp, colSds)))
  
  ## return
  list(mean=d.mean, sd=d.sd, phe=phe, dat=data.frame(t(dat)), grp = f.grp)
}

local.plotmat <- function(pfmat, pferror, f.phe, 
                          f.vp, f.vp.ext = 20000, f.scale=1000000, 
                          f.col=c("#4481b2", "#b83039"), 
                          f.ylab="4C singal per 10kb", ...){
  #####
  # plot track plot for 4C data
  #####
  ## set the view point
  f.ext = f.vp
  iMid = floor((f.vp[,3]+f.vp[,2])/2)
  f.ext[,2] = iMid-f.vp.ext
  f.ext[,3] = iMid+f.vp.ext
  
  ## remove the data arround the view point
  ss = f.phe[,3]>f.ext[1,2] & f.phe[,2]<f.ext[1,3]
  pfmat[ss, ] = NA
  pferror[ss, ] = NA
  
  ## prepare data
  gg = do.call(rbind, lapply(1:ncol(pfmat), function(i){
    data.frame(x=c(f.phe[,2], f.phe[,3]-1), y=c(pfmat[,i], pfmat[,i]), yer=c(pferror[,i], pferror[,i]), sample=colnames(pfmat)[i])
  }))
  gg[,1] = gg[,1]/f.scale
  gg$upper = gg$y+gg$yer
  gg$lower = gg$y-gg$yer
  gg$lower[gg$lower<0] = 0
  gg$y[gg$y<0] = 0
  
  ## make plot 
  print(
    ggplot(gg, aes(x=x, y=y)) + 
      geom_ribbon(aes(ymax=upper, ymin=lower, fill=sample), alpha=0.2) +
      geom_line(aes(col=sample), size=0.5) +
      scale_fill_manual(values=f.col) +
      scale_color_manual(values=f.col) +
      theme_my() + theme(legend.position="top") +
      xlab(paste0("Genomic coordinates (x", f.scale, ")")) +
      ylab(f.ylab)
  )
}

local.plot.wrap <- function(dat, tag, f.vp.ext = c(20000, 30000), ...){
  ####
  # wrap plot all track plot in a certain resolution
  ####
  ## View point: ongene; Compare: undifferentiated WT vs KO
  pdf(easy.Dir(paste0("_OnGene_Undiff_WTvsKO_", tag, ".pdf")), 5, 3)
  slt = c("ongene|WT_undiff", "ongene|KO_undiff")
  local.plotmat(dat$mean[,slt], dat$sd[,slt], dat$phe, iRegion[5,], f.ylab=paste0("4C singal per ", tag), f.vp.ext = f.vp.ext[1], ...)
  dev.off()
  
  ## View point: DRE; Compare: undifferentiated WT vs KO
  pdf(easy.Dir(paste0("_DRE_Undiff_WTvsKO_", tag, ".pdf")), 5, 3)
  slt = c("onDMR|WT_undiff", "onDMR|KO_undiff")
  local.plotmat(dat$mean[,slt], dat$sd[,slt], dat$phe, iRegion[4,], f.ylab=paste0("4C singal per ", tag), f.vp.ext = f.vp.ext[2], ...)
  dev.off()
}


######################################################################
# set pars
######################################################################
## set outprefix
outprefix = "Track_Plot"

## set plot regions
pRegion = data.frame(chrom="chr8", start=54595055, end=55596154, bin=10000)

## readin iRegion
iRegion = read.delim("iRegion.bed", header=F, stringsAsFactors=F)


######################################################################
# readin file and process data
######################################################################
## readin filelist
fl = read.delim("fl.merged.RPM.smooth.bedGraph", header=F, stringsAsFactors=F)[,1]
vp = gsub(".*_", "", gsub("../../data/|/.*|_Rep.*", "", fl)); vp = factor(vp, levels=unique(vp))
gp = gsub("../../data/|/.*|_ND.*|_mod.*", "", fl); gp = factor(gp, levels=unique(gp))


######################################################################
# process data
######################################################################
dat_10k = local.prepare.data(fl, paste(vp, gp, sep="|"))
dat_5k = local.prepare.data(fl, paste(vp, gp, sep="|"), f.bin=5000)
dat_1k = local.prepare.data(fl, paste(vp, gp, sep="|"), f.bin=1000)


######################################################################
# make track plot for 4C
######################################################################
local.plot.wrap(dat_10k, "10kb")
local.plot.wrap(dat_5k, "5kb")
local.plot.wrap(dat_1k, "1kb")

