library(flowCore)
library(gylab)
library(SpatialTools)
library(cachecache)
# Reads data
#
# Arguments:
# datadir	: path to root directory of data
# datadate	: name of directory containing data (expected to be the date of acquisitions in format YYYY-MM-DD
# annofile	: experimenter annotation file, semi-colon separated, with header:
#			plate;well;time;gal;strain
# Value:
# a list of data.frames containing the data of ALL cells
read_facs_yeast_HTS_gal_data <- function(datadir, datadate, annofile, anno=NULL){
  if (is.null(anno)) {
    anno = lapply(datadate, function(d) {
      anno = read.csv(paste(datadir, d, annofile, sep=""), sep=";", stringsAsFactors=FALSE)
      date = gsub("/", "", d)
      anno <- cbind(anno, date = rep(date, times = dim(anno)[1]))
      return(anno)
    })
    anno = do.call(rbind, anno)
  }    
  results = lapply(1:nrow(anno), function(i){
    print(i)
    a = anno[i,]
    plate = a$plate
    well = a$well
    time = a$time 
    gal = a$gal
    strain = a$strain
    date = a$date
    # Reading fcs data
    path = paste(datadir, date, "/", plate, "/dat/", sep="")
    files = list.files(path)
    well_filename = files[match(well, substr(files, nchar(files) - 2, nchar(files)))]
    fcs_filename = paste(path, well_filename, sep = "")
    fcs_orig = get_content(fcs_filename, "fcs", transformation=FALSE)
    fcs = data.frame(exprs(fcs_orig)[,1:3])   
    return(fcs)
  })
 # quality control
 foo = lapply(results, function(l){
   list(
     median_fl1 = median(l$FL1.H),
     median_fsc = median(l$FSC.H),
     median_ssc = median(l$SSC.H),
     nb_cells = length(l$FL1.H)
   )
 })
 foo = do.call(rbind, foo)
 foo = data.frame(lapply(data.frame(foo, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE) 
 anno = cbind(anno, foo)
 return(list(alldat = results, anno = anno ))
}

my_read.fcs = function(obj, ...) {
  library(flowCore)
  read.FCS(obj$filename, ...)
}


# This function, and the one it is based on, were developed by Florent Chuffart
# and later adapted by Gael Yvert
# 
# It interrogates the FSC/SSC density plot and extract ncell from the peak of density
# 
# Arguments:
# data             : a list containing a field called alldat which is a list containing the data
# frac_cell	: fraction of cells to keep after gating (approximately). This number defines the size of the gate (few cells: narrow gate).
#
# graphics	: if TRUE (default), graphics are produced in pdf files to visualize gating. The name of these files will be in the format platewell.pdf.
#
# graphicsdir	: a character string giving the path to the directory where the graphics files should be written
#
# pre_gating_rm_rate : Pre-gating fraction of cells that will be remove on x and y axis. This allows to limit computations and filtering to the area of interest. Yeast cells being small, default value is 0.05.
#
# The main function is kdce2d() that Florent implemented in gylab package.
# This function calls kde2d from the MASS package
# 
# kerneld1$inside is the gating Boolean: TRUE = we keep it, FALSE = we don't.
# kernel$cl is the contourline defining the gate.
gate_facs_yeast_HTS_data <- function(data, frac_cells=0.4, graphics=TRUE, pre_gating_rm_rate=0.05, verbose=TRUE, hard_lim=NULL){
  kerneld = lapply(1:length(data$alldat), function(i) {
    if (i %% 10 == 1) { print(paste(i, "/", length(data$alldat), "gating..."))  }
    l = data$alldat[[i]]
    x = l$FSC.H
    y = l$SSC.H
    if (!is.null(hard_lim)) {
      idx = (x >= hard_lim[1] & x <= hard_lim[2] & y >= hard_lim[3] & y <= hard_lim[4] )              
    } else{
      idx = rep(TRUE, length(x))        
    }
    thres_1 = pre_gating_rm_rate
    thres_2 = frac_cells/(1-thres_1)
    k = gate_it(x, y, idx=idx, thres_1=thres_1, thres_2=thres_2)
    if (graphics) plot_gating(k)
    return(k)
  })
  data$gdat = lapply(1:length(kerneld), function(i) {
    k = kerneld[[i]]
    data$alldat[[i]][k$idxgate,]
  })
  data$gate = lapply(kerneld, function(k) {
    k$cl
  })
  # quality control
  foo = lapply(data$gdat, function(l){
    list(
      median_gfl1 = median(l$FL1.H),
      median_gfsc = median(l$FSC.H),
      median_gssc = median(l$SSC.H),
      nb_gcells = length(l$FL1.H)
    )
  })
  foo = do.call(rbind, foo)
  foo = data.frame(lapply(data.frame(foo, stringsAsFactors=FALSE), unlist), stringsAsFactors=FALSE) 
  data$anno = cbind(data$anno, foo)  
  return(data);
}








gate_it = function(x, y, idx, thres_1, thres_2, n=34, DEBUG=FALSE) {  
  tmp_x = x[idx]
  tmp_y = y[idx]
  # Removing saturated
  qx = quantile(tmp_x, probs=c(thres_1/2, 1 - thres_1/2))
  qy = quantile(tmp_y, probs=c(thres_1/2, 1 - thres_1/2))

  idxdesatx = tmp_x >= qx[1] & tmp_x <= qx[2]    
  idxdesaty = tmp_y >= qy[1] & tmp_y <= qy[2]    
  idxdesat = rep(FALSE, length(x))
  idxdesat[idx] = idxdesatx & idxdesaty

  nb_cell=sum(idxdesat)*thres_2

  x1 = x[idxdesat]
  y1 =  y[idxdesat]

  # lims = c(qx, qy)
  # print(lims)

  dx = qx[2] - qx[1]
  dy = qy[2] - qy[1]  
  lims = c(qx + c(-0.1*dx,+0.1*dx), qy + c(-0.1*dy,+0.1*dy))
  # lims[which(lims <= 0)] = 1
  
  h = c(bandwidth.nrd(x1), bandwidth.nrd(y1))
  if (0 %in% h) {
    h[which(h == 0)] = 1
  }
  kerneld = kde2d(x1, y1, h=h, n=n, lims=lims)
  kerneld$xorig = x
  kerneld$yorig = y
  kerneld$idxorig = idx
  kerneld$xdesat = x1
  kerneld$ydesat = y1
  kerneld$idxdesat = idxdesat
  kerneld$lims = lims
  kerneld$n = n

  get_confidencebound = function(kerneld, prop=prop, DEBUG=DEBUG) {
    pp = sapply(1:length(kerneld$xdesat), function(i) {
      if (DEBUG & (i %% 10000 == 1)) {print(paste("Getting confidence bound", i, "/", length(kerneld$xdesat)))}
    	z.x <- max(which(kerneld$x <= kerneld$xdesat[i]))
    	z.y <- max(which(kerneld$y <= kerneld$ydesat[i]))
    	return(kerneld$z[z.x, z.y])
    })
    confidencebound = quantile(pp, prop, na.rm = TRUE)
    return(confidencebound)
  }

  prop = min(1, max(0, 1-nb_cell/length(kerneld$xdesat)))
  # print(paste(nb_cell, "/", length(kerneld$xdesat), "===>", prop))
  confidencebound = get_confidencebound(kerneld, prop=prop, DEBUG=DEBUG)
  kerneld$cl = contourLines(kerneld, levels = confidencebound)

  baz = sapply(1:length(kerneld$cl), function(i) {
    poly = cbind(kerneld$cl[[i]]$x,kerneld$cl[[i]]$y)
    pts = cbind(kerneld$xdesat,kerneld$ydesat)
    inside = in.out(poly, pts)      
    return(inside)
  })
  # kerneld$inside = as.logical(apply(t(baz), 2, sum))
  bar = sapply(data.frame(baz),sum)
  idx = which(bar==max(bar))[1]
  kerneld$cl = kerneld$cl[idx]
  kerneld$inside = baz[,idx]
  tmp_idx = rep(FALSE, length(kerneld$xorig))
  tmp_idx[idxdesat] = kerneld$inside
  kerneld$idxgate = tmp_idx
  return(kerneld)
}


plot_gating = function(k) {
  plot(k$xorig[k$idxorig], k$yorig[k$idxorig], pch=".")
  abline(v=k$lims[1:2])
  abline(h=k$lims[3:4])
  persp(k, phi = 30, theta = 20, d = 5)
  # plot(k$xdesat, k$ydesat, pch = ".", col= k$inside +1)
  # plot.contourLines(k$cl, add=TRUE)
  plot(k$xorig, k$yorig, pch = ".", col= k$idxgate + 1, xlim=k$lims[1:2], ylim=k$lims[3:4])
  plot.contourLines(k$cl, add=TRUE)
}



plot_qc = function(data, idx, type="raw", col_key="col", ...) {
  if (missing(idx)) {
    idx = rep(TRUE, nrow(data$anno))
  }
  data$anno = data$anno[idx,]
  data$alldat = data$alldat[idx]
  data$gdat = data$gdat[idx]
  data$gate = data$gate[idx]
  
  if (type=="raw") {
    keys = c("median_fl1", "median_fsc", "median_ssc", "nb_cells")
    ylims=list("median_fl1"=c(100, 700), "median_fsc"=c(150,350), "median_ssc"=c(30,130), "nb_cells"=c(0,10000))
  } else if (type=="gated") {
    keys = c("median_gfl1", "median_gfsc", "median_gssc", "nb_gcells")
    ylims=list("median_gfl1"=c(100, 700), "median_gfsc"=c(150,350), "median_gssc"=c(30,130), "nb_gcells"=c(0,5000))
  }


  layout(matrix(1:4,4,byrow=TRUE))
  # stats
  for(key in keys) {
    ylim = ylims[[key]]
    plot(1:length(data$anno[[key]]), data$anno[[key]],  xaxt='n', xlab="", col=data$anno[[col_key]], ylab=key, cex=2, pch=16, ylim=ylims[[key]], ...)
    axis(side = 1, at = 1:length(data$anno[[key]]), labels = paste(data$anno$plate,data$anno$well,data$anno$time,data$anno$gal, data$anno$strain), tck = -0.01, las=2)
  }
  # nsubdiv = 4
  # layout(matrix(c(rep(1,nsubdiv), rep(2,nsubdiv), rep(3,nsubdiv), rep(4,nsubdiv), 5:(4 + nsubdiv)),5, byrow=TRUE))
  # # stats
  # for(key in keys) {
  #   ylim = ylims[[key]]
  #   plot(1:length(data$anno[[key]]), data$anno[[key]],  xaxt='n', xlab="", col=data$anno[[col_key]], ylab=key, cex=2, pch=16, ylim=ylims[[key]], ...)
  #   axis(side = 1, at = 1:length(data$anno[[key]]), labels = paste(data$anno$plate,data$anno$well,data$anno$time,data$anno$gal, data$anno$strain), tck = -0.01, las=2)
  # }
  # plot_gate(data, col_key=col_key, ...)
  
}


plot_gates = function(data, idx, col_key="col", ...) {
  if (missing(idx)) {
    idx = rep(TRUE, nrow(data$anno))
  }
  data$anno = data$anno[idx,]
  data$alldat = data$alldat[idx]
  data$gdat = data$gdat[idx]
  data$gate = data$gate[idx]

  bar = lapply(data$gate, function(g){
    list(rx = range(g[[1]]$x), ry = range(g[[1]]$y))
  })
  bar = do.call(rbind, bar)
  bar = data.frame((lapply(data.frame(bar, stringsAsFactors=FALSE), unlist)), stringsAsFactors=FALSE)

  plot(0,0,col=0, xlim=range(bar$rx), ylim=range(bar$ry), xlab="FSC", ylab="SSC", ...)

  baz = sapply(1:length(data$gate), function(i){
    g = data$gate[[i]]
    col = data$anno[i, col_key]
    lines(g[[1]]$x, g[[1]]$y, lwd=3, col=adjustcolor(col, alpha.f=0.3))
  })
}


show.tc <- function(strain, gal, colors = c("white", "darkblue"),...){
  idx = which(data$anno$gal == gal & data$anno$strain == strain)
  ld = list()
  nc = NULL
  tc = NULL
  xpos = NULL
  for (i in 1:length(idx)){
    ld[[i]] <- density(data$gdat[[idx[i]]]$FL1.H)
    xpos[i] <- data$anno$time[idx[i]]
    nc[i] = length(data$gdat[[idx[i]]]$FL1.H)
    tc[i] = length(data$alldat[[idx[i]]]$FL1.H)
  }
  # ncell = dim(data$gdat[[idx[1]]])[1]
  dyn.profile(ld, xpos = xpos, xlab = "minutes", ylab = "FL1.H", main = paste(strain, ", [GAL]=", gal, " , ", sep = ""), colors = colors, ...)
  text(x=xpos, y=50, labels=nc)
  text(x=xpos, y=0, labels=tc)
}
