#!/usr/bin/python

import numpy
import scipy.stats
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.cm as cm
import pylab
import scipy
import cPickle
import multiprocessing
import ConfigParser
import sys
import functools
import numpy.random

#Developed by: Peter Freddolino, Tavazoie lab, Columbia University
# https://tavazoielab.c2b2.columbia.edu/lab/
#
#Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#
#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers.
#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution.
#    * Neither the names of the Tavazoie lab, Columbia University, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.


# testing various methods for expression tuning in yeast
# everything here used time units of seconds, and protein/transcript levels as count per cell

####----  first we define a bunch of helper functions
# skip down to the main function for where the business end is

# Unified function for updating protein levels
def update_p_levels(prot_levs, ts_levs, tl_rates, decay_rates):
  # return an updated vector of protein levels
  # after one timestep
  # inputs:
  #  -prot_levs -- protein levels in the previous timestep
  #  -ts_levs -- transcript leves in the previous timestep
  #  -tl_rates -- probability of each transcript making a protein
  #  -decay_rates -- probability of each protein molecule to decay

  n_prot = len(prot_levs)
  prot_levs_new = prot_levs - scipy.stats.binom.rvs(n=prot_levs, p=decay_rates) + scipy.stats.binom.rvs(n=ts_levs,p=tl_rates)
  return prot_levs_new

# Unified function for updating transcript levels
def update_ts_levels(ts_levs, ts_rates, decay_rates):
  # return an updated vector of transcript levels after one timestep
  # inputs:
  #  -ts_levs -- transcript levels at the previous timestep
  #  -ts_rates -- current transcription rates, as a probability per timestep of generating a transcript
  #  -decay_rates -- probability per timestep of a transcript decaying
  
  ts_levs_new = ts_levs - scipy.stats.binom.rvs(n=ts_levs, p=decay_rates) + scipy.stats.bernoulli.rvs(p=ts_rates)
  return ts_levs_new

# Given an idealized transcription rate distribution, 
#   find the steady state protein levels 
# We use this to set the target proteins for a given
#   target transcript distribution. Doing it this way
#   instead of just setting random protein targets 
#   ensures that the target is achievable

def get_ideal_p(target_ts_rates, t_decay_rates, p_decay_rates, tl_rates):
  # calculate steady state protein values given a transcript rate distribution
  # inputs are:
  #  -target_ts_rates -- ideal transcription rate distribution
  #  -t_decay_rates -- decay rates of transcripts
  #  -p_decay_rates -- decay rates of proteins
  #  -tl_rates -- translation efficiencies of proteins

  eq_ts_levs = target_ts_rates / t_decay_rates
  eq_p_levs = numpy.rint((eq_ts_levs * tl_rates) / p_decay_rates).astype('int64')
  return eq_p_levs

def get_ideal_t(target_ts_rates, t_decay_rates):
  # calculate steady state protein values given a transcript rate distribution
  # inputs are:
  #  -target_ts_rates -- ideal transcription rate distribution
  #  -t_decay_rates -- decay rates of transcripts
  #  -p_decay_rates -- decay rates of proteins
  #  -tl_rates -- translation efficiencies of proteins

  eq_ts_levs = numpy.rint(target_ts_rates / t_decay_rates).astype('int64')
  return eq_ts_levs
  

####-----

def ace_discmark_method(ngenes, nsteps, fitfunc, global_dict, cfg,do_tune=True,change_nmarks=True,do_l2 = True):
  # transcriptional tuning method using explicit chromatin marks and
  #   no local memory outside of the marks themselves
  # here ngenes and nsteps are the number of genes to consider and number of optimization steps
  # fitfunc should take a vector of length ngenes, and return a scalar that is proportional to cell health/fitness
  # positive numbers of marks increase transcription, negative numbers decrease
  # both are relative to some baseline level
  # if change_nmarks is false, we never actually allow the number of marks to change

  # global parameters
  SAVEFREQ = cfg.getint('simulation','SAVEFREQ')

  DISC_TS_MARK_SIZE = cfg.getfloat('signed_discmark','DISC_TS_MARK_SIZE')
  DISC_TS_L2MARKSIZE = cfg.getfloat('signed_discmark','DISC_TS_L2MARKSIZE')
  DISC_TS_NOISE = cfg.getfloat('signed_discmark','DISC_TS_NOISE')
  DISC_WINDOWSIZE = cfg.getint('signed_discmark','DISC_WINDOWSIZE')
  DISC_MARKFREQ = cfg.getfloat('signed_discmark','DISC_MARKFREQ')
  DISC_L2MARKFREQ = cfg.getfloat('signed_discmark','DISC_L2MARKFREQ')
  DISC_MARK_DECAY = cfg.getfloat('signed_discmark','DISC_MARK_DECAY')
 

  # items that vary with timestep -- this is local storage for them
  ts_rates = numpy.zeros( (ngenes, 2*DISC_WINDOWSIZE) ) ;# transcription rate of each gene at each timestep
  ts_levs = numpy.zeros( (ngenes, 2*DISC_WINDOWSIZE),dtype=numpy.int64 ) ;# levels of transcripts corresponding to each gene, at each timestep
  prot_levs = numpy.zeros( (ngenes, 2*DISC_WINDOWSIZE),dtype=numpy.int64 ) ;# levels of proteins corresponding to each gene, at each timestep
  n_marks = numpy.zeros( (ngenes, 2*DISC_WINDOWSIZE), dtype=numpy.int64) ;# number of +/- marks altering transcription
  n_marks_l2 = numpy.zeros( (ngenes, 2*DISC_WINDOWSIZE), dtype=numpy.int64) ;# number of marks changing baseline value
  fitnesses = numpy.zeros( 2*DISC_WINDOWSIZE )


  # items to be written
  ts_rates_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ) ) 
  ts_levs_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ),dtype=numpy.int64 ) 
  prot_levs_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ),dtype=numpy.int64 ) 
  fitnesses_write = numpy.zeros( nsteps/SAVEFREQ )

  
  tl_rates = global_dict['tl_rates']
  t_decay_rates = global_dict['t_decay_rates']
  p_decay_rates = global_dict['p_decay_rates']

  print "step 1:"
  print tl_rates
  print t_decay_rates
  print p_decay_rates
  print "----"

  # do some math for the ts rate calculatoin
  max_nmarks = int(numpy.rint(1/DISC_TS_MARK_SIZE))
  half_max = max_nmarks/2

  ts_rates_base = global_dict['ts_rates_init']

  def calc_ts_rates(marks,l2marks):
    # calculate transcription rates at a moment in time given the number of marks present

    scaled_val = 4 * (marks + half_max) / max_nmarks

    scaled_val = numpy.fmin(4.0,scaled_val)
    scaled_val=numpy.fmax(0.0,scaled_val)
    newrates= (ts_rates_base * numpy.exp(DISC_TS_L2MARKSIZE * l2marks)) * scaled_val

    newrates[newrates>1.0] = 1.0
    return newrates

  # initialize protein levels and transcription rates
  prot_levs[:,0] = global_dict['prot_levs_init']
  ts_levs[:,0] = global_dict['ts_levs_init']
  n_marks[:,0] = numpy.zeros( ngenes )
  n_marks_l2[:,0] = numpy.zeros( ngenes )
  print "initial marks: %s" % n_marks[:,0]
  print "target init rates: %s" % ts_rates_base
  ts_rates[:,0] = calc_ts_rates(n_marks[:,0],n_marks_l2[:,0])
  print "actual init ts: %s" % ts_rates[:,0]
  fitnesses[0] = fitfunc(prot_levs[:,0])

  fitnesses_write[0] = fitnesses[0]
  prot_levs_write[:,0] = prot_levs[:,0]
  ts_levs_write[:,0] = ts_levs[:,0]
  ts_rates_write[:,0] = ts_rates[:,0]

  # propagate without any input until we have enough data to start tuning
  for step in range(1,2*DISC_WINDOWSIZE):
    ts_levs[:,step] = ts_levs[:,step-1] - scipy.stats.binom.rvs(n=ts_levs[:,step-1], p=t_decay_rates) + scipy.stats.bernoulli.rvs(p=ts_rates[:,step-1])
    prot_levs[:,step] = prot_levs[:,step-1] - scipy.stats.binom.rvs(n=prot_levs[:,step-1], p=p_decay_rates) + scipy.stats.binom.rvs(n=ts_levs[:,step-1],p=tl_rates)

    if change_nmarks:
      delta_marks = scipy.stats.randint.rvs(size=ngenes, low=-1, high=2) * scipy.stats.bernoulli.rvs(p=DISC_MARK_DECAY,size=ngenes)
      n_marks_new = n_marks[:,step-1] + delta_marks
      n_marks_new[n_marks_new < (-1*half_max)] = (-1*half_max)
      n_marks_new[n_marks_new >half_max] = half_max
      n_marks[:,step] = n_marks_new
    else:
      n_marks[:,step] = n_marks[:,step-1]

    ts_rates[:,step] = calc_ts_rates(n_marks[:,step-1],n_marks_l2[:,step-1])
    fitnesses[step] = fitfunc(prot_levs[:,step])

    if (step % SAVEFREQ) == 0:
      fitnesses_write[step/SAVEFREQ] = fitnesses[step]
      prot_levs_write[:,step/SAVEFREQ] = prot_levs[:,step]
      ts_levs_write[:,step/SAVEFREQ] = ts_levs[:,step]
      ts_rates_write[:,step/SAVEFREQ] = ts_rates[:,step]
      #print fitnesses[-1]

  # now go through the requested number of steps, updating as needed
  for step in range(2*DISC_WINDOWSIZE,nsteps):
    dF = numpy.mean(fitnesses[DISC_WINDOWSIZE:]) - numpy.mean(fitnesses[:DISC_WINDOWSIZE])

    
    # figure out what all the new values should be
    if do_tune:
      fit_term = numpy.sign(dF) * numpy.sign(numpy.mean(n_marks[:,DISC_WINDOWSIZE:])) * scipy.stats.randint.rvs(size=ngenes, low=1,high=6) * scipy.stats.bernoulli.rvs(p=DISC_MARKFREQ,size=ngenes)
    else:
      coinflip = numpy.random.choice([-1,1])
      fit_term = numpy.sign(coinflip) * numpy.sign(numpy.mean(n_marks[:,DISC_WINDOWSIZE:])) * scipy.stats.randint.rvs(size=ngenes, low=1,high=6) * scipy.stats.bernoulli.rvs(p=DISC_MARKFREQ,size=ngenes)


    decay_term = numpy.sign(n_marks[:,-1]) * scipy.stats.binom.rvs(p=DISC_MARK_DECAY,n=numpy.abs(n_marks[:,-1]))
    rand_term = scipy.stats.randint.rvs(size=ngenes, low=-1, high=2) * scipy.stats.bernoulli.rvs(p=DISC_TS_NOISE,size=ngenes) + (1 - 2*scipy.stats.bernoulli.rvs(p=0.5,size=ngenes)) * (n_marks[:,-1] == 0) * scipy.stats.randint.rvs(size=ngenes,low=1,high=6) 

    delta_marks = fit_term -decay_term + rand_term

    n_marks_new = numpy.rint(n_marks[:,-1] + delta_marks).astype('int64')
    n_marks_new[n_marks_new < (-1*half_max)] =  (-1*half_max)
    n_marks_new[n_marks_new >half_max] = half_max

    if not change_nmarks:
      n_marks_new = n_marks[:,-1]
  
    # now figure out changes in L2 marks
    if (do_tune or change_nmarks):
      p_change_l2 = DISC_L2MARKFREQ * numpy.abs(n_marks[:,-1]) / float(half_max)
      s_change_l2 = numpy.sign(n_marks[:,-1])
      n_marks_l2_new = n_marks_l2[:,-1] + s_change_l2 * scipy.stats.binom.rvs(n=ngenes,p=p_change_l2)
    else:
      n_marks_l2_new = n_marks_l2[:,-1]

    ts_rates_new = calc_ts_rates(n_marks[:,-1],n_marks_l2[:,-1])

    ts_levs_new = update_ts_levels(ts_levs[:,-1],ts_rates[:,-1],t_decay_rates)
    prot_levs_new = update_p_levels(prot_levs[:,-1], ts_levs[:,-1], tl_rates, p_decay_rates)

    # update all of the arrays with the new cellular state
    n_marks = numpy.append(n_marks, n_marks_new.reshape(ngenes,1), axis=1)[:,1:]
    n_marks_l2 = numpy.append(n_marks_l2, n_marks_l2_new.reshape(ngenes,1), axis=1)[:,1:]
    ts_rates = numpy.append( ts_rates, ts_rates_new.reshape(ngenes,1), axis=1)[:,1:]
    ts_levs = numpy.append( ts_levs, ts_levs_new.reshape(ngenes,1), axis=1)[:,1:]
    prot_levs = numpy.append(prot_levs, prot_levs_new.reshape(ngenes,1), axis=1)[:,1:]
    fitnesses = numpy.append(fitnesses, numpy.array(fitfunc(prot_levs[:,-1])).reshape(1,1))[1:]


    if (step % SAVEFREQ) == 0:
      #print  numpy.append(n_marks, n_marks_new.reshape(ngenes,1), axis=1)
      #print numpy.append(n_marks, n_marks_new.reshape(ngenes,1), axis=1)[:,1:]
      #print n_marks

      #print "At step %i" % step
      #print "  ts_rates are now %s" % ts_rates[:,-1]
      #print "  ts_levs are now %s" % ts_levs[:,-1]
      fitnesses_write[step/SAVEFREQ] = fitnesses[-1]
      prot_levs_write[:,step/SAVEFREQ] = prot_levs[:,-1]
      ts_levs_write[:,step/SAVEFREQ] = ts_levs[:,-1]
      ts_rates_write[:,step/SAVEFREQ] = ts_rates[:,-1]


  print "final marks: %s" % n_marks[:,-1]
  print "final ts rates: %s" % ts_rates[:,-1]
  print "final ts levels: %s" % ts_levs[:,-1]
  print "final prot levels: %s" % prot_levs[:,-1]
  print "final fitness: %s" % fitnesses[-1]
  return {'ts_rates' : ts_rates_write, 'ts_levs' : ts_levs_write,  'prot_levs' : prot_levs_write, 'fitnesses' : fitnesses_write}



def known_best_method(ngenes, nsteps, fitfunc, global_dict, cfg, opt_ts_rates):
  # this method shows only the effects of noise, as
  # transcription rates are set to their optimal values
  # at each timepoint
  
  # global parameters
  SAVEFREQ = cfg.getint('simulation','SAVEFREQ')

  # items that vary with timestep
  ts_rates = numpy.zeros( (ngenes, 1) ) ;# transcription rate of each gene at each timestep
  ts_levs = numpy.zeros( (ngenes, 1),dtype=numpy.int64 ) ;# levels of transcripts corresponding to each gene, at each timestep
  prot_levs = numpy.zeros( (ngenes, 1),dtype=numpy.int64 ) ;# levels of proteins corresponding to each gene, at each timestep

  # parse the global dictionary
  tl_rates = global_dict['tl_rates']
  t_decay_rates = global_dict['t_decay_rates']
  p_decay_rates = global_dict['p_decay_rates']

  print "step 1:"
  print tl_rates
  print t_decay_rates
  print p_decay_rates
  print "----"

  fitnesses = numpy.zeros( (1,1) )

  # items to be written
  ts_rates_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ) ) 
  ts_levs_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ),dtype=numpy.int64 ) 
  prot_levs_write = numpy.zeros( (ngenes, nsteps/SAVEFREQ),dtype=numpy.int64 ) 
  fitnesses_write = numpy.zeros( nsteps/SAVEFREQ ) 

  # initialize protein levels and transcription rates
  prot_levs = global_dict['prot_levs_init']
  ts_levs = global_dict['ts_levs_init']

  ts_rates = opt_ts_rates 
  ts_rates = ts_rates * (ts_rates > 0) 
  ts_rates[(ts_rates > 1)] = 1.0
  fitnesses = fitfunc(prot_levs)

  fitnesses_write[0] = fitnesses
  prot_levs_write[:,0] = prot_levs
  ts_levs_write[:,0] = ts_levs
  ts_rates_write[:,0] = ts_rates


  # now go through the requested number of steps, updating as needed
  for step in range(1,nsteps):

    fitnesses = fitfunc(prot_levs)
    ts_levs = update_ts_levels(ts_levs,ts_rates,t_decay_rates)
    prot_levs = update_p_levels(prot_levs, ts_levs, tl_rates, p_decay_rates)

    ts_rates = opt_ts_rates 
    ts_rates = ts_rates * (ts_rates > 0) 
    ts_rates[(ts_rates > 1)] = 1.0

    if (step % SAVEFREQ) == 0:
      fitnesses_write[step/SAVEFREQ] = fitnesses
      prot_levs_write[:,step/SAVEFREQ] = prot_levs
      ts_levs_write[:,step/SAVEFREQ] = ts_levs
      ts_rates_write[:,step/SAVEFREQ] = ts_rates

  print "final ts rates: %s" % ts_rates
  print "final ts levels: %s" % ts_levs
  print "final prot levels: %s" % prot_levs
  print "final fitness: %s" % fitnesses


  return {'ts_rates' : ts_rates_write, 'ts_levs' : ts_levs_write,  'prot_levs' : prot_levs_write, 'fitnesses' : fitnesses_write}


def euclidean_dist(loc, target):
  # fitness function taking the euclidean distance between a target and given value
  # return a negative since we want to maximize fitness
  return -1 * numpy.sqrt(numpy.sum( (loc-target) * (loc-target) ) )

def half_euc_dist(loc, target):
  # fitness function taking the euclidean distance between a target and given value for genes with expression too low, and manhattan for expression too high
  # return a negative since we want to maximize fitness
  euc_term = numpy.sqrt(numpy.sum( ( (loc - target) > 0 ) * (loc-target) * (loc-target) ) )
  man_term = numpy.sum( ( (loc - target) < 0 ) * numpy.abs(loc-target) )
  return -1 * (man_term + euc_term)

def scaled_euclidean_dist(loc, target):
  # fitness function taking the euclidean distance between a target and given value
  # return a negative since we want to maximize fitness
  # this version scales the contribution from each gene by the target value
  return -1 * numpy.sqrt(numpy.sum( (loc-target) * (loc-target) / (target*target) ) )

def manhattan_dist(loc, target):
  # fitness function taking the manhattan distance between a target and a given value
  # return a negative since we want to maximize fitness
  return -1 * numpy.sum( numpy.abs(loc-target) )

def scaled_manhattan_dist(loc, target):
  # fitness function taking the scaled manhattan distance between a target and a given value
  # here we divide each distance by the target value
  # return a negative since we want to maximize fitness
  return -1 * numpy.sum( numpy.abs( (loc-target) / target) )

def plot_comp_hist(dict1, dict2, outprefix):

  fits_flail = dict2['fitnesses']
  fits = dict1['fitnesses']

  pylab.figure()
  pylab.hist(fits_flail[nsteps/2:], bins=50, range=( (numpy.min(fits_flail[nsteps/2:]), 0)), color='red', alpha=0.5)
  pylab.hist(fits[nsteps/2:], bins=50, range=( (numpy.min(fits_flail[nsteps/2:]), 0)), color='blue', alpha=0.5)
  #print fits
  pylab.savefig("%s.png" % outprefix)

def plot_opt_traj(mydict, target, outprefix):
  # plot the optimization trajectory

  pylab.figure()
  prot_levs = mydict['prot_levs']
  pylab.plot(prot_levs[0,:], prot_levs[1,:], 'k--')
  pylab.scatter(prot_levs[0,:], prot_levs[1,:], c=numpy.arange(len(prot_levs[1,:])), marker='o', cmap="bone")
  pylab.plot(target[0], target[1], 'yo', markersize=20)
  #pylab.xlim( (0,7))
  #pylab.ylim((0,7))
  pylab.savefig("%s_protlevs.png" % outprefix)


  pylab.figure()
  fitnesses = mydict['fitnesses']
  pylab.plot(fitnesses)
  pylab.xlabel("Time (s)")
  pylab.ylabel("Fitness (AU)")
  pylab.savefig("%s_fitnesses.png" % outprefix)
  #print "FIT:"
  #print fitnesses
  print "TS:"
  print mydict['ts_rates']
  print "TSLEV:"
  print mydict['ts_levs']
  print "PROTLEV:"
  print mydict['prot_levs']

  #pylab.figure()
  #ts_levs = mydict['ts_levs']

## Actual simulation begins here
if __name__ == "__main__":

  # first, parse all config files and read some globals that we need
  cfg_files = sys.argv[1:]
  cfg_main = ConfigParser.RawConfigParser()
  cfg_main.optionxform = str
  cfg_main.read(cfg_files)

  # define key run parameters
  ngenes= cfg_main.getint('simulation','ngenes')
  nsteps=cfg_main.getint('simulation','nsteps')
  distance_metric=cfg_main.get('simulation','dist_metric')

  # read all of the globals for physiological parameters
  global_opts = cfg_main.options('global')
  for optname in global_opts:
    print (optname + " = cfg_main.getfloat('global','%s')" % optname)
    exec(optname + " = cfg_main.getfloat('global','%s')" % optname)
  
  # set up global parameters shared by all methods
  tl_rates = numpy.power(2, scipy.stats.t.rvs(df=TL_RATE_DF,loc=TL_RATE_MU, scale=TL_RATE_SIGMA,size=ngenes)) ;# probability per timestep that each transcript generates a protein
  tl_rates = numpy.fmin(1.0, tl_rates)
  print 'tl_rates:'
  print tl_rates

  
  t_halflives = scipy.stats.gamma.rvs(size=ngenes,a=T_HALFLIFE_SHAPE,scale=T_HALFLIFE_SCALE) ;# this gives the half lives in MINUTES
  t_decay_rates = numpy.log(2) / (t_halflives*60.0) ;# probability of each transcript decaying at each time step
  print 't_decay_rates:'
  print t_decay_rates
  
  p_halflives = scipy.stats.t.rvs(df=P_HALFLIFE_DF, loc=P_HALFLIFE_LOC, scale=P_HALFLIFE_SCALE, size=ngenes) ;# half lives of proteins IN HOURS
  p_halflives[p_halflives < P_HALFLIFE_MIN] = P_HALFLIFE_MIN
  p_decay_rates = numpy.log(2) / (p_halflives * 3600.0) ;# probability of each protein being degraded at each timestep
  print 'p_decay_rates:'
  print p_decay_rates
  
  # also set up some initial conditions that will be shared by most methods
  ts_rates_init = scipy.stats.gamma.rvs(size=ngenes, a=TS_RATE_SHAPE, scale=TS_RATE_SCALE) / TS_RATE_TIMEFACTOR ;# this has the same functional form as the target rates, but obviously different values
  ts_levs_init = get_ideal_t(ts_rates_init, t_decay_rates)
  
  prot_levs_init = get_ideal_p(ts_rates_init, t_decay_rates, p_decay_rates, tl_rates)
  
  
  
  global_dict = {'tl_rates' : tl_rates, 't_decay_rates' : t_decay_rates, 'p_decay_rates' : p_decay_rates, 'prot_levs_init' : prot_levs_init, 'ts_rates_init' : ts_rates_init, 'ts_levs_init' : ts_levs_init} ;# bundle of these arrays for easy interaction with other functions
  
  # set up a target transcription rate distribution that we want the cells to match
  # note that all readouts are done at the level of proteins, but we set this up as an effort to match a given
  #  transcription rate distribution to ensure that
  #  a solution exists with the given global parameters
  # we generate the target tsr by multiplying the initial rates by random
  #  factors between 0.125 and 8
  target_tsr = ts_rates_init * numpy.power(2.0, numpy.random.choice( [-1,1], size=ngenes) * scipy.stats.uniform.rvs(loc=1,scale=3,size=ngenes))
  target_tsr[target_tsr > 1.0] = 1.0
 
  target_ps = get_ideal_p(target_tsr, t_decay_rates, p_decay_rates, tl_rates)
  target_ts = get_ideal_t(target_tsr, t_decay_rates)
  
  print "INITIAL RATES:"
  print ts_rates_init

  print "TARGETS:"
  print "tsr:"
  print target_tsr
  print "tsr ratio:"
  print target_tsr / ts_rates_init
  print "ts:"
  print target_ts
  print "ps:"
  print target_ps
  print "----"

  # as we note that the theoretical target ps listed here are actually not precisely correct (although they will be close), we run the known_best model first to generate an empirical target distribution that will be used for all subsequent simulations
  print "Running simulation to calculate optimal protein levels"
  opt_ts_sim = known_best_method(ngenes, 10*nsteps, lambda x: euclidean_dist(x, target_ps), global_dict,cfg_main, opt_ts_rates=target_tsr)
  opt_prot_levs = opt_ts_sim['prot_levs']
  print opt_prot_levs.shape
  n_prot_levs = opt_prot_levs.shape[1]
  target_prot_levs = numpy.mean(opt_prot_levs[:,((n_prot_levs)/2):], axis=1)
  numpy.savetxt('opt_traj.txt',opt_prot_levs)
  print "empirical protein targets:"
  print target_prot_levs

  # now we run all requested models

  all_models = cfg_main.get('simulation','methods')
  out_prefix = cfg_main.get('simulation','outprefix')

  # here we put a mapping of the model type string in the config file to actual functions
  model_funcs = {}
  model_funcs['best'] = functools.partial(known_best_method, opt_ts_rates = target_tsr)
  model_funcs['ace_discmark'] = functools.partial(ace_discmark_method, do_tune=True)
  model_funcs['l1only_ace_discmark'] = functools.partial(ace_discmark_method, do_tune=True,do_l2=False)
  model_funcs['flail_ace_discmark'] = functools.partial(ace_discmark_method, do_tune=False)
  model_funcs['fixed_ace_discmark'] = functools.partial(ace_discmark_method, change_nmarks=False)

  # set up the distance metric to use
  if distance_metric == 'euclidean':
    dist_func = lambda x: euclidean_dist(x, target_prot_levs)
  elif distance_metric == 'scaled_euclidean':
    dist_func = lambda x: scaled_euclidean_dist(x, target_prot_levs)
  elif distance_metric == 'manhattan':
    dist_func = lambda x: manhattan_dist(x, target_prot_levs)
  elif distance_metric == 'scaled_manhattan':
    dist_func = lambda x: scaled_manhattan_dist(x, target_prot_levs)
  elif distance_metric == 'half_euc':
    dist_func = lambda x: half_euc_dist(x, target_prot_levs)
  else:
    raise(ValueError("Unknown distance metric %s specified" % distance_metric))

  run_i = 1


  for modeltype in all_models.split(","):
    if not(model_funcs.has_key(modeltype)):
      print "Skipping unknown model type %s" % modeltype
      continue

    print "Running with model type %s" % modeltype
    this_func = model_funcs[modeltype]
    #try:
    model_vals = this_func(ngenes, nsteps, dist_func, global_dict, cfg_main)
    #except Exception as badstuff:
    #  print "Encountered exception %s with model %s" % (badstuff, modeltype)
    #  continue

    this_opr = "%s_%s_run%i" % (out_prefix, modeltype, run_i)
    print "saving results to prefix %s" % (this_opr)
    print "final fitness was %s; ending median is %f" % (model_vals['fitnesses'][-5:], numpy.median(model_vals['fitnesses'][-10:]))

    # pickle the run object
    ostr = open("%s_traj.pkl" % (this_opr), 'w')
    cPickle.dump(model_vals,ostr)
    ostr.close()

    # do some plotting
    #plot_opt_traj(model_vals, target_ps, this_opr)

    run_i += 1 
    print "Done with model %s" % modeltype
    print "*******************\n\n"

