#!/usr/bin/python
"""
Global patterns of genome evolution associated with generalist parasitism
Thomas Badet, Malick Mbengue, Olivier Navaud, Remi Peyraud, Adelin Barbacci, Sylvain Raffaele
"""

from pandas import *
import random as rd

def read(path):
	data = read_csv(path)
	return data

def get_target(data):
	# Generates random mutation
	N_mut = rd.randint(1,len(data))
	t = rd.sample(data.Codon.values,rd.randint(1,len(data)))
	m = []
	for ii in range(0,len(t)):
		m.append(rd.random()) 
	# Normalizes array of mutation
	m = np.array(m)/sum(m)
	return t,m.tolist()


def mutation(data):
	Datab = data.copy()
	for i,row in data.iterrows():
		# Synonymous mutations
		syno = data[(row.AA==data.AA) & (data.Codon != row.Codon)]
		if(len(syno)!=0):
			# chosing randomly mutations
			target_s,freq = get_target(syno)
			# Numbers of codons added to every mutations
			num = (np.array(freq)*row.Syn_mutation_rate*row.Number*0.01).tolist()
			for c,n1 in zip(target_s,num):
				Datab.Number[c==Datab.Codon] += n1
			# Updating source codons
			Datab.Number[row.Codon==Datab.Codon] -= sum(num)
		# Not synonymous mutations
		n_syno = data[data.AA != row.AA]
		target_ns,freq = get_target(n_syno)
		# Numbers of codons added to every mutations
		num = (np.array(freq)*row.ns_mutation_rate*row.Number*0.01).tolist()
		for c,n1 in zip(target_ns,num):
			Datab.Number[c==Datab.Codon] += n1
		# Updating source codons
		Datab.Number[row.Codon==Datab.Codon] -= sum(num)
	return Datab

def get_change(data,tps,iterat):
	# Computes the number of optimal, not-optimal and undifferenciated codons
	opt = float(sum(data.Number[data.optimal=="yes"]))/float(sum(data.Number))
	n_opt = float(sum(data.Number[data.optimal=="no"]))/float(sum(data.Number))
	ind = float(sum(data.Number[data.optimal=="diff"]))/float(sum(data.Number))
	d = DataFrame([{'iteration':iterat,'t':tps,'opti':opt,'non_opti':n_opt,'diff':ind},])
	return d

if __name__=="__main__":
	# Input file
	D = read_csv('./sclero.txt',sep='\t',usecols=['Codon','AA','Number','Syn_mutation_rate','ns_mutation_rate','optimal'])
	# Number of repetitions of simulations
	for kk in range(0,10):
		if kk == 0:
			# output table
			res = get_change(D,0,kk)
		else :
			res = concat([res,get_change(D,0,kk)])

		D1 = D.copy()
		for ii in range(1,1000):
			# Simulation of long time evolution
			print("Iteration "+str(kk)+" time "+str(ii))
			D1 = mutation(D1)
			res = concat([res,get_change(D1,ii,kk)])
	# writing results
	res.to_csv("./res_python_sclero_ok_3.csv",sep='\t')

