Title: Python scripts for in silico evolution of codon usage random_codon.py Evolution of codon usage using known mutation frequencies with random direction of mutation ##################################################################################### #!/usr/bin/python """ Global patterns of genome evolution associated with generalist parasitism Thomas Badet, Remi Peyraud, Malick Mbengue, Olivier Navaud, Mark Derbyshire, Richard P. Oliver, Adelin Barbacci, Sylvain Raffaele """ from pandas import * import random as rd def read(path): data = read_csv(path) return data def get_target(data): # Generates random mutation N_mut = rd.randint(1,len(data)) t = rd.sample(data.Codon.values,rd.randint(1,len(data))) m = [] for ii in range(0,len(t)): m.append(rd.random()) # Normalizes array of mutation m = np.array(m)/sum(m) return t,m.tolist() def mutation(data): Datab = data.copy() for i,row in data.iterrows(): # Synonymous mutations syno = data[(row.AA==data.AA) & (data.Codon != row.Codon)] if(len(syno)!=0): # chosing randomly mutations target_s,freq = get_target(syno) # Numbers of codons added to every mutations num = (np.array(freq)*row.Syn_mutation_rate*row.Number*0.01).tolist() for c,n1 in zip(target_s,num): Datab.Number[c==Datab.Codon] += n1 # Updating source codons Datab.Number[row.Codon==Datab.Codon] -= sum(num) # Not synonymous mutations n_syno = data[data.AA != row.AA] target_ns,freq = get_target(n_syno) # Numbers of codons added to every mutations num = (np.array(freq)*row.ns_mutation_rate*row.Number*0.01).tolist() for c,n1 in zip(target_ns,num): Datab.Number[c==Datab.Codon] += n1 # Updating source codons Datab.Number[row.Codon==Datab.Codon] -= sum(num) return Datab def get_change(data,tps,iterat): # Computes the number of optimal, not-optimal and undifferenciated codons opt = float(sum(data.Number[data.optimal=="yes"]))/float(sum(data.Number)) n_opt = float(sum(data.Number[data.optimal=="no"]))/float(sum(data.Number)) ind = float(sum(data.Number[data.optimal=="diff"]))/float(sum(data.Number)) d = DataFrame([{'iteration':iterat,'t':tps,'opti':opt,'non_opti':n_opt,'diff':ind},]) return d if __name__=="__main__": # Input file D = read_csv('./sclero.txt',sep='\t',usecols=['Codon','AA','Number','Syn_mutation_rate','ns_mutation_rate','optimal']) # Number of repetitions of simulations for kk in range(0,10): if kk == 0: # output table res = get_change(D,0,kk) else : res = concat([res,get_change(D,0,kk)]) D1 = D.copy() for ii in range(1,1000): # Simulation of long time evolution print("Iteration "+str(kk)+" time "+str(ii)) D1 = mutation(D1) res = concat([res,get_change(D1,ii,kk)]) # writing results res.to_csv("./res_python_sclero_ok_3.csv",sep='\t') ##################################################################################### non_random_codon.py Evolution of codon usage using fixed mutation frequencies and fixed substitution patterns ##################################################################################### #!/usr/bin/python """ Global patterns of genome evolution associated with generalist parasitism Thomas Badet, Remi Peyraud, Malick Mbengue, Olivier Navaud, Mark Derbyshire, Richard P. Oliver, Adelin Barbacci, Sylvain Raffaele """ from pandas import * def read(path): data = read_csv(path) return data def mutation(data): # Simulates mutations Datab = data.copy() for i,row in data.iterrows(): mut = row[3:]*row[0]*0.01 for codon in mut.index: Datab.N[Datab.Reference==codon]+=mut[codon] Datab.N[Datab.Reference == row.Reference] -= sum(mut) return Datab def get_change(data,tps): # Returns the numbers of optimal, not-optimal and undiffercentiated codons opt = float(sum(data.N[data.optimal=="yes"]))/float(sum(data.N)) n_opt = float(sum(data.N[data.optimal=="no"]))/float(sum(data.N)) ind = float(sum(data.N[data.optimal=="diff"]))/float(sum(data.N)) d = DataFrame([{'t':tps,'opti':opt,'non_opti':n_opt,'diff':ind},]) return d if __name__=="__main__": # Input file D1 = read_csv('./Codon_mutations_Zymo_3.csv',sep='\t') # Number of optimal, not optimal and not differcentiated codons res = get_change(D1,0) for ii in range(1,1000): # Computing mutation D1 = mutation(D1) # Creating table of results res = concat([res,get_change(D1,ii)]) # Storing output file res.to_csv("./res_python_Zymo_det.csv",sep='\t') print("finished")