import random
import numpy as np
import time
import sys
from tqdm import tqdm


# Script needs a file nt_freq.txt with tab-separated and normalized frequencies (0 to 1) for the bases A, C, G, T, and N
table = np.genfromtxt('nt_freq.txt')
nucleotides = list('ACGTN')
#assert sum(probs.values()) == 1.0

k=0
i=0
dna = ''

nos = input("Enter number of sequences to generate: ")
outputfile = raw_input("Enter name of output file (sequences will be appended if file already exists): ")


for k in tqdm(range(nos)): 
	while (i < len(table)):
		probs = {'A': table[i][0], 'C': table[i][1], 'G': table[i][2], 'T': table[i][3], 'N': table[i][4]}
		nuc = random.choice(nucleotides)		
		dice = random.random()
		if dice < probs[nuc]:
			dna += nuc
			i=i+1
	k=k+1
	#print(">syn_%d" % (k))
	#print dna	
	with open(outputfile, "a") as myfile:
    		myfile.write(">syn_%d" % (k) + "\n")
		myfile.write(dna + "\n")
	dna = ''
	i=0
print "Done!"

