#!/usr/bin/python #converting a multiple sequence alignment to binary with ones (residues) and zeros (gaps) import sys handle = open(sys.argv[1], "r") line = handle.readline() #MultipleSequenceAlignment while line: if line[0] == ">": id = line[1:].strip() seq = "" line = handle.readline() while line and line[0] != ">": seq += line.strip() line = handle.readline() indelseq = "" for c in seq: if c == "-": indelseq += "0" else: indelseq += "1" sys.stdout.write(">%s\n%s\n" % (id,indelseq)) handle.close() #!/usr/bin/python import sys import itertools # code for editing probablities file to remove headers, tabs, states and positions. Retain Node identity my_file = open ("Ancestral_State_Probabilities.txt") stringlist = my_file.readlines() stringlist.remove(stringlist[0]) for line in stringlist: line = line.replace('\t', " ") line = line.split() del(line[1]) del(line[1]) print (*line) sys.stdout.write import itertools import sys #code for truncating raw ancestral protein reconstructions and removing spurious insertions for selected ancestral node ancestorfile = open(sys.argv[1], "r") #AncestralStates binaryfile = open(sys.argv[2], "r") #AncestralStatesBinary probfile = open(sys.argv[3], "r") #AncestralStateProbabilities ancline = ancestorfile.readlines() binaryline = binaryfile.readlines() probline = probfile.readlines() AminoAcid = ("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V") Binary = ["1"] prob = "" seq = "" maximum = "" header = "" cnode = "" dnode = "" ancest = [ ] binary = [ ] headerlist =[] # node = "" ancestors = "" binarys = "" for (c, d) in zip(ancline, binaryline): c = c.replace("\t" , " ") #string d = d.replace("\t" , " ") c = c.split() #list d = d.split() cnode = c.pop(0) dnode = d.pop(0) c = [','.join(c) + "\n"] #string d = [','.join(d) + "\n"] for (l, m) in zip(c, d): ancestors += (l) binarys += (m) for (a, b, e)in zip(ancestors, binarys, probline): if b in Binary: e = e.split() node = e.pop(0) e = ' '.join(e) elist = [e.split()] edict = (e.split()) dictionary = dict(zip(AminoAcid, edict)) for lines in elist: maximum = max(lines) print (a, maximum, dictionary, node) prob += e seq += a elif b not in Binary: continue elif a == "\n": seq += a print (seq) #print (c, maximum, dictionary) ##for lines in elist: ## maximum = max(lines) ## print (c, maximum, dictionary) binaryfile.close() ancestorfile.close() probfile.close() #extracting and binning posterior probability values for select ancestral nodes import sys import itertools sequencefile = open(sys.argv[1], "r") sequence = sequencefile.readlines() # print (sequence[0]) # sequence.remove(sequence[0]) ninetypercentcount = 0 totalcount = 0 eightytoninety = 0 seventytoeighty = 0 sixtytoseventy = 0 fiftytosixty = 0 fourtytofifty = 0 thirtytofourty = 0 twentytothirty = 0 tentotwenty = 0 zerototen = 0 seq = [] for line in sequence: totalcount += 1 ## print (max(line)) ## print ((line[2:9])) if line[2:9] > "0.9": print (totalcount, line) ninetypercentcount += 1 seq += line[2] elif line[2:9] >= "0.8" and line [2:9] < "0.9": print (totalcount, line) eightytoninety += 1 elif line[2:9] >= "0.7" and line [2:9] < "0.8": print (totalcount, line) seventytoeighty += 1 elif line[2:9] >= "0.6" and line [2:9] < "0.7": print (totalcount, line) sixtytoseventy += 1 elif line[2:9] >= "0.5" and line [2:9] < "0.6": print (totalcount, line) fiftytosixty += 1 elif line[2:9] >= "0.4" and line [2:9] < "0.5": print (totalcount, line) fourtytofifty += 1 elif line[2:9] >= "0.3" and line [2:9] < "0.4": print (totalcount, line) thirtytofourty += 1 elif line[2:9] >= "0.2" and line [2:9] < "0.3": print (totalcount, line) twentytothirty += 1 elif line[2:9] >= "0.1" and line [2:9] < "0.2": print (totalcount, line) tentotwenty += 1 elif line[2:9] >= "0.0" and line [2:9] < "0.1": print (totalcount, line) zerototen += 1 print (" > 0.9 =" , ninetypercentcount, ninetypercentcount/totalcount * 100 ) print (" > 0.8 =" , eightytoninety, eightytoninety/totalcount * 100) print (" > 0.7 =" , seventytoeighty, seventytoeighty/totalcount * 100) print (" > 0.6 =" , sixtytoseventy, sixtytoseventy/totalcount * 100) print (" > 0.5 =" , fiftytosixty, fiftytosixty/totalcount * 100) print (" > 0.4 =" , fourtytofifty, fourtytofifty/totalcount * 100) print (" > 0.3 =" , thirtytofourty, thirtytofourty/totalcount * 100) print (" > 0.2 =" , twentytothirty, twentytothirty/totalcount * 100) print (" > 0.1 =" , tentotwenty, tentotwenty/totalcount * 100) print (" > 0.0 =" , zerototen, zerototen/totalcount * 100) print ( "total = ", totalcount)