#!/usr/bin/python
#converting a multiple sequence alignment to binary with ones (residues) and zeros (gaps)
import sys

handle = open(sys.argv[1], "r")
line = handle.readline() #MultipleSequenceAlignment
while line:
	if line[0] == ">":
		id = line[1:].strip()
		seq = ""
		line = handle.readline()
		while line and line[0] != ">":
			seq += line.strip()
			line = handle.readline()

		indelseq = ""
		for c in seq:
			if c == "-":
				indelseq += "0"
			else:
				indelseq += "1"
		sys.stdout.write(">%s\n%s\n" % (id,indelseq))
handle.close()



#!/usr/bin/python

import sys
import itertools
# code for editing probablities file to remove headers, tabs, states and positions. Retain Node identity

my_file = open ("Ancestral_State_Probabilities.txt")
stringlist = my_file.readlines()

stringlist.remove(stringlist[0])

for line in stringlist:
    line = line.replace('\t', " ")
    line = line.split()
    del(line[1]) 
    del(line[1])
    print (*line)
    sys.stdout.write


	
import itertools
import sys

#code for truncating raw ancestral protein reconstructions and removing spurious insertions for selected ancestral node


ancestorfile = open(sys.argv[1], "r") #AncestralStates
binaryfile = open(sys.argv[2], "r") #AncestralStatesBinary
probfile = open(sys.argv[3], "r") #AncestralStateProbabilities

ancline = ancestorfile.readlines()
binaryline = binaryfile.readlines()
probline = probfile.readlines()

AminoAcid = ("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
Binary = ["1"]

 
prob = ""
seq = ""
maximum = ""
header = ""
cnode = ""
dnode = ""
ancest = [ ]
binary = [ ]
headerlist =[]
# node = ""
ancestors = ""
binarys = ""

for (c, d) in zip(ancline, binaryline):
    c = c.replace("\t" , " ") #string
    d = d.replace("\t" , " ")
    c = c.split() #list
    d = d.split()
    cnode = c.pop(0)
    dnode = d.pop(0)
    c = [','.join(c) + "\n"] #string
    d = [','.join(d) + "\n"]
        
    for (l, m) in zip(c, d):
        ancestors += (l)
        binarys += (m)    
for (a, b, e)in zip(ancestors, binarys, probline):
    
    if b in Binary:
        e = e.split()
        node = e.pop(0)
        e = ' '.join(e)
        elist = [e.split()]
        edict = (e.split())
        dictionary = dict(zip(AminoAcid, edict))
        for lines in elist:
            maximum = max(lines)
            print (a, maximum, dictionary, node)
        prob += e
        seq += a
    elif b not in Binary:
        continue
    elif a == "\n":
        seq += a
print (seq)

                
            



#print (c, maximum, dictionary)
##for lines in elist:
##            maximum = max(lines)
##            print (c, maximum, dictionary)
binaryfile.close()
ancestorfile.close()
probfile.close()

#extracting and binning posterior probability values for select ancestral nodes
import sys
import itertools

sequencefile = open(sys.argv[1], "r")

sequence = sequencefile.readlines()

# print (sequence[0])
# sequence.remove(sequence[0])


ninetypercentcount = 0
totalcount = 0
eightytoninety = 0
seventytoeighty = 0
sixtytoseventy = 0
fiftytosixty = 0
fourtytofifty = 0
thirtytofourty = 0
twentytothirty = 0
tentotwenty = 0
zerototen = 0
seq = []

for line in sequence:
    totalcount += 1
##    print (max(line))
##    print ((line[2:9]))
    if line[2:9] > "0.9":
        print (totalcount, line)
        ninetypercentcount += 1
        seq += line[2]
    elif line[2:9] >= "0.8" and line [2:9] < "0.9":
        print (totalcount, line)
        eightytoninety += 1
    elif line[2:9] >= "0.7" and line [2:9] < "0.8":
        print (totalcount, line)
        seventytoeighty += 1
    elif line[2:9] >= "0.6" and line [2:9] < "0.7":
        print (totalcount, line)
        sixtytoseventy += 1
    elif line[2:9] >= "0.5" and line [2:9] < "0.6":
        print (totalcount, line)
        fiftytosixty += 1
    elif line[2:9] >= "0.4" and line [2:9] < "0.5":
        print (totalcount, line)
        fourtytofifty += 1
    elif line[2:9] >= "0.3" and line [2:9] < "0.4":
        print (totalcount, line)
        thirtytofourty += 1
    elif line[2:9] >= "0.2" and line [2:9] < "0.3":
        print (totalcount, line)
        twentytothirty += 1
    elif line[2:9] >= "0.1" and line [2:9] < "0.2":
        print (totalcount, line)
        tentotwenty += 1
    elif line[2:9] >= "0.0" and line [2:9] < "0.1":
        print (totalcount, line)
        zerototen += 1



print (" > 0.9 =" , ninetypercentcount, ninetypercentcount/totalcount * 100 )
print (" > 0.8 =" , eightytoninety, eightytoninety/totalcount * 100)
print (" > 0.7 =" , seventytoeighty, seventytoeighty/totalcount * 100)
print (" > 0.6 =" , sixtytoseventy, sixtytoseventy/totalcount * 100)
print (" > 0.5 =" , fiftytosixty, fiftytosixty/totalcount * 100)
print (" > 0.4 =" , fourtytofifty, fourtytofifty/totalcount * 100)
print (" > 0.3 =" , thirtytofourty, thirtytofourty/totalcount * 100)
print (" > 0.2 =" , twentytothirty, twentytothirty/totalcount * 100)
print (" > 0.1 =" , tentotwenty, tentotwenty/totalcount * 100)
print (" > 0.0 =" , zerototen, zerototen/totalcount * 100)

print ( "total = ", totalcount)