-
Notifications
You must be signed in to change notification settings - Fork 0
/
PROT.py
37 lines (30 loc) · 10.3 KB
/
PROT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
inputseq="AUGAUGAUAGCUGUUAUUACUCCAGGUGUAUGUGAUGCUGCUGACAUUCCACCUAUCAGUGGGUACUUUGUAUCGCUUGCGUGGGCUCUAUGGCUUGCAACGUUAGACAACGCCGUCAGCUCGCAGAUGGCAAACCUCUCACAUCGAUGUAUUUAUUGUGGGAGUGCUGCGCUCUUGUCAGGAGCAGAUUGGUUGUUCUGUCGCCCUUACAUCCGCUUAUGUCAAUCUCAUGAUGAGCAAUAUCCUGUCGAAGGCUUACUUCACCCUUAUCCCAAUCCGUGUCCUGGAGCGUCGCGAAUACUUACAGAACGAGACUUCCUAUUGUAUCCCAACUCCGCGCCCCGAAACGACAUAGCUCUUGGUCGGUCUGCGGUAGAGCGGUCGCCCCCAAGCACGAACACAUCGUUGACAAUGACCUCCUGCGCUGAACGUACAAUUAUGAAAGCGCUCGCAAUCUCGGCAUGUUAUAACCGCCCCGUCUACACCGACCAUCGUGGGGAAAUCUCCACAUCGGUUUCCCGAAGAAGAGAAUUGACCGUCCACCAGUCAGAGCCGCCGCGCCCGAAUCUACACCUGAGCUACCAGUUUAACCAACGAAAUCCGUUAUAUCCGCGUGGGAAUACUAAUAGCAUCUAUUCUCAACUGGAACUGCAAUCCGCAAUCUAUUACUUUAAUUCUGAUAGAGGAGCUUGCACCGUCCCUCCCGUCCGAAACUCUGGAUGUAAUUCCAACCGGACAGGCCGACCCAUUCAGGGUGGAGAUGGGCGAGCCUUGAAUGGAUUCGAUUGCGAUUGCCGCCACCCCCGAUCUUCAUCCGGGCUUGUAAUACGGCUUCCUUCAGACUUGACGACAUUCAAAGGCUUAUGGGUGUGCUACUUUCUGGUAGAUGGAUUCAUAUGGUGCCGCGUUGGGGUUCGAACAAUGCUUGUUCAGUACUCAAACCGAGCUCCUCUUUGCUCAUUCCGUCUAAGACGAUUACUCCUCGCAAACCAGAAGCCCUACAAAAUAAGCACGGAUUCGCCUUGUCGCUUCGCGCACAUUUGUAUCGCCGGCUUACAAAUAUCCGUGCUCUAUACAGUUCACGGUAGUCCCUUCGUCAACCUUGGUAUACCGGAUAUGAGACCGGUCGUCGGACUGCCCCACACGGUGCUGAGACAAGUCACUGCGGAGAGCGUUAAUGCUGCUGACCUUUUAGAUCAUCGCUAUAACAAAAGACUCACGGUUCAAUUGCGCCGACCUAUGGAUAGAAGACCCGAGAGACAGGUGGUAGUCAAGCUAACGCGUGGAGCCCAGCUUAGUACAGAGCCUCGUGUAAAAGAGCUGGGCAAUGCCUACCAAAUACCCCCGAGCCAAGCGUGUCAGCAUUAUUCGCAAUUGCGGCUGGCGAAUGGCAAGACCACCCCCACUGAAAUGAAGUCGCAAAAGUUAACCCAGUUGCGGCCCAUGUAUCUUGAAGACCUACAUUACUACGAAGCGGGUUCGUGUCUCCAGGACAACCUCAAAUACACCUGUUGCUAUAAUGGGCUCUUCGGUCGGCAGCUACUAGUUGGUAAGAAAGGGUAUCCCGUGACGCUGCUUGAAAUUCCGGCUAUUCCCUUCGAUCUAAGUAAUAUACAUACUUCCAUGGCGGGCGGAGAUCAGCGUAUACGGUUCCCCAAAAGCAAGAUCCAAAGGGGCGGCGACAAUUAUUCUGGUAUCGGGAAUGGUUCUGUAUUCUUCAAUGAUGCCCCACGUUGUACGUCCAGUUACCAUCUUGCCAGGGACCACGUUCGUGGUCGCAUAUCGCCCAAGGAUGUGCCGCCUGUGUCUUAUGGGAAUUCUGACUACUACACACCGUGUGCCUUGCCCAUCUCCCACUGGGAGUUAAAGCGCGCGCCACCCAGAGGUAGUGGAACCACUAACAAGAGUAAGCUAGUCAAACUUGGAUGCCGCACUAUCUUACGUCGUGAAUUUAGCCAAGCCAGUCUAGUGUGGGCUCAGCCCCCGGGCGGACAGACCUCGGCUUCCCGCCGACUCCGAGACUCACGGCCAGUUAAGCCUUGCCGCGCCAGCUUUAUUAUCAUCAAGUGCAUUCAAUUCAGUGUGCUUGUUUUUCUAUGCACGUAUAAUAGUGAGACUAAGCUAAAAUCGUGCAUGUCUUCAAUUCUCAACAUCGGCAAGCACCCGGACAUAACUGAAAACACCCGACGUCGAUGUCCGCAAUCAUUAUCUGGGUUUAUUUUGACCAACUUUUGCAAGCUAGUCAUCGUUCUCUGUUUAGCAACGCUCAUGAAGGUCGAAGUCGGUGUGGAUCGCCCGAGCUUACUCUUCAAGUGGUUCUUGAUGGUAAUUGGCAAGCGCGACAAAAUGGCGUCAUUUUGCUCAGGUUCGGUUCAGGGUGGGAGCCAUUGGGGCCAGGGGCCGAGCAUUCCAUGGUCGAACCGACUGUGCAUAACGCAAACAGAGGGCGGGUCGGAGGGUCUAUUGGACUUAGAUGAUCGUCUGGAUCCAACUCCUUCUAGACGCGCUCAACCCUGCUGGCGCCCCUUCACCCUCCAGCUGUUUACUAUUACCGUAAGAGAUACACUCGCCGCUCGGAAGUGUAUAGAUAGAUCCACCGUCGUAAUGAAUGUCCGUCUUGACGAUGACGUCCGUCCUUAUUGUCCGAACGAAUCUACUCUAGAAUUCGCGUCUAUGCUUCCCGACACUCUUCGAAAGGGGUUAUCCGCAAUACGCACGGAAGGCGCGUCUUUGACUUCUACUCGUCAAGGCAACGCGGUCCCAUACAGUUGCCAGUCUGAAAUCCGCAAGGCGUCCUGUGGUCUCAGUUUGUGCGCGCAAGAAAGGUAUUUAGCCUCCAAUAGGCCAGAUCCAAUCUCAAUAUUAAGUCAGUGGCGUUGCAAAUACAGACCGCCCGUGUCUACCCUAUGGUCCGAUUCCCGGGUACAAGUGUCCUAUCAAGGAUUAUGUGCAAGGGUAUUUCAUAGAAUCACCGGGCUGGGAGAUUCUCCUCUAUCACUAAACCAACACAAGUACAACAACUCUACAAAUGUGCUCAAUCUAUGUGAAAACCGCAUGAAAUCGUUAAUCCGCACGUCUGCAGGUCUGAAUUGUCGAUGGUUGCAUUUUUAUACGACGCACUCCCUUAUUACCGUAGACAGAUUACAUCCACGACUUCGCGUUAGAAAUCAAGCUAAGGCAGGAGGUGUUCGGACCUGUAGGUCAUCGGGGCACAUGAAAAGGCAAGUGAAUUCUAGUCGGCCAUCUUCAAAGUCAACCCGGGUCCGAUGGGGCCGGCAUUCUGCGCAUCGUCGACCAGCCUCAAAUCUACCGCCUGGCGAAACUAAUAGGUUGCGGCGGUUCUGGUGGGUGUUGGUCCCUGCAGUCACACCAGUUAUUAUUUCAGUAGCUAGCACUAUUGACUGCGGCCACGGGCUGGGCUUAGCAGGACCUCGCCUAACUCAUCAUCCACCUUCGGCACCACCGAUGGGAGCGAUAAGGCUUAGUUAUCGUAUUGAUAUGAUAGGCCCAGUUUCGAGCAAUUUCCGAAUAUUUCCCGAAAGUGGACGAGACAGUCCGUUAGUCCACAAUGCACCACCUCCUGCAUUGUUCCGUAGGAUUUUCACGUACAGAUUCGCGGGAGCCCGACUUUCCGGUGAAUGGUUAAUUCUGAUCAGCAAUCGCGAGCGGAUGGGACUAGCAAAGUGGAACUCCCCCGCGGCCCGAGACCGAGAAAUCGGCCCAGUGUUCGGAGGCUCUGCCAAUAGCUAUGAGGCGGUUCCGUGCACUUUAGACGGUACCAGCCAAACUCAGAUAUGCUUGUUGAUGUGUGGUUCUGACUGCUUGCGCACCCUUGAUUUGAGACUCACGGUAAGUACCUCUUAUGGACCGGGAGCUCCGUCCGCAUUCGAUGAUCGGCAUAAUCUGGAGACAGUUCGGGUCCACUGCGAAAGCGGUGAGUCGGAGACAGCUGCUGGUGCCACUCCGCUGUCACCCAAAAUUGAAUUCAACAGUGUAUGUUCGCUUACACCUAACAGGGAAUUUGACAGCGGGACACGUGCGGCGAGACGUUUCACCGAUCGUGGGUUGAGACCGGGCUUCGCAUGCUUAUACUUCGAUGGUUUCGAUUUGACGGUUAAUCCGCAGCAGAACGGCGCAUCUUCAAUAGGUUAUACAGUUAAGUUCACACUUCUGCUCAUUCGUCUGUGCUUCUGCACCUCUGGAUUGCCUCAGCUGGUCUUUGAGGUCACGCUUAAUCUUGAGAGAGUACUAGUAGGUGAUGGGUGGAAGCAAAGCGACCACCGCGGCUUCCAAAAUACCGCCGGUACACAGCCUGAAGUGUCCCAGCGCACACGGCCCGUAUAUUAUCUCAGGCCCAACCGGAUGGUCCGUCAUCUUAGCUUUCAAAAGGUUAGAGCCGCUAGUCGAAUGCUAUGUUACCGAGAAGGAAUACACUCUGUCAAACAAAAGCCUGAUACAAAUUUGGGGGACAUCUCACUUGCGUCUUGCACGGUCUCCCGUACACAGUACGUUAAGGGGUCAAUUCCAUCAACAUUUCCGAACGACUUGCGGAAUUUCCACAUGAGGGUUCGAGUCCUAUGUGCCUGCAGGGUCCUAGGUACAUGGAAUCGCACCGGACGGUGUGGCCCUAACUCUAUAGACCUCGAAUUGGGUUGUGGGUCAGUGGACCUUAGGGACAUGGCUCCUAAUCACACUCGUCGAGGGAAGGUACGGUGCUACCAUCUCACAUUCGUCGUCACAAUGAUGGCACAGUUGUCGCUUACGAGCGGCGAGGGUCUUGUGCUUAUCAUAUCACGCUUUGAUAACUACUCCAAUACCCCAUCAAUCUCGAGGGGAUCGUGGUACAUUUCGGGGGCGCCCCGAGGCCACUACAGGGCCCGUCAGUCCUCGCUUGACCUUACCGAGACAAGAGUUCAAAAUGGUACCGACGCUUUGUAUUGGAGACGAAUCACUCGCACAGCCCAACAGCAGAAAAGGCUGCUAGCGGUUACUAGCAAUUCCUGUAGGAGGAGAAUCCAGUGGAAGCCUACAGUAUGUUGCACGUGUGCAGAGUUAUCCCACUUGUUUCUGCUAAGCGCUAAGGUACUAGGCCAUGCCCUGCUUACGAUUAGGACGAAACCUUUUACCGGCUAUGUGUUCAGGAGCGGUGGAUACUUGUACGCCUAUAUGUCUGGCAUCCCUGAAUAUAACAAGAGACUGAGAAGGUAUACGGCUGACACAGUCUGUUUGACCUGCAAACCUACGUUGGCAUACUUCGUAGCUCUUCGCUCGUCAAGAGUAGUGGAUCCGCAUACAGUUAACGGAUCCGGAGUUACCAAAAAAAGUCAAAAGAAGCUACGGAUCAUCACGACAACUAUGACUUCAGACCGGUGCAUGGACCACCACCUUCCAAGACGAUGUUUGCAAAGUCUUGCACUCCGGGUGCGCUGGCUAACGUUGAAAAUGGCAUUCCGUUAUCAAUAUAAAGCGCGCCUCUGCCUUGGAAGUCUAUGUGCCUCACUUUCCUUGCAUUACCUGCGCGUACGCGUUGUUCUAGCUGCUGUUCGAAUGCGAUACUCGCCAAUCAUUAAUGACUUAAAUGAUUAUUUUGUGAAAUGUAUUAUCGUAAAGGUCGUUAUACUGUUACGUUCGCCUGGAGACCAAAUUUGGCAAUGUCAGGAUCGUUGGACUUACGUAGGGUUCAUCGAUAAAGUUAAAUCUAUCUCCGGCCUCGCCAGCAUGUAUUCCGUGUUAAUUAAACCGGUGACCUCCCCAUACGUUUUGAGGUGCGUGGUAUUACGUAUUGAUCCGACGGUGCAACAACGCUUUAUUAGGCCGAUAAUUAAGGGCGACAUCAACCAGCGCACUAGUGCCCAAGCUAAACACUCAACAAUCGCGAGUACUGAUGUUGCCUCCUGCACCGAGUCUGAUGCCGCGCCCCAUAAAGCUCUAGUAGCGCUUUUAUUCGUAGCUCAAGAUAGCAGUUUUCGCGUUGCAUCAACUGAGACACGACGACAUCUCAACGACCAAGGUGCGCGCUCGGCCACUCCCACCCCGAUCUGCCGGGGUUGCUUCGACGGUCUAGGGCAAUGCCACGAUGCAUCGCCUUUCAAGACUCAACCUUGGGACAACGAGUGCCGCCCGAAAAGCGCUCGUCGGAAGUUCCUGUUAGACACAGUACAUUGCUCUCGUAUGUCCAGGCAUGCAUCCCCAGACCCCAUUUCUGCAAAGAACCCCUUGACCAGCUCCAUGCGUAAGAUACACUCACUUAGGCGAGUACCAAAACUAUGGUGUGCACUCUGCGGCGAUCUCUUUAUGAGGAAAUACUCAGCAGCCCUGUUUCAGCACCCGAUAAAUGGCGCGCUGGGGCUCGGCUUGCUUUCUCGAGUGGGGAGCGUCCAAGCGCGUCGUGUCUAUUACCUUAAAGGAUACUGGUUAUGGGGUACAGUCAUAUUGACUUUUCUUGCCGUUCCACGAGGUAUUUCGAGACUCAUUGAUAUGACGCCGGCCUACGCACGGUCUUGGGCGUGGGGGUAUACAGCUCGUAACGUUAGUCAGAGGAUUCUUGGAUGCAUCCGGUGGGCACCUAUAUUGAGAGUUGUUCCAGAAUGUCCAGGUGGCCCCUCAUCACCUCGACUGAGUACAUUGAUGAAAUACCAUACGAGUUCAGGCAAGCUCACGAUUUUCGUGAAAAUAGGUUCCCACGGUUGGCGCCCAUAUAGAAGUUUGCGGCAUCUGACCGGAACUAGGUGUCGUGAUCAUAUUAGGGCAUCUCUCUUUCAAGGACUCGAACACGUGAUGCCGGCGCUGUUACCCGGCGCCCUAUACGACUAUUCGUUAAGAAUAAAAGACACACACCCUACGUCUCCUCAAGUAGUUCCUCGGGUUGUUGAUGAUCCUUGCGGCGAACAUAGUAAGCUAUAUGCUGUCAGGUGUCGCGGAGCCGUUUUCUGGGCACAAUUCCCACCGACUCACCAUCUGUUUUCUUCAGUUUUACACAUUGGAUGGGUUGUUGGUAAGGCCAUGAGAGCUCCCGGAAACGACCAAGAACCAGAUUACCUCCCAGUCACACCGUUACACCCAGCUGGUUAUCGGAAAUGGUGGCGGAUUCGAUGGUUCAGUAUAUAUGGCGACAUGUCCAAUCUUCUAGUUGACCCUCUAGCUGAUGCGGCUAAACUCAUAGGAUACCUCGCUGCUAGGAGUAUUUUUGUUAGGUUGCCGCUUCGACAACCUUCGUAUAAUUACCGCUUUAGCACAAAAGACGGCCGUCUUGACCUCGGCGAGACAUUUGUCUCGCCUCCGGUGAUCUACGUGACUACAUCGGUGACAACGCUCGGUCACCUUCCAGGUGUUGUUGAGUACACGGAGACGCACCCAUCCGUUCACACCUUAAUCGUUACUGACCCUGGCGCGCGAGGCGGGUUGCGCGCAGUUACGCGUGGAACUACAGGGCCAGCCGGCCCUGUAUGUCAGGGUACAGGUGAUCAGGCCAUAGAGGGGCGUGGCUUCGUGGAGAUCCAAAGCACGCCAGCCGAAUUUCGCAAGGGUCGUUCAAGAGGCGGGUUAUUAAUGCGAGAUCGCAGGAAAUUUGCACCAUCGGGGUGCGAUCCCAUUCUAUGUAACGUGAAGCGGCGGCAGAACGAUGCAUCGGGAGUUCAAUUUUUACAAAAAGAAUGUUCGGAAGGGAGUUCUAUGUUCCGUGCUGCAUCGAUUUUUAAACACAAAAACAGUCGCCGGAGCCUGUGGUUACUGGUUCCACUGAGCUCUGCCAUCUGGACCGAUGGGAAUCUUCUACGUAAAUGCCUGCGUGCCUCGUGGUUUCUACUCACCCAAAUUCUUUAUUUCCCCCUCCGAUCCUCAAACUGGUCUACUGAAGGCGAAAUUAGGAAUGGCUCCAUGUUGGCAAAUCUACUUCUCGUGAGACCACCCAGACUCAAGUCACGCAGACCGUGGGUAGAUGUGCACUGGGAUCCAAGAGCGAGACUAUGCAUUGGAAGGCUGCUCGAAAAACAAGGGUCGACUUCAUGGGGGCACGUACCAAGAAAGAUGAUUCGUGAAAUUUGUGAGCCGGUGCGAGGAAACCCAUUGUGGUUCCCGCCCGAAACUGGAGGAAGAGUCGAGCCCCUCCUGCGAGCCAACGUUGGUCUAUUUGCCUUAACCGGGCACGGUGUAAAUCCUACCCCCGACCGUGCUAACAGGUGGAGGAGGUUCCCUCGAGCCGUGUAUAGUAAUCGAACUGCCUCUCGUCAUACGAUCAAUCAAGCCGGGAGCUUCCUUACUCGACUUGGUGCGACGCGAACGUACAUUUCUUUAUCCGAUGAGAAGGGUCGCAUACGUCACUGUUGCGCGGAAUUCGCGAAUCCGCCCAAUAGCUGUGACACGCUCACGAUGACCCAUCCAGCGACACUUAACGGAAUUGUCGCUUUGGCCCGACAUUUUCGCAUAUUAUACCGCGUGCUCGUGGGAGAGCCUUUGAAGACAUUGAUAACAUGUGAUUCGGGAAUUUGUUCGCUCCCCCCGCACCACCCAAGCCUUCGGGGACCUUCGGUUAUGGUACUUCGGAUGCCGCCACGUACGAAGCCCCGCCCUUCUGCCGAGUCCAUAUGCCAUCGCGCUGUGAACUCGCGCUUGCACCUGUCAUUGUCUGUGAAAUGUCAUUAUCCAAACAGGUGUGGAGCGGCUACAUCUCAGGCCGUGAAGACUAAGCAAACCUCCCCCGCAGAGGACGUGGCACGGGGAGUUACGACAGAAGGAACGAUGUGUCACAGUAAGCAACACCGUCGUACGAUCGACUAUACGUCGAACUGGGGCGGUUUGAAGCUCUUAACCUCCAUGCCACCCUACACUGGGGCCGGAUGUUACUCUGGUUGUGAAUUGUUGAGCGCUUACACCUCCACCAAGUCCGAGAUUAGCCACCGAUUGAUUAACCGAGCAGCACAAUCUGUUCUGUCCAGAGCGGUGCAGGUCGCGCACUUCCUUAGACUGAGUUUCGAGCCCCUACUAUUUAAUCUCAUGACCAUUUAUGGCAGAGGAUCGAUUAGAGCAGUUAUCAGCCGUCAGACUCGCGUUCUUCAAUCACUCUGCCACGUCCACGCCUUCCGCAACUCUCAAGAAUCGAAGCCGGAACACACGCUCCCUUCCUCUGGUAGCGAUUCUUGCCACCCUCGACGACUCGAGUUUCCUUUUCUUCGUAUAGUACAUGGUGCCGUCGGCGACGAUUAUAUGUCCCUGAAACGUCCUACCCCUAAGGUUCUCUGGUCAUACCUACCGAAUCUCUUCAUAAGAAAGCGGGGGGGCAAAAGGAGACCUAGUACCGUAAAACGGCGCACGGAAUCGAGGGUAGGGCUAUCGAGGUAUGAGCGCACCCUGCAGCGACUUCGAUGGUGGGUAUCGGUCUGCCUACAGCUCAUAAUACAGAGGUUCGGAGCGGCCCGGUCCAAGCCUUGGUAUCGAUCGGAUUUCCGAGACCUCAAACUCCCCAUGCACAGUACCUACUACAAGUUCGUAGCCCGUGUCUCUGCGCCUUAUAUUAACAAACUGCUACCAACUAGUCGAGGAUCCGUUGGUCUCGUCGGCAUUUCACGGAACUCAGAUCGUUUUGGAAUCUCGAGAUACGCGGCGCGAGCCCCUUGGGCACAGCCUUCCGCUCGGGACGUUAACCGCCGUCUUUGCCGUGUUCGACGGCUCCCUAGCGUUACAAAGAUCCUACGCAGUGAAUGGCUGGUUGCAAGGAUUAAACCAGGAGCGUGUUGGUCGUACUUUCCAUUCAUUUACGCUACCCGUGUUGGAAGUAUUGACUGCUUGGUAGGUUUAACGAAGAGCAACCCGAAUACGCCGGAGGAGGACGUUUCCAACGCAUUACAGCCGUCCCGAAGUCGACUGACCGUCGCAACUACUUGA"
f = open ("standardrna.code.txt")
data = {}
for l in f:
sl = l.split()
key = sl[0]
value = sl[2]
data[key] = value
f.close()
b1 = data['Base1']
b2 = data['Base2']
b3 = data['Base3']
aa = data['AAs']
st = data['Starts']
codons = {}
init = {}
n = len(aa)
for i in range(n):
codon = b1[i] + b2[i] + b3[i]
codons[codon] = aa[i]
init[codon] = (st[i] == 'M')
def protein_seq(seq):
seqlen = len(seq)
aaseq = []
for i in range(0,seqlen,3):
codon = seq[i:i+3]
aa = codons[codon]
aaseq.append(aa)
return ''.join(aaseq)
print protein_seq(inputseq)