Skip to content

Commit 6da3d61

Browse files
author
Christian Schudoma
committed
Added MCA-Parser
1 parent a28a26b commit 6da3d61

File tree

3 files changed

+686
-0
lines changed

3 files changed

+686
-0
lines changed

mca2bracket2.py

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/usr/bin/python
2+
3+
import re
4+
import os
5+
import sys
6+
import math
7+
8+
import minilib as biolib
9+
import mdg_dt as MDGDT
10+
11+
ix_pattern = re.compile('([A-Za-z]|\'[0-9]\')-?[0-9]+(\.[A-Z])?')
12+
13+
###
14+
def read_data(fn):
15+
sequence = []
16+
structure = []
17+
18+
unidentified = set([])
19+
20+
edge_re = re.compile('W[a-z]/W[a-z]')
21+
pair_re = re.compile('([AG]-U)|(C-G)|(G-[CU])|(U-[AG])')
22+
23+
fi = open(fn)
24+
mode = 'ignore'
25+
while True:
26+
line = fi.readline()
27+
if not line: break
28+
29+
line = line.strip()
30+
if line.endswith('----'):
31+
if line.startswith('Residue conformations'):
32+
mode = 'sequence'
33+
if len(sequence) > 0:
34+
""" ignore multimodels """
35+
break
36+
elif line.startswith('Base-pairs'):
37+
mode = 'structure'
38+
else:
39+
mode = 'ignore'
40+
pass
41+
continue
42+
pass
43+
44+
45+
if mode == 'ignore': continue
46+
elif mode == 'sequence':
47+
line = line.split()
48+
49+
50+
base = line[2].strip()
51+
if base in biolib.BASEDICT:
52+
base = biolib.BASEDICT[base]
53+
sequence.append((line[0], base))
54+
else:
55+
unidentified.add(base)
56+
57+
elif mode == 'structure':
58+
is_ss_pair = 'antiparallel' in line and \
59+
'cis' in line and \
60+
edge_re.search(line) and \
61+
pair_re.search(line)
62+
if is_ss_pair:
63+
line = line.split()
64+
line = line[0]
65+
66+
67+
try:
68+
base1 = ix_pattern.search(line)
69+
line = line[base1.end():]
70+
base2 = ix_pattern.search(line)
71+
structure.append([base1.group(), base2.group()])
72+
except:
73+
print line, 'fail'
74+
pass
75+
76+
"""
77+
if len(tmp) == len(line) - 1:
78+
structure.append(line[0].split('-'))
79+
elif len(tmp) == len(line) - 2:
80+
pass
81+
else:
82+
p = line.find('-')
83+
"""
84+
85+
else:
86+
continue
87+
88+
pass
89+
90+
if len(unidentified) > 0:
91+
sys.stderr.write('UNIDENTIFIED RESIDUES: %s\n' % \
92+
str(list(unidentified)))
93+
pass
94+
95+
96+
return sequence, structure
97+
98+
99+
100+
###
101+
def create_brackets(structure, sequence):
102+
brackets = ['.' for i in xrange(len(sequence))]
103+
104+
index = [x[0] for x in sequence]
105+
106+
structure = [(index.index(b1), index.index(b2))
107+
for b1, b2 in structure]
108+
ss_tree = MDGDT.MDG_Stem()
109+
ss_tree.assemble(sorted(structure), outp=sys.stderr)
110+
111+
knotless = sorted(ss_tree.preorder())
112+
if ss_tree.has_tertiary_pairs() and ss_tree.has_tertiary_stem():
113+
print ss_tree.te_contacts
114+
return None
115+
# print 'KLESS', knotless
116+
117+
def write_structure(structure, n):
118+
brackets = ['.' for i in xrange(n)]
119+
120+
for i, j in structure:
121+
if brackets[i] != '.' or brackets[j] != '.':
122+
sys.stderr.write('%s %s %i %i already occupied\n' % \
123+
(b1, b2, i, j))
124+
if brackets[i] == '(' or brackets[i] == '<':
125+
brackets[i] = '<'
126+
elif brackets[i] == ')' or brackets[i] == '>':
127+
brackets[i] ='#'
128+
129+
if brackets[j] == '(' or brackets[j] == '<':
130+
brackets[j] = '@'
131+
elif brackets[j] == ')' or brackets[j] == '>':
132+
brackets[j] ='>'
133+
134+
else:
135+
brackets[i] = '('
136+
brackets[j] = ')'
137+
138+
pass
139+
140+
# sys.stderr.write('***\n')
141+
return ''.join(brackets)
142+
143+
return (write_structure(structure, len(sequence)),
144+
write_structure(knotless, len(sequence)))
145+
146+
###
147+
def main(argv):
148+
#
149+
if len(argv) < 1:
150+
sys.stderr.write('Usage: python mca2bracket2.py <MC-Annotate-file>\n')
151+
sys.exit(1)
152+
elif not os.path.exists(argv[0]):
153+
sys.stderr.write('Error: file %s does not exist. Aborting.\n' % argv[0])
154+
sys.exit(1)
155+
156+
seq, struc = read_data(argv[0])
157+
brackets = create_brackets(struc, seq)
158+
159+
print 'Structure with knots:\n%s\nKnotless:\n%s\n' % brackets
160+
161+
162+
return None
163+
164+
if __name__ == '__main__': main(sys.argv[1:])

0 commit comments

Comments
 (0)