Skip to content

Commit ef7d0a1

Browse files
committed
Fix other ambiguities
1 parent 08bc90a commit ef7d0a1

File tree

3 files changed

+197
-141
lines changed

3 files changed

+197
-141
lines changed

mutalyzer_hgvs_parser/hgvs_parser.py

+175-141
Original file line numberDiff line numberDiff line change
@@ -269,207 +269,239 @@
269269
"type": "variant_certain-location_repeat|repeat - variant_certain-location",
270270
# NM_000492.4:c.1210-34_1210-6
271271
"conditions": lambda children: (
272-
len(children) == 3
273-
and children[0].data == children[1].data == children[2].data == "variant_certain"
274-
and data_equals(children, [0, 0], "location")
275-
and data_equals(children, [0, 1], "repeat")
276-
and data_equals(children, [1, 0], "location")
277-
and data_equals(children, [1, 1], "repeat")
278-
and len(get_child(children, [2]).children) == 1
279-
and data_equals(children, [2, 0], "location")
272+
len(children) == 3
273+
and children[0].data == children[1].data == children[2].data == "variant_certain"
274+
and data_equals(children, [0, 0], "location")
275+
and data_equals(children, [0, 1], "repeat")
276+
and data_equals(children, [1, 0], "location")
277+
and data_equals(children, [1, 1], "repeat")
278+
and len(get_child(children, [2]).children) == 1
279+
and data_equals(children, [2, 0], "location")
280280
),
281281
"selected": 2,
282282
},
283283
{
284284
"type": "variant_certain-location_repeat|location_inversion - inversion",
285285
# NC_000015.9(NM_001012338.3):c.396-6644_1397-29766inv
286286
"conditions": lambda children: (
287-
len(children) == 3
288-
and children[0].data == children[1].data == children[2].data == "variant_certain"
289-
and len(get_child(children, [0]).children) == 2
290-
and data_equals(children, [0, 0], "location")
291-
and data_equals(children, [0, 1], "inversion")
292-
and len(get_child(children, [1]).children) == 2
293-
and data_equals(children, [1, 0], "location")
294-
and data_equals(children, [1, 1], "repeat")
295-
and len(get_child(children, [2]).children) == 2
296-
and data_equals(children, [2, 0], "location")
297-
and data_equals(children, [2, 1], "repeat")
287+
len(children) == 3
288+
and children[0].data == children[1].data == children[2].data == "variant_certain"
289+
and len(get_child(children, [0]).children) == 2
290+
and data_equals(children, [0, 0], "location")
291+
and data_equals(children, [0, 1], "inversion")
292+
and len(get_child(children, [1]).children) == 2
293+
and data_equals(children, [1, 0], "location")
294+
and data_equals(children, [1, 1], "repeat")
295+
and len(get_child(children, [2]).children) == 2
296+
and data_equals(children, [2, 0], "location")
297+
and data_equals(children, [2, 1], "repeat")
298298
),
299299
"selected": 0,
300300
},
301301
{
302302
"type": "variant_certain_duplication | variant_certain_repeat - duplication",
303303
# R1:c.10-5_10-2dupR2:10
304304
"conditions": lambda children: (
305-
len(children) == 2
306-
and children[0].data == children[1].data == "variant_certain"
307-
and len(get_child(children, [0]).children) == 2
308-
and data_equals(children, [0, 0], "location")
309-
and data_equals(children, [0, 1], "duplication")
310-
and len(get_child(children, [1]).children) == 2
311-
and data_equals(children, [1, 0], "location")
312-
and data_equals(children, [1, 1], "repeat")
305+
len(children) == 2
306+
and children[0].data == children[1].data == "variant_certain"
307+
and len(get_child(children, [0]).children) == 2
308+
and data_equals(children, [0, 0], "location")
309+
and data_equals(children, [0, 1], "duplication")
310+
and len(get_child(children, [1]).children) == 2
311+
and data_equals(children, [1, 0], "location")
312+
and data_equals(children, [1, 1], "repeat")
313313
),
314314
"selected": 0,
315315
},
316316
{
317317
"type": "variant_certain_deletion | variant_certain_repeat - deletion",
318318
# R1:c.10-5_10-2delR2:10del
319319
"conditions": lambda children: (
320-
len(children) == 2
321-
and children[0].data == children[1].data == "variant_certain"
322-
and len(get_child(children, [0]).children) == 2
323-
and data_equals(children, [0, 0], "location")
324-
and data_equals(children, [0, 1], "deletion")
325-
and len(get_child(children, [1]).children) == 2
326-
and data_equals(children, [1, 0], "location")
327-
and data_equals(children, [1, 1], "repeat")
328-
329-
and len(get_child(children, [0, 1]).children) == 1
330-
and data_equals(children, [0, 1, 0], "inserted")
331-
320+
len(children) == 2
321+
and children[0].data == children[1].data == "variant_certain"
322+
and len(get_child(children, [0]).children) == 2
323+
and data_equals(children, [0, 0], "location")
324+
and data_equals(children, [0, 1], "deletion")
325+
and len(get_child(children, [1]).children) == 2
326+
and data_equals(children, [1, 0], "location")
327+
and data_equals(children, [1, 1], "repeat")
328+
and len(get_child(children, [0, 1]).children) == 1
329+
and data_equals(children, [0, 1, 0], "inserted")
332330
),
333331
"selected": 0,
334332
},
335333
{
336334
"type": "variant_certain_delins | variant_certain_delins - one insert",
337335
# R1:c.10-5_10-2delinsTCTR2.2:c.10insT
338336
"conditions": lambda children: (
339-
len(children) == 2
340-
and children[0].data == children[1].data == "deletion_insertion"
341-
and len(get_child(children, [1]).children) == 1
342-
and data_equals(children, [0, 0], "inserted")
337+
len(children) == 2
338+
and children[0].data == children[1].data == "deletion_insertion"
339+
and len(get_child(children, [1]).children) == 1
340+
and data_equals(children, [0, 0], "inserted")
343341
),
344-
"selected": 1
342+
"selected": 1,
345343
},
346344
# TODO: revisit the next ones in the repeats context.
347345
{
348346
"type": "variant_certain_repeat | variant_certain_repeat_length - length 0",
349347
# R1:c.10-2[5]
350348
"conditions": lambda children: (
351-
len(children) == 2
352-
and children[0].data == children[1].data == "variant_certain"
353-
and len(get_child(children, [0]).children) == 2
354-
and data_equals(children, [0, 0], "location")
355-
and data_equals(children, [0, 1], "repeat")
356-
357-
and len(get_child(children, [1]).children) == 2
358-
and data_equals(children, [1, 0], "location")
359-
and data_equals(children, [1, 1], "repeat")
360-
361-
and len(get_child(children, [0, 1]).children) == 1
362-
and data_equals(children, [0, 1, 0], "inserted")
363-
and len(get_child(children, [0, 1, 0]).children) == 1
364-
and data_equals(children, [0, 1, 0, 0], "insert")
365-
and len(get_child(children, [0, 1, 0, 0]).children) == 1
366-
and data_equals(children, [0, 1, 0, 0, 0], "length")
349+
len(children) == 2
350+
and children[0].data == children[1].data == "variant_certain"
351+
and len(get_child(children, [0]).children) == 2
352+
and data_equals(children, [0, 0], "location")
353+
and data_equals(children, [0, 1], "repeat")
354+
and len(get_child(children, [1]).children) == 2
355+
and data_equals(children, [1, 0], "location")
356+
and data_equals(children, [1, 1], "repeat")
357+
and len(get_child(children, [0, 1]).children) == 1
358+
and data_equals(children, [0, 1, 0], "inserted")
359+
and len(get_child(children, [0, 1, 0]).children) == 1
360+
and data_equals(children, [0, 1, 0, 0], "insert")
361+
and len(get_child(children, [0, 1, 0, 0]).children) == 1
362+
and data_equals(children, [0, 1, 0, 0, 0], "length")
367363
),
368-
"selected": 0
364+
"selected": 0,
369365
},
370366
{
371367
"type": "variant_certain_repeat | variant_certain_repeat_length - length 1",
372368
# R1:c.10-2[5]
373369
"conditions": lambda children: (
374-
len(children) == 2
375-
and children[0].data == children[1].data == "variant_certain"
376-
and len(get_child(children, [0]).children) == 2
377-
and data_equals(children, [0, 0], "location")
378-
and data_equals(children, [0, 1], "repeat")
379-
380-
and len(get_child(children, [1]).children) == 2
381-
and data_equals(children, [1, 0], "location")
382-
and data_equals(children, [1, 1], "repeat")
383-
384-
and len(get_child(children, [1, 1]).children) == 1
385-
and data_equals(children, [1, 1, 0], "inserted")
386-
and len(get_child(children, [1, 1, 0]).children) == 1
387-
and data_equals(children, [1, 1, 0, 0], "insert")
388-
and len(get_child(children, [1, 1, 0, 0]).children) == 1
389-
and data_equals(children, [1, 1, 0, 0, 0], "length")
370+
len(children) == 2
371+
and children[0].data == children[1].data == "variant_certain"
372+
and len(get_child(children, [0]).children) == 2
373+
and data_equals(children, [0, 0], "location")
374+
and data_equals(children, [0, 1], "repeat")
375+
and len(get_child(children, [1]).children) == 2
376+
and data_equals(children, [1, 0], "location")
377+
and data_equals(children, [1, 1], "repeat")
378+
and len(get_child(children, [1, 1]).children) == 1
379+
and data_equals(children, [1, 1, 0], "inserted")
380+
and len(get_child(children, [1, 1, 0]).children) == 1
381+
and data_equals(children, [1, 1, 0, 0], "insert")
382+
and len(get_child(children, [1, 1, 0, 0]).children) == 1
383+
and data_equals(children, [1, 1, 0, 0, 0], "length")
390384
),
391-
"selected": 1
385+
"selected": 1,
392386
},
393387
{
394388
"type": "variant_certain_repeat | variant_certain_repeat_range_length - length 0",
395389
# R1:c.10-2_10-4[5]
396390
"conditions": lambda children: (
397-
len(children) == 3
398-
and children[0].data == children[1].data == "variant_certain"
399-
and len(get_child(children, [0]).children) == 2
400-
and data_equals(children, [0, 0], "location")
401-
and data_equals(children, [0, 1], "repeat")
402-
403-
and len(get_child(children, [1]).children) == 2
404-
and data_equals(children, [1, 0], "location")
405-
and data_equals(children, [1, 1], "repeat")
406-
407-
and len(get_child(children, [2]).children) == 2
408-
and data_equals(children, [2, 0], "location")
409-
and data_equals(children, [2, 1], "repeat")
410-
411-
and len(get_child(children, [0, 1]).children) == 1
412-
and data_equals(children, [0, 1, 0], "inserted")
413-
and len(get_child(children, [0, 1, 0]).children) == 1
414-
and data_equals(children, [0, 1, 0, 0], "insert")
415-
and len(get_child(children, [0, 1, 0, 0]).children) == 1
416-
and data_equals(children, [0, 1, 0, 0, 0], "length")
391+
len(children) == 3
392+
and children[0].data == children[1].data == "variant_certain"
393+
and len(get_child(children, [0]).children) == 2
394+
and data_equals(children, [0, 0], "location")
395+
and data_equals(children, [0, 1], "repeat")
396+
and len(get_child(children, [1]).children) == 2
397+
and data_equals(children, [1, 0], "location")
398+
and data_equals(children, [1, 1], "repeat")
399+
and len(get_child(children, [2]).children) == 2
400+
and data_equals(children, [2, 0], "location")
401+
and data_equals(children, [2, 1], "repeat")
402+
and len(get_child(children, [0, 1]).children) == 1
403+
and data_equals(children, [0, 1, 0], "inserted")
404+
and len(get_child(children, [0, 1, 0]).children) == 1
405+
and data_equals(children, [0, 1, 0, 0], "insert")
406+
and len(get_child(children, [0, 1, 0, 0]).children) == 1
407+
and data_equals(children, [0, 1, 0, 0, 0], "length")
417408
),
418-
"selected": 0
409+
"selected": 0,
419410
},
420411
{
421412
"type": "variant_certain_repeat | variant_certain_repeat_range_length - length 1",
422413
# R1:c.10-2_10-4[5]
423414
"conditions": lambda children: (
424-
len(children) == 3
425-
and children[0].data == children[1].data == "variant_certain"
426-
and len(get_child(children, [0]).children) == 2
427-
and data_equals(children, [0, 0], "location")
428-
and data_equals(children, [0, 1], "repeat")
429-
430-
and len(get_child(children, [1]).children) == 2
431-
and data_equals(children, [1, 0], "location")
432-
and data_equals(children, [1, 1], "repeat")
433-
434-
and len(get_child(children, [2]).children) == 2
435-
and data_equals(children, [2, 0], "location")
436-
and data_equals(children, [2, 1], "repeat")
437-
438-
and len(get_child(children, [1, 1]).children) == 1
439-
and data_equals(children, [1, 1, 0], "inserted")
440-
and len(get_child(children, [1, 1, 0]).children) == 1
441-
and data_equals(children, [1, 1, 0, 0], "insert")
442-
and len(get_child(children, [1, 1, 0, 0]).children) == 1
443-
and data_equals(children, [1, 1, 0, 0, 0], "length")
415+
len(children) == 3
416+
and children[0].data == children[1].data == "variant_certain"
417+
and len(get_child(children, [0]).children) == 2
418+
and data_equals(children, [0, 0], "location")
419+
and data_equals(children, [0, 1], "repeat")
420+
and len(get_child(children, [1]).children) == 2
421+
and data_equals(children, [1, 0], "location")
422+
and data_equals(children, [1, 1], "repeat")
423+
and len(get_child(children, [2]).children) == 2
424+
and data_equals(children, [2, 0], "location")
425+
and data_equals(children, [2, 1], "repeat")
426+
and len(get_child(children, [1, 1]).children) == 1
427+
and data_equals(children, [1, 1, 0], "inserted")
428+
and len(get_child(children, [1, 1, 0]).children) == 1
429+
and data_equals(children, [1, 1, 0, 0], "insert")
430+
and len(get_child(children, [1, 1, 0, 0]).children) == 1
431+
and data_equals(children, [1, 1, 0, 0, 0], "length")
444432
),
445-
"selected": 1
433+
"selected": 1,
446434
},
447435
{
448436
"type": "variant_certain_repeat | variant_certain_repeat_range_length - length 2",
449437
# R1:c.10-2_10-4[5]
450438
"conditions": lambda children: (
451-
len(children) == 3
452-
and children[0].data == children[1].data == "variant_certain"
453-
and len(get_child(children, [0]).children) == 2
454-
and data_equals(children, [0, 0], "location")
455-
and data_equals(children, [0, 1], "repeat")
456-
457-
and len(get_child(children, [1]).children) == 2
458-
and data_equals(children, [1, 0], "location")
459-
and data_equals(children, [1, 1], "repeat")
460-
461-
and len(get_child(children, [2]).children) == 2
462-
and data_equals(children, [2, 0], "location")
463-
and data_equals(children, [2, 1], "repeat")
464-
465-
and len(get_child(children, [2, 1]).children) == 1
466-
and data_equals(children, [2, 1, 0], "inserted")
467-
and len(get_child(children, [2, 1, 0]).children) == 1
468-
and data_equals(children, [2, 1, 0, 0], "insert")
469-
and len(get_child(children, [2, 1, 0, 0]).children) == 1
470-
and data_equals(children, [2, 1, 0, 0, 0], "length")
439+
len(children) == 3
440+
and children[0].data == children[1].data == "variant_certain"
441+
and len(get_child(children, [0]).children) == 2
442+
and data_equals(children, [0, 0], "location")
443+
and data_equals(children, [0, 1], "repeat")
444+
and len(get_child(children, [1]).children) == 2
445+
and data_equals(children, [1, 0], "location")
446+
and data_equals(children, [1, 1], "repeat")
447+
and len(get_child(children, [2]).children) == 2
448+
and data_equals(children, [2, 0], "location")
449+
and data_equals(children, [2, 1], "repeat")
450+
and len(get_child(children, [2, 1]).children) == 1
451+
and data_equals(children, [2, 1, 0], "inserted")
452+
and len(get_child(children, [2, 1, 0]).children) == 1
453+
and data_equals(children, [2, 1, 0, 0], "insert")
454+
and len(get_child(children, [2, 1, 0, 0]).children) == 1
455+
and data_equals(children, [2, 1, 0, 0, 0], "length")
471456
),
472-
"selected": 2
457+
"selected": 2,
458+
},
459+
{
460+
"type": "variant_certain_repeat | variant_certain_substitution - 2",
461+
# for protein descriptions
462+
# STR:D5S818
463+
"conditions": lambda children: (
464+
len(children) == 2
465+
and children[0].data == children[1].data == "variant_certain"
466+
and len(get_child(children, [0]).children) == 2
467+
and data_equals(children, [0, 0], "location")
468+
and isinstance(get_child(children, [0, 0, 0]), Tree)
469+
and data_equals(children, [0, 0, 0], "point")
470+
and len(get_child(children, [0, 0, 0]).children) == 2
471+
and isinstance(get_child(children, [0, 0, 0, 0]), Token)
472+
and isinstance(get_child(children, [0, 0, 0, 1]), Token)
473+
and data_equals(children, [0, 1], "repeat")
474+
and len(get_child(children, [1]).children) == 2
475+
and data_equals(children, [1, 0], "location")
476+
and isinstance(get_child(children, [1, 0, 0]), Tree)
477+
and data_equals(children, [1, 0, 0], "point")
478+
and len(get_child(children, [1, 0, 0]).children) == 2
479+
and isinstance(get_child(children, [1, 0, 0, 0]), Token)
480+
and isinstance(get_child(children, [1, 0, 0, 1]), Token)
481+
and data_equals(children, [1, 1], "substitution")
482+
),
483+
"selected": 1,
484+
},
485+
{
486+
"type": "deletion_insertion | deletion_insertion | ... nested - 0",
487+
# REF_1:10del REF_2:20insA REF_3:30insT
488+
"conditions": lambda children: (
489+
len(children) >= 2
490+
and children[0].data == children[1].data == "deletion_insertion"
491+
and len(get_child(children, [0]).children) == 2
492+
and len(get_child(children, [1]).children) == 2
493+
and isinstance(get_child(children, [1, 0]), Tree)
494+
and len(get_child(children, [1, 0]).children) == 1
495+
and isinstance(get_child(children, [1, 1]), Tree)
496+
and len(get_child(children, [1, 1]).children) == 1
497+
and data_equals(children, [1, 0, 0], "insert")
498+
and len(get_child(children, [1, 0, 0]).children) == 1
499+
and (
500+
data_equals(children, [1, 0, 0, 0], "description_dna")
501+
or data_equals(children, [1, 0, 0, 0], "description_protein")
502+
)
503+
),
504+
"selected": 1,
473505
},
474506
]
475507

@@ -698,10 +730,12 @@ def status(self):
698730
" Propagate positions: %s" % self._parser.options.propagate_positions
699731
)
700732

733+
701734
@functools.lru_cache
702735
def get_parser(grammar_path=None, start_rule=None):
703736
return HgvsParser(grammar_path, start_rule)
704737

738+
705739
def parse(description, grammar_path=None, start_rule=None):
706740
"""
707741
Parse the provided HGVS `description`, or the description part,

0 commit comments

Comments
 (0)