Skip to content

Commit 08bc90a

Browse files
committedNov 19, 2024·
Fix protein extension conversion
1 parent 9adf68c commit 08bc90a

File tree

3 files changed

+52
-10
lines changed

3 files changed

+52
-10
lines changed
 

‎mutalyzer_hgvs_parser/ebnf/protein.g

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ extension: extension_n | extension_c
4343

4444
extension_n: "ext" "-" NUMBER
4545

46-
extension_c: P_SEQUENCE "ext" (P_SEQUENCE | p_point)
46+
extension_c: P_SEQUENCE "ext" P_SEQUENCE p_location?
4747

4848
frame_shift: "fs" | AA "fs" ("*" | "Ter") p_location
4949

‎mutalyzer_hgvs_parser/hgvs_parser.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -540,12 +540,16 @@ def extension_n(self, children):
540540
return Tree("inserted", [Tree("insert", location)])
541541

542542
def extension_c(self, children):
543-
inserted = [Tree("insert", [Token("P_SEQUENCE", children[0]).value])]
544-
if isinstance(children[1], Token):
545-
inserted.append(Tree("insert", [Token("P_SEQUENCE", children[1].value)]))
543+
new_children = []
544+
for child in children:
545+
if isinstance(child, Token):
546+
new_children.append(Tree("insert", [Token("P_SEQUENCE", child.value)]))
547+
else:
548+
new_children.append(Tree("insert", [child]))
549+
if new_children:
550+
return Tree("extension", [Tree("inserted", new_children)])
546551
else:
547-
inserted.append(Tree("insert", [Tree("location", [children[1]])]))
548-
return Tree("inserted", inserted)
552+
return Tree("extension", [])
549553

550554
def frame_shift(self, children):
551555
new_children = []

‎tests/test_protein.py

+42-4
Original file line numberDiff line numberDiff line change
@@ -831,10 +831,10 @@
831831
"source": "reference",
832832
"inserted": [
833833
{"sequence": "Gln", "source": "description"},
834+
{"sequence": "Ter", "source": "description"},
834835
{
835836
"location": {
836837
"type": "point",
837-
"amino_acid": "Ter",
838838
"position": 17,
839839
},
840840
"source": "reference",
@@ -854,10 +854,10 @@
854854
"source": "reference",
855855
"inserted": [
856856
{"sequence": "Gln", "source": "description"},
857+
{"sequence": "*", "source": "description"},
857858
{
858859
"location": {
859860
"type": "point",
860-
"amino_acid": "*",
861861
"position": 17,
862862
},
863863
"source": "reference",
@@ -901,8 +901,46 @@
901901
],
902902
},
903903
# -
904-
"PREF:p.Ter327Argext*?": {},
905-
"PREF:p.*327Argext*?": {},
904+
"PREF:p.Ter327Argext*?": {
905+
"type": "description_protein",
906+
"reference": {"id": "PREF"},
907+
"coordinate_system": "p",
908+
"variants": [
909+
{
910+
"location": {"type": "point", "amino_acid": "Ter", "position": 327},
911+
"type": "extension",
912+
"source": "reference",
913+
"inserted": [
914+
{"sequence": "Arg", "source": "description"},
915+
{"sequence": "*", "source": "description"},
916+
{
917+
"location": {"type": "point", "uncertain": True},
918+
"source": "reference",
919+
},
920+
],
921+
}
922+
],
923+
},
924+
"PREF:p.*327Argext*?": {
925+
"type": "description_protein",
926+
"reference": {"id": "PREF"},
927+
"coordinate_system": "p",
928+
"variants": [
929+
{
930+
"location": {"type": "point", "amino_acid": "*", "position": 327},
931+
"type": "extension",
932+
"source": "reference",
933+
"inserted": [
934+
{"sequence": "Arg", "source": "description"},
935+
{"sequence": "*", "source": "description"},
936+
{
937+
"location": {"type": "point", "uncertain": True},
938+
"source": "reference",
939+
},
940+
],
941+
}
942+
],
943+
},
906944
}
907945

908946
OTHER = {

0 commit comments

Comments
 (0)
Please sign in to comment.