forked from singnet/das-poc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Andre Senna
committed
Oct 11, 2022
1 parent
ce69d36
commit 2976270
Showing
5 changed files
with
345 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import ply.lex as lex | ||
from das.exceptions import AtomeseLexerError | ||
|
||
class AtomeseLex: | ||
def __init__(self, **kwargs): | ||
|
||
self.reserved = { | ||
} | ||
|
||
self.tokens = [ | ||
'ATOM_OPENNING', | ||
'ATOM_CLOSING', | ||
'ATOM_TYPE', | ||
'NODE_NAME', | ||
'EOF', | ||
] + list(self.reserved.values()) | ||
|
||
self.t_ATOM_OPENNING = r'\(' | ||
self.t_ATOM_CLOSING = r'\)' | ||
|
||
self.lexer = lex.lex(module=self, **kwargs) | ||
self.lexer.eof_reported_flag = False | ||
self.action_broker = None | ||
self.eof_handler = self.default_eof_handler | ||
self.lexer.filename = "" | ||
|
||
def t_NODE_NAME(self, t): | ||
r'\"[^\"]+\"' | ||
t.value = t.value[1:-1] | ||
return t | ||
|
||
def t_ATOM_TYPE(self, t): | ||
r'[^\W0-9]\w*' | ||
if t.value.endswith("Node") or t.value.endswith("Link"): | ||
t.value = t.value[0:-4] | ||
return t | ||
|
||
t_ignore =' \t' | ||
|
||
def t_newline(self, t): | ||
r'\n+' | ||
t.lexer.lineno += len(t.value) | ||
|
||
def t_eof(self, t): | ||
return self.eof_handler(t) | ||
|
||
def default_eof_handler(self, t): | ||
if self.lexer.eof_reported_flag: | ||
return None | ||
else: | ||
self.lexer.input("") | ||
t.type = 'EOF' | ||
self.lexer.eof_reported_flag = True | ||
return t | ||
|
||
def t_error(self, t): | ||
source = f"File: {self.lexer.filename if self.lexer.filename else '<input string>'}" | ||
n = 80 if len(t.value) > 30 else len(t.value) - 1 | ||
error_message = f"{source} - Illegal character at line {t.lexer.lineno}: '{t.value[0]}' " +\ | ||
f"Near: '{t.value[0:n]}...'" | ||
raise MettaLexerError(error_message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import pytest | ||
from das.atomese_lex import AtomeseLex | ||
|
||
lex_test_data = """ | ||
(ContextLink | ||
(MemberLink | ||
(ChebiNode "ChEBI:10033") | ||
(ReactomeNode "R-HSA-6806664")) | ||
(EvaluationLink | ||
(PredicateNode "has_location") | ||
(ListLink | ||
(ChebiNode "ChEBI:10033") | ||
(ConceptNode "cytosol")))) | ||
(EvaluationLink | ||
(PredicateNode "has_name") | ||
(ListLink | ||
(ChebiNode "ChEBI:10033") | ||
(ConceptNode "warfarin"))) | ||
(ContextLink | ||
(MemberLink | ||
(ChebiNode "ChEBI:10036") | ||
(ReactomeNode "R HSA 2142753")) | ||
(EvaluationLink | ||
(PredicateNode "has_location") | ||
(ListLink | ||
(ChebiNode "ChEBI:10036") | ||
(ConceptNode "endoplasmic reticulum lumen"))))""" | ||
|
||
|
||
def test_lexer(): | ||
wrap = AtomeseLex() | ||
#wrap.build() | ||
expected_tokens = [ | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_OPENNING", | ||
"ATOM_TYPE", | ||
"NODE_NAME", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"ATOM_CLOSING", | ||
"EOF" | ||
] | ||
|
||
wrap.lexer.input(lex_test_data) | ||
for expected_token in expected_tokens: | ||
token = wrap.lexer.token() | ||
assert token.type == expected_token | ||
assert not wrap.lexer.token() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
""" | ||
START -> LIST_OF_TOP_LEVEL_ATOMS | ||
LIST_OF_TOP_LEVEL_ATOMS -> TOP_LEVEL_ATOM | ||
| LIST_OF_TOP_LEVEL_ATOMS TOP_LEVEL_ATOM | ||
TOP_LEVEL_ATOM -> ATOM | ||
ATOM -> NODE | ||
| LINK | ||
NODE -> ATOM_OPENNING ATOM_TYPE NODE_NAME ATOM_CLOSING | ||
LINK -> ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING | ||
ATOM_LIST -> ATOM | ||
| ATOM_LIST ATOM | ||
""" | ||
|
||
from typing import List, Any, Optional | ||
import ply.yacc as yacc | ||
from das.atomese_lex import AtomeseLex | ||
from das.exceptions import AtomeseSyntaxError, UndefinedSymbolError | ||
from das.expression_hasher import ExpressionHasher | ||
from das.expression import Expression | ||
from das.base_yacc import BaseYacc | ||
|
||
class AtomeseYacc(BaseYacc): | ||
|
||
### Parser rules ### | ||
|
||
def p_START(self, p): | ||
"""START : LIST_OF_TOP_LEVEL_ATOMS EOF | ||
| EOF | ||
|""" | ||
p[0] = 'SUCCESS' | ||
print(f"p_START: {p[0]}") | ||
|
||
def p_LIST_OF_TOP_LEVEL_ATOMS_base(self, p): | ||
"""LIST_OF_TOP_LEVEL_ATOMS : TOP_LEVEL_ATOM""" | ||
#if self.check_mode or not self.action_broker: | ||
# return | ||
p[0] = [p[1]] | ||
print(f"p_LIST_OF_TOP_LEVEL_ATOMS_base: {p[0]}") | ||
|
||
def p_LIST_OF_TOP_LEVEL_ATOMS_recursion(self, p): | ||
"""LIST_OF_TOP_LEVEL_ATOMS : LIST_OF_TOP_LEVEL_ATOMS TOP_LEVEL_ATOM""" | ||
p[0] = [*p[1], p[2]] | ||
print(f"p_LIST_OF_TOP_LEVEL_ATOMS_recursion: {p[0]}") | ||
|
||
def p_TOP_LEVEL_ATOM(self, p): | ||
"""TOP_LEVEL_ATOM : ATOM""" | ||
p[0] = p[1] | ||
print(f"p_TOP_LEVEL_ATOM: {p[0]}") | ||
|
||
def p_ATOM_node(self, p): | ||
"""ATOM : NODE""" | ||
p[0] = p[1] | ||
print(f"p_ATOM_node: {p[0]}") | ||
|
||
def p_ATOM_link(self, p): | ||
"""ATOM : LINK""" | ||
p[0] = p[1] | ||
print(f"p_ATOM_link: {p[0]}") | ||
|
||
def p_NODE(self, p): | ||
"""NODE : ATOM_OPENNING ATOM_TYPE NODE_NAME ATOM_CLOSING""" | ||
p[0] = f"<{p[2]}: {p[3]}>" | ||
print(f"p_NODE: {p[0]}") | ||
|
||
def p_LINK(self, p): | ||
"""LINK : ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING""" | ||
p[0] = f"({p[2]} {p[3]})" | ||
print(f"p_LINK: {p[0]}") | ||
|
||
def p_ATOM_LIST_base(self, p): | ||
"""ATOM_LIST : ATOM""" | ||
p[0] = [p[1]] | ||
print(f"p_ATOM_LIST_base: {p[0]}") | ||
|
||
def p_ATOM_LIST_recursion(self, p): | ||
"""ATOM_LIST : ATOM_LIST ATOM""" | ||
p[0] = [*p[1], p[2]] | ||
print(f"p_ATOM_LIST_recursion: {p[0]}") | ||
|
||
def p_error(self, p): | ||
error = f"Syntax error in line {self.lexer.lineno} " + \ | ||
f"Current token: {p}" | ||
raise AtomeseSyntaxError(error) | ||
|
||
### End of parser rules ### | ||
|
||
def __init__(self, **kwargs): | ||
super().__init__(**kwargs) | ||
self.lex_wrap = AtomeseLex() | ||
super().setup() | ||
self.parser = yacc.yacc(module=self) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import pytest | ||
from das.atomese_lex import AtomeseLex | ||
from das.atomese_lex_test import lex_test_data as test_data | ||
from das.atomese_yacc import AtomeseYacc | ||
from das.parser_actions import ParserActions | ||
from das.exceptions import UndefinedSymbolError | ||
|
||
class ActionBroker(ParserActions): | ||
def __init__(self, data=None): | ||
self.count_toplevel_expression = 0 | ||
self.count_nested_expression = 0 | ||
self.count_terminal = 0 | ||
self.count_type = 0 | ||
self.file_path = "" | ||
self.input_string = data | ||
|
||
def new_expression(self, expression: str): | ||
self.count_nested_expression += 1 | ||
|
||
def new_terminal(self, expression: str): | ||
self.count_terminal += 1 | ||
|
||
def new_top_level_expression(self, expression: str): | ||
self.count_toplevel_expression += 1 | ||
|
||
def new_top_level_typedef_expression(self, expression: str): | ||
self.count_type += 1 | ||
|
||
def test_parser(): | ||
yacc_wrap = AtomeseYacc() | ||
result = yacc_wrap.check(test_data) | ||
assert result == "SUCCESS" | ||
|
||
def _action_broker(): | ||
|
||
action_broker = ActionBroker(test_data) | ||
yacc_wrap = AtomeseYacc(action_broker=action_broker) | ||
result = yacc_wrap.check(test_data) | ||
assert result == "SUCCESS" | ||
assert action_broker.count_toplevel_expression == 0 | ||
assert action_broker.count_type == 0 | ||
|
||
action_broker = ActionBroker() | ||
yacc_wrap = AtomeseYacc(action_broker=action_broker) | ||
result = yacc_wrap.parse(test_data) | ||
assert result == "SUCCESS" | ||
assert action_broker.count_toplevel_expression == 1 | ||
assert action_broker.count_type == 8 | ||
|
||
action_broker = ActionBroker(test_data) | ||
yacc_wrap = AtomeseYacc(action_broker=action_broker) | ||
result = yacc_wrap.parse_action_broker_input() | ||
assert result == "SUCCESS" | ||
assert action_broker.count_toplevel_expression == 1 | ||
assert action_broker.count_type == 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters