Skip to content

Commit

Permalink
Basic parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Andre Senna committed Oct 11, 2022
1 parent ce69d36 commit 2976270
Show file tree
Hide file tree
Showing 5 changed files with 345 additions and 0 deletions.
61 changes: 61 additions & 0 deletions das/atomese_lex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import ply.lex as lex
from das.exceptions import AtomeseLexerError

class AtomeseLex:
def __init__(self, **kwargs):

self.reserved = {
}

self.tokens = [
'ATOM_OPENNING',
'ATOM_CLOSING',
'ATOM_TYPE',
'NODE_NAME',
'EOF',
] + list(self.reserved.values())

self.t_ATOM_OPENNING = r'\('
self.t_ATOM_CLOSING = r'\)'

self.lexer = lex.lex(module=self, **kwargs)
self.lexer.eof_reported_flag = False
self.action_broker = None
self.eof_handler = self.default_eof_handler
self.lexer.filename = ""

def t_NODE_NAME(self, t):
r'\"[^\"]+\"'
t.value = t.value[1:-1]
return t

def t_ATOM_TYPE(self, t):
r'[^\W0-9]\w*'
if t.value.endswith("Node") or t.value.endswith("Link"):
t.value = t.value[0:-4]
return t

t_ignore =' \t'

def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)

def t_eof(self, t):
return self.eof_handler(t)

def default_eof_handler(self, t):
if self.lexer.eof_reported_flag:
return None
else:
self.lexer.input("")
t.type = 'EOF'
self.lexer.eof_reported_flag = True
return t

def t_error(self, t):
source = f"File: {self.lexer.filename if self.lexer.filename else '<input string>'}"
n = 80 if len(t.value) > 30 else len(t.value) - 1
error_message = f"{source} - Illegal character at line {t.lexer.lineno}: '{t.value[0]}' " +\
f"Near: '{t.value[0:n]}...'"
raise MettaLexerError(error_message)
123 changes: 123 additions & 0 deletions das/atomese_lex_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import pytest
from das.atomese_lex import AtomeseLex

lex_test_data = """
(ContextLink
(MemberLink
(ChebiNode "ChEBI:10033")
(ReactomeNode "R-HSA-6806664"))
(EvaluationLink
(PredicateNode "has_location")
(ListLink
(ChebiNode "ChEBI:10033")
(ConceptNode "cytosol"))))
(EvaluationLink
(PredicateNode "has_name")
(ListLink
(ChebiNode "ChEBI:10033")
(ConceptNode "warfarin")))
(ContextLink
(MemberLink
(ChebiNode "ChEBI:10036")
(ReactomeNode "R HSA 2142753"))
(EvaluationLink
(PredicateNode "has_location")
(ListLink
(ChebiNode "ChEBI:10036")
(ConceptNode "endoplasmic reticulum lumen"))))"""


def test_lexer():
wrap = AtomeseLex()
#wrap.build()
expected_tokens = [
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_CLOSING",
"EOF"
]

wrap.lexer.input(lex_test_data)
for expected_token in expected_tokens:
token = wrap.lexer.token()
assert token.type == expected_token
assert not wrap.lexer.token()
98 changes: 98 additions & 0 deletions das/atomese_yacc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
START -> LIST_OF_TOP_LEVEL_ATOMS
LIST_OF_TOP_LEVEL_ATOMS -> TOP_LEVEL_ATOM
| LIST_OF_TOP_LEVEL_ATOMS TOP_LEVEL_ATOM
TOP_LEVEL_ATOM -> ATOM
ATOM -> NODE
| LINK
NODE -> ATOM_OPENNING ATOM_TYPE NODE_NAME ATOM_CLOSING
LINK -> ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING
ATOM_LIST -> ATOM
| ATOM_LIST ATOM
"""

from typing import List, Any, Optional
import ply.yacc as yacc
from das.atomese_lex import AtomeseLex
from das.exceptions import AtomeseSyntaxError, UndefinedSymbolError
from das.expression_hasher import ExpressionHasher
from das.expression import Expression
from das.base_yacc import BaseYacc

class AtomeseYacc(BaseYacc):

### Parser rules ###

def p_START(self, p):
"""START : LIST_OF_TOP_LEVEL_ATOMS EOF
| EOF
|"""
p[0] = 'SUCCESS'
print(f"p_START: {p[0]}")

def p_LIST_OF_TOP_LEVEL_ATOMS_base(self, p):
"""LIST_OF_TOP_LEVEL_ATOMS : TOP_LEVEL_ATOM"""
#if self.check_mode or not self.action_broker:
# return
p[0] = [p[1]]
print(f"p_LIST_OF_TOP_LEVEL_ATOMS_base: {p[0]}")

def p_LIST_OF_TOP_LEVEL_ATOMS_recursion(self, p):
"""LIST_OF_TOP_LEVEL_ATOMS : LIST_OF_TOP_LEVEL_ATOMS TOP_LEVEL_ATOM"""
p[0] = [*p[1], p[2]]
print(f"p_LIST_OF_TOP_LEVEL_ATOMS_recursion: {p[0]}")

def p_TOP_LEVEL_ATOM(self, p):
"""TOP_LEVEL_ATOM : ATOM"""
p[0] = p[1]
print(f"p_TOP_LEVEL_ATOM: {p[0]}")

def p_ATOM_node(self, p):
"""ATOM : NODE"""
p[0] = p[1]
print(f"p_ATOM_node: {p[0]}")

def p_ATOM_link(self, p):
"""ATOM : LINK"""
p[0] = p[1]
print(f"p_ATOM_link: {p[0]}")

def p_NODE(self, p):
"""NODE : ATOM_OPENNING ATOM_TYPE NODE_NAME ATOM_CLOSING"""
p[0] = f"<{p[2]}: {p[3]}>"
print(f"p_NODE: {p[0]}")

def p_LINK(self, p):
"""LINK : ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING"""
p[0] = f"({p[2]} {p[3]})"
print(f"p_LINK: {p[0]}")

def p_ATOM_LIST_base(self, p):
"""ATOM_LIST : ATOM"""
p[0] = [p[1]]
print(f"p_ATOM_LIST_base: {p[0]}")

def p_ATOM_LIST_recursion(self, p):
"""ATOM_LIST : ATOM_LIST ATOM"""
p[0] = [*p[1], p[2]]
print(f"p_ATOM_LIST_recursion: {p[0]}")

def p_error(self, p):
error = f"Syntax error in line {self.lexer.lineno} " + \
f"Current token: {p}"
raise AtomeseSyntaxError(error)

### End of parser rules ###

def __init__(self, **kwargs):
super().__init__(**kwargs)
self.lex_wrap = AtomeseLex()
super().setup()
self.parser = yacc.yacc(module=self)
55 changes: 55 additions & 0 deletions das/atomese_yacc_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pytest
from das.atomese_lex import AtomeseLex
from das.atomese_lex_test import lex_test_data as test_data
from das.atomese_yacc import AtomeseYacc
from das.parser_actions import ParserActions
from das.exceptions import UndefinedSymbolError

class ActionBroker(ParserActions):
def __init__(self, data=None):
self.count_toplevel_expression = 0
self.count_nested_expression = 0
self.count_terminal = 0
self.count_type = 0
self.file_path = ""
self.input_string = data

def new_expression(self, expression: str):
self.count_nested_expression += 1

def new_terminal(self, expression: str):
self.count_terminal += 1

def new_top_level_expression(self, expression: str):
self.count_toplevel_expression += 1

def new_top_level_typedef_expression(self, expression: str):
self.count_type += 1

def test_parser():
yacc_wrap = AtomeseYacc()
result = yacc_wrap.check(test_data)
assert result == "SUCCESS"

def _action_broker():

action_broker = ActionBroker(test_data)
yacc_wrap = AtomeseYacc(action_broker=action_broker)
result = yacc_wrap.check(test_data)
assert result == "SUCCESS"
assert action_broker.count_toplevel_expression == 0
assert action_broker.count_type == 0

action_broker = ActionBroker()
yacc_wrap = AtomeseYacc(action_broker=action_broker)
result = yacc_wrap.parse(test_data)
assert result == "SUCCESS"
assert action_broker.count_toplevel_expression == 1
assert action_broker.count_type == 8

action_broker = ActionBroker(test_data)
yacc_wrap = AtomeseYacc(action_broker=action_broker)
result = yacc_wrap.parse_action_broker_input()
assert result == "SUCCESS"
assert action_broker.count_toplevel_expression == 1
assert action_broker.count_type == 8
8 changes: 8 additions & 0 deletions das/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ class MettaSyntaxError(Exception):
def __init__(self, error_message: str):
super().__init__(error_message)

class AtomeseLexerError(Exception):
def __init__(self, error_message: str):
super().__init__(error_message)

class AtomeseSyntaxError(Exception):
def __init__(self, error_message: str):
super().__init__(error_message)

class UndefinedSymbolError(Exception):
def __init__(self, symbols: List[str]):
super().__init__(str(symbols))
Expand Down

0 comments on commit 2976270

Please sign in to comment.