Skip to content

Commit

Permalink
Added support for atomese files
Browse files Browse the repository at this point in the history
  • Loading branch information
Andre Senna committed Oct 13, 2022
1 parent 8171256 commit 4ecac6f
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 25 deletions.
19 changes: 16 additions & 3 deletions das/atomese_lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ def __init__(self, **kwargs):
'ATOM_CLOSING',
'ATOM_TYPE',
'NODE_NAME',
'STV',
'FLOAT',
'COMMENT',
'EOF',
] + list(self.reserved.values())

Expand All @@ -24,19 +27,29 @@ def __init__(self, **kwargs):
self.eof_handler = self.default_eof_handler
self.lexer.filename = ""


def t_NODE_NAME(self, t):
r'\"[^\"]+\"'
t.value = t.value[1:-1]
return t

def t_ATOM_TYPE(self, t):
r'[^\W0-9]\w*'
if t.value.endswith("Node") or t.value.endswith("Link"):
t.value = t.value[0:-4]
if t.value == 'STV' or t.value == 'stv':
t.type = 'STV'
else:
if t.value.endswith("Node") or t.value.endswith("Link"):
t.value = t.value[0:-4]
return t

t_FLOAT = r'\d+\.\d+'

t_ignore =' \t'

def t_COMMENT(self, t):
r'\;.*'
pass

def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
Expand All @@ -58,4 +71,4 @@ def t_error(self, t):
n = 80 if len(t.value) > 30 else len(t.value) - 1
error_message = f"{source} - Illegal character at line {t.lexer.lineno}: '{t.value[0]}' " +\
f"Near: '{t.value[0:n]}...'"
raise MettaLexerError(error_message)
raise AtomeseLexerError(error_message)
19 changes: 19 additions & 0 deletions das/atomese_lex_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from das.atomese_lex import AtomeseLex

lex_test_data = """
(EvaluationLink (stv 1.0 0.964)
(PredicateNode "interacts_with")
(GeneNode "E"))
(ContextLink
(MemberLink
(ChebiNode "ChEBI:10033")
Expand Down Expand Up @@ -31,6 +34,22 @@ def test_lexer():
wrap = AtomeseLex()
#wrap.build()
expected_tokens = [
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
"STV",
"FLOAT",
"FLOAT",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"NODE_NAME",
"ATOM_CLOSING",
"ATOM_CLOSING",
"ATOM_OPENNING",
"ATOM_TYPE",
"ATOM_OPENNING",
Expand Down
39 changes: 30 additions & 9 deletions das/atomese_yacc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
NODE -> ATOM_OPENNING ATOM_TYPE NODE_NAME ATOM_CLOSING
LINK -> ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING
| ATOM_OPENNING ATOM_TYPE STV_DEFINITION ATOM_LIST ATOM_CLOSING
STV_DEFINITION -> ATOM_OPENNING STV FLOAT FLOAT ATOM_CLOSING
ATOM_LIST -> ATOM
| ATOM_LIST ATOM
Expand Down Expand Up @@ -89,20 +92,28 @@ def p_NODE(self, p):
expression = self._new_terminal(terminal_name)
p[0] = expression

def p_LINK(self, p):
def p_STV_DEFINITION(self, p):
"""STV_DEFINITION : ATOM_OPENNING STV FLOAT FLOAT ATOM_CLOSING"""
pass

def p_LINK_no_stv(self, p):
"""LINK : ATOM_OPENNING ATOM_TYPE ATOM_LIST ATOM_CLOSING"""
if self.check_mode or not self.action_broker:
p[0] = f"<{p[2]}: {p[3]}>"
return
link_type = p[2]
targets = p[3]
expression = self._new_link(link_type, targets)
p[0] = expression

def p_LINK_stv(self, p):
"""LINK : ATOM_OPENNING ATOM_TYPE STV_DEFINITION ATOM_LIST ATOM_CLOSING"""
if self.check_mode or not self.action_broker:
p[0] = f"<{p[2]}: {p[3]}>"
p[0] = f"<{p[2]}: {p[4]}>"
return
if link_type not in self.types:
self.types.add(link_type)
expression = self._typedef(link_type, BASIC_TYPE)
expression.toplevel = True
self.action_broker.new_top_level_typedef_expression(expression)
head_expression = self._new_symbol(link_type)
expression = self._nested_expression([head_expression, *targets])
link_type = p[2]
targets = p[4]
expression = self._new_link(link_type, targets)
p[0] = expression

def p_ATOM_LIST_base(self, p):
Expand Down Expand Up @@ -139,3 +150,13 @@ def __init__(self, **kwargs):
self.nodes = set()
named_type_hash = self._get_named_type_hash(BASIC_TYPE)
self.parent_type[named_type_hash] = named_type_hash

def _new_link(self, link_type, targets):
if link_type not in self.types:
self.types.add(link_type)
expression = self._typedef(link_type, BASIC_TYPE)
expression.toplevel = True
self.action_broker.new_top_level_typedef_expression(expression)
head_expression = self._new_symbol(link_type)
expression = self._nested_expression([head_expression, *targets])
return expression
12 changes: 6 additions & 6 deletions das/atomese_yacc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@ def test_action_broker():
yacc_wrap = AtomeseYacc(action_broker=action_broker)
result = yacc_wrap.parse(test_data)
assert result == "SUCCESS"
assert action_broker.count_terminal == 9
assert action_broker.count_terminal == 11
assert action_broker.count_nested_expression == 7
assert action_broker.count_toplevel_expression == 3
assert action_broker.count_type == 8 + action_broker.count_terminal
assert action_broker.count_toplevel_expression == 4
assert action_broker.count_type == 9 + action_broker.count_terminal

action_broker = ActionBroker(test_data)
yacc_wrap = AtomeseYacc(action_broker=action_broker)
result = yacc_wrap.parse_action_broker_input()
assert result == "SUCCESS"
assert action_broker.count_terminal == 9
assert action_broker.count_terminal == 11
assert action_broker.count_nested_expression == 7
assert action_broker.count_toplevel_expression == 3
assert action_broker.count_type == 8 + action_broker.count_terminal
assert action_broker.count_toplevel_expression == 4
assert action_broker.count_type == 9 + action_broker.count_terminal
9 changes: 7 additions & 2 deletions das/distributed_atom_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""

import os
from time import sleep
from pymongo import MongoClient as MongoDBClient
from couchbase.cluster import Cluster as CouchbaseDB
from couchbase.auth import PasswordAuthenticator as CouchbasePasswordAuthenticator
Expand Down Expand Up @@ -94,8 +95,12 @@ def _read_knowledge_base(self, kwargs):
ParserThread(KnowledgeBaseFile(self.db, file_name, shared_data))
for file_name in knowledge_base_file_list
]
for thread in parser_threads:
thread.start()
for i in range(len(parser_threads)):
parser_threads[i].start()
# Sleep to avoid concurrency harzard in yacc lib startup
# (which is not thread safe)
if i < (len(parser_threads) - 1):
sleep(10)
for thread in parser_threads:
thread.join()
assert shared_data.parse_ok_count == len(parser_threads)
Expand Down
11 changes: 8 additions & 3 deletions das/parser_threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ def __init__(self, db: DBInterface, shared_data: SharedData):

def run(self):
print(f"MongoDB links uploader thread {self.name} (TID {self.native_id}) started.")
duplicates = 0
stopwatch_start = time.perf_counter()
bulk_insertion_1 = []
bulk_insertion_2 = []
Expand All @@ -312,13 +313,17 @@ def run(self):
mongo_collection.insert_many(bulk_insertion_1)
if bulk_insertion_2:
mongo_collection = self.db.mongo_db[MongoCollections.LINKS_ARITY_2]
mongo_collection.insert_many(bulk_insertion_2)
try:
mongo_collection.insert_many(bulk_insertion_2)
except Exception:
duplicates += 1
pass
if bulk_insertion_N:
mongo_collection = self.db.mongo_db[MongoCollections.LINKS_ARITY_N]
mongo_collection.insert_many(bulk_insertion_N)
self.shared_data.mongo_uploader_ok = True
elapsed = (time.perf_counter() - stopwatch_start) // 60
print(f"MongoDB links uploader thread {self.name} (TID {self.native_id}) finished. {elapsed:.0f} minutes.")
print(f"MongoDB links uploader thread {self.name} (TID {self.native_id}) finished. {duplicates} hash colisions. {elapsed:.0f} minutes.")

class PopulateCouchbaseCollectionThread(Thread):

Expand Down Expand Up @@ -351,7 +356,7 @@ def run(self):
first_block = couchbase_collection.get(key)
couchbase_collection.upsert(f"{key}_0", first_block.content, timeout=datetime.timedelta(seconds=100))
couchbase_collection.upsert(key, block_count + 1)
couchbase_collection.upsert(f"{key}_{block_count}", v, timeout=datetime.timedelta(seconds=100))
couchbase_collection.upsert(f"{key}_{block_count}", value, timeout=datetime.timedelta(seconds=100))
elapsed = (time.perf_counter() - stopwatch_start) // 60
self.shared_data.process_ok()
print(f"Couchbase collection uploader thread {self.name} (TID {self.native_id}) finished. {elapsed:.0f} minutes.")
2 changes: 1 addition & 1 deletion das/pattern_matcher/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def __init__(self, architecture: DB_Architecture, rounds: int, gene_count: int,
"port": 27017,
"username": "dbadmin",
"password": "dassecret",
"database": "BIO",
"database": "das",
}
couchbase_specs = {
"hostname": "couchbase",
Expand Down
3 changes: 2 additions & 1 deletion scripts/load_das.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from das.distributed_atom_space import DistributedAtomSpace

das = DistributedAtomSpace(knowledge_base_dir_name="./data/annotation_service",show_progress=True)
das = DistributedAtomSpace(knowledge_base_dir_name="/tmp/bio")
#das = DistributedAtomSpace(knowledge_base_file_name="/tmp/bio/COVID-19-biogrid_LATEST.tab3.zip_2020-10-20.scm")
#das = DistributedAtomSpace(knowledge_base_dir_name="./data/samples",show_progress=True)
#das = DistributedAtomSpace(knowledge_base_file_name="./data/samples/animals.metta")

0 comments on commit 4ecac6f

Please sign in to comment.