Skip to content

Commit

Permalink
Remove terminal_hash cash from DAS
Browse files Browse the repository at this point in the history
  • Loading branch information
Andre Senna committed May 9, 2023
1 parent 23b8160 commit 88f89b1
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 156 deletions.
35 changes: 16 additions & 19 deletions das/database/redis_mongo_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def get(self, handle, default_value):
return node if node else default_value

def size(self):
return self.count
if USE_CACHED_NODES:
return len(self.cached_nodes)
else:
return self.count

def values(self):
for document in self.cached_nodes.values() if USE_CACHED_NODES else self.mongo_collection.find():
Expand All @@ -58,9 +61,9 @@ def __init__(self, redis: Redis, mongo_db: Database):
self.named_type_hash_reverse = None
self.named_types = None
self.symbol_hash = None
self.terminal_hash = None
self.parent_type = None
self.node_documents = None
self.terminal_hash = None
self.typedef_mark_hash = ExpressionHasher._compute_hash(":")
self.typedef_base_type_hash = ExpressionHasher._compute_hash("Type")
self.typedef_composite_type_hash = ExpressionHasher.composite_hash([
Expand All @@ -79,28 +82,22 @@ def _get_atom_type_hash(self, atom_type):
self.named_type_hash_reverse[named_type_hash] = atom_type
return named_type_hash

def _get_node_handle(self, node_type, node_name):
composite_name = (node_type, node_name)
node_handle = self.terminal_hash.get(composite_name, None)
if node_handle is None:
node_handle = ExpressionHasher.terminal_hash(node_type, node_name)
self.terminal_hash[composite_name] = node_handle
return node_handle

def prefetch(self) -> None:
self.named_type_hash = {}
self.named_type_hash_reverse = {}
self.named_types = {}
self.symbol_hash = {}
self.terminal_hash = {}
self.parent_type = {}
self.terminal_hash = {}
self.node_documents = NodeDocuments(self.mongo_nodes_collection)
for document in self.mongo_nodes_collection.find():
node_id = document[MongoFieldNames.ID_HASH]
node_type = document[MongoFieldNames.TYPE_NAME]
node_name = document[MongoFieldNames.NODE_NAME]
self.node_documents.add(node_id, document)
self.terminal_hash[(node_type, node_name)] = node_id
if USE_CACHED_NODES:
for document in self.mongo_nodes_collection.find():
node_id = document[MongoFieldNames.ID_HASH]
node_type = document[MongoFieldNames.TYPE_NAME]
node_name = document[MongoFieldNames.NODE_NAME]
self.node_documents.add(node_id, document)
else:
self.node_documents.count = self.mongo_nodes_collection.count_documents({})
for document in self.mongo_types_collection.find():
hash_id = document[MongoFieldNames.ID_HASH]
named_type = document[MongoFieldNames.TYPE_NAME]
Expand Down Expand Up @@ -193,7 +190,7 @@ def _build_deep_representation(self, handle, arity=-1):
# DB interface methods

def node_exists(self, node_type: str, node_name: str) -> bool:
node_handle = self._get_node_handle(node_type, node_name)
node_handle = ExpressionHasher.terminal_hash(node_type, node_name)
# TODO: use a specific query to nodes table
document = self._retrieve_mongo_document(node_handle)
return document is not None
Expand All @@ -204,7 +201,7 @@ def link_exists(self, link_type: str, target_handles: List[str]) -> bool:
return document is not None

def get_node_handle(self, node_type: str, node_name: str) -> str:
return self._get_node_handle(node_type, node_name)
return ExpressionHasher.terminal_hash(node_type, node_name)

def get_link_handle(self, link_type: str, target_handles: List[str]) -> str:
link_handle = ExpressionHasher.expression_hash(self._get_atom_type_hash(link_type), target_handles)
Expand Down
1 change: 0 additions & 1 deletion das/database/redis_mongo_db_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def test_db_creation(db: DBInterface):
assert db.redis
assert db.mongo_db
assert db.node_documents.size() == 14
assert len(db.terminal_hash) == 14
assert len(db.named_type_hash) == 18
assert len(db.named_type_hash_reverse) == 18
assert len(db.named_types) == 18
Expand Down
2 changes: 1 addition & 1 deletion das/distributed_atom_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _setup_database(self):
hostname = os.environ.get('DAS_REDIS_HOSTNAME')
port = os.environ.get('DAS_REDIS_PORT')
#TODO fix this to use a proper parameter
if port == 7000:
if port == "7000":
logger().info(f"Using Redis cluster at port {port}")
self.redis = RedisCluster(host=hostname, port=port, decode_responses=False)
else:
Expand Down
File renamed without changes.
157 changes: 25 additions & 132 deletions notebooks/SimplePatternMiner.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "bed9889a",
"metadata": {},
"outputs": [],
Expand All @@ -16,12 +16,16 @@
"import numpy as np\n",
"from itertools import combinations\n",
"warnings.filterwarnings('ignore')\n",
"TARGET_NODES = None"
"TARGET_NODES = None\n",
"das = DistributedAtomSpace()\n",
"db = das.db\n",
"db.prefetch()\n",
"das.count_atoms()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "b13079c5",
"metadata": {},
"outputs": [],
Expand All @@ -47,43 +51,23 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "3a70779e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Log initialized. Log file: /tmp/das.log\n"
]
}
],
"outputs": [],
"source": [
"assert len(DEPTH_WEIGTH) == HALO_LENGTH\n",
"halo_levels = [i for i in range(HALO_LENGTH)]\n",
"das = DistributedAtomSpace()\n",
"db = das.db\n",
"db.prefetch()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f71fdbf3",
"metadata": {},
"outputs": [],
"source": [
"if TARGET_NODES is None:\n",
" atomspace_nodes = db.get_matched_node_name(TARGET_TYPE, TARGET_SUBSTRING)\n",
" print(atomspace_nodes)\n",
" TARGET_NODES = [Node(TARGET_TYPE, db.get_node_name(h)) for h in atomspace_nodes]\n",
" print(TARGET_NODES)"
"print(f\"TARGET_NODES = {TARGET_NODES}\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "3fa7b2ec",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -162,7 +146,7 @@
"def prob(count):\n",
" return count / universe_size\n",
"\n",
"def compute_isurprisingness(count, terms, counts, normalized = False):\n",
"def compute_isurprisingness(count, terms, term_handles, counts, normalized = False):\n",
" n = len(term_handles)\n",
" if n == 2:\n",
" subset_probs = [prob(counts[0]) * prob(counts[1])]\n",
Expand Down Expand Up @@ -256,28 +240,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"id": "43bab382",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(25, 60)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"das.count_atoms()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"id": "5851202f",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -306,21 +269,10 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "cd24fce5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"10\n",
"25\n",
"----------\n",
"35\n"
]
}
],
"outputs": [],
"source": [
"total = 0\n",
"for level in range(HALO_LENGTH):\n",
Expand All @@ -333,7 +285,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "45fd7384",
"metadata": {},
"outputs": [],
Expand All @@ -354,21 +306,10 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "981a8a28",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"11\n",
"14\n",
"----------\n",
"25\n"
]
}
],
"outputs": [],
"source": [
"total = 0\n",
"for level in range(HALO_LENGTH):\n",
Expand All @@ -381,30 +322,10 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "c34c1d24",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4 0.10122448979591836: [<Inheritance: [<Concept: Davion>, V1]>, <Inheritance: [<Concept: Allen>, V1]>, <Inheritance: [<Concept: Cason>, V1]>] [('c697d8d1e5d994f267305ef5f8dd2873', 0), ('6810d7150c5bbacf6f06e199cf316fa8', 1), ('33016bc6c256c84eb094f119466fbd2d', 1)] [4, 4, 4]\n",
"5 0.10204081632653061: [<Inheritance: [V1, <Concept: ugly>]>, <Inheritance: [V1, <Concept: man>]>, <Inheritance: [V1, <Concept: soda drinker>]>] [('dbe995f7d87acada986458ba215c9ce5', 0), ('6221bad34726e0690688d77e355af010', 1), ('f4ec0dd38186aad7e48eaf196e3f1d7e', 1)] [10, 10, 10]\n",
"AND([AND([<Inheritance: [V1, <Concept: ugly>]>, <Inheritance: [V1, <Concept: man>]>]), <Inheritance: [V1, <Concept: soda drinker>]>])\n",
"V1: Abe\n",
"\n",
"V1: Hessley\n",
"\n",
"V1: Davion\n",
"\n",
"V1: Cason\n",
"\n",
"V1: Allen\n",
"\n"
]
}
],
"outputs": [],
"source": [
"higher_isurprisingness = 0\n",
"best_pattern = None\n",
Expand All @@ -425,7 +346,7 @@
" composite_pattern = build_composite_pattern(terms)\n",
" count = compute_count(composite_pattern)\n",
" if count >= SUPPORT:\n",
" isurprisingness = compute_isurprisingness(count, terms, counts, normalized=NORMALIZED_ISURPRISINGNESS) \n",
" isurprisingness = compute_isurprisingness(count, terms, term_handles, counts, normalized=NORMALIZED_ISURPRISINGNESS) \n",
" if isurprisingness > higher_isurprisingness:\n",
" print(f\"{count} {isurprisingness}: {terms} {term_handles} {counts}\")\n",
" higher_isurprisingness = isurprisingness\n",
Expand All @@ -435,30 +356,10 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "efac9ac6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5 0.061224489795918366: [<Inheritance: [V1, <Concept: ugly>]>, <Inheritance: [V1, <Concept: soda drinker>]>, <Inheritance: [V1, <Concept: human>]>] [10, 10, 20]\n",
"5 0.10204081632653061: [<Inheritance: [V1, <Concept: ugly>]>, <Inheritance: [V1, <Concept: soda drinker>]>, <Inheritance: [V1, <Concept: man>]>] [10, 10, 10]\n",
"AND([AND([<Inheritance: [V1, <Concept: ugly>]>, <Inheritance: [V1, <Concept: soda drinker>]>]), <Inheritance: [V1, <Concept: man>]>])\n",
"V1: Abe\n",
"\n",
"V1: Hessley\n",
"\n",
"V1: Davion\n",
"\n",
"V1: Cason\n",
"\n",
"V1: Allen\n",
"\n"
]
}
],
"outputs": [],
"source": [
"higher_isurprisingness = 0\n",
"best_pattern = None\n",
Expand All @@ -474,7 +375,7 @@
" composite_pattern = build_composite_pattern(terms)\n",
" count = compute_count(composite_pattern)\n",
" if count >= SUPPORT:\n",
" isurprisingness = compute_isurprisingness(count, terms, counts, normalized=NORMALIZED_ISURPRISINGNESS) \n",
" isurprisingness = compute_isurprisingness(count, terms, term_handles, counts, normalized=NORMALIZED_ISURPRISINGNESS) \n",
" if isurprisingness > higher_isurprisingness:\n",
" print(f\"{count} {isurprisingness}: {terms} {counts}\")\n",
" higher_isurprisingness = isurprisingness\n",
Expand All @@ -485,15 +386,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "c82db547",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b36dd876",
"id": "b90ff166",
"metadata": {},
"outputs": [],
"source": []
Expand Down
1 change: 1 addition & 0 deletions scripts/debug-container-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ docker run \
--network="host" \
--volume /tmp:/tmp \
--volume $(pwd):/app/das \
--workdir /app/das \
-ti \
das:latest \
bash
Expand Down
21 changes: 21 additions & 0 deletions scripts/jupyter-notebook-debug.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

docker run \
--name jupyter-notebook \
--env DAS_MONGODB_HOSTNAME=${DAS_MONGODB_HOSTNAME:-mongo} \
--env DAS_MONGODB_PORT=${DAS_MONGODB_PORT:-27017} \
--env DAS_REDIS_HOSTNAME=${DAS_REDIS_HOSTNAME:-redis} \
--env DAS_REDIS_PORT=${DAS_REDIS_PORT:-6379} \
--env DAS_DATABASE_USERNAME=${DAS_DATABASE_USERNAME:-dbadmin} \
--env DAS_DATABASE_PASSWORD=${DAS_DATABASE_PASSWORD:-dassecret} \
--env PYTHONPATH=/app \
--env TZ=${TZ} \
--network="host" \
--volume /tmp:/tmp \
--volume /mnt:/mnt \
--volume /opt/das/data:/data \
--volume $(pwd)/notebooks:/app/notebooks \
das:latest \
jupyter-notebook --ip 0.0.0.0 --port 8887 --no-browser --allow-root

docker rm jupyter-notebook >& /dev/null
Loading

0 comments on commit 88f89b1

Please sign in to comment.