Skip to content

Commit

Permalink
Setup cluster deployment environment
Browse files Browse the repository at this point in the history
  • Loading branch information
Andre Senna committed May 8, 2023
1 parent 59669e6 commit 7072fab
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 13 deletions.
26 changes: 17 additions & 9 deletions das/canonical_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from das.database.key_value_schema import CollectionNames as KeyPrefix, build_redis_key
from das.database.mongo_schema import CollectionNames as MongoCollections
from das.key_value_file import write_key_value, key_value_generator, key_value_targets_generator, sort_file
import das.key_value_file
from das.database.db_interface import WILDCARD

class State(str, Enum):
Expand All @@ -21,8 +22,8 @@ def _file_line_count(file_name):

EXPRESSIONS_CHUNK_SIZE = 10000000
HINT_FILE_SIZE = None
#TMP_DIR = '/tmp'
TMP_DIR = '/mnt/HD10T/nfs_share/work/tmp'
TMP_DIR = '/tmp'
#TMP_DIR = '/mnt/HD10T/nfs_share/work/tmp'

class CanonicalParser:

Expand Down Expand Up @@ -193,15 +194,22 @@ def _populate_mongo_links(self):
self._mongo_insert_many(mongo_collection, bulk_insertion_N)

def _populate_redis_collection(self, collection_name, use_targets, merge_rest, update):
logger().info(f"Populating collection {collection_name}")
file_name = self.temporary_file_name[collection_name]
generator = key_value_targets_generator if use_targets else key_value_generator
for key, value, block_count in generator(file_name, merge_rest=merge_rest):
assert block_count == 0
if use_targets:
self.db.redis.sadd(build_redis_key(collection_name, key), *[pickle.dumps(v) for v in value])
else:
self.db.redis.sadd(build_redis_key(collection_name, key), *value)

key_count = 0
for key, value, block_count in generator(file_name, block_size=100000, merge_rest=merge_rest):
key_count += 1
if key_count % 100000 == 0:
logger().info(f"Added {key_count} keys (line count = {das.key_value_file.KEY_VALUE_LINE_COUNTER})")
try:
if use_targets:
self.db.redis.sadd(build_redis_key(collection_name, key), *[pickle.dumps(v) for v in value])
else:
self.db.redis.sadd(build_redis_key(collection_name, key), *value)
except:
logger().error(f"Error in key-value file {collection_name} on line {das.key_value_file.KEY_VALUE_LINE_COUNTER}")
assert False

def _populate_redis(self):
self._populate_redis_collection(KeyPrefix.OUTGOING_SET, False, False, False),
Expand Down
6 changes: 6 additions & 0 deletions das/key_value_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ def key_value_generator(input_filename, *, block_size=None, merge_rest=False):
last_key = ''
last_list = []
block_count = 0
global KEY_VALUE_LINE_COUNTER
KEY_VALUE_LINE_COUNTER = 0
with open(input_filename, 'r') as fh:
for line in fh:
KEY_VALUE_LINE_COUNTER += 1
line = line.strip()
if line == '':
continue
Expand Down Expand Up @@ -47,9 +50,12 @@ def key_value_generator(input_filename, *, block_size=None, merge_rest=False):
def key_value_targets_generator(input_filename, *, block_size=None, merge_rest=False):
last_key = ''
last_list = []
global KEY_VALUE_LINE_COUNTER
KEY_VALUE_LINE_COUNTER = 0
block_count = 0
with open(input_filename, 'r') as fh:
for line in fh:
KEY_VALUE_LINE_COUNTER += 1
line = line.strip()
if line == '':
continue
Expand Down
8 changes: 8 additions & 0 deletions environment_das
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export DAS_REDIS_HOSTNAME=149.28.192.132
export DAS_REDIS_PORT=7000

export DAS_MONGODB_HOSTNAME=45.63.83.31
export DAS_MONGODB_PORT=27018

export DAS_DATABASE_USERNAME=dbadmin
export DAS_DATABASE_PASSWORD=dassecret
3 changes: 2 additions & 1 deletion scripts/build-das.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

docker build -t das \
source environment_das
docker build --no-cache -t das \
--build-arg USER_ID=$(id -u) \
--build-arg GROUP_ID=$(id -g) \
-f Dockerfile .
6 changes: 3 additions & 3 deletions scripts/canonical-load.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash

docker stop das >& /dev/null
docker rm das >& /dev/null
source environment_das
docker run \
--name canonical-load \
--env DAS_MONGODB_HOSTNAME=${DAS_MONGODB_HOSTNAME:-mongo} \
Expand All @@ -12,10 +11,11 @@ docker run \
--env DAS_DATABASE_PASSWORD=${DAS_DATABASE_PASSWORD:-dassecret} \
--env PYTHONPATH=/app \
--env TZ=${TZ} \
--detach \
--network="host" \
--volume /tmp:/tmp \
--volume /mnt:/mnt \
--volume /media:/media \
--volume /opt/das/data:/data \
das:latest \
python3 scripts/load_das.py --canonical --knowledge-base $1

Expand Down
22 changes: 22 additions & 0 deletions scripts/jupyter-notebook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

source environment_das
# --detach \
docker run \
--name jupyter-notebook \
--env DAS_MONGODB_HOSTNAME=${DAS_MONGODB_HOSTNAME:-mongo} \
--env DAS_MONGODB_PORT=${DAS_MONGODB_PORT:-27017} \
--env DAS_REDIS_HOSTNAME=${DAS_REDIS_HOSTNAME:-redis} \
--env DAS_REDIS_PORT=${DAS_REDIS_PORT:-6379} \
--env DAS_DATABASE_USERNAME=${DAS_DATABASE_USERNAME:-dbadmin} \
--env DAS_DATABASE_PASSWORD=${DAS_DATABASE_PASSWORD:-dassecret} \
--env PYTHONPATH=/app \
--env TZ=${TZ} \
--network="host" \
--volume /tmp:/tmp \
--volume /mnt:/mnt \
--volume /opt/das/data:/data \
das:latest \
jupyter-notebook --ip 0.0.0.0 --port 8887 --no-browser --allow-root

docker rm jupyter-notebook >& /dev/null

0 comments on commit 7072fab

Please sign in to comment.