Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jena changes #324

Merged
merged 17 commits into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
338 changes: 338 additions & 0 deletions src/main/bin/devreset-api.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
#!/bin/bash -f
#
# PREREQUISITE: This script requires a download of the "UnitTestData"
# (https://drive.google.com/drive/u/0/folders/1kXIr9J3jgO-8fN01LJwhNkOuZbAfaQBh)
# to a directory called "UnitTestData" that must live under whatever
# directory is mounted as /data within the stardog container. Thus, while in
# the stardog container the path /data/UnitTestData must be available.
#
# It resets the stardog and elasticsearch data sets locally to update to
# the latest dev testing data set at that google drive URL.
#
help=0
while [[ "$#" -gt 0 ]]; do case $1 in
--help) help=1;;
*) arr=( "${arr[@]}" "$1" );;
esac; shift; done

if [ $help == 1 ] || [ ${#arr[@]} -ne 2 ]; then
echo "Usage: src/main/bin/devreset.sh \"c:/data/UnitTestData\" stardog"
echo " e.g. src/main/bin/devreset.sh ../data/UnitTestData stardog"
exit 1
fi
dir=${arr[0]}
db_type=${arr[1]}
# Hardcode the history file
historyFile=$dir/cumulative_history_21.06e.txt


if [[ $db_type = "stardog" ]]; then
databases=("NCIT2" "CTRP")
curl_cmd="curl -s -f -u ${STARDOG_USERNAME}:${STARDOG_PASSWORD}"
elif [[ $db_type = "jena" ]]; then
databases=("NCIT2" "CTRP")
curl_cmd="curl -s -f"
fi

# Set up ability to format json
jq --help >> /dev/null 2>&1
if [[ $? -eq 0 ]]; then
jq="jq ."
else
jq="python -m json.tool"
fi

echo "--------------------------------------------------"
echo "Starting ...`/bin/date`"
echo "--------------------------------------------------"
echo "dir = $dir"
echo "db_type = $db_type"
echo ""
#set -e

# Check configuration
if [[ -z $STARDOG_HOST ]]; then
echo "ERROR: STARDOG_HOST is not set"
exit 1
elif [[ -z $STARDOG_PORT ]]; then
echo "ERROR: STARDOG_PORT is not set"
exit 1
elif [[ -z $STARDOG_USERNAME ]]; then
echo "ERROR: STARDOG_USERNAME is not set"
exit 1
elif [[ -z $STARDOG_PASSWORD ]]; then
echo "ERROR: STARDOG_PASSWORD is not set"
exit 1
elif [[ -z $ES_SCHEME ]]; then
echo "ERROR: ES_SCHEME is not set"
exit 1
elif [[ -z $ES_HOST ]]; then
echo "ERROR: ES_HOST is not set"
exit 1
elif [[ -z $ES_PORT ]]; then
echo "ERROR: ES_PORT is not set"
exit 1
fi

# Prerequisites - check the UnitTest
echo " Check prerequisites"

# Check that reindex.sh is at src/main/bin
if [[ ! -e "src/main/bin/reindex.sh" ]]; then
echo "ERROR: src/main/bin/reindex.sh does not exist, run from top-level evsrestapi directory"
exit 1
fi

# Check NCIM
echo " check NCIM"
ct=`ls $dir/NCIM | grep RRF | wc -l`
if [[ $ct -le 20 ]]; then
echo "ERROR: unexpectedly small number of NCIM/*RRF files = $ct"
exit 1
fi
# Check NCIt weekly
echo " check NCIt weekly"
if [[ ! -e "$dir/ThesaurusInferred_+1weekly.owl" ]]; then
echo "ERROR: unexpectedly ThesaurusInferred_+1weekly.owl file"
exit 1
fi
# Check NCIt monthly
echo " check NCIt monthly"
if [[ ! -e "$dir/ThesaurusInferred_monthly.owl" ]]; then
echo "ERROR: unexpectedly ThesaurusInferred_monthly.owl file"
exit 1
fi
# Check GO monthly
echo " check GO monthly"
if [[ ! -e "$dir/GO/go.2022-07-01.owl" ]]; then
echo "ERROR: unexpectedly missing GO/go.2022-07-01.owl file"
exit 1
fi

# Check HGNC monthly
echo " check HGNC monthly"
if [[ ! -e "$dir/HGNC/HGNC_202209.owl" ]]; then
echo "ERROR: unexpectedly missing HGNC/HGNC_202209.owl file"
exit 1
fi

# Check ChEBI monthly
echo " check ChEBI monthly"
if [[ ! -e "$dir/ChEBI/chebi_213.owl" ]]; then
echo "ERROR: unexpectedly missing ChEBI/chebi_213.owl file"
exit 1
fi

# Check DUO
echo " check DUO"
if [[ ! -e "$dir/DUO/duo_Feb21.owl" ]]; then
echo "ERROR: unexpectedly missing DUO/duo_Feb21.owl file"
exit 1
fi

# Check OBI
echo " check OBI"
if [[ ! -e "$dir/OBI/obi_2022_07.owl" ]]; then
echo "ERROR: unexpectedly missing OBI/obi_2022_07.owl file"
exit 1
fi

# Check OBIB
echo " check OBIB"
if [[ ! -e "$dir/OBIB/obib_2021-11.owl" ]]; then
echo "ERROR: unexpectedly missing OBI/obib_2021-11.owl file"
exit 1
fi

# Check NDFRT
echo " check NDFRT"
if [[ ! -e "$dir/NDFRT/NDFRT_Public_2018.02.05_Inferred.owl" ]]; then
echo "ERROR: unexpectedly missing NDFRT/NDFRT_Public_2018.02.05_Inferred.owl file"
exit 1
fi

# Verify docker stardog is running
echo " verify $db_type database is running"
if [[ $db_type = "stardog" ]]; then
$curl_cmd "http://${STARDOG_HOST}:${STARDOG_PORT}/admin/healthcheck" > /dev/null
elif [[ $db_type = "jena" ]]; then
$curl_cmd -s -f "http://${STARDOG_HOST}:${STARDOG_PORT}/$/ping" > /dev/null
fi
if [[ $? -ne 0 ]]; then
echo "$db_type is not running"
exit 1
fi


# Verify elasticsearch can be reached
echo " verify elasticsearch can be reached"
curl -s "$ES_SCHEME://$ES_HOST:$ES_PORT/_cat/indices" >> /dev/null
if [[ $? -ne 0 ]]; then
echo "ERROR: problem connecting to elasticsearch"
exit 1
fi

# Remove elasticsearch indexes
remove_elasticsearch_indexes(){
echo " Remove elasticsearch indexes"
curl -s "$ES_SCHEME://$ES_HOST:$ES_PORT/_cat/indices" | cut -d\ -f 3 | egrep "metrics|concept|evs" | grep -v "snomed" | cat > /tmp/x.$$.txt
if [[ $? -ne 0 ]]; then
echo "ERROR: problem connecting to docker elasticsearch"
exit 1
fi
for i in `cat /tmp/x.$$.txt`; do
echo " remove $i"
curl -s -X DELETE "$ES_SCHEME://$ES_HOST:$ES_PORT/$i" >> /dev/null
if [[ $? -ne 0 ]]; then
echo "ERROR: problem removing elasticsearch index $i"
exit 1
fi
done
}
# Reindex ncim - individual terminologies
reindex_ncim(){
for t in MDR ICD10CM ICD9CM LNC SNOMEDCT_US RADLEX PDQ ICD10 HL7V3.0; do
# Keep the NCIM folder around while we run
echo "Load $t (from downloaded data)"
src/main/bin/ncim-part.sh --noconfig $dir/NCIM --keep --terminology $t > /tmp/x.$$.txt 2>&1
if [[ $? -ne 0 ]]; then
cat /tmp/x.$$.txt | sed 's/^/ /'
echo "ERROR: loading $t"
exit 1
fi
done
# Reindex ncim - must run after the prior section so that maps can connect to loaded terminologies
echo " Reindex ncim"
src/main/bin/ncim-part.sh --noconfig $dir/NCIM > /tmp/x.$$.txt 2>&1
if [[ $? -ne 0 ]]; then
cat /tmp/x.$$.txt | sed 's/^/ /'
echo "ERROR: problem running ncim-part.sh"
exit 1
fi
}

drop_databases(){
for db in "${databases[@]}"
do
echo " Dropping $db"
if [[ $db_type = "stardog" ]]; then
$curl_cmd -X DELETE "http://${STARDOG_HOST}:${STARDOG_PORT}/admin/databases/${db}" > /dev/null
elif [[ $db_type = "jena" ]]; then
$curl_cmd -X DELETE "http://${STARDOG_HOST}:${STARDOG_PORT}/$/datasets/${db}" > /dev/null
fi
if [[ $? -ne 0 ]]; then
echo "Error occurred when dropping database ${db}. Response:$_"
exit 1
fi
done
}

create_databases(){
for db in "${databases[@]}"
do
echo " Creating $db"
if [[ $db_type = "stardog" ]]; then
$curl_cmd -X POST -F root="{\"dbname\":\"${db}\"}" "http://${STARDOG_HOST}:${STARDOG_PORT}/admin/databases" > /dev/null
elif [[ $db_type = "jena" ]]; then
$curl_cmd -X POST -d "dbName=${db}&dbType=tdb2" "http://${STARDOG_HOST}:${STARDOG_PORT}/$/datasets" > /dev/null
fi
if [[ $? -ne 0 ]]; then
echo "Error occurred when creating database ${db}. Response:$_"
exit 1
fi
done
}

load_terminology_data_in_transaction(){
echo " Loading $3 into $1"
tx=$(curl -s -u "${STARDOG_USERNAME}":"${STARDOG_PASSWORD}" -X POST "http://localhost:5820/$1/transaction/begin")
curl -s -u "${STARDOG_USERNAME}":"${STARDOG_PASSWORD}" -X POST "http://localhost:5820/$1/${tx}/add?graph-uri=$2" -H "Content-Type: application/rdf+xml" -T - < "$dir/$3"
tx=$(curl -s -u "${STARDOG_USERNAME}":"${STARDOG_PASSWORD}" -X POST "http://localhost:5820/NCIT2/transaction/commit/${tx}")
if [[ $? -ne 0 ]]; then
echo "Error occurred when loading data into $1. Response:$_"
exit 1
fi
}

load_terminology_data(){
echo " Loading $3 into $1"
$curl_cmd -X POST -H "Content-Type: application/rdf+xml" -T "$dir/$3" "http://${STARDOG_HOST}:${STARDOG_PORT}/$1/data?graph=$2" > /dev/null
if [[ $? -ne 0 ]]; then
echo "Error occurred when loading data into $1. Response:$_"
exit 1
fi
}

load_data(){
if [[ $db_type = "stardog" ]]; then
load_terminology_data_in_transaction CTRP http://NCI_T_weekly ThesaurusInferred_+1weekly.owl
load_terminology_data_in_transaction CTRP http://NCI_T_monthly ThesaurusInferred_monthly.owl
load_terminology_data_in_transaction NCIT2 http://NCI_T_monthly ThesaurusInferred_monthly.owl
load_terminology_data_in_transaction NCIT2 http://GO_monthly GO/go.2022-07-01.owl
load_terminology_data_in_transaction NCIT2 http://HGNC_monthly HGNC/HGNC_202209.owl
load_terminology_data_in_transaction NCIT2 http://ChEBI_monthly ChEBI/chebi_213.owl
load_terminology_data_in_transaction NCIT2 http://UmlsSemNet UmlsSemNet/umlssemnet.owl
load_terminology_data_in_transaction NCIT2 http://MEDRT MED-RT/medrt.owl
load_terminology_data_in_transaction NCIT2 http://Canmed CanMed/canmed.owl
load_terminology_data_in_transaction NCIT2 http://CTCAE CTCAE/ctcae5.owl
load_terminology_data_in_transaction NCIT2 http://DUO_monthly DUO/duo_Feb21.owl
load_terminology_data_in_transaction NCIT2 http://DUO_monthly DUO/iao_Dec20.owl
load_terminology_data_in_transaction NCIT2 http://OBI_monthly OBI/obi_2022_07.owl
load_terminology_data_in_transaction NCIT2 http://OBIB OBIB/obib_2021-11.owl
load_terminology_data_in_transaction NCIT2 http://NDFRT2 NDFRT/NDFRT_Public_2018.02.05_Inferred.owl
load_terminology_data_in_transaction NCIT2 http://MGED MGED/MGEDOntology.owl
load_terminology_data_in_transaction NCIT2 http://NPO NPO/npo-2011-12-08_inferred.owl
load_terminology_data_in_transaction NCIT2 http://MA Mouse_Anatomy/ma_07_27_2016.owl
load_terminology_data_in_transaction NCIT2 http://Zebrafish Zebrafish/zfa_2019_08_02.owl
elif [[ $db_type = "jena" ]]; then
load_terminology_data CTRP http://NCI_T_weekly ThesaurusInferred_+1weekly.owl
load_terminology_data CTRP http://NCI_T_monthly ThesaurusInferred_monthly.owl
load_terminology_data NCIT2 http://NCI_T_monthly ThesaurusInferred_monthly.owl
load_terminology_data NCIT2 http://GO_monthly GO/go.2022-07-01.owl
load_terminology_data NCIT2 http://HGNC_monthly HGNC/HGNC_202209.owl
load_terminology_data NCIT2 http://ChEBI_monthly ChEBI/chebi_213.owl
load_terminology_data NCIT2 http://UmlsSemNet UmlsSemNet/umlssemnet.owl
load_terminology_data NCIT2 http://MEDRT MED-RT/medrt.owl
load_terminology_data NCIT2 http://Canmed CanMed/canmed.owl
load_terminology_data NCIT2 http://CTCAE CTCAE/ctcae5.owl
load_terminology_data NCIT2 http://DUO_monthly DUO/duo_Feb21.owl
load_terminology_data NCIT2 http://DUO_monthly DUO/iao_Dec20.owl
load_terminology_data NCIT2 http://OBI_monthly OBI/obi_2022_07.owl
load_terminology_data NCIT2 http://OBIB OBIB/obib_2021-11.owl
load_terminology_data NCIT2 http://NDFRT2 NDFRT/NDFRT_Public_2018.02.05_Inferred.owl
load_terminology_data NCIT2 http://MGED MGED/MGEDOntology.fix.owl
load_terminology_data NCIT2 http://NPO NPO/npo-2011-12-08_inferred.owl
load_terminology_data NCIT2 http://MA Mouse_Anatomy/ma_07_27_2016.owl
load_terminology_data NCIT2 http://Zebrafish Zebrafish/zfa_2019_08_02.owl
fi
}


reindex(){
# Reindex stardog terminologies
echo " Reindex stardog terminologies"
# After this point, the log is stored in the tmp folder unless an error is hit
src/main/bin/reindex.sh --noconfig --history "$historyFile" > /tmp/x.$$.txt 2>&1
if [[ $? -ne 0 ]]; then
cat /tmp/x.$$.txt | sed 's/^/ /'
echo "ERROR: problem running reindex.sh script"
exit 1
fi
}

# Clean and load stardog
echo " Remove stardog databases and load monthly/weekly"
drop_databases
create_databases
remove_elasticsearch_indexes
reindex_ncim
load_data
reindex

# Cleanup
/bin/rm -f /tmp/x.$$.txt $dir/x.{sh,txt}

echo ""
echo "--------------------------------------------------"
echo "Finished ...`/bin/date`"
echo "--------------------------------------------------"
18 changes: 18 additions & 0 deletions src/main/bin/get_databases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import json
import sys

if len(sys.argv) > 1:
GRAPH_DB_TYPE = sys.argv[1]
else:
raise Exception("Expecting graph db type as a script argument")

data = json.load(sys.stdin)

if GRAPH_DB_TYPE.lower() == "stardog":
for db in data["databases"]:
print(db)
elif GRAPH_DB_TYPE.lower() == "jena":
for db in data["datasets"]:
print(db["ds.name"].replace("/", ""))
else:
raise Exception("Unknown graph DB")
14 changes: 14 additions & 0 deletions src/main/bin/get_graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import json
import sys

db = sys.argv[1]
data = json.load(sys.stdin)

bindings = data["results"]["bindings"]

for binding in bindings:
version = binding["version"]["value"]
graph_name = binding["graphName"]["value"]
source = binding["source"]["value"]
print("|".join((version, db, graph_name, source)))

Loading