diff --git a/CHANGELOG.md b/CHANGELOG.md index 37d1bed..724ffd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [1.5.1] - 2024-12-23 + +### Added +- Error warmings in case no appointments are provided +- Added ssl-certificate fix in deploy script +- Added unit tests for empty predictions + +### Changed +- Saving of predictions is now located in helper function +- Updated requirements to solve dependabot alert ## [1.5.0] - 2024-12-10 diff --git a/deploy.sh b/deploy.sh index 3c0b57e..2dac155 100644 --- a/deploy.sh +++ b/deploy.sh @@ -6,6 +6,9 @@ do fi done +# necessary for certifi to find the right certificate if run without conda forge's ca-certificates +export SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt + read -p "What do you want to deploy? Options: 'admin-dash'/1 ; 'calling-dash'/2 ; 'calling-dash-test'/3 ; 'api'/4 ; 'api-test'/5 " APPLICATION APPLICATION=${APPLICATION:-N} diff --git a/pyproject.toml b/pyproject.toml index 1b01468..a830e8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "noshow" -version = "1.5.0" +version = "1.5.1" authors = [ { name="Ruben Peters", email="r.peters-7@umcutrecht.nl" }, { name="Eric Wolters", email="e.j.wolters-4@umcutrecht.nl" } @@ -31,6 +31,7 @@ dependencies =[ "streamlit>=1.30", "tomli~=2.0", "relplot>=1.0", + "pygit2<1.17", # Version 1.18 has no available wheels yet ] [dependency-groups] diff --git a/requirements.txt b/requirements.txt index b1b15bb..e1a774b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,26 +2,26 @@ # uv export --no-dev --no-group lint --no-hashes -o requirements.txt -e . aiohappyeyeballs==2.4.4 -aiohttp==3.11.10 +aiohttp==3.11.11 aiohttp-retry==2.9.1 -aiosignal==1.3.1 +aiosignal==1.3.2 altair==5.5.0 amqp==5.3.1 annotated-types==0.7.0 antlr4-python3-runtime==4.9.3 -anyio==4.7.0 +anyio==4.8.0 appdirs==1.4.4 -asyncssh==2.18.0 +asyncssh==2.19.0 atpublic==5.0 -attrs==24.2.0 +attrs==24.3.0 billiard==4.2.1 blinker==1.9.0 cachetools==5.5.0 celery==5.4.0 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 -charset-normalizer==3.4.0 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-didyoumean==0.3.1 click-plugins==1.1.1 click-repl==0.3.0 @@ -35,7 +35,7 @@ dictdiffer==0.9.0 diskcache==5.6.3 distro==1.9.0 dpath==2.2.0 -dulwich==0.22.6 +dulwich==0.22.7 dvc==3.58.0 dvc-data==3.16.7 dvc-http==2.32.0 @@ -43,7 +43,7 @@ dvc-objects==5.1.0 dvc-render==1.0.2 dvc-studio-client==0.21.0 dvc-task==0.40.2 -dvclive==3.48.0 +dvclive==3.48.1 entrypoints==0.4 fastapi==0.115.6 filelock==3.16.1 @@ -51,54 +51,54 @@ flatten-dict==0.4.2 flufl-lock==8.1.0 fonttools==4.55.3 frozenlist==1.5.0 -fsspec==2024.10.0 +fsspec==2024.12.0 funcy==2.0 -gitdb==4.0.11 -gitpython==3.1.43 +gitdb==4.0.12 +gitpython==3.1.44 grandalf==0.8 greenlet==3.1.1 ; (python_full_version < '3.13' and platform_machine == 'AMD64') or (python_full_version < '3.13' and platform_machine == 'WIN32') or (python_full_version < '3.13' and platform_machine == 'aarch64') or (python_full_version < '3.13' and platform_machine == 'amd64') or (python_full_version < '3.13' and platform_machine == 'ppc64le') or (python_full_version < '3.13' and platform_machine == 'win32') or (python_full_version < '3.13' and platform_machine == 'x86_64') gto==1.7.2 hydra-core==1.3.2 idna==3.10 iterative-telemetry==0.0.9 -jinja2==3.1.4 +jinja2==3.1.5 joblib==1.4.2 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -kiwisolver==1.4.7 +kiwisolver==1.4.8 kombu==5.4.2 markdown-it-py==3.0.0 markupsafe==3.0.2 -matplotlib==3.9.3 +matplotlib==3.10.0 mdurl==0.1.2 multidict==6.1.0 -narwhals==1.17.0 +narwhals==1.21.1 networkx==3.4.2 -numpy==2.2.0 +numpy==2.2.1 nvidia-ml-py==12.560.30 omegaconf==2.3.0 -orjson==3.10.12 ; implementation_name == 'cpython' +orjson==3.10.13 ; implementation_name == 'cpython' packaging==24.2 pandas==2.2.3 pathspec==0.12.1 -pillow==11.0.0 +pillow==11.1.0 platformdirs==4.3.6 prompt-toolkit==3.0.48 propcache==0.2.1 -protobuf==5.29.1 -psutil==6.1.0 +protobuf==5.29.2 +psutil==6.1.1 pyarrow==18.1.0 pycparser==2.22 -pydantic==2.10.3 -pydantic-core==2.27.1 +pydantic==2.10.4 +pydantic-core==2.27.2 pydeck==0.9.1 -pydot==3.0.3 +pydot==3.0.4 pygit2==1.16.0 -pygments==2.18.0 +pygments==2.19.1 pygtrie==2.5.0 pymssql==2.3.2 pynvml==12.0.0 -pyparsing==3.2.0 +pyparsing==3.2.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pytz==2024.2 @@ -109,24 +109,24 @@ relplot==1.0 requests==2.32.3 rich==13.9.4 rpds-py==0.22.3 -ruamel-yaml==0.18.6 +ruamel-yaml==0.18.10 ruamel-yaml-clib==0.2.12 ; python_full_version < '3.13' and platform_python_implementation == 'CPython' scikit-learn==1.6.0 -scipy==1.14.1 +scipy==1.15.0 scmrepo==3.3.9 seaborn==0.13.2 -semver==2.13.0 -setuptools==75.6.0 +semver==3.0.2 +setuptools==75.7.0 shellingham==1.5.4 shortuuid==1.0.13 shtab==1.7.1 six==1.17.0 -smmap==5.0.1 +smmap==5.0.2 sniffio==1.3.1 sqlalchemy==2.0.36 sqltrie==0.11.1 starlette==0.41.3 -streamlit==1.41.0 +streamlit==1.41.1 tabulate==0.9.0 tenacity==9.0.0 threadpoolctl==3.5.0 @@ -138,10 +138,10 @@ tqdm==4.67.1 typer==0.15.1 typing-extensions==4.12.2 tzdata==2024.2 -urllib3==2.2.3 +urllib3==2.3.0 vine==5.1.0 voluptuous==0.15.2 -watchdog==6.0.0 ; platform_system != 'Darwin' +watchdog==6.0.0 ; sys_platform != 'darwin' wcwidth==0.2.13 yarl==1.18.3 zc-lockfile==3.0.post1 diff --git a/src/noshow/api/app.py b/src/noshow/api/app.py index 7f38926..e6557af 100644 --- a/src/noshow/api/app.py +++ b/src/noshow/api/app.py @@ -17,14 +17,12 @@ fix_outdated_appointments, load_model, remove_sensitive_info, + store_predictions, ) from noshow.api.pydantic_models import Appointment from noshow.config import CLINIC_CONFIG, KEEP_SENSITIVE_DATA from noshow.database.models import ( - ApiPatient, - ApiPrediction, ApiRequest, - ApiSensitiveInfo, Base, ) from noshow.model.predict import create_prediction @@ -119,10 +117,18 @@ async def predict( project_path = Path(__file__).parents[3] start_time = datetime.now() + if len(input) == 0: + raise HTTPException(status_code=400, detail="Input cannot be empty.") + input_df = load_appointment_pydantic(input) appointments_df = process_appointments(input_df, CLINIC_CONFIG, start_date) all_postalcodes = process_postal_codes(project_path / "data" / "raw" / "NL.txt") + if appointments_df.empty: + raise HTTPException( + status_code=400, detail="No appointments for the start date and filters" + ) + model = load_model() prediction_df = create_prediction( model, @@ -160,71 +166,7 @@ async def predict( ) db.add(apirequest) - for _, row in prediction_df.iterrows(): - apisensitive = db.get(ApiSensitiveInfo, row["pseudo_id"]) - - if not apisensitive: - if row["name_text"] is None: - row["name_text"] = "" - logger.warning( - f"Patient {row['pseudo_id']} has no name_text, " - "replacing with empty string" - ) - - apisensitive = ApiSensitiveInfo( - patient_id=row["pseudo_id"], - hix_number=row["patient_id"], - full_name=row["name_text"], - first_name=row["name_given1_callMe"], - birth_date=row["birthDate"], - mobile_phone=row["telecom1_value"], - home_phone=row["telecom2_value"], - other_phone=row["telecom3_value"], - ) - else: - # name and birthdate can't change, but phone number might - apisensitive.mobile_phone = row["telecom1_value"] - apisensitive.home_phone = row["telecom2_value"] - apisensitive.other_phone = row["telecom3_value"] - - apipatient = db.get(ApiPatient, row["pseudo_id"]) - if not apipatient: - apipatient = ApiPatient( - id=row["pseudo_id"], - ) - apipatient.treatment_group = int(row["treatment_group"]) - apiprediction = db.get(ApiPrediction, row["APP_ID"]) - if not apiprediction: - apiprediction = ApiPrediction( - id=row["APP_ID"], - patient_id=row["pseudo_id"], - prediction=row["prediction"], - start_time=row["start"], - request_relation=apirequest, - patient_relation=apipatient, - clinic_name=row["hoofdagenda"], - clinic_reception=row["description"], - clinic_phone_number=CLINIC_CONFIG[row["clinic"]].phone_number, - clinic_teleq_unit=CLINIC_CONFIG[row["clinic"]].teleq_name, - active=True, - ) - else: - # All values of a prediction can be updated except the ID and treatment - apiprediction.prediction = row["prediction"] - apiprediction.start_time = row["start"] - apiprediction.request_relation = apirequest - apiprediction.clinic_name = row["hoofdagenda"] - apiprediction.clinic_reception = row["description"] - apiprediction.clinic_phone_number = CLINIC_CONFIG[ - row["clinic"] - ].phone_number - apiprediction.clinic_teleq_unit = CLINIC_CONFIG[row["clinic"]].teleq_name - apiprediction.active = True - - db.merge(apisensitive) - db.merge(apiprediction) - db.merge(apipatient) - db.commit() + store_predictions(prediction_df, db, apirequest) fix_outdated_appointments(db, prediction_df["APP_ID"], start_date) diff --git a/src/noshow/api/app_helpers.py b/src/noshow/api/app_helpers.py index ab8f9ac..964b939 100644 --- a/src/noshow/api/app_helpers.py +++ b/src/noshow/api/app_helpers.py @@ -1,3 +1,4 @@ +import logging import pickle import random from datetime import datetime, timedelta @@ -9,7 +10,15 @@ from sqlalchemy import delete, select from sqlalchemy.orm import Session -from noshow.database.models import ApiPatient, ApiPrediction, ApiSensitiveInfo +from noshow.config import CLINIC_CONFIG +from noshow.database.models import ( + ApiPatient, + ApiPrediction, + ApiRequest, + ApiSensitiveInfo, +) + +logger = logging.getLogger(__name__) def load_model(model_path: Union[str, Path, None] = None) -> Any: @@ -186,3 +195,87 @@ def create_treatment_groups( predictions = predictions.drop(columns="score_bin") predictions["treatment_group"] = predictions["treatment_group"].astype(int) return predictions + + +def store_predictions( + prediction_df: pd.DataFrame, + db: Session, + apirequest: ApiRequest, +) -> None: + """ + Store predictions in the database. + + Parameters + ---------- + prediction_df : pd.DataFrame + DataFrame containing the predictions. + db : Session + Database session. + apirequest : Any + API request object related to the predictions. + """ + for _, row in prediction_df.iterrows(): + apisensitive = db.get(ApiSensitiveInfo, row["pseudo_id"]) + + if not apisensitive: + if row["name_text"] is None: + row["name_text"] = "" + logger.warning( + f"Patient {row['pseudo_id']} has no name_text, " + "replacing with empty string" + ) + + apisensitive = ApiSensitiveInfo( + patient_id=row["pseudo_id"], + hix_number=row["patient_id"], + full_name=row["name_text"], + first_name=row["name_given1_callMe"], + birth_date=row["birthDate"], + mobile_phone=row["telecom1_value"], + home_phone=row["telecom2_value"], + other_phone=row["telecom3_value"], + ) + else: + # name and birthdate can't change, but phone number might + apisensitive.mobile_phone = row["telecom1_value"] + apisensitive.home_phone = row["telecom2_value"] + apisensitive.other_phone = row["telecom3_value"] + + apipatient = db.get(ApiPatient, row["pseudo_id"]) + if not apipatient: + apipatient = ApiPatient( + id=row["pseudo_id"], + ) + apipatient.treatment_group = int(row["treatment_group"]) + apiprediction = db.get(ApiPrediction, row["APP_ID"]) + if not apiprediction: + apiprediction = ApiPrediction( + id=row["APP_ID"], + patient_id=row["pseudo_id"], + prediction=row["prediction"], + start_time=row["start"], + request_relation=apirequest, + patient_relation=apipatient, + clinic_name=row["hoofdagenda"], + clinic_reception=row["description"], + clinic_phone_number=CLINIC_CONFIG[row["clinic"]].phone_number, + clinic_teleq_unit=CLINIC_CONFIG[row["clinic"]].teleq_name, + active=True, + ) + else: + # All values of a prediction can be updated except the ID and treatment + apiprediction.prediction = row["prediction"] + apiprediction.start_time = row["start"] + apiprediction.request_relation = apirequest + apiprediction.clinic_name = row["hoofdagenda"] + apiprediction.clinic_reception = row["description"] + apiprediction.clinic_phone_number = CLINIC_CONFIG[ + row["clinic"] + ].phone_number + apiprediction.clinic_teleq_unit = CLINIC_CONFIG[row["clinic"]].teleq_name + apiprediction.active = True + + db.merge(apisensitive) + db.merge(apiprediction) + db.merge(apipatient) + db.commit() diff --git a/tests/test_api.py b/tests/test_api.py index 5b3d0df..e969909 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -60,9 +60,39 @@ async def test_predict_endpoint(monkeypatch): app, "create_treatment_groups", lambda x, y, z, q: x.assign(treatment_group=1) ) monkeypatch.setattr(app, "CLINIC_CONFIG", create_unit_test_clinic_config()) + monkeypatch.setattr(app_helpers, "CLINIC_CONFIG", create_unit_test_clinic_config()) monkeypatch.setenv("DB_USER", "") monkeypatch.setenv("X_API_KEY", "test") output = await predict(appointments_pydantic, "2024-07-16", FakeDB(), "test") output_df = pd.DataFrame(output) assert output_df.shape == (5, 17) + + +# teste empty appointments +@pytest.mark.asyncio +async def test_predict_endpoint_empty_appointments(monkeypatch): + appointments_pydantic = fake_appointments() + monkeypatch.setattr(app, "get_bins", fake_bins) + monkeypatch.setattr(app, "process_postal_codes", fake_postal_codes) + monkeypatch.setattr(app, "load_model", fake_model) + monkeypatch.setattr(app_helpers, "delete", lambda x: FakeWhere()) + # patch create treatment groups and add column to the dataframe + monkeypatch.setattr( + app, "create_treatment_groups", lambda x, y, z, q: x.assign(treatment_group=1) + ) + monkeypatch.setattr(app, "CLINIC_CONFIG", create_unit_test_clinic_config()) + monkeypatch.setenv("DB_USER", "") + monkeypatch.setenv("X_API_KEY", "test") + + # empty appointments + with pytest.raises(Exception) as exc_info_empty: + __ = await predict([], "2024-07-16", FakeDB(), "test") + assert "400: Input cannot be empty." in str(exc_info_empty.value) + + # no appointments for the start date + with pytest.raises(Exception) as exc_inf_wrong_date: + __ = await predict(appointments_pydantic, "2024-07-15", FakeDB(), "test") + assert "400: No appointments for the start date and filters" in str( + exc_inf_wrong_date.value + )