Small agenda updates (#143)

* Update config and queries * Retrained model * Update changelog and fix import * Add small analysis for ideal calling time * Fix linting
UMCU-Digital-Health · Nov 19, 2024 · cd5975e · cd5975e
1 parent e65cc54
commit cd5975e
Show file tree

Hide file tree

Showing 16 changed files with 78 additions and 30 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.4.9] - 2024-11-18
+
+### Changed
+- Updated agendas
+- Small fixes to data loading and importing dvclive module
+
 ## [1.4.8] - 2024-11-13
 
 ### Added

diff --git a/data/raw/poliafspraken_no_show.csv.dvc b/data/raw/poliafspraken_no_show.csv.dvc
@@ -1,5 +1,5 @@
 outs:
-- md5: 530380d7750f4c1e819d94bee3c2c52a
-  size: 499291413
+- md5: 46c186d0fd0d69609b08131997cba338
+  size: 507224267
   path: poliafspraken_no_show.csv
   hash: md5
diff --git a/data/sql/data_export.sql b/data/sql/data_export.sql
@@ -113,8 +113,9 @@ WHERE 1=1
             -- Zorglijn Ontwikkeling in perspectief
             'ZH0300', -- PSY Ontwikkeling in perspectief
             -- Zorglijn Acute en intensieve zorg
-            'ZH0297' -- PSY Acute en intensieve zorg
-
+            'ZH0297', -- PSY Acute en intensieve zorg
+            -- Oncologische urologie
+            'ZH0033' -- B&O Urologische oncologie
         )
     AND APP.identifier_system = 'https://metadata.umcutrecht.nl/ids/HixAgendaAfspraak'
     AND APP.created >= '2015-01-01'

diff --git a/data/sql/data_prediction.sql b/data/sql/data_prediction.sql
@@ -135,7 +135,9 @@ WHERE 1=1
         -- Zorglijn Ontwikkeling in perspectief
         'ZH0300', -- PSY Ontwikkeling in perspectief
         -- Zorglijn Acute en intensieve zorg
-        'ZH0297' -- PSY Acute en intensieve zorg
+        'ZH0297', -- PSY Acute en intensieve zorg
+        -- Oncologische urologie
+        'ZH0033' -- B&O Urologische oncologie
     )
     AND APP.identifier_system = 'https://metadata.umcutrecht.nl/ids/HixAgendaAfspraak'
     AND APP.[created] >= '2015-01-01'
@@ -236,7 +238,9 @@ WHERE 1=1
                 -- Zorglijn Ontwikkeling in perspectief
                 'ZH0300', -- PSY Ontwikkeling in perspectief
                 -- Zorglijn Acute en intensieve zorg
-                'ZH0297' -- PSY Acute en intensieve zorg
+                'ZH0297', -- PSY Acute en intensieve zorg
+                -- Oncologische urologie
+                'ZH0033' -- B&O Urologische oncologie
             )
             AND APP2.identifier_system = 'https://metadata.umcutrecht.nl/ids/HixAgendaAfspraak'
             AND CONVERT(DATE, APP2.[start]) = @start_date

diff --git a/dvc.lock b/dvc.lock
@@ -5,17 +5,17 @@ stages:
     deps:
     - path: data/processed/featuretable.parquet
       hash: md5
-      md5: 98d6d67a29704828698fcd1fbd039e83
-      size: 93522680
+      md5: 86589d93563f0a2fdc2373a7bf487732
+      size: 94648247
     - path: src/noshow/model/train_model.py
       hash: md5
       md5: 00964a947199825f721ebbbe0bb23da6
       size: 3708
     outs:
     - path: output/models/no_show_model_cv.pickle
       hash: md5
-      md5: f30c648b054c4ee8f336638f1982f381
-      size: 1860467
+      md5: 6517c5261b1eb56b2297ae827c37262f
+      size: 1156419
   feature_building:
     cmd: python src/noshow/features/feature_pipeline.py
     deps:
@@ -24,14 +24,14 @@ stages:
       size: 279455
     - path: data/raw/poliafspraken_no_show.csv
       hash: md5
-      md5: 530380d7750f4c1e819d94bee3c2c52a
-      size: 499291413
+      md5: 46c186d0fd0d69609b08131997cba338
+      size: 507224267
     - path: src/noshow/features/feature_pipeline.py
       hash: md5
       md5: 71ffb7a162976bde11e0aed72ea19f98
       size: 2889
     outs:
     - path: data/processed/featuretable.parquet
       hash: md5
-      md5: 98d6d67a29704828698fcd1fbd039e83
-      size: 93522680
+      md5: 86589d93563f0a2fdc2373a7bf487732
+      size: 94648247
diff --git a/notebooks/analyse_call_results.ipynb b/notebooks/analyse_call_results.ipynb
@@ -310,11 +310,47 @@
     "text_contents = [val for val in text_contents if \"eschikbaar\" not in val]\n",
     "text_contents"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Analyse when to call"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Group by hour of timestamp and calculate percentage and number of different outcomes\n",
+    "call_response[\"hour_called\"] = call_response[\"timestamp\"].dt.hour\n",
+    "hourly_outcome = call_response.groupby([\"hour_called\", \"call_outcome\"]).size()\n",
+    "hourly_outcome = hourly_outcome.unstack()\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(10, 5))\n",
+    "hourly_outcome.plot.bar(stacked=True, ax=ax)\n",
+    "ax.set_title(\"Uitkomst van bellen huidige status per uur\")\n",
+    "ax.set_ylabel(\"Aantal\")\n",
+    "ax.set_xlabel(\"Uur\")\n",
+    "ax.legend(title=\"Uitkomst\")\n",
+    "fig.show()\n",
+    "\n",
+    "# Also plot the percentages with bars of equal height\n",
+    "fig, ax = plt.subplots(figsize=(10, 5))\n",
+    "hourly_outcome.div(hourly_outcome.sum(axis=1), axis=0).plot.bar(stacked=True, ax=ax)\n",
+    "ax.set_title(\"Percentage uitkomst van bellen huidige status per uur\")\n",
+    "ax.set_ylabel(\"Percentage\")\n",
+    "ax.set_xlabel(\"Uur\")\n",
+    "ax.legend(title=\"Uitkomst\")\n",
+    "fig.show()"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "no_show_ruben",
+   "display_name": ".venv",
    "language": "python",
    "name": "python3"
   },
@@ -328,7 +364,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,

diff --git a/output/dvclive/metrics.json b/output/dvclive/metrics.json
@@ -1,7 +1,7 @@
 {
-    "best_score": 0.7402663469877975,
-    "mean_roc_auc": 0.7402663469877975,
-    "std_roc_auc": 0.007219339746532362,
-    "mean_precision": 0.5441756346300849,
-    "mean_recall": 0.01516288989391171
+    "best_score": 0.7424015243279327,
+    "mean_roc_auc": 0.7424015243279327,
+    "std_roc_auc": 0.007613920544359392,
+    "mean_precision": 0.562214765652999,
+    "mean_recall": 0.01428090066737743
 }
diff --git a/output/dvclive/plots/metrics/best_score.tsv b/output/dvclive/plots/metrics/best_score.tsv
@@ -1,2 +1,2 @@
 step	best_score
-0	0.7402663469877975
+0	0.7424015243279327
diff --git a/output/dvclive/plots/metrics/mean_precision.tsv b/output/dvclive/plots/metrics/mean_precision.tsv
@@ -1,2 +1,2 @@
 step	mean_precision
-0	0.5441756346300849
+0	0.562214765652999
diff --git a/output/dvclive/plots/metrics/mean_recall.tsv b/output/dvclive/plots/metrics/mean_recall.tsv
@@ -1,2 +1,2 @@
 step	mean_recall
-0	0.01516288989391171
+0	0.01428090066737743
diff --git a/output/dvclive/plots/metrics/mean_roc_auc.tsv b/output/dvclive/plots/metrics/mean_roc_auc.tsv
@@ -1,2 +1,2 @@
 step	mean_roc_auc
-0	0.7402663469877975
+0	0.7424015243279327
diff --git a/output/dvclive/plots/metrics/std_roc_auc.tsv b/output/dvclive/plots/metrics/std_roc_auc.tsv
@@ -1,2 +1,2 @@
 step	std_roc_auc
-0	0.007219339746532362
+0	0.007613920544359392
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "noshow"
-version = "1.4.8"
+version = "1.4.9"
 authors = [
   { name="Ruben Peters", email="[email protected]" },
   { name="Eric Wolters", email="[email protected]" }

diff --git a/run/config/config.toml.dvc b/run/config/config.toml.dvc
@@ -1,5 +1,5 @@
 outs:
-- md5: acf09aea219b69c5c6298ecf5f1cbc48
-  size: 14506
+- md5: 6ade00b7f8bece964bde000c8e982f9e
+  size: 14933
   hash: md5
   path: config.toml
diff --git a/src/noshow/model/train_model.py b/src/noshow/model/train_model.py
@@ -3,7 +3,7 @@
 from typing import Dict, Union
 
 import pandas as pd
-from dvclive import Live
+from dvclive.live import Live
 from sklearn.base import BaseEstimator
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.model_selection import GridSearchCV, StratifiedGroupKFold, train_test_split

diff --git a/src/noshow/preprocessing/load_data.py b/src/noshow/preprocessing/load_data.py
@@ -51,6 +51,7 @@ def load_appointment_csv(csv_path: Union[str, Path]) -> pd.DataFrame:
         csv_path,
         parse_dates=["created"],
         date_format="ISO8601",
+        dtype={"specialty_code": "object"},  # Avoids Dtype Mixed warning
     )
 
     appointments_df["start"] = pd.to_datetime(