-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Updated agendas * Rerun pipelines * Updated version * update data export
- Loading branch information
1 parent
cd5975e
commit 47ecd9e
Showing
14 changed files
with
157 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
outs: | ||
- md5: 46c186d0fd0d69609b08131997cba338 | ||
size: 507224267 | ||
- md5: 31983d8a20d487417a9da7cf024c7ecc | ||
size: 487004530 | ||
path: poliafspraken_no_show.csv | ||
hash: md5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
{ | ||
"best_score": 0.7424015243279327, | ||
"mean_roc_auc": 0.7424015243279327, | ||
"std_roc_auc": 0.007613920544359392, | ||
"mean_precision": 0.562214765652999, | ||
"mean_recall": 0.01428090066737743 | ||
"best_score": 0.7480471226772029, | ||
"mean_roc_auc": 0.7480471226772029, | ||
"std_roc_auc": 0.005831461640562374, | ||
"mean_precision": 0.5541110092343222, | ||
"mean_recall": 0.014451354691335214 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
step best_score | ||
0 0.7424015243279327 | ||
0 0.7480471226772029 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
step mean_precision | ||
0 0.562214765652999 | ||
0 0.5541110092343222 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
step mean_recall | ||
0 0.01428090066737743 | ||
0 0.014451354691335214 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
step mean_roc_auc | ||
0 0.7424015243279327 | ||
0 0.7480471226772029 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
step std_roc_auc | ||
0 0.007613920544359392 | ||
0 0.005831461640562374 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" | |
|
||
[project] | ||
name = "noshow" | ||
version = "1.4.9" | ||
version = "1.4.10" | ||
authors = [ | ||
{ name="Ruben Peters", email="[email protected]" }, | ||
{ name="Eric Wolters", email="[email protected]" } | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
outs: | ||
- md5: 6ade00b7f8bece964bde000c8e982f9e | ||
size: 14933 | ||
- md5: 922735dc7dbcb39f8ab28e8cb7bb1297 | ||
size: 18980 | ||
hash: md5 | ||
path: config.toml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# Module to export data from dataplatform to a csv file to train the model | ||
# uses the export query in data/sql/data_export.sql | ||
import csv | ||
import logging | ||
from pathlib import Path | ||
|
||
from sqlalchemy import text | ||
|
||
from noshow.database.connection import get_connection_string, get_engine | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def export_data( | ||
db_host: str = "dataplatform", | ||
db_database: str = "PUB", | ||
output_path: str = "poliafspraken_no_show.csv", | ||
batch_size: int = 10_000, | ||
): | ||
"""Function to efficiently export data from the dataplatform to a csv file | ||
Used to export data to train the model. The data is exported in batches to | ||
avoid memory issues. | ||
Parameters | ||
---------- | ||
db_host : str, optional | ||
hostname of the database server, by default "dataplatform" | ||
db_database : str, optional | ||
Name of the database, by default "PUB" | ||
output_path : str, optional | ||
Name of the output file, located in the data/raw folder, | ||
by default "poliafspraken_no_show.csv" | ||
batch_size : int, optional | ||
batch size for reading from query result and writing to csv, by default 1000 | ||
""" | ||
connection_string = get_connection_string(db_database=db_database, db_host=db_host) | ||
with open(Path(__file__).parents[3] / "data/sql/data_export.sql") as f: | ||
sql_query = f.read() | ||
|
||
output_csv = Path(__file__).parents[3] / "data/raw" / output_path | ||
|
||
db_engine = get_engine(connection_string) | ||
with db_engine.connect() as conn: | ||
logger.info("Executing export query...") | ||
result = conn.execution_options(stream_results=True).execute(text(sql_query)) | ||
logger.info("Export query executed successfully") | ||
|
||
with open(output_csv, "w", newline="") as csvfile: | ||
writer = csv.writer(csvfile) | ||
|
||
# Write the header row | ||
writer.writerow(result.keys()) | ||
|
||
# Write data in batches | ||
while True: | ||
rows = result.fetchmany(batch_size) | ||
if not rows: | ||
break | ||
writer.writerows(rows) | ||
|
||
logger.info(f"Data exported to {output_csv}") | ||
|
||
|
||
if __name__ == "__main__": | ||
logging.basicConfig( | ||
level=logging.DEBUG, | ||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | ||
) | ||
export_data() |