Skip to content

Commit

Permalink
Merge pull request #65 from UBC-MDS/milestone-2-fix
Browse files Browse the repository at this point in the history
Milestone 2 fix
  • Loading branch information
dariakhv authored Dec 13, 2024
2 parents 3eb8d8c + c952f35 commit d37bba2
Show file tree
Hide file tree
Showing 10 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/.ipynb_checkpoints
*.ipynb_checkpoints
/analysis/.ipynb_checkpoints/
/report/.ipynb_checkpoints/
/results/figures/.ipynb_checkpoints
Expand Down
3 changes: 0 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ USER root
RUN sudo apt update \
&& sudo apt install -y lmodern

RUN sudo apt-get update \
&& sudo apt-get install -y make

USER $NB_UID

RUN mamba update --quiet --file /tmp/conda-linux-64.lock \
Expand Down
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,20 @@ where you launched the container, and then type `docker compose rm`
```
python scripts/download.py \
--id 186 \
--save_to ./data/wine.csv
--save_to ./data/raw/wine.csv

python scripts/clean_n_split_data.py \
--raw-data ./data/wine.csv
--raw-data ./data/raw/wine.csv

python scripts/validation_before_split.py \
--file_name wine.csv \
--data_path ./data/
--data_path ./data/raw

```
4.2 Run EDA
```
python scripts/eda_n_correlation_check.py \
--train-file ./data/wine_train.csv \
--train-file ./data/proc/wine_train.csv \
--output-img ./results/figures \
--output-table ./results/tables
```
Expand All @@ -97,8 +97,8 @@ where you launched the container, and then type `docker compose rm`
--pipe-to ./results/models

python scripts/model_evaluation_wine_predictor.py \
--train-data ./data/wine_train.csv \
--test-data ./data/wine_test.csv \
--train-data ./data/proc/wine_train.csv \
--test-data ./data/proc/wine_test.csv \
--pipeline-path ./results/models/wine_pipeline.pickle \
--table-to ./results/tables \
--plot-to ./results/figures
Expand All @@ -109,5 +109,8 @@ where you launched the container, and then type `docker compose rm`
quarto render report/report.qmd --to pdf
```
# License
This project was created with the [`MIT License`](LICENSE.md)
# References
Cortez P, Cerdeira A, Almeida F, Matos T, Reis J. Wine Quality [dataset]. 2009. UCI Machine Learning Repository. Available from: https://doi.org/10.24432/C56S3T.
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ services:
ports:
- "8888:8888"
volumes:
- "${PWD}:/app/data"
working_dir: /app
- "${PWD}:/app/data"
working_dir: /app/data
deploy:
resources:
limits:
Expand Down
Binary file modified docs/report.pdf
Binary file not shown.
Binary file modified report/.report.html.icloud
Binary file not shown.
Binary file modified report/report.pdf
Binary file not shown.
Binary file modified results/models/wine_random_search.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion results/tables/random_search.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
mean_train_score,0.9875779986984037,0.9873192686470664,0.9887321572718368,0.9882197254580379,0.9883474962027348,0.9870566844281605,0.9880919482052322,0.9870566844281605,0.9864183082552028,0.9861637573491556
mean_test_score,0.9885928776559425,0.9885928776559425,0.9885821459956146,0.988576762777796,0.988576762777796,0.9880680139186435,0.9880653223097344,0.9870318213202898,0.9855028900889848,0.9844799953502902
param_logisticregression__C,1.2665709946616681,0.9159332036121722,9.15226100278092,2.4713734184878824,2.74750150868112,0.7017992831138442,2.342391466254441,0.6084642077147051,0.37348973478010344,0.28425159292991603
mean_fit_time,0.8944474697113037,0.623142671585083,0.9552007675170898,1.446831512451172,0.8496040344238281,0.7155133247375488,0.9875524997711181,0.5591573238372802,1.4225006580352784,0.7792784214019776
mean_fit_time,0.4468091487884521,0.40517253875732423,0.6185922622680664,0.7212271690368652,0.47739248275756835,0.5206932544708252,0.590698528289795,0.40631418228149413,0.6384647846221924,0.5607413291931153
4 changes: 3 additions & 1 deletion tests/README
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ pytest -v test_random_search.py \
--test_data=../data/proc/wine_test.csv \
--pipeline_path=../results/models/wine_pipeline.pickle

pytest test_validate_column_names.py
pytest test_validate_column_names.py

pytest test_split_data.py

0 comments on commit d37bba2

Please sign in to comment.