Added dvc functionality

vasalosi · Apr 1, 2024 · 20d65fd · 20d65fd
1 parent d13f781
commit 20d65fd
Show file tree

Hide file tree

Showing 11 changed files with 665 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,4 +159,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-artifacts/*
+artifacts/*
+mlruns/*
diff --git a/README.md b/README.md
@@ -10,4 +10,33 @@ Update the configuration manager in src config
 Update the components
 Update the pipeline
 Update the main.py
-Update the dvc.yaml
+Update the dvc.yaml
+
+
+
+MLflow
+Documentation
+
+MLflow tutorial
+
+cmd
+mlflow ui
+dagshub
+dagshub
+
+MLFLOW_TRACKING_URI=https://dagshub.com/vasalosi/End-to-End-Chest-Cancer-Classification-using-MLflow.mlflow
+MLFLOW_TRACKING_USERNAME=vasalosi
+MLFLOW_TRACKING_PASSWORD=2cb87396b30ddd10b37e93c45c1ce2662812de54
+python script.py
+
+Run this to export as env variables:
+
+export MLFLOW_TRACKING_URI=https://dagshub.com/vasalosi/End-to-End-Chest-Cancer-Classification-using-MLflow.mlflow
+
+export MLFLOW_TRACKING_USERNAME=vasalosi 
+
+export MLFLOW_TRACKING_PASSWORD=2cb87396b30ddd10b37e93c45c1ce2662812de54
+DVC cmd
+dvc init
+dvc repro
+dvc dag
diff --git a/dvc.lock b/dvc.lock
@@ -0,0 +1,113 @@
+schema: '2.0'
+stages:
+  data_ingestion:
+    cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+    deps:
+    - path: config/config.yaml
+      hash: md5
+      md5: 80450104406e3e10b8b2d62cf840b2f2
+      size: 594
+    - path: src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+      hash: md5
+      md5: 257f1b6398e02f7479ce8922f36b35c8
+      size: 908
+    outs:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+  prepare_base_model:
+    cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+    deps:
+    - path: config/config.yaml
+      hash: md5
+      md5: 80450104406e3e10b8b2d62cf840b2f2
+      size: 594
+    - path: src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+      hash: md5
+      md5: 7f9ec5a73931e7bff22705a294223529
+      size: 952
+    params:
+      params.yaml:
+        CLASSES: 2
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+        INCLUDE_TOP: false
+        LEARNING_RATE: 0.01
+        WEIGHTS: imagenet
+    outs:
+    - path: artifacts/prepare_base_model
+      hash: md5
+      md5: 9520d29801a13ca1113ba9ce79fd88d9.dir
+      size: 118054560
+      nfiles: 2
+  training:
+    cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
+    deps:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+    - path: artifacts/prepare_base_model
+      hash: md5
+      md5: 9520d29801a13ca1113ba9ce79fd88d9.dir
+      size: 118054560
+      nfiles: 2
+    - path: config/config.yaml
+      hash: md5
+      md5: 80450104406e3e10b8b2d62cf840b2f2
+      size: 594
+    - path: src/cnnClassifier/pipeline/stage_03_model_trainer.py
+      hash: md5
+      md5: 2f4d245918743185245a30a155be2ec3
+      size: 910
+    params:
+      params.yaml:
+        AUGMENTATION: true
+        BATCH_SIZE: 16
+        EPOCHS: 1
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+    outs:
+    - path: artifacts/training/model.h5
+      hash: md5
+      md5: 531c7725e3b6d0a315faf1312ba5789b
+      size: 59337520
+  evaluation:
+    cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+    deps:
+    - path: artifacts/data_ingestion/Chest-CT-Scan-data
+      hash: md5
+      md5: 904fa45d934ce879b3b1933dca6cb2f1.dir
+      size: 49247431
+      nfiles: 343
+    - path: artifacts/training/model.h5
+      hash: md5
+      md5: 531c7725e3b6d0a315faf1312ba5789b
+      size: 59337520
+    - path: config/config.yaml
+      hash: md5
+      md5: 80450104406e3e10b8b2d62cf840b2f2
+      size: 594
+    - path: src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+      hash: md5
+      md5: 6d19372baf34366679787a0fb1b89f49
+      size: 922
+    params:
+      params.yaml:
+        BATCH_SIZE: 16
+        IMAGE_SIZE:
+        - 224
+        - 224
+        - 3
+    outs:
+    - path: scores.json
+      hash: md5
+      md5: 4e5cd96340896497805352e330cf4a51
+      size: 73
diff --git a/dvc.yaml b/dvc.yaml
@@ -0,0 +1,54 @@
+stages:
+  data_ingestion:
+    cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_01_data_ingestion.py
+      - config/config.yaml
+    outs:
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+
+
+  prepare_base_model:
+    cmd: python src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_02_prepare_base_model.py
+      - config/config.yaml
+    params:
+      - IMAGE_SIZE
+      - INCLUDE_TOP
+      - CLASSES
+      - WEIGHTS
+      - LEARNING_RATE
+    outs:
+      - artifacts/prepare_base_model
+
+
+  training:
+    cmd: python src/cnnClassifier/pipeline/stage_03_model_trainer.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_03_model_trainer.py
+      - config/config.yaml
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+      - artifacts/prepare_base_model
+    params:
+      - IMAGE_SIZE
+      - EPOCHS
+      - BATCH_SIZE
+      - AUGMENTATION
+    outs:
+      - artifacts/training/model.h5
+
+
+  evaluation:
+    cmd: python src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+    deps:
+      - src/cnnClassifier/pipeline/stage_04_model_evaluation.py
+      - config/config.yaml
+      - artifacts/data_ingestion/Chest-CT-Scan-data
+      - artifacts/training/model.h5
+    params:
+      - IMAGE_SIZE
+      - BATCH_SIZE
+    metrics:
+    - scores.json:
+        cache: false
diff --git a/main.py b/main.py
@@ -2,7 +2,7 @@
 from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
 from cnnClassifier.pipeline.stage_02_prepare_base_model  import PrepareBaseModelTrainingPipeline
 from cnnClassifier.pipeline.stage_03_model_trainer import ModelTrainingPipeline
-
+from cnnClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline
 
 STAGE_NAME = "Data Ingestion Stage"
 
@@ -35,4 +35,17 @@
     logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
 except Exception as e:
     logger.exception(e)
-    raise e
+    raise e
+
+
+STAGE_NAME = "Evaluation stage"
+try:
+   logger.info(f"*******************")
+   logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
+   model_evalution = EvaluationPipeline()
+   model_evalution.main()
+   logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
+
+except Exception as e:
+        logger.exception(e)
+        raise e