update vision FT examples corresponding to recent component update (A…

…zure#2650) * updating changes with ME component * updating MMOD as well * update MMOD cli * separate section for model evaluation component * remove ds ort true for classification * black check
vrxmike · Sep 27, 2023 · bc5dc8a · bc5dc8a
1 parent 696eb99
commit bc5dc8a
Show file tree

Hide file tree

Showing 12 changed files with 365 additions and 66 deletions.
diff --git a/...class-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml b/...class-classification/hftransformers-fridgeobjects-multiclass-classification-pipeline.yaml
@@ -16,13 +16,17 @@ inputs:
   validation_data:
     path: ./data/validation-mltable-folder
     type: mltable
+  test_data:
+    path: ./data/validation-mltable-folder
+    type: mltable
   # deepspeed config file
   ds_finetune:
     path: ./deepspeed_configs/zero1.json
     type: uri_file
   # compute
   compute_model_import: sample-model-import-cluster
   compute_finetune: sample-finetune-cluster-gpu
+  compute_model_evaluation: sample-finetune-cluster-gpu
 
 outputs:
   # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
@@ -42,6 +46,7 @@ jobs:
       # Compute
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
       process_count_per_instance: 1
       instance_count: 1
 
@@ -55,15 +60,14 @@ jobs:
       # data
       training_data: ${{parent.inputs.training_data}}
       validation_data: ${{parent.inputs.validation_data}}
+      test_data: ${{parent.inputs.test_data}}
 
       # Finetuning args
       image_width: -1
       image_height: -1
       apply_augmentations: True
       number_of_workers: 8
-      apply_deepspeed: False
       deepspeed_config: ${{parent.inputs.ds_finetune}}
-      apply_ort: False
       auto_find_batch_size: False
       extra_optim_args: ""
       precision: 32
@@ -93,6 +97,13 @@ jobs:
       # gradient_accumulation_step: 1
       # label_smoothing_factor: 0.0
       # max_grad_norm: 1.0
+      # apply_ort: True 
+      # apply_deepspeed: True
+
+      # Model evaluation args
+      # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
+      # label_column_name: label
+      # input_column_names: image_url
 
     outputs:
       mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/...ation/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh b/...ation/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh
@@ -9,9 +9,12 @@ workspace_name="<WORKSPACE_NAME>"
 
 compute_cluster_model_import="sample-model-import-cluster"
 compute_cluster_finetune="sample-finetune-cluster-gpu"
+# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
+compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="STANDARD_NC6s_v3"
+compute_finetune_sku="Standard_NC6s_v3"
+compute_model_evaluation_sku="Standard_NC6s_v3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
@@ -72,6 +75,18 @@ else
     }
 fi
 
+# Check if $compute_model_evaluation exists, else create it
+if az ml compute show --name $compute_model_evaluation $workspace_info
+then
+    echo "Compute cluster $compute_model_evaluation already exists"
+else
+    echo "Creating compute cluster $compute_model_evaluation"
+    az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_model_evaluation"
+        exit 1
+    }
+fi
+
 # Check if the finetuning pipeline component exists
 if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
 then
@@ -96,6 +111,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
 train_data="./data/training-mltable-folder"
 # validation data
 validation_data="./data/validation-mltable-folder"
+# test data
+# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
+test_data="./data/validation-mltable-folder"
 
 # Check if training data, validation data exist
 if [ ! -d $train_data ]; then
@@ -107,7 +125,12 @@ if [ ! -d $validation_data ]; then
     exit 1
 fi
 
-# 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model
+if [ ! -d $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
+# 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model
 
 # # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
 # inputs.model_name=$huggingface_model_name
@@ -120,8 +143,10 @@ huggingface_parent_job_name=$( az ml job create \
   inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
   inputs.training_data.path=$train_data \
   inputs.validation_data.path=$validation_data \
+  inputs.test_data.path=$test_data \
   inputs.compute_model_import=$compute_cluster_model_import \
-  inputs.compute_finetune=$compute_cluster_finetune
+  inputs.compute_finetune=$compute_cluster_finetune \
+  inputs.compute_model_evaluation=$compute_model_evaluation
   ) || {
     echo "Failed to submit finetuning job"
     exit 1

diff --git a/...label-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml b/...label-classification/hftransformers-fridgeobjects-multilabel-classification-pipeline.yaml
@@ -16,14 +16,17 @@ inputs:
   validation_data:
     path: ./data/validation-mltable-folder
     type: mltable
+  test_data:
+    path: ./data/validation-mltable-folder
+    type: mltable
   # deepspeed config file
   ds_finetune:
     path: ./deepspeed_configs/zero1.json
     type: uri_file
   # compute
   compute_model_import: sample-model-import-cluster
   compute_finetune: sample-finetune-cluster-gpu
-
+  compute_model_evaluation: sample-finetune-cluster-gpu
 
 outputs:
   # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
@@ -43,6 +46,7 @@ jobs:
       # Compute
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
       process_count_per_instance: 1
       instance_count: 1
 
@@ -56,15 +60,14 @@ jobs:
       # data
       training_data: ${{parent.inputs.training_data}}
       validation_data: ${{parent.inputs.validation_data}}
+      test_data: ${{parent.inputs.test_data}}
 
       # Finetuning args
       image_width: -1
       image_height: -1
       apply_augmentations: True
       number_of_workers: 8
-      apply_deepspeed: False
       deepspeed_config: ${{parent.inputs.ds_finetune}}
-      apply_ort: False
       auto_find_batch_size: False
       extra_optim_args: ""
       precision: 32
@@ -94,6 +97,13 @@ jobs:
       # gradient_accumulation_step: 1
       # label_smoothing_factor: 0.0
       # max_grad_norm: 1.0
+      # apply_ort: True 
+      # apply_deepspeed: True
+
+      # Model evaluation args
+      # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
+      # label_column_name: label
+      # input_column_names: image_url
 
     outputs:
       mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/...ation/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh b/...ation/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh
@@ -9,9 +9,12 @@ workspace_name="<WORKSPACE_NAME>"
 
 compute_cluster_model_import="sample-model-import-cluster"
 compute_cluster_finetune="sample-finetune-cluster-gpu"
+# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
+compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
-compute_finetune_sku="STANDARD_NC6s_v3"
+compute_finetune_sku="Standard_NC6s_v3"
+compute_model_evaluation_sku="Standard_NC6s_v3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
@@ -73,6 +76,18 @@ else
     }
 fi
 
+# Check if $compute_model_evaluation exists, else create it
+if az ml compute show --name $compute_model_evaluation $workspace_info
+then
+    echo "Compute cluster $compute_model_evaluation already exists"
+else
+    echo "Creating compute cluster $compute_model_evaluation"
+    az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_model_evaluation"
+        exit 1
+    }
+fi
+
 # Check if the finetuning pipeline component exists
 if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
 then
@@ -97,6 +112,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
 train_data="./data/training-mltable-folder"
 # validation data
 validation_data="./data/validation-mltable-folder"
+# test data
+# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
+test_data="./data/validation-mltable-folder"
 
 # Check if training data, validation data exist
 if [ ! -d $train_data ]; then
@@ -108,6 +126,10 @@ if [ ! -d $validation_data ]; then
     exit 1
 fi
 
+if [ ! -d $test_data ]; then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
 # 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model
 
 # # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
@@ -121,8 +143,10 @@ huggingface_parent_job_name=$( az ml job create \
   inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
   inputs.training_data.path=$train_data \
   inputs.validation_data.path=$validation_data \
+  inputs.test_data.path=$test_data \
   inputs.compute_model_import=$compute_cluster_model_import \
-  inputs.compute_finetune=$compute_cluster_finetune
+  inputs.compute_finetune=$compute_cluster_finetune \
+  inputs.compute_model_evaluation=$compute_model_evaluation
   ) || {
     echo "Failed to submit finetuning job"
     exit 1

diff --git a/...image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yaml b/...image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation-pipeline.yaml
@@ -10,6 +10,9 @@ inputs:
 
   validation_data:
     type: mltable
+
+  test_data:
+    type: mltable
 
   # deepspeed config file
   ds_finetune:
@@ -18,6 +21,7 @@ inputs:
   # compute
   compute_model_import: sample-model-import-cluster
   compute_finetune: sample-finetune-cluster-gpu
+  compute_model_evaluation: sample-finetune-cluster-gpu
   # model_name: microsoft/beit-base-patch16-224
   # # Model - specify the foundation model available in the azureml system registry
   mlflow_model: 
@@ -43,6 +47,7 @@ jobs:
       # Compute
       compute_model_import: ${{parent.inputs.compute_model_import}}
       compute_finetune: ${{parent.inputs.compute_finetune}}
+      compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
       instance_count: 1
       process_count_per_instance: 1
 
@@ -56,6 +61,7 @@ jobs:
       # Data
       training_data: ${{parent.inputs.training_data}}
       validation_data: ${{parent.inputs.validation_data}}
+      test_data: ${{parent.inputs.test_data}}
 
       # Finetuning parameters
       apply_augmentations: True
@@ -96,5 +102,10 @@ jobs:
       # iou_threshold: 0.5
       # box_score_threshold: 0.3
 
+      # Model evaluation args
+      # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
+      # label_column_name: label
+      # input_column_names: image_url
+
     outputs:
       mlflow_model_folder: ${{parent.outputs.trained_model}}
diff --git a/...m/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh b/...m/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh
@@ -11,9 +11,12 @@ workspace_name="<WORKSPACE_NAME>"
 
 compute_cluster_model_import="sample-model-import-cluster"
 compute_cluster_finetune="sample-finetune-cluster-gpu"
+# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
+compute_model_evaluation="sample-finetune-cluster-gpu"
 # If above compute cluster does not exist, create it with the following vm size
 compute_model_import_sku="Standard_D12"
 compute_finetune_sku="Standard_NC6s_v3"
+compute_model_evaluation_sku="Standard_NC6s_v3"
 
 # This is the number of GPUs in a single node of the selected 'vm_size' compute. 
 # Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
@@ -73,6 +76,18 @@ else
     }
 fi
 
+# Check if $compute_model_evaluation exists, else create it
+if az ml compute show --name $compute_model_evaluation $workspace_info
+then
+    echo "Compute cluster $compute_model_evaluation already exists"
+else
+    echo "Creating compute cluster $compute_model_evaluation"
+    az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
+        echo "Failed to create compute cluster $compute_model_evaluation"
+        exit 1
+    }
+fi
+
 # Check if the finetuning pipeline component exists
 if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
 then
@@ -96,6 +111,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
 train_data="./data/training-mltable-folder"
 # validation data
 validation_data="./data/validation-mltable-folder"
+# test data
+# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
+test_data="./data/validation-mltable-folder"
 
 # Check if training data, validation data
 if [ ! -d $train_data ] 
@@ -110,6 +128,12 @@ then
     exit 1
 fi
 
+if [ ! -d $test_data ] 
+then
+    echo "Test data $test_data does not exist"
+    exit 1
+fi
+
 # 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model
 
 # If you want to use a MMDetection model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
@@ -123,9 +147,11 @@ mmdetection_parent_job_name=$( az ml job create \
   jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
   inputs.compute_model_import=$compute_cluster_model_import \
   inputs.compute_finetune=$compute_cluster_finetune \
+  inputs.compute_model_evaluation=$compute_model_evaluation \
   inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \
   inputs.training_data.path=$train_data \
-  inputs.validation_data.path=$validation_data
+  inputs.validation_data.path=$validation_data \
+  inputs.test_data.path=$test_data
   ) || {
     echo "Failed to submit finetuning job"
     exit 1