Skip to content

Commit

Permalink
update vision FT examples corresponding to recent component update (A…
Browse files Browse the repository at this point in the history
…zure#2650)

* updating changes with ME component

* updating MMOD as well

* update MMOD cli

* separate section for model evaluation component

* remove ds ort true for classification

* black check
  • Loading branch information
shubhamiit authored Sep 27, 2023
1 parent 696eb99 commit bc5dc8a
Show file tree
Hide file tree
Showing 12 changed files with 365 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ inputs:
validation_data:
path: ./data/validation-mltable-folder
type: mltable
test_data:
path: ./data/validation-mltable-folder
type: mltable
# deepspeed config file
ds_finetune:
path: ./deepspeed_configs/zero1.json
type: uri_file
# compute
compute_model_import: sample-model-import-cluster
compute_finetune: sample-finetune-cluster-gpu
compute_model_evaluation: sample-finetune-cluster-gpu

outputs:
# Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
Expand All @@ -42,6 +46,7 @@ jobs:
# Compute
compute_model_import: ${{parent.inputs.compute_model_import}}
compute_finetune: ${{parent.inputs.compute_finetune}}
compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
process_count_per_instance: 1
instance_count: 1

Expand All @@ -55,15 +60,14 @@ jobs:
# data
training_data: ${{parent.inputs.training_data}}
validation_data: ${{parent.inputs.validation_data}}
test_data: ${{parent.inputs.test_data}}

# Finetuning args
image_width: -1
image_height: -1
apply_augmentations: True
number_of_workers: 8
apply_deepspeed: False
deepspeed_config: ${{parent.inputs.ds_finetune}}
apply_ort: False
auto_find_batch_size: False
extra_optim_args: ""
precision: 32
Expand Down Expand Up @@ -93,6 +97,13 @@ jobs:
# gradient_accumulation_step: 1
# label_smoothing_factor: 0.0
# max_grad_norm: 1.0
# apply_ort: True
# apply_deepspeed: True

# Model evaluation args
# # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
# label_column_name: label
# input_column_names: image_url

outputs:
mlflow_model_folder: ${{parent.outputs.trained_model}}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ workspace_name="<WORKSPACE_NAME>"

compute_cluster_model_import="sample-model-import-cluster"
compute_cluster_finetune="sample-finetune-cluster-gpu"
# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="STANDARD_NC6s_v3"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down Expand Up @@ -72,6 +75,18 @@ else
}
fi

# Check if $compute_model_evaluation exists, else create it
if az ml compute show --name $compute_model_evaluation $workspace_info
then
echo "Compute cluster $compute_model_evaluation already exists"
else
echo "Creating compute cluster $compute_model_evaluation"
az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
echo "Failed to create compute cluster $compute_model_evaluation"
exit 1
}
fi

# Check if the finetuning pipeline component exists
if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
then
Expand All @@ -96,6 +111,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
train_data="./data/training-mltable-folder"
# validation data
validation_data="./data/validation-mltable-folder"
# test data
# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
test_data="./data/validation-mltable-folder"

# Check if training data, validation data exist
if [ ! -d $train_data ]; then
Expand All @@ -107,7 +125,12 @@ if [ ! -d $validation_data ]; then
exit 1
fi

# 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model
if [ ! -d $test_data ]; then
echo "Test data $test_data does not exist"
exit 1
fi

# 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model

# # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
# inputs.model_name=$huggingface_model_name
Expand All @@ -120,8 +143,10 @@ huggingface_parent_job_name=$( az ml job create \
inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
inputs.training_data.path=$train_data \
inputs.validation_data.path=$validation_data \
inputs.test_data.path=$test_data \
inputs.compute_model_import=$compute_cluster_model_import \
inputs.compute_finetune=$compute_cluster_finetune
inputs.compute_finetune=$compute_cluster_finetune \
inputs.compute_model_evaluation=$compute_model_evaluation
) || {
echo "Failed to submit finetuning job"
exit 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ inputs:
validation_data:
path: ./data/validation-mltable-folder
type: mltable
test_data:
path: ./data/validation-mltable-folder
type: mltable
# deepspeed config file
ds_finetune:
path: ./deepspeed_configs/zero1.json
type: uri_file
# compute
compute_model_import: sample-model-import-cluster
compute_finetune: sample-finetune-cluster-gpu

compute_model_evaluation: sample-finetune-cluster-gpu

outputs:
# Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint
Expand All @@ -43,6 +46,7 @@ jobs:
# Compute
compute_model_import: ${{parent.inputs.compute_model_import}}
compute_finetune: ${{parent.inputs.compute_finetune}}
compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
process_count_per_instance: 1
instance_count: 1

Expand All @@ -56,15 +60,14 @@ jobs:
# data
training_data: ${{parent.inputs.training_data}}
validation_data: ${{parent.inputs.validation_data}}
test_data: ${{parent.inputs.test_data}}

# Finetuning args
image_width: -1
image_height: -1
apply_augmentations: True
number_of_workers: 8
apply_deepspeed: False
deepspeed_config: ${{parent.inputs.ds_finetune}}
apply_ort: False
auto_find_batch_size: False
extra_optim_args: ""
precision: 32
Expand Down Expand Up @@ -94,6 +97,13 @@ jobs:
# gradient_accumulation_step: 1
# label_smoothing_factor: 0.0
# max_grad_norm: 1.0
# apply_ort: True
# apply_deepspeed: True

# Model evaluation args
# # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
# label_column_name: label
# input_column_names: image_url

outputs:
mlflow_model_folder: ${{parent.outputs.trained_model}}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ workspace_name="<WORKSPACE_NAME>"

compute_cluster_model_import="sample-model-import-cluster"
compute_cluster_finetune="sample-finetune-cluster-gpu"
# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="STANDARD_NC6s_v3"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down Expand Up @@ -73,6 +76,18 @@ else
}
fi

# Check if $compute_model_evaluation exists, else create it
if az ml compute show --name $compute_model_evaluation $workspace_info
then
echo "Compute cluster $compute_model_evaluation already exists"
else
echo "Creating compute cluster $compute_model_evaluation"
az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
echo "Failed to create compute cluster $compute_model_evaluation"
exit 1
}
fi

# Check if the finetuning pipeline component exists
if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
then
Expand All @@ -97,6 +112,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
train_data="./data/training-mltable-folder"
# validation data
validation_data="./data/validation-mltable-folder"
# test data
# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
test_data="./data/validation-mltable-folder"

# Check if training data, validation data exist
if [ ! -d $train_data ]; then
Expand All @@ -108,6 +126,10 @@ if [ ! -d $validation_data ]; then
exit 1
fi

if [ ! -d $test_data ]; then
echo "Test data $test_data does not exist"
exit 1
fi
# 5. Submit finetuning job using pipeline.yaml for a HuggingFace Transformers model

# # If you want to use a HuggingFace model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
Expand All @@ -121,8 +143,10 @@ huggingface_parent_job_name=$( az ml job create \
inputs.mlflow_model_path.path="azureml://registries/$registry_name/models/$aml_registry_model_name/versions/$model_version" \
inputs.training_data.path=$train_data \
inputs.validation_data.path=$validation_data \
inputs.test_data.path=$test_data \
inputs.compute_model_import=$compute_cluster_model_import \
inputs.compute_finetune=$compute_cluster_finetune
inputs.compute_finetune=$compute_cluster_finetune \
inputs.compute_model_evaluation=$compute_model_evaluation
) || {
echo "Failed to submit finetuning job"
exit 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ inputs:

validation_data:
type: mltable

test_data:
type: mltable

# deepspeed config file
ds_finetune:
Expand All @@ -18,6 +21,7 @@ inputs:
# compute
compute_model_import: sample-model-import-cluster
compute_finetune: sample-finetune-cluster-gpu
compute_model_evaluation: sample-finetune-cluster-gpu
# model_name: microsoft/beit-base-patch16-224
# # Model - specify the foundation model available in the azureml system registry
mlflow_model:
Expand All @@ -43,6 +47,7 @@ jobs:
# Compute
compute_model_import: ${{parent.inputs.compute_model_import}}
compute_finetune: ${{parent.inputs.compute_finetune}}
compute_model_evaluation: ${{parent.inputs.compute_model_evaluation}}
instance_count: 1
process_count_per_instance: 1

Expand All @@ -56,6 +61,7 @@ jobs:
# Data
training_data: ${{parent.inputs.training_data}}
validation_data: ${{parent.inputs.validation_data}}
test_data: ${{parent.inputs.test_data}}

# Finetuning parameters
apply_augmentations: True
Expand Down Expand Up @@ -96,5 +102,10 @@ jobs:
# iou_threshold: 0.5
# box_score_threshold: 0.3

# Model evaluation args
# # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
# label_column_name: label
# input_column_names: image_url

outputs:
mlflow_model_folder: ${{parent.outputs.trained_model}}
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ workspace_name="<WORKSPACE_NAME>"

compute_cluster_model_import="sample-model-import-cluster"
compute_cluster_finetune="sample-finetune-cluster-gpu"
# using the same compute cluster for model evaluation as finetuning. If you want to use a different cluster, specify it below
compute_model_evaluation="sample-finetune-cluster-gpu"
# If above compute cluster does not exist, create it with the following vm size
compute_model_import_sku="Standard_D12"
compute_finetune_sku="Standard_NC6s_v3"
compute_model_evaluation_sku="Standard_NC6s_v3"

# This is the number of GPUs in a single node of the selected 'vm_size' compute.
# Setting this to less than the number of GPUs will result in underutilized GPUs, taking longer to train.
Expand Down Expand Up @@ -73,6 +76,18 @@ else
}
fi

# Check if $compute_model_evaluation exists, else create it
if az ml compute show --name $compute_model_evaluation $workspace_info
then
echo "Compute cluster $compute_model_evaluation already exists"
else
echo "Creating compute cluster $compute_model_evaluation"
az ml compute create --name $compute_model_evaluation --type amlcompute --min-instances 0 --max-instances 2 --size $compute_model_evaluation_sku $workspace_info || {
echo "Failed to create compute cluster $compute_model_evaluation"
exit 1
}
fi

# Check if the finetuning pipeline component exists
if ! az ml component show --name $finetuning_pipeline_component --label latest --registry-name $registry_name
then
Expand All @@ -96,6 +111,9 @@ python prepare_data.py --subscription $subscription_id --group $resource_group_n
train_data="./data/training-mltable-folder"
# validation data
validation_data="./data/validation-mltable-folder"
# test data
# Using the same data for validation and test. If you want to use a different dataset for test, specify it below
test_data="./data/validation-mltable-folder"

# Check if training data, validation data
if [ ! -d $train_data ]
Expand All @@ -110,6 +128,12 @@ then
exit 1
fi

if [ ! -d $test_data ]
then
echo "Test data $test_data does not exist"
exit 1
fi

# 5. Submit finetuning job using pipeline.yaml for a open-mmlab mmdetection model

# If you want to use a MMDetection model, specify the inputs.model_name instead of inputs.mlflow_model_path.path like below
Expand All @@ -123,9 +147,11 @@ mmdetection_parent_job_name=$( az ml job create \
jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
inputs.compute_model_import=$compute_cluster_model_import \
inputs.compute_finetune=$compute_cluster_finetune \
inputs.compute_model_evaluation=$compute_model_evaluation \
inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \
inputs.training_data.path=$train_data \
inputs.validation_data.path=$validation_data
inputs.validation_data.path=$validation_data \
inputs.test_data.path=$test_data
) || {
echo "Failed to submit finetuning job"
exit 1
Expand Down
Loading

0 comments on commit bc5dc8a

Please sign in to comment.