Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Commit

Permalink
Merge pull request #91 from microsoft/mabou/patch
Browse files Browse the repository at this point in the history
Mabou/patch
  • Loading branch information
marabout2015 authored Dec 10, 2019
2 parents 714679c + df7730f commit b98f7b4
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 167 deletions.
164 changes: 30 additions & 134 deletions .ci/azure-pipelines-v2.yml
Original file line number Diff line number Diff line change
@@ -1,134 +1,30 @@
# MLHyperparameterTuning Pipeline

trigger: none

variables:
BuildConfiguration: Release
BuildBinariesDirectory: $(Build.BinariesDirectory)
BuildPlatform: any cpu
DotNetCoreBuildVersion: 2.2.108
DotNetRuntimeTarget: ubuntu.18.04-x64
AgentToolsDirectory: $(Agent.ToolsDirectory)
CloudPlatform: AzureCloud
ProductName: Trident
TridentWorkloadType: $(WorkloadType)
TridentWorkloadTypeShort: $(WorkloadTypeShort)
DeployLocation: eastus
Agent: agce-ai
azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3)
azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3

jobs:
- job: MLHyperparameterTuningJob
timeoutInMinutes: 300
cancelTimeoutInMinutes: 2
pool:
vmImage: 'Ubuntu-16.04'

steps:
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
which conda
conda env create -f environment.yml
conda env list
conda activate MLHyperparameterTuning
conda env list
echo Login Azure Account
az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret)
echo Try and figure out what account set takes
az account set -h
echo Try and set it.
az account set --subscription $(subscriptionid)
# papermill 01_Data_Prep.ipynb 01_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3
displayName: 'Configuration'

- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 00_Data_Prep.ipynb
papermill 00_Data_Prep.ipynb 00_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '00_Data_Prep.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 01_Training_Script.ipynb
papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '01_Training_Script.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 02_Testing_Script.ipynb
papermill 02_Testing_Script.ipynb 02_Testing_Script_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '02_Testing_Script.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 03_Run_Locally.ipynb
papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p selected_subscription $(subscriptionid) -p resource_group $(azurergname)
displayName: '03_Run_Locally.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 04_Hyperparameter_Random_Search.ipynb
papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns)
displayName: '04_Hyperparameter_Random_Search.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 05_Train_Best_Model.ipynb
papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '05_Train_Best_Model.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 06_Test_Best_Model.ipynb
papermill 06_Test_Best_Model.ipynb 06_Test_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '06_Test_Best_Model.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 07_Train_With_AML_Pipeline.ipynb
papermill 07_Train_With_AML_Pipeline.ipynb 07_Train_With_AML_Pipeline_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns)
displayName: '07_Train_With_AML_Pipeline.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Executing 08_Tear_Down.ipynb
papermill 08_Tear_Down.ipynb 08_Tear_Down_Output.ipynb --log-output --no-progress-bar -k python3
displayName: '08_Tear_Down.ipynb'
- bash: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate MLHyperparameterTuning
echo Execute Resource Group Delete
existResponse=$(az group exists -n $(azurergname))
if [ "$existResponse" == "true" ]; then
echo Deleting project resource group
az group delete --name $(azurergname) --yes
else
echo Project resource group did not exist
fi
echo Done Cleanup
displayName: 'Backup Cleanup'
condition: or(canceled(),failed())
- task: CreateWorkItem@1
inputs:
workItemType: 'Issue'
title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed
assignedTo: 'Mario Bourgoin <[email protected]>'
associate: true
teamProject: $(System.TeamProject)

fieldMappings: |
Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type.
displayName: 'Create work item on failure'
condition: failed()
# MLHyperparameterTuning Pipeline
#
# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"

resources:
repositories:
- repository: aitemplates
type: github
name: microsoft/AI
endpoint: AIArchitecturesAndPractices-GitHub

trigger:
branches:
include:
- master
- mabou/instrument

pr:
autoCancel: true
branches:
include:
- master
- mabou/instrument

stages:
- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
parameters:
jobDisplayName: MLScoreDeployJob
DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
workload_vars: ../vars/mlhyperparametertuning_vars.yml
3 changes: 3 additions & 0 deletions .ci/vars/agce_devops_sub_vars.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
variables:
azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3
azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3)
7 changes: 7 additions & 0 deletions .ci/vars/mlhyperparametertuning_vars.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
variables:
DeploymentName: MLScoreDeployJob
TridentWorkloadTypeShort: aimlscore
DeployLocation: eastus
ProjectLocation: "."
PythonPath: "."
Template: MLTrainDeployAMLJob.yml
24 changes: 22 additions & 2 deletions 01_Training_Script.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,27 @@
"metadata": {},
"source": [
"## Run the script to see that it works <a id='run'></a>\n",
"This should take around ten minutes."
"Set the effort expended to train the classifier."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"estimators = 1000"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run the classifier script. This should take about 10 minutes."
]
},
{
Expand All @@ -451,7 +471,7 @@
},
"outputs": [],
"source": [
"%run -t scripts/TrainClassifier.py --estimators 1000 --match 5 --ngrams 2 --min_child_samples 10 --save model"
"%run -t scripts/TrainClassifier.py --estimators $estimators --match 5 --ngrams 2 --min_child_samples 10 --save model"
]
},
{
Expand Down
13 changes: 7 additions & 6 deletions 03_Run_Locally.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"metadata": {},
"source": [
"## Azure subscription <a id='subscription'></a>\n",
"If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources."
"If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources. We also define the number of estimators to use for the local run."
]
},
{
Expand All @@ -55,7 +55,8 @@
"subscription_name=\"YOUR_SUBSCRIPTION_NAME\"\n",
"subscription_id=\"YOUR_SUBSCRIPTION_ID\"\n",
"location=\"eastus\"\n",
"resource_group=\"hypetuning\""
"resource_group=\"hypetuning\"\n",
"estimators = 1000"
]
},
{
Expand Down Expand Up @@ -179,10 +180,10 @@
"est = Estimator(source_directory=os.path.join('.', 'scripts'), \n",
" entry_script='TrainClassifier.py',\n",
" script_params={'--data-folder': os.path.abspath('.'),\n",
" '--estimators': '1000',\n",
" '--match': '5',\n",
" '--ngrams': '2',\n",
" '--min_child_samples': '10',\n",
" '--estimators': estimators,\n",
" '--match': 5,\n",
" '--ngrams': 2,\n",
" '--min_child_samples': 10,\n",
" \"--save\": \"local_model\"},\n",
" compute_target='local',\n",
" conda_packages=['pandas==0.23.4',\n",
Expand Down
7 changes: 4 additions & 3 deletions 04_Hyperparameter_Random_Search.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`."
"This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`. We also define the number of estimators to use for each run."
]
},
{
Expand All @@ -215,7 +215,8 @@
},
"outputs": [],
"source": [
"max_total_runs = 96"
"max_total_runs = 96\n",
"estimators = 1000"
]
},
{
Expand Down Expand Up @@ -270,7 +271,7 @@
"estimator = Estimator(source_directory=os.path.join('.', 'scripts'),\n",
" entry_script='TrainClassifier.py',\n",
" script_params={'--data-folder': ds.as_mount(),\n",
" '--estimators': 1000},\n",
" '--estimators': estimators},\n",
" compute_target=compute_target,\n",
" conda_packages=['pandas==0.23.4',\n",
" 'scikit-learn==0.21.3',\n",
Expand Down
6 changes: 3 additions & 3 deletions 05_Train_Best_Model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@
},
"outputs": [],
"source": [
"model_estimators = 8 * int(best_parameters['--estimators'])\n",
"model_estimators"
"estimators = 8 * int(best_parameters['--estimators'])\n",
"estimators"
]
},
{
Expand All @@ -186,7 +186,7 @@
"ds = ws.get_default_datastore()\n",
"model_parameters = best_parameters.copy()\n",
"model_parameters['--data-folder'] = ds.as_mount()\n",
"model_parameters['--estimators'] = model_estimators\n",
"model_parameters['--estimators'] = estimators\n",
"model_parameters['--save'] = 'FAQ_ranker'\n",
"pd.Series(model_parameters, name='Value').to_frame()"
]
Expand Down
22 changes: 9 additions & 13 deletions 07_Train_With_AML_Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@
"metadata": {},
"source": [
"## Create AML Pipeline Tuning Step <a id='aml_pipeline_tune_step'></a>\n",
"We create a HyperDrive step in the AML pipeline to perform a search for hyperparameters. The `tune_estimators` pipeline parameter that controls the number of estimators used in tuning deliberately has a low default value for the speed of pipeline testing. The `tune_steps_data` output pipeline data is only used to synchronize with the next pipeline step."
"We create a HyperDrive step in the AML pipeline to perform a search for hyperparameters. The `tune_estimators` pipeline parameter that controls the number of estimators used in tuning deliberately has a low default value for the speed of pipeline testing."
]
},
{
Expand All @@ -302,15 +302,13 @@
"outputs": [],
"source": [
"tune_step_name=\"tune_model\"\n",
"tune_steps_data = PipelineData(\"tune_steps_data\", datastore=ds)\n",
"tune_estimators = PipelineParameter(name=\"tune_estimators\", default_value=1) # Set to 1000 when running the pipeline.\n",
"tune_step = HyperDriveStep(\n",
" name=tune_step_name,\n",
" hyperdrive_config=hyperdrive_run_config,\n",
" estimator_entry_script_arguments=[\"--data-folder\", data_folder,\n",
" \"--estimators\", tune_estimators],\n",
" inputs=[data_folder],\n",
" outputs=[tune_steps_data],\n",
" allow_reuse=False)"
]
},
Expand Down Expand Up @@ -404,7 +402,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating PythonScript Step for AML pipeline to get the best run's hyperparameters. The `tune_steps_data` input pipeline data is only used to synchronize with the previous pipeline step."
"Creating PythonScript Step for AML pipeline to get the best run's hyperparameters."
]
},
{
Expand All @@ -428,18 +426,18 @@
" arguments=[\"--hd-step\", tune_step_name,\n",
" \"--output-steps-data\", bh_steps_data,\n",
" \"--hyperparameters\", bh_hyperparameters_file],\n",
" inputs=[tune_steps_data],\n",
" outputs=[bh_steps_data],\n",
" runconfig=bh_run_config,\n",
" allow_reuse=False)"
" allow_reuse=False)\n",
"bh_step.run_after(tune_step)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create AML Pipeline Best Model Step <a id='aml_pipeline_estimator_step'></a>\n",
"This step passes the hyperparameters file from the previous step to the training script to create the best model. The `best_estimators` pipeline parameter that controls the number of estimators used in getting the best model deliberately has a low default value for the speed of pipeline testing. The `bm_steps_data` output pipeline data is only used to synchronize with the next pipeline step."
"This step passes the hyperparameters file from the previous step to the training script to create the best model. The `best_estimators` pipeline parameter that controls the number of estimators used in getting the best model deliberately has a low default value for the speed of pipeline testing."
]
},
{
Expand All @@ -449,7 +447,6 @@
"outputs": [],
"source": [
"bm_step_name=\"best_model\"\n",
"bm_steps_data = PipelineData(\"bm_steps_data\", datastore=ds)\n",
"bm_estimators = PipelineParameter(name=\"best_estimators\", default_value=1) # Set to 8000 when running the pipeline\n",
"bm_estimator = Estimator(source_directory=os.path.join('.', 'scripts'), # Use a new Estimator as a bug workaround\n",
" entry_script='TrainClassifier.py',\n",
Expand All @@ -467,7 +464,6 @@
" \"--save\", model_name],\n",
" compute_target=compute_target,\n",
" inputs=[data_folder, bh_steps_data],\n",
" outputs=[bm_steps_data],\n",
" allow_reuse=False)"
]
},
Expand Down Expand Up @@ -532,7 +528,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating PythonScript Step for AML pipeline to register the best model. The `bm_steps_data` input pipeline data is only used to synchronize with the previous pipeline step."
"Creating PythonScript Step for AML pipeline to register the best model."
]
},
{
Expand All @@ -554,9 +550,9 @@
" arguments=[\"--es-step\", bm_step_name,\n",
" \"--outputs\", \"outputs\",\n",
" \"--model-name\", model_name],\n",
" inputs=[bm_steps_data],\n",
" runconfig=rm_run_config,\n",
" allow_reuse=False)"
" allow_reuse=False)\n",
"rm_step.run_after(bm_step)"
]
},
{
Expand Down Expand Up @@ -671,7 +667,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.6.7"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit b98f7b4

Please sign in to comment.