diff --git a/.ci/azure-pipelines-v2.yml b/.ci/azure-pipelines-v2.yml index abfb6df..e9500a4 100644 --- a/.ci/azure-pipelines-v2.yml +++ b/.ci/azure-pipelines-v2.yml @@ -1,134 +1,30 @@ -# MLHyperparameterTuning Pipeline - -trigger: none - -variables: - BuildConfiguration: Release - BuildBinariesDirectory: $(Build.BinariesDirectory) - BuildPlatform: any cpu - DotNetCoreBuildVersion: 2.2.108 - DotNetRuntimeTarget: ubuntu.18.04-x64 - AgentToolsDirectory: $(Agent.ToolsDirectory) - CloudPlatform: AzureCloud - ProductName: Trident - TridentWorkloadType: $(WorkloadType) - TridentWorkloadTypeShort: $(WorkloadTypeShort) - DeployLocation: eastus - Agent: agce-ai - azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) - azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 - -jobs: -- job: MLHyperparameterTuningJob - timeoutInMinutes: 300 - cancelTimeoutInMinutes: 2 - pool: - vmImage: 'Ubuntu-16.04' - - steps: - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - which conda - conda env create -f environment.yml - conda env list - conda activate MLHyperparameterTuning - conda env list - echo Login Azure Account - az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret) - echo Try and figure out what account set takes - az account set -h - echo Try and set it. - az account set --subscription $(subscriptionid) -# papermill 01_Data_Prep.ipynb 01_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: 'Configuration' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 00_Data_Prep.ipynb - papermill 00_Data_Prep.ipynb 00_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '00_Data_Prep.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 01_Training_Script.ipynb - papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '01_Training_Script.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 02_Testing_Script.ipynb - papermill 02_Testing_Script.ipynb 02_Testing_Script_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '02_Testing_Script.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 03_Run_Locally.ipynb - papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p selected_subscription $(subscriptionid) -p resource_group $(azurergname) - displayName: '03_Run_Locally.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 04_Hyperparameter_Random_Search.ipynb - papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) - displayName: '04_Hyperparameter_Random_Search.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 05_Train_Best_Model.ipynb - papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '05_Train_Best_Model.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 06_Test_Best_Model.ipynb - papermill 06_Test_Best_Model.ipynb 06_Test_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '06_Test_Best_Model.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 07_Train_With_AML_Pipeline.ipynb - papermill 07_Train_With_AML_Pipeline.ipynb 07_Train_With_AML_Pipeline_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) - displayName: '07_Train_With_AML_Pipeline.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 08_Tear_Down.ipynb - papermill 08_Tear_Down.ipynb 08_Tear_Down_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '08_Tear_Down.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Execute Resource Group Delete - existResponse=$(az group exists -n $(azurergname)) - if [ "$existResponse" == "true" ]; then - echo Deleting project resource group - az group delete --name $(azurergname) --yes - else - echo Project resource group did not exist - fi - echo Done Cleanup - displayName: 'Backup Cleanup' - condition: or(canceled(),failed()) - - - task: CreateWorkItem@1 - inputs: - workItemType: 'Issue' - title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed - assignedTo: 'Mario Bourgoin ' - associate: true - teamProject: $(System.TeamProject) - - fieldMappings: | - Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type. - displayName: 'Create work item on failure' - condition: failed() +# MLHyperparameterTuning Pipeline +# +# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub" + +resources: + repositories: + - repository: aitemplates + type: github + name: microsoft/AI + endpoint: AIArchitecturesAndPractices-GitHub + +trigger: + branches: + include: + - master + - mabou/instrument + +pr: + autoCancel: true + branches: + include: + - master + - mabou/instrument + +stages: +- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates + parameters: + jobDisplayName: MLScoreDeployJob + DefaultWorkingDirectory: $(System.DefaultWorkingDirectory) + workload_vars: ../vars/mlhyperparametertuning_vars.yml diff --git a/.ci/vars/agce_devops_sub_vars.yml b/.ci/vars/agce_devops_sub_vars.yml new file mode 100644 index 0000000..02070e3 --- /dev/null +++ b/.ci/vars/agce_devops_sub_vars.yml @@ -0,0 +1,3 @@ +variables: + azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 + azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) diff --git a/.ci/vars/mlhyperparametertuning_vars.yml b/.ci/vars/mlhyperparametertuning_vars.yml new file mode 100644 index 0000000..7d0d820 --- /dev/null +++ b/.ci/vars/mlhyperparametertuning_vars.yml @@ -0,0 +1,7 @@ +variables: + DeploymentName: MLScoreDeployJob + TridentWorkloadTypeShort: aimlscore + DeployLocation: eastus + ProjectLocation: "." + PythonPath: "." + Template: MLTrainDeployAMLJob.yml diff --git a/01_Training_Script.ipynb b/01_Training_Script.ipynb index 2e35491..d539ab5 100644 --- a/01_Training_Script.ipynb +++ b/01_Training_Script.ipynb @@ -440,7 +440,27 @@ "metadata": {}, "source": [ "## Run the script to see that it works \n", - "This should take around ten minutes." + "Set the effort expended to train the classifier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "estimators = 1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run the classifier script. This should take about 10 minutes." ] }, { @@ -451,7 +471,7 @@ }, "outputs": [], "source": [ - "%run -t scripts/TrainClassifier.py --estimators 1000 --match 5 --ngrams 2 --min_child_samples 10 --save model" + "%run -t scripts/TrainClassifier.py --estimators $estimators --match 5 --ngrams 2 --min_child_samples 10 --save model" ] }, { diff --git a/03_Run_Locally.ipynb b/03_Run_Locally.ipynb index 22879c4..fa4e09f 100644 --- a/03_Run_Locally.ipynb +++ b/03_Run_Locally.ipynb @@ -39,7 +39,7 @@ "metadata": {}, "source": [ "## Azure subscription \n", - "If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources." + "If you have multiple subscriptions select the subscription you want to use. You may supply either the subscription's name or the subscription's ID. If you want to run this in a different location that supports HyperDrive, you may enter the one you want to use. You can also set the name of the resource group in which this tutorial will add resources. *IMPORTANT NOTE:* The last notebook in this example will delete this resource group and all associated resources. We also define the number of estimators to use for the local run." ] }, { @@ -55,7 +55,8 @@ "subscription_name=\"YOUR_SUBSCRIPTION_NAME\"\n", "subscription_id=\"YOUR_SUBSCRIPTION_ID\"\n", "location=\"eastus\"\n", - "resource_group=\"hypetuning\"" + "resource_group=\"hypetuning\"\n", + "estimators = 1000" ] }, { @@ -179,10 +180,10 @@ "est = Estimator(source_directory=os.path.join('.', 'scripts'), \n", " entry_script='TrainClassifier.py',\n", " script_params={'--data-folder': os.path.abspath('.'),\n", - " '--estimators': '1000',\n", - " '--match': '5',\n", - " '--ngrams': '2',\n", - " '--min_child_samples': '10',\n", + " '--estimators': estimators,\n", + " '--match': 5,\n", + " '--ngrams': 2,\n", + " '--min_child_samples': 10,\n", " \"--save\": \"local_model\"},\n", " compute_target='local',\n", " conda_packages=['pandas==0.23.4',\n", diff --git a/04_Hyperparameter_Random_Search.ipynb b/04_Hyperparameter_Random_Search.ipynb index 1255112..5533e0c 100644 --- a/04_Hyperparameter_Random_Search.ipynb +++ b/04_Hyperparameter_Random_Search.ipynb @@ -202,7 +202,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`." + "This hyperparameter space specifies a grid of 9,360 unique configuration points (4 `ngrams` X 39 `match` X 30 `min_child_samples` X 2 `unweighted`). We control the resources used by the search through specifying a maximum number of configuration points to sample as `max_total_runs`. We also define the number of estimators to use for each run." ] }, { @@ -215,7 +215,8 @@ }, "outputs": [], "source": [ - "max_total_runs = 96" + "max_total_runs = 96\n", + "estimators = 1000" ] }, { @@ -270,7 +271,7 @@ "estimator = Estimator(source_directory=os.path.join('.', 'scripts'),\n", " entry_script='TrainClassifier.py',\n", " script_params={'--data-folder': ds.as_mount(),\n", - " '--estimators': 1000},\n", + " '--estimators': estimators},\n", " compute_target=compute_target,\n", " conda_packages=['pandas==0.23.4',\n", " 'scikit-learn==0.21.3',\n", diff --git a/05_Train_Best_Model.ipynb b/05_Train_Best_Model.ipynb index 9fc101c..95015c2 100644 --- a/05_Train_Best_Model.ipynb +++ b/05_Train_Best_Model.ipynb @@ -166,8 +166,8 @@ }, "outputs": [], "source": [ - "model_estimators = 8 * int(best_parameters['--estimators'])\n", - "model_estimators" + "estimators = 8 * int(best_parameters['--estimators'])\n", + "estimators" ] }, { @@ -186,7 +186,7 @@ "ds = ws.get_default_datastore()\n", "model_parameters = best_parameters.copy()\n", "model_parameters['--data-folder'] = ds.as_mount()\n", - "model_parameters['--estimators'] = model_estimators\n", + "model_parameters['--estimators'] = estimators\n", "model_parameters['--save'] = 'FAQ_ranker'\n", "pd.Series(model_parameters, name='Value').to_frame()" ] diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ee44441..028d2eb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,6 +8,7 @@ trigger: variables: - group: AzureKeyVault + # estimators: 1 jobs: - job: MLHyperparameterTuningJob @@ -44,7 +45,7 @@ jobs: source /usr/share/miniconda/etc/profile.d/conda.sh conda activate MLHyperparameterTuning echo Executing 01_Training_Script.ipynb - papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3 + papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3 -p estimators 1 displayName: '01_Training_Script.ipynb' - bash: | @@ -58,21 +59,21 @@ jobs: source /usr/share/miniconda/etc/profile.d/conda.sh conda activate MLHyperparameterTuning echo Executing 03_Run_Locally.ipynb - papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p subscription_id $(subscriptionid) -p resource_group $(azurergname) + papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p subscription_id $(subscriptionid) -p resource_group $(azurergname) -p estimators 1 displayName: '03_Run_Locally.ipynb' - bash: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate MLHyperparameterTuning echo Executing 04_Hyperparameter_Random_Search.ipynb - papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) + papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) -p estimators 1 displayName: '04_Hyperparameter_Random_Search.ipynb' - bash: | source /usr/share/miniconda/etc/profile.d/conda.sh conda activate MLHyperparameterTuning echo Executing 05_Train_Best_Model.ipynb - papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 + papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 -p estimators 1 displayName: '05_Train_Best_Model.ipynb' - bash: |