From 8edfaa2653f647d8ea04b9ee5a449e06f91ef69f Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Fri, 6 Dec 2019 18:34:42 -0500 Subject: [PATCH 1/4] Update azure-pipelines-v2.yml --- .ci/azure-pipelines-v2.yml | 277 ++++++++++++++++++++----------------- 1 file changed, 153 insertions(+), 124 deletions(-) diff --git a/.ci/azure-pipelines-v2.yml b/.ci/azure-pipelines-v2.yml index abfb6df..1ddd0f1 100644 --- a/.ci/azure-pipelines-v2.yml +++ b/.ci/azure-pipelines-v2.yml @@ -3,132 +3,161 @@ trigger: none variables: - BuildConfiguration: Release - BuildBinariesDirectory: $(Build.BinariesDirectory) - BuildPlatform: any cpu - DotNetCoreBuildVersion: 2.2.108 - DotNetRuntimeTarget: ubuntu.18.04-x64 - AgentToolsDirectory: $(Agent.ToolsDirectory) CloudPlatform: AzureCloud - ProductName: Trident - TridentWorkloadType: $(WorkloadType) - TridentWorkloadTypeShort: $(WorkloadTypeShort) + TridentWorkloadType: ai-ml-score-int + TridentWorkloadTypeShort: aimlscore DeployLocation: eastus - Agent: agce-ai azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 + TestPostfix: "" + DeploymentName: MLScoreDeployJob + ProjectLocation: "." + PythonPath: "." + Agent: "Hosted Ubuntu 1604" -jobs: -- job: MLHyperparameterTuningJob - timeoutInMinutes: 300 - cancelTimeoutInMinutes: 2 - pool: - vmImage: 'Ubuntu-16.04' - - steps: - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - which conda - conda env create -f environment.yml - conda env list - conda activate MLHyperparameterTuning - conda env list - echo Login Azure Account - az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret) - echo Try and figure out what account set takes - az account set -h - echo Try and set it. - az account set --subscription $(subscriptionid) -# papermill 01_Data_Prep.ipynb 01_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: 'Configuration' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 00_Data_Prep.ipynb - papermill 00_Data_Prep.ipynb 00_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '00_Data_Prep.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 01_Training_Script.ipynb - papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '01_Training_Script.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 02_Testing_Script.ipynb - papermill 02_Testing_Script.ipynb 02_Testing_Script_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '02_Testing_Script.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 03_Run_Locally.ipynb - papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p selected_subscription $(subscriptionid) -p resource_group $(azurergname) - displayName: '03_Run_Locally.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 04_Hyperparameter_Random_Search.ipynb - papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) - displayName: '04_Hyperparameter_Random_Search.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 05_Train_Best_Model.ipynb - papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '05_Train_Best_Model.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 06_Test_Best_Model.ipynb - papermill 06_Test_Best_Model.ipynb 06_Test_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '06_Test_Best_Model.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 07_Train_With_AML_Pipeline.ipynb - papermill 07_Train_With_AML_Pipeline.ipynb 07_Train_With_AML_Pipeline_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) - displayName: '07_Train_With_AML_Pipeline.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Executing 08_Tear_Down.ipynb - papermill 08_Tear_Down.ipynb 08_Tear_Down_Output.ipynb --log-output --no-progress-bar -k python3 - displayName: '08_Tear_Down.ipynb' - - - bash: | - source /usr/share/miniconda/etc/profile.d/conda.sh - conda activate MLHyperparameterTuning - echo Execute Resource Group Delete - existResponse=$(az group exists -n $(azurergname)) - if [ "$existResponse" == "true" ]; then - echo Deleting project resource group - az group delete --name $(azurergname) --yes - else - echo Project resource group did not exist - fi - echo Done Cleanup - displayName: 'Backup Cleanup' - condition: or(canceled(),failed()) - - - task: CreateWorkItem@1 - inputs: - workItemType: 'Issue' - title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed - assignedTo: 'Mario Bourgoin ' - associate: true - teamProject: $(System.TeamProject) - - fieldMappings: | - Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type. - displayName: 'Create work item on failure' - condition: failed() +# In additional to the above variables, the "template" parameter of the last step must also be hard coded for each workload + +trigger: + branches: + include: + - mabou/instrument + +stages: +- stage: stable + dependsOn: [] + jobs: + - job: build_deploy_ai + displayName: 'Build deploy AI' + + timeoutInMinutes: 180 + + workspace: + clean: all + + variables: + DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] + DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] + EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) + ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) + AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + + steps: + + - template: .ci/steps/docker_clean.yml@aitemplates + + - template: .ci/steps/deploy_notebook_steps.yml@aitemplates + parameters: + deployment_name: $(DeploymentName) + template: MLTrainDeployAMLJob.yml + azureSubscription: $(azureSubscription) + azure_subscription: $(azure_subscription) + azureresourcegroup: $(AIResourceGroupName) + workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) + azureregion: $(DeployLocation) + aksimagename: myimage + environment: $(EnvironmentContext) + doCleanup: False + alias: $(Build.QueuedBy) + project: $(TridentWorkloadTypeShort) + agent: $(Agent) + ENVIRONMENT_PREFIX: $(EnvironmentPrefix) + deploymentguidtag: $(DeploymentGuidTag) + aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) + python_path: $(System.DefaultWorkingDirectory)$(PythonPath) + location: $(ProjectLocation) + python_secret_root: "./" + +- stage: flight_release + dependsOn: [] + jobs: + - job: build_deploy_ai + displayName: 'Build deploy AI' + + timeoutInMinutes: 180 + + workspace: + clean: all + + variables: + DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] + DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] + TestPostfix: "-release" + EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) + ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) + AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + + steps: + + - template: .ci/steps/docker_clean.yml@aitemplates + + - template: .ci/steps/deploy_notebook_steps.yml@aitemplates + parameters: + deployment_name: $(DeploymentName) + template: MLTrainDeployAMLJob.yml + azureSubscription: $(azureSubscription) + azure_subscription: $(azure_subscription) + azureresourcegroup: $(AIResourceGroupName) + workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) + azureregion: $(DeployLocation) + aksimagename: myimage + environment: $(EnvironmentContext) + doCleanup: False + alias: $(Build.QueuedBy) + project: $(TridentWorkloadTypeShort) + agent: $(Agent) + ENVIRONMENT_PREFIX: $(EnvironmentPrefix) + deploymentguidtag: $(DeploymentGuidTag) + aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) + python_path: $(System.DefaultWorkingDirectory)$(PythonPath) + location: $(ProjectLocation) + python_secret_root: "./" + flighting_release: true + +- stage: flight_preview + dependsOn: [] + jobs: + - job: build_deploy_ai + displayName: 'Build deploy AI' + + timeoutInMinutes: 180 + + workspace: + clean: all + + variables: + DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] + DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] + TestPostfix: "-preview" + EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) + ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) + AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + + steps: + + - template: .ci/steps/docker_clean.yml@aitemplates + + - template: .ci/steps/deploy_notebook_steps.yml@aitemplates + parameters: + deployment_name: $(DeploymentName) + template: MLTrainDeployAMLJob.yml + azureSubscription: $(azureSubscription) + azure_subscription: $(azure_subscription) + azureresourcegroup: $(AIResourceGroupName) + workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) + azureregion: $(DeployLocation) + aksimagename: myimage + environment: $(EnvironmentContext) + doCleanup: False + alias: $(Build.QueuedBy) + project: $(TridentWorkloadTypeShort) + agent: $(Agent) + ENVIRONMENT_PREFIX: $(EnvironmentPrefix) + deploymentguidtag: $(DeploymentGuidTag) + aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) + python_path: $(System.DefaultWorkingDirectory)$(PythonPath) + location: $(ProjectLocation) + python_secret_root: "./" + flighting_preview: true From e0d7be0520fe0fe5482a8b5416b9d5ab4660ffad Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Fri, 6 Dec 2019 18:35:16 -0500 Subject: [PATCH 2/4] Create agce_devops_sub_vars.yml --- .ci/vars/agce_devops_sub_vars.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .ci/vars/agce_devops_sub_vars.yml diff --git a/.ci/vars/agce_devops_sub_vars.yml b/.ci/vars/agce_devops_sub_vars.yml new file mode 100644 index 0000000..02070e3 --- /dev/null +++ b/.ci/vars/agce_devops_sub_vars.yml @@ -0,0 +1,3 @@ +variables: + azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 + azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) From 8a36e026a3e8830d4e5ab814cbc17ec330b162b7 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Fri, 6 Dec 2019 18:35:34 -0500 Subject: [PATCH 3/4] Create mlhyperparametertuning_vars.yml --- .ci/vars/mlhyperparametertuning_vars.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .ci/vars/mlhyperparametertuning_vars.yml diff --git a/.ci/vars/mlhyperparametertuning_vars.yml b/.ci/vars/mlhyperparametertuning_vars.yml new file mode 100644 index 0000000..7d0d820 --- /dev/null +++ b/.ci/vars/mlhyperparametertuning_vars.yml @@ -0,0 +1,7 @@ +variables: + DeploymentName: MLScoreDeployJob + TridentWorkloadTypeShort: aimlscore + DeployLocation: eastus + ProjectLocation: "." + PythonPath: "." + Template: MLTrainDeployAMLJob.yml From 8098844a993eecb6e9b32ee92f1729b41611e665 Mon Sep 17 00:00:00 2001 From: Daniel Ciborowski Date: Fri, 6 Dec 2019 18:51:10 -0500 Subject: [PATCH 4/4] Update azure-pipelines-v2.yml --- .ci/azure-pipelines-v2.yml | 177 +++++-------------------------------- 1 file changed, 22 insertions(+), 155 deletions(-) diff --git a/.ci/azure-pipelines-v2.yml b/.ci/azure-pipelines-v2.yml index 1ddd0f1..e9500a4 100644 --- a/.ci/azure-pipelines-v2.yml +++ b/.ci/azure-pipelines-v2.yml @@ -1,163 +1,30 @@ -# MLHyperparameterTuning Pipeline +# MLHyperparameterTuning Pipeline +# +# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub" -trigger: none - -variables: - CloudPlatform: AzureCloud - TridentWorkloadType: ai-ml-score-int - TridentWorkloadTypeShort: aimlscore - DeployLocation: eastus - azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) - azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 - TestPostfix: "" - DeploymentName: MLScoreDeployJob - ProjectLocation: "." - PythonPath: "." - Agent: "Hosted Ubuntu 1604" - -# In additional to the above variables, the "template" parameter of the last step must also be hard coded for each workload +resources: + repositories: + - repository: aitemplates + type: github + name: microsoft/AI + endpoint: AIArchitecturesAndPractices-GitHub trigger: branches: include: + - master - mabou/instrument -stages: -- stage: stable - dependsOn: [] - jobs: - - job: build_deploy_ai - displayName: 'Build deploy AI' - - timeoutInMinutes: 180 - - workspace: - clean: all - - variables: - DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] - DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] - EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) - ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) - AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - - steps: - - - template: .ci/steps/docker_clean.yml@aitemplates - - - template: .ci/steps/deploy_notebook_steps.yml@aitemplates - parameters: - deployment_name: $(DeploymentName) - template: MLTrainDeployAMLJob.yml - azureSubscription: $(azureSubscription) - azure_subscription: $(azure_subscription) - azureresourcegroup: $(AIResourceGroupName) - workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) - azureregion: $(DeployLocation) - aksimagename: myimage - environment: $(EnvironmentContext) - doCleanup: False - alias: $(Build.QueuedBy) - project: $(TridentWorkloadTypeShort) - agent: $(Agent) - ENVIRONMENT_PREFIX: $(EnvironmentPrefix) - deploymentguidtag: $(DeploymentGuidTag) - aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) - python_path: $(System.DefaultWorkingDirectory)$(PythonPath) - location: $(ProjectLocation) - python_secret_root: "./" - -- stage: flight_release - dependsOn: [] - jobs: - - job: build_deploy_ai - displayName: 'Build deploy AI' - - timeoutInMinutes: 180 - - workspace: - clean: all - - variables: - DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] - DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] - TestPostfix: "-release" - EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) - ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) - AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - - steps: - - - template: .ci/steps/docker_clean.yml@aitemplates - - - template: .ci/steps/deploy_notebook_steps.yml@aitemplates - parameters: - deployment_name: $(DeploymentName) - template: MLTrainDeployAMLJob.yml - azureSubscription: $(azureSubscription) - azure_subscription: $(azure_subscription) - azureresourcegroup: $(AIResourceGroupName) - workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) - azureregion: $(DeployLocation) - aksimagename: myimage - environment: $(EnvironmentContext) - doCleanup: False - alias: $(Build.QueuedBy) - project: $(TridentWorkloadTypeShort) - agent: $(Agent) - ENVIRONMENT_PREFIX: $(EnvironmentPrefix) - deploymentguidtag: $(DeploymentGuidTag) - aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) - python_path: $(System.DefaultWorkingDirectory)$(PythonPath) - location: $(ProjectLocation) - python_secret_root: "./" - flighting_release: true - -- stage: flight_preview - dependsOn: [] - jobs: - - job: build_deploy_ai - displayName: 'Build deploy AI' - - timeoutInMinutes: 180 - - workspace: - clean: all - - variables: - DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] - DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] - TestPostfix: "-preview" - EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) - ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) - AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - - steps: - - - template: .ci/steps/docker_clean.yml@aitemplates +pr: + autoCancel: true + branches: + include: + - master + - mabou/instrument - - template: .ci/steps/deploy_notebook_steps.yml@aitemplates - parameters: - deployment_name: $(DeploymentName) - template: MLTrainDeployAMLJob.yml - azureSubscription: $(azureSubscription) - azure_subscription: $(azure_subscription) - azureresourcegroup: $(AIResourceGroupName) - workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) - azureregion: $(DeployLocation) - aksimagename: myimage - environment: $(EnvironmentContext) - doCleanup: False - alias: $(Build.QueuedBy) - project: $(TridentWorkloadTypeShort) - agent: $(Agent) - ENVIRONMENT_PREFIX: $(EnvironmentPrefix) - deploymentguidtag: $(DeploymentGuidTag) - aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) - python_path: $(System.DefaultWorkingDirectory)$(PythonPath) - location: $(ProjectLocation) - python_secret_root: "./" - flighting_preview: true +stages: +- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates + parameters: + jobDisplayName: MLScoreDeployJob + DefaultWorkingDirectory: $(System.DefaultWorkingDirectory) + workload_vars: ../vars/mlhyperparametertuning_vars.yml