diff --git a/.ci/azure-pipelines-v2.yml b/.ci/azure-pipelines-v2.yml index 471738b..478ead2 100644 --- a/.ci/azure-pipelines-v2.yml +++ b/.ci/azure-pipelines-v2.yml @@ -1,76 +1,138 @@ -# MLAKSDeploy Pipeline -resources: - repositories: - - repository: aitemplates - type: github - name: microsoft/AI - endpoint: AIArchitecturesAndPractices-GitHub +# MLHyperparameterTuning Pipeline + +trigger: + batch: true + branches: + include: + - master variables: + BuildConfiguration: Release + BuildBinariesDirectory: $(Build.BinariesDirectory) + BuildPlatform: any cpu + DotNetCoreBuildVersion: 2.2.108 + DotNetRuntimeTarget: ubuntu.18.04-x64 + AgentToolsDirectory: $(Agent.ToolsDirectory) CloudPlatform: AzureCloud - TridentWorkloadType: ai-ml-score-int - TridentWorkloadTypeShort: aimlscore + ProductName: Trident + TridentWorkloadType: $(WorkloadType) + TridentWorkloadTypeShort: $(WorkloadTypeShort) DeployLocation: eastus + Agent: agce-ai azureSubscription: AG-AzureCAT-AIDevOps-Test-COGSNonProd-IO1685734(0ca618d2-22a8-413a-96d0-0f1b531129c3) azure_subscription: 0ca618d2-22a8-413a-96d0-0f1b531129c3 - TestPostfix: "" - DeploymentName: MLScoreDeployJob - ProjectLocation: "." - PythonPath: "." - Agent: "Hosted Ubuntu 1604" -# In additional to the above variables, the "template" parameter of the last step must also be hard coded for each workload +jobs: +- job: MLHyperparameterTuningJob + timeoutInMinutes: 300 + cancelTimeoutInMinutes: 2 + pool: + vmImage: 'Ubuntu-16.04' -trigger: - batch: true - branches: - include: - - master + steps: + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + which conda + conda env create -f environment.yml + conda env list + conda activate MLHyperparameterTuning + conda env list + echo Login Azure Account + az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret) + echo Try and figure out what account set takes + az account set -h + echo Try and set it. + az account set --subscription $(subscriptionid) +# papermill 01_Data_Prep.ipynb 01_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: 'Configuration' -jobs: -- job: build_deploy_ai - displayName: 'Build deploy AI' + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 00_Data_Prep.ipynb + papermill 00_Data_Prep.ipynb 00_Data_Prep_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '00_Data_Prep.ipynb' - timeoutInMinutes: 180 + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 01_Training_Script.ipynb + papermill 01_Training_Script.ipynb 01_Training_Script_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '01_Training_Script.ipynb' + + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 02_Testing_Script.ipynb + papermill 02_Testing_Script.ipynb 02_Testing_Script_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '02_Testing_Script.ipynb' + + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 03_Run_Locally.ipynb + papermill 03_Run_Locally.ipynb 03_Run_Locally_Output.ipynb --log-output --no-progress-bar -k python3 -p selected_subscription $(subscriptionid) -p resource_group $(azurergname) + displayName: '03_Run_Locally.ipynb' - workspace: - clean: all + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 04_Hyperparameter_Random_Search.ipynb + papermill 04_Hyperparameter_Random_Search.ipynb 04_Hyperparameter_Random_Search_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) + displayName: '04_Hyperparameter_Random_Search.ipynb' - variables: - DeploymentGuidTag: $[ dependencies.build_relayer_sources.outputs['GenDeployGuidTag.DeploymentGuid'] ] - DeploymentTimeStamp: $[ dependencies.build_relayer_sources.outputs['GenDeployTimeStamp.timeStamp'] ] - EnvironmentPrefix: $(TridentWorkloadType)-$(CloudPlatform)-$(DeployLocation)$(TestPostfix) - ResourcePrefix: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) - EnvironmentContext: $(EnvironmentPrefix)-$(DeploymentGuidTag) - AIResourceGroupName: $(TridentWorkloadTypeShort)-$(DeployLocation)$(TestPostfix) + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 05_Train_Best_Model.ipynb + papermill 05_Train_Best_Model.ipynb 05_Train_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '05_Train_Best_Model.ipynb' - steps: + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 06_Test_Best_Model.ipynb + papermill 06_Test_Best_Model.ipynb 06_Test_Best_Model_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '06_Test_Best_Model.ipynb' - - template: .ci/steps/docker_clean.yml@aitemplates + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 07_Train_With_AML_Pipeline.ipynb + papermill 07_Train_With_AML_Pipeline.ipynb 07_Train_With_AML_Pipeline_Output.ipynb --log-output --no-progress-bar -k python3 -p max_total_runs $(dsmaxruns) + displayName: '07_Train_With_AML_Pipeline.ipynb' + + - bash: | + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Executing 08_Tear_Down.ipynb + papermill 08_Tear_Down.ipynb 08_Tear_Down_Output.ipynb --log-output --no-progress-bar -k python3 + displayName: '08_Tear_Down.ipynb' - bash: | - Deploy_Location_Short=$(echo $DeployLocation | cut -c1-12) - echo "##vso[task.setvariable variable=Deploy_Location_Short]$Deploy_Location_Short" - displayName: "Create short name for AKS" + source /usr/share/miniconda/etc/profile.d/conda.sh + conda activate MLHyperparameterTuning + echo Execute Resource Group Delete + existResponse=$(az group exists -n $(azurergname)) + if [ "$existResponse" == "true" ]; then + echo Deleting project resource group + az group delete --name $(azurergname) --yes + else + echo Project resource group did not exist + fi + echo Done Cleanup + displayName: 'Backup Cleanup' + condition: or(canceled(),failed()) - - template: .ci/steps/deploy_notebook_steps.yml@aitemplates - parameters: - deployment_name: $(DeploymentName) - template: MLScoreDeployJob.yml - azureSubscription: $(azureSubscription) - azure_subscription: $(azure_subscription) - azureresourcegroup: $(AIResourceGroupName) - workspacename: $(TridentWorkloadTypeShort)-$(DeployLocation) - azureregion: $(DeployLocation) - aksimagename: myimage - environment: $(EnvironmentContext) - doCleanup: False - alias: $(Build.QueuedBy) - project: $(TridentWorkloadTypeShort) - agent: $(Agent) - ENVIRONMENT_PREFIX: $(EnvironmentPrefix) - deploymentguidtag: $(DeploymentGuidTag) - aks_name: $(TridentWorkloadTypeShort)$(Deploy_Location_Short) - python_path: $(System.DefaultWorkingDirectory)$(PythonPath) - location: $(ProjectLocation) - python_secret_root: "./" + - task: CreateWorkItem@1 + inputs: + workItemType: 'Issue' + title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed + assignedTo: 'Mario Bourgoin ' + associate: true + teamProject: $(System.TeamProject) + + fieldMappings: | + Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type. + displayName: 'Create work item on failure' + condition: failed() diff --git a/03_Run_Locally.ipynb b/03_Run_Locally.ipynb index 20d75c8..6461b4e 100644 --- a/03_Run_Locally.ipynb +++ b/03_Run_Locally.ipynb @@ -52,7 +52,7 @@ }, "outputs": [], "source": [ - "subscription_id=\"YOUR_SUBSCRIPTION\"\n", + "selected_subscription=\"YOUR_SUBSCRIPTION\"\n", "location=\"eastus\"\n", "resource_group=\"hypetuning\"" ] @@ -92,7 +92,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%bash -s \"$subscription_id\"\n", + "%%bash -s \"$selected_subscription\"\n", "az account set --subscription \"$1\"\n", "az account show -o table" ] diff --git a/environment.yml b/environment.yml index c5206ad..0176f5f 100644 --- a/environment.yml +++ b/environment.yml @@ -5,11 +5,11 @@ dependencies: - python=3.6 - pip==19.2.3 - nb_conda_kernels==2.2.2 + - papermill==1.2.0 - pandas==0.23.4 - scikit-learn==0.21.3 - lightgbm==2.2.1 - pip: - - papermill==1.1.0 - prompt_toolkit==2.0.9 - azure-cli==2.0.75 - azureml-sdk[notebooks]==1.0.69