- Added upload_data main function.

- added azureml run config files.
Seb-Good · Apr 30, 2019 · 468bd25 · 468bd25
1 parent 87bcf22
commit 468bd25
Show file tree

Hide file tree

Showing 7 changed files with 270 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -167,3 +167,6 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+# VS Cods
+.vscode
diff --git a/aml_config/conda_dependencies.yml b/aml_config/conda_dependencies.yml
@@ -0,0 +1,15 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for runs with userManagedDependencies=False.
+
+# Details about the Conda environment file format:
+# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
+
+name: project_environment
+dependencies:
+  # The python interpreter version.
+  # Currently Azure ML only supports 3.5.2 and later.
+- python=3.6.2
+
+- pip:
+    # Required packages for AzureML execution, history, and data preparation.
+  - azureml-defaults
diff --git a/aml_config/docker.runconfig b/aml_config/docker.runconfig
@@ -0,0 +1,120 @@
+# The script to run.
+script: train.py
+# The arguments to the script file.
+arguments: []
+# The name of the compute target to use for this run.
+target: local
+# Framework to execute inside. Allowed values are "Python" ,  "PySpark", "CNTK",  "TensorFlow", and "PyTorch".
+framework: PySpark
+# Communicator for the given framework. Allowed values are "None" ,  "ParameterServer", "OpenMpi", and "IntelMpi".
+communicator: None
+# Automatically prepare the run environment as part of the run itself.
+autoPrepareEnvironment: true
+# Maximum allowed duration for the run.
+maxRunDurationSeconds:
+# Number of nodes to use for running job.
+nodeCount: 1
+# Environment details.
+environment:
+# Environment variables set for the run.
+  environmentVariables:
+    EXAMPLE_ENV_VAR: EXAMPLE_VALUE
+# Python details
+  python:
+# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
+    userManagedDependencies: false
+# The python interpreter path
+    interpreterPath: python
+# Path to the conda dependencies file to use for this run. If a project
+# contains multiple programs with different sets of dependencies, it may be
+# convenient to manage those environments with separate files.
+    condaDependenciesFile: aml_config/conda_dependencies.yml
+# Docker details
+  docker:
+# Set True to perform this run inside a Docker container.
+    enabled: true
+# Base image used for Docker-based runs.
+    baseImage: mcr.microsoft.com/azureml/base:0.2.1
+# Set False if necessary to work around shared volume bugs.
+    sharedVolumes: true
+# Run with NVidia Docker extension to support GPUs.
+    gpuSupport: false
+# Shared memory size for Docker container. Default is 1g.
+    shmSize: 1g
+# Extra arguments to the Docker run command.
+    arguments: []
+# Image registry that contains the base image.
+    baseImageRegistry:
+# DNS name or IP address of azure container registry(ACR)
+      address:
+# The username for ACR
+      username:
+# The password for ACR
+      password:
+# Spark details
+  spark:
+# List of spark repositories.
+    repositories:
+    - https://mmlspark.azureedge.net/maven
+# The packages to use.
+    packages:
+    - group: com.microsoft.ml.spark
+      artifact: mmlspark_2.11
+      version: '0.12'
+# Whether to precache the packages.
+    precachePackages: true
+# Databricks details
+  databricks:
+# List of maven libraries.
+    mavenLibraries: []
+# List of PyPi libraries
+    pypiLibraries: []
+# List of RCran libraries
+    rcranLibraries: []
+# List of JAR libraries
+    jarLibraries: []
+# List of Egg libraries
+    eggLibraries: []
+# History details.
+history:
+# Enable history tracking -- this allows status, logs, metrics, and outputs
+# to be collected for a run.
+  outputCollection: true
+# Whether to take snapshots for history.
+  snapshotProject: true
+# Spark configuration details.
+spark:
+# The Spark configuration.
+  configuration:
+    spark.app.name: Azure ML Experiment
+    spark.yarn.maxAppAttempts: 1
+# HDI details.
+hdi:
+# Yarn deploy mode. Options are cluster and client.
+  yarnDeployMode: cluster
+# Tensorflow details.
+tensorflow:
+# The number of worker tasks.
+  workerCount: 1
+# The number of parameter server tasks.
+  parameterServerCount: 1
+# Mpi details.
+mpi:
+# When using MPI, number of processes per node.
+  processCountPerNode: 1
+# data reference configuration details
+dataReferences: {}
+# Project share datastore reference.
+sourceDirectoryDataStore:
+# AmlCompute details.
+amlcompute:
+# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
+  vmSize:
+# VM priority of the Cluster to be created. Allowed values are:"dedicated" , "lowpriority".
+  vmPriority:
+# A bool that indicates if the cluster has to be retained after job completion.
+  retainCluster: false
+# Name of the cluster to be created. If not specified, runId will be used as cluster name.
+  name:
+# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
+  clusterMaxNodeCount: 1
diff --git a/aml_config/local.runconfig b/aml_config/local.runconfig
@@ -0,0 +1,82 @@
+{
+    "script": "train.py",
+    "arguments": [],
+    "target": "local",
+    "framework": "Python",
+    "communicator": "None",
+    "autoPrepareEnvironment": true,
+    "maxRunDurationSeconds": null,
+    "nodeCount": 1,
+    "environment": {
+        "environmentVariables": {
+            "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
+        },
+        "python": {
+            "userManagedDependencies": true,
+            "interpreterPath": "/home/sebastiangoodfellow/anaconda3/envs/mnist-azure/bin/python",
+            "condaDependenciesFile": "aml_config/conda_dependencies.yml"
+        },
+        "docker": {
+            "enabled": false,
+            "baseImage": "mcr.microsoft.com/azureml/base:0.2.1",
+            "sharedVolumes": true,
+            "gpuSupport": false,
+            "shmSize": "1g",
+            "arguments": [],
+            "baseImageRegistry": {
+                "address": null,
+                "username": null,
+                "password": null
+            }
+        },
+        "spark": {
+            "repositories": [
+                "https://mmlspark.azureedge.net/maven"
+            ],
+            "packages": [
+                {
+                    "group": "com.microsoft.ml.spark",
+                    "artifact": "mmlspark_2.11",
+                    "version": "0.12"
+                }
+            ],
+            "precachePackages": true
+        },
+        "databricks": {
+            "mavenLibraries": [],
+            "pypiLibraries": [],
+            "rcranLibraries": [],
+            "jarLibraries": [],
+            "eggLibraries": []
+        }
+    },
+    "history": {
+        "outputCollection": true,
+        "snapshotProject": true
+    },
+    "spark": {
+        "configuration": {
+            "spark.app.name": "Azure ML Experiment",
+            "spark.yarn.maxAppAttempts": 1
+        }
+    },
+    "hdi": {
+        "yarnDeployMode": "cluster"
+    },
+    "tensorflow": {
+        "workerCount": 1,
+        "parameterServerCount": 1
+    },
+    "mpi": {
+        "processCountPerNode": 1
+    },
+    "dataReferences": {},
+    "sourceDirectoryDataStore": null,
+    "amlcompute": {
+        "vmSize": null,
+        "vmPriority": null,
+        "retainCluster": false,
+        "name": null,
+        "clusterMaxNodeCount": 1
+    }
+}
diff --git a/aml_config/project.json b/aml_config/project.json
@@ -0,0 +1 @@
+{"Id": "test", "Scope": "/subscriptions/30284b70-31e1-4b93-b620-26959f80a8f9/resourceGroups/ml-testing/providers/Microsoft.MachineLearningServices/workspaces/mnist-azure/projects/test"}
diff --git a/train.py b/train.py
@@ -16,7 +16,7 @@
 
 
 def main(args):
-    """Build saved model for serving."""
+    """Train MNIST tensorflow model."""
     # Image shape
     image_shape = (28, 28, 1)
 
@@ -73,7 +73,7 @@ def main(args):
 
 
 def get_parser():
-    """Get parser object for script predict.py."""
+    """Get parser object for script train.py."""
     # Initialize parser
     parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter)
 

diff --git a/upload_data.py b/upload_data.py
@@ -0,0 +1,47 @@
+"""
+upload_data.py
+--------------
+By: Sebastian D. Goodfellow, Ph.D., 2019
+"""
+
+# 3rd party imports
+from azureml.core import Workspace
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+
+# Local imports
+from mnistazure.config import DATA_PATH
+
+
+def main(args):
+    """Upload MNIST dataset to Azure Workspace data store."""
+    # Get workspace
+    ws = Workspace(subscription_id=args.subscription_id, resource_group=args.resource_group,
+                   workspace_name=args.workspace_name)
+
+    # Get data store
+    ds = ws.get_default_datastore()
+
+    # Upload MNIST dataset to data store
+    ds.upload(src_dir=DATA_PATH, target_path='mnist', show_progress=True)
+
+
+def get_parser():
+    """Get parser object for script upload_data.py."""
+    # Initialize parser
+    parser = ArgumentParser(description=__doc__, formatter_class=ArgumentDefaultsHelpFormatter)
+
+    # Setup arguments
+    parser.add_argument("--subscription_id", dest="subscription_id", type=str)
+    parser.add_argument("--resource_group", dest="resource_group", type=str)
+    parser.add_argument("--workspace_name", dest="workspace_name", type=str)
+
+    return parser
+
+
+if __name__ == "__main__":
+
+    # Parse arguments
+    arguments = get_parser().parse_args()
+
+    # Run main function
+    main(args=arguments)
-Original file line number
+Diff line change
@@ Expand Up / @@ -167,3 +167,6 @@ ENV/ @@
     # mypy
     .mypy_cache/
+    # VS Cods
+    .vscode
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"Id": "test", "Scope": "/subscriptions/30284b70-31e1-4b93-b620-26959f80a8f9/resourceGroups/ml-testing/providers/Microsoft.MachineLearningServices/workspaces/mnist-azure/projects/test"}