cleaner variables (#99)

microsoft · Nov 20, 2019 · 1eb4a42 · 1eb4a42
1 parent 5372bb0
commit 1eb4a42
Show file tree

Hide file tree

Showing 18 changed files with 328 additions and 221 deletions.
diff --git a/.env.example b/.env.example
@@ -1,52 +1,39 @@
 # Azure Subscription Variables
 SUBSCRIPTION_ID = ''
-LOCATION = ''
+LOCATION = 'westeurope'
 TENANT_ID = ''
 BASE_NAME = ''
 SP_APP_ID = ''
 SP_APP_SECRET = ''
+RESOUCE_GROUP = 'mlops-rg'
 
 # Mock build/release ID for local testing - update ReleaseID each "release"
 BUILD_BUILDID = '001'
 RELEASE_RELEASEID = '001'
 
 # Azure ML Workspace Variables
+WORKSPACE_NAME = ''
 EXPERIMENT_NAME = ''
-SCRIPT_FOLDER = './'
 
 # AML Compute Cluster Config
-AML_COMPUTE_CLUSTER_NAME = ''
-AML_COMPUTE_CLUSTER_CPU_SKU = ''
-AML_CLUSTER_MAX_NODES = ''
-AML_CLUSTER_MIN_NODES = ''
+AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
+AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
+AML_CLUSTER_MAX_NODES = '4'
+AML_CLUSTER_MIN_NODES = '0'
 AML_CLUSTER_PRIORITY = 'lowpriority'
 # Training Config
 MODEL_NAME = 'sklearn_regression_model.pkl'
 MODEL_VERSION = '1'
 TRAIN_SCRIPT_PATH = 'training/train.py'
 # AML Pipeline Config
-TRAINING_PIPELINE_NAME = ''
-PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
+TRAINING_PIPELINE_NAME = 'Training Pipeline'
 MODEL_PATH = ''
 EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
 REGISTER_SCRIPT_PATH = 'register/register_model.py'
 SOURCES_DIR_TRAIN = 'code'
 
-# These are not mandatory for the core workflow
-# Remote VM Config
-REMOTE_VM_NAME = ''
-REMOTE_VM_USERNAME = ''
-REMOTE_VM_PASSWORD = ''
-REMOTE_VM_IP = ''
-# Image config
-IMAGE_NAME = ''
-IMAGE_DESCRIPTION = ''
-IMAGE_VERSION = ''
-# ACI Config
-ACI_CPU_CORES = ''
-ACI_MEM_GB = ''
-ACI_DESCRIPTION = ''
-
 # Optional. Used by a training pipeline with R on Databricks
 DB_CLUSTER_ID = ''
-DATABRICKS_COMPUTE_NAME = ''
+
+# Optional. Container Image name for image creation
+IMAGE_NAME = 'ml-trained'
diff --git a/.gitignore b/.gitignore
@@ -24,6 +24,7 @@ wheels/
 .installed.cfg
 *.egg
 MANIFEST
+venv/
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
@@ -11,14 +11,9 @@ trigger:
     - ml_service/util/create_scoring_image.py
 
 variables:
+- template: azdo-variables.yml
 - group: devopsforai-aml-vg
-# Choose from default, build_train_pipeline_with_r.py, or build_train_pipeline_with_r_on_dbricks.py
-- name: build-train-script
-  value: 'build_train_pipeline.py'
-# Automatically triggers the train, evaluate, register pipeline after the CI steps. 
-# Uncomment to set to false or add same variable name at queue time with value of false to disable.
-# - name: auto-trigger-training
-#   value: false
+
 
 stages:
 - stage: 'Model_CI'
@@ -34,7 +29,7 @@ stages:
     - template: azdo-base-pipeline.yml
     - script: |
         # Invoke the Python building and publishing a training pipeline
-        python3 $(Build.SourcesDirectory)/ml_service/pipelines/$(build-train-script)
+        python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
       failOnStderr: 'false'
       env:
         SP_APP_SECRET: '$(SP_APP_SECRET)'

diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
@@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest
 
 
 variables:
+- template: azdo-variables.yml
 - group: devopsforai-aml-vg
 
 

diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml
@@ -0,0 +1,40 @@
+variables:
+  # Azure ML Workspace Variables
+- name: EXPERIMENT_NAME
+  value: mlopspython
+  # AML Compute Cluster Config
+- name: AML_COMPUTE_CLUSTER_CPU_SKU
+  value: STANDARD_DS2_V2
+- name: AML_COMPUTE_CLUSTER_NAME
+  value: train-cluster
+- name: AML_CLUSTER_MIN_NODES
+  value: 0
+- name: AML_CLUSTER_MAX_NODES
+  value: 4
+- name: AML_CLUSTER_PRIORITY
+  value: lowpriority
+  # Training Config
+- name: BUILD_TRAIN_SCRIPT
+  value: build_train_pipeline.py
+- name: TRAIN_SCRIPT_PATH
+  value: training/train.py
+- name: MODEL_NAME
+  value: sklearn_regression_model.pkl
+- name: MODEL_VERSION
+  value: '1'
+  # AML Pipeline Config 
+- name: TRAINING_PIPELINE_NAME
+  value: 'Training Pipeline'
+- name: MODEL_PATH
+  value: ''
+- name: EVALUATE_SCRIPT_PATH
+  value: evaluate/evaluate_model.py
+- name: REGISTER_SCRIPT_PATH
+  value: register/register_model.py
+- name: SOURCES_DIR_TRAIN
+  value: code
+- name: IMAGE_NAME
+  value: ''
+  # Optional. Used by a training pipeline with R on Databricks
+- name: DB_CLUSTER_ID
+  value: ''
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -47,41 +47,48 @@ Click on **Library** in the **Pipelines** section as indicated below:
 Please name your variable group **``devopsforai-aml-vg``** as we are using this
 name within our build yaml file.
 
-The variable group should contain the following variables:
+The variable group should contain the following required variables:
 
 | Variable Name               | Suggested Value                    |
 | --------------------------- | -----------------------------------|
-| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2                    |
-| AML_COMPUTE_CLUSTER_NAME    | train-cluster                      |
 | BASE_NAME                   | [unique base name]                 |
-| DB_CLUSTER_ID               | [Optional Databricks cluster Id]   |
-| DATABRICKS_COMPUTE_NAME     | [Optional Databricks compute name] |
-| EVALUATE_SCRIPT_PATH        | evaluate/evaluate_model.py         |
-| EXPERIMENT_NAME             | mlopspython                        |
 | LOCATION                    | centralus                          |
-| MODEL_NAME                  | sklearn_regression_model.pkl       |
-| REGISTER_SCRIPT_PATH        | register/register_model.py         |
-| SOURCES_DIR_TRAIN           | code                               |
 | SP_APP_ID                   |                                    |
 | SP_APP_SECRET               |                                    |
 | SUBSCRIPTION_ID             |                                    |
 | TENANT_ID                   |                                    |
-| TRAIN_SCRIPT_PATH           | training/train.py                  |
-| TRAINING_PIPELINE_NAME      | training-pipeline                  |
+| RESOURCE_GROUP              |                                    |
+| WORKSPACE_NAME              | mlops-AML-WS                       |
 
 Mark **SP_APP_SECRET** variable as a secret one.
 
-**Note:** The **BASE_NAME** parameter is used throughout the solution for naming
+**Note:** 
+
+The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one.
+
+The **BASE_NAME** parameter is used throughout the solution for naming
 Azure resources. When the solution is used in a shared subscription, there can
 be naming collisions with resources that require unique names like azure blob
 storage and registry DNS naming. Make sure to give a unique value to the
 BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have
-unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of
-the BASE_NAME value should not exceed 10 characters.
+unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-KV, etc.). The length of
+the BASE_NAME value should not exceed 10 characters. 
 
 Make sure to select the **Allow access to all pipelines** checkbox in the
 variable group configuration.
 
+## More variable options
+
+There are more variables used in the project. They're defined in two places one for local execution one for using Azure DevOps Pipelines
+
+### Local configuration
+
+In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well.
+
+### Azure DevOps configuration
+
+For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Adjust as needed the variables, also the defaults will give you an easy jump start.
+
 Up until now you should have:
 
 * Forked (or cloned) the repo

diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json
@@ -26,16 +26,35 @@
       "metadata": {
         "description": "Specifies the location for all resources."
       }
+    },
+    "workspace": {
+      "type": "string"
+    },
+    "storageAccount": {
+      "type": "string",
+      "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]"
+    },
+    "keyvault": {
+      "type": "string",
+      "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]"
+    },
+    "appInsights": {
+      "type": "string",
+      "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]"
+    },
+    "acr": {
+      "type": "string",
+      "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]"
     }
   },
   "variables": {
-    "amlWorkspaceName": "[concat(parameters('baseName'),'-AML-WS')]",
-    "storageAccountName": "[concat(toLower(parameters('baseName')), 'amlsa')]",
+    "amlWorkspaceName": "[parameters('workspace')]",
+    "storageAccountName": "[parameters('storageAccount')]",
     "storageAccountType": "Standard_LRS",
-    "keyVaultName": "[concat(parameters('baseName'),'-AML-KV')]",
+    "keyVaultName": "[parameters('keyvault')]",
     "tenantId": "[subscription().tenantId]",
-    "applicationInsightsName": "[concat(parameters('baseName'),'-AML-AI')]",
-    "containerRegistryName": "[concat(toLower(parameters('baseName')),'amlcr')]"
+    "applicationInsightsName": "[parameters('appInsights')]",
+    "containerRegistryName": "[parameters('acr')]"
   },
   "resources": [
     {

diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml
@@ -25,11 +25,11 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'Create Or Update Resource Group'
-    resourceGroupName: '$(BASE_NAME)-AML-RG'
+    resourceGroupName: '$(RESOURCE_GROUP)'
     location: $(LOCATION)
     templateLocation: 'Linked artifact'
     csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
-    overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
+    overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME)'
     deploymentMode: 'Incremental'
   displayName: 'Deploy MLOps resources to Azure'
 

diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml
@@ -18,7 +18,7 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'DeleteRG'
-    resourceGroupName: '$(BASE_NAME)-AML-RG'
+    resourceGroupName: '$(RESOURCE_GROUP)'
     location: $(LOCATION)
   displayName: 'Delete resources in Azure'
 

diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
@@ -5,45 +5,32 @@
 # from azureml.core import Datastore
 import os
 import sys
-from dotenv import load_dotenv
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from workspace import get_workspace
 from attach_compute import get_compute
+from env_variables import Env
 
 
 def main():
-    load_dotenv()
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    app_id = os.environ.get("SP_APP_ID")
-    app_secret = os.environ.get("SP_APP_SECRET")
-    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
-    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
-    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
-    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
-    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
-    model_name = os.environ.get("MODEL_NAME")
-    build_id = os.environ.get("BUILD_BUILDID")
-    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
-
+    e = Env()
     # Get Azure machine learning workspace
     aml_workspace = get_workspace(
-        workspace_name,
-        resource_group,
-        subscription_id,
-        tenant_id,
-        app_id,
-        app_secret)
+        e.workspace_name,
+        e.resource_group,
+        e.subscription_id,
+        e.tenant_id,
+        e.app_id,
+        e.app_secret)
+    print("get_workspace:")
     print(aml_workspace)
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(
         aml_workspace,
-        compute_name,
-        vm_size)
+        e.compute_name,
+        e.vm_size)
     if aml_compute is not None:
+        print("aml_compute:")
         print(aml_compute)
 
     run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
@@ -56,16 +43,16 @@ def main():
     run_config.environment.docker.enabled = True
 
     model_name = PipelineParameter(
-        name="model_name", default_value=model_name)
+        name="model_name", default_value=e.model_name)
     release_id = PipelineParameter(
         name="release_id", default_value="0"
     )
 
     train_step = PythonScriptStep(
         name="Train Model",
-        script_name=train_script_path,
+        script_name=e.train_script_path,
         compute_target=aml_compute,
-        source_directory=sources_directory_train,
+        source_directory=e.sources_directory_train,
         arguments=[
             "--release_id", release_id,
             "--model_name", model_name,
@@ -77,9 +64,9 @@ def main():
 
     evaluate_step = PythonScriptStep(
         name="Evaluate Model ",
-        script_name=evaluate_script_path,
+        script_name=e.evaluate_script_path,
         compute_target=aml_compute,
-        source_directory=sources_directory_train,
+        source_directory=e.sources_directory_train,
         arguments=[
             "--release_id", release_id,
             "--model_name", model_name,
@@ -95,9 +82,9 @@ def main():
     train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
     train_pipeline.validate()
     published_pipeline = train_pipeline.publish(
-        name=pipeline_name,
+        name=e.pipeline_name,
         description="Model training/retraining pipeline",
-        version=build_id
+        version=e.build_id
     )
     print(f'Published pipeline: {published_pipeline.name}')
     print(f'for build {published_pipeline.version}')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest


		variables:
		- template: azdo-variables.yml
		- group: devopsforai-aml-vg


Expand Down