Skip to content

Commit

Permalink
cleaner variables (#99)
Browse files Browse the repository at this point in the history
  • Loading branch information
dariuszparys authored and dtzar committed Nov 20, 2019
1 parent 5372bb0 commit 1eb4a42
Show file tree
Hide file tree
Showing 18 changed files with 328 additions and 221 deletions.
35 changes: 11 additions & 24 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,52 +1,39 @@
# Azure Subscription Variables
SUBSCRIPTION_ID = ''
LOCATION = ''
LOCATION = 'westeurope'
TENANT_ID = ''
BASE_NAME = ''
SP_APP_ID = ''
SP_APP_SECRET = ''
RESOUCE_GROUP = 'mlops-rg'

# Mock build/release ID for local testing - update ReleaseID each "release"
BUILD_BUILDID = '001'
RELEASE_RELEASEID = '001'

# Azure ML Workspace Variables
WORKSPACE_NAME = ''
EXPERIMENT_NAME = ''
SCRIPT_FOLDER = './'

# AML Compute Cluster Config
AML_COMPUTE_CLUSTER_NAME = ''
AML_COMPUTE_CLUSTER_CPU_SKU = ''
AML_CLUSTER_MAX_NODES = ''
AML_CLUSTER_MIN_NODES = ''
AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
AML_CLUSTER_MAX_NODES = '4'
AML_CLUSTER_MIN_NODES = '0'
AML_CLUSTER_PRIORITY = 'lowpriority'
# Training Config
MODEL_NAME = 'sklearn_regression_model.pkl'
MODEL_VERSION = '1'
TRAIN_SCRIPT_PATH = 'training/train.py'
# AML Pipeline Config
TRAINING_PIPELINE_NAME = ''
PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
TRAINING_PIPELINE_NAME = 'Training Pipeline'
MODEL_PATH = ''
EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
REGISTER_SCRIPT_PATH = 'register/register_model.py'
SOURCES_DIR_TRAIN = 'code'

# These are not mandatory for the core workflow
# Remote VM Config
REMOTE_VM_NAME = ''
REMOTE_VM_USERNAME = ''
REMOTE_VM_PASSWORD = ''
REMOTE_VM_IP = ''
# Image config
IMAGE_NAME = ''
IMAGE_DESCRIPTION = ''
IMAGE_VERSION = ''
# ACI Config
ACI_CPU_CORES = ''
ACI_MEM_GB = ''
ACI_DESCRIPTION = ''

# Optional. Used by a training pipeline with R on Databricks
DB_CLUSTER_ID = ''
DATABRICKS_COMPUTE_NAME = ''

# Optional. Container Image name for image creation
IMAGE_NAME = 'ml-trained'
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ wheels/
.installed.cfg
*.egg
MANIFEST
venv/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
11 changes: 3 additions & 8 deletions .pipelines/azdo-ci-build-train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,9 @@ trigger:
- ml_service/util/create_scoring_image.py

variables:
- template: azdo-variables.yml
- group: devopsforai-aml-vg
# Choose from default, build_train_pipeline_with_r.py, or build_train_pipeline_with_r_on_dbricks.py
- name: build-train-script
value: 'build_train_pipeline.py'
# Automatically triggers the train, evaluate, register pipeline after the CI steps.
# Uncomment to set to false or add same variable name at queue time with value of false to disable.
# - name: auto-trigger-training
# value: false


stages:
- stage: 'Model_CI'
Expand All @@ -34,7 +29,7 @@ stages:
- template: azdo-base-pipeline.yml
- script: |
# Invoke the Python building and publishing a training pipeline
python3 $(Build.SourcesDirectory)/ml_service/pipelines/$(build-train-script)
python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
failOnStderr: 'false'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
Expand Down
1 change: 1 addition & 0 deletions .pipelines/azdo-pr-build-train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest


variables:
- template: azdo-variables.yml
- group: devopsforai-aml-vg


Expand Down
40 changes: 40 additions & 0 deletions .pipelines/azdo-variables.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
variables:
# Azure ML Workspace Variables
- name: EXPERIMENT_NAME
value: mlopspython
# AML Compute Cluster Config
- name: AML_COMPUTE_CLUSTER_CPU_SKU
value: STANDARD_DS2_V2
- name: AML_COMPUTE_CLUSTER_NAME
value: train-cluster
- name: AML_CLUSTER_MIN_NODES
value: 0
- name: AML_CLUSTER_MAX_NODES
value: 4
- name: AML_CLUSTER_PRIORITY
value: lowpriority
# Training Config
- name: BUILD_TRAIN_SCRIPT
value: build_train_pipeline.py
- name: TRAIN_SCRIPT_PATH
value: training/train.py
- name: MODEL_NAME
value: sklearn_regression_model.pkl
- name: MODEL_VERSION
value: '1'
# AML Pipeline Config
- name: TRAINING_PIPELINE_NAME
value: 'Training Pipeline'
- name: MODEL_PATH
value: ''
- name: EVALUATE_SCRIPT_PATH
value: evaluate/evaluate_model.py
- name: REGISTER_SCRIPT_PATH
value: register/register_model.py
- name: SOURCES_DIR_TRAIN
value: code
- name: IMAGE_NAME
value: ''
# Optional. Used by a training pipeline with R on Databricks
- name: DB_CLUSTER_ID
value: ''
37 changes: 22 additions & 15 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,41 +47,48 @@ Click on **Library** in the **Pipelines** section as indicated below:
Please name your variable group **``devopsforai-aml-vg``** as we are using this
name within our build yaml file.

The variable group should contain the following variables:
The variable group should contain the following required variables:

| Variable Name | Suggested Value |
| --------------------------- | -----------------------------------|
| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2 |
| AML_COMPUTE_CLUSTER_NAME | train-cluster |
| BASE_NAME | [unique base name] |
| DB_CLUSTER_ID | [Optional Databricks cluster Id] |
| DATABRICKS_COMPUTE_NAME | [Optional Databricks compute name] |
| EVALUATE_SCRIPT_PATH | evaluate/evaluate_model.py |
| EXPERIMENT_NAME | mlopspython |
| LOCATION | centralus |
| MODEL_NAME | sklearn_regression_model.pkl |
| REGISTER_SCRIPT_PATH | register/register_model.py |
| SOURCES_DIR_TRAIN | code |
| SP_APP_ID | |
| SP_APP_SECRET | |
| SUBSCRIPTION_ID | |
| TENANT_ID | |
| TRAIN_SCRIPT_PATH | training/train.py |
| TRAINING_PIPELINE_NAME | training-pipeline |
| RESOURCE_GROUP | |
| WORKSPACE_NAME | mlops-AML-WS |

Mark **SP_APP_SECRET** variable as a secret one.

**Note:** The **BASE_NAME** parameter is used throughout the solution for naming
**Note:**

The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one.

The **BASE_NAME** parameter is used throughout the solution for naming
Azure resources. When the solution is used in a shared subscription, there can
be naming collisions with resources that require unique names like azure blob
storage and registry DNS naming. Make sure to give a unique value to the
BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have
unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of
the BASE_NAME value should not exceed 10 characters.
unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-KV, etc.). The length of
the BASE_NAME value should not exceed 10 characters.

Make sure to select the **Allow access to all pipelines** checkbox in the
variable group configuration.

## More variable options

There are more variables used in the project. They're defined in two places one for local execution one for using Azure DevOps Pipelines

### Local configuration

In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well.

### Azure DevOps configuration

For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Adjust as needed the variables, also the defaults will give you an easy jump start.

Up until now you should have:

* Forked (or cloned) the repo
Expand Down
29 changes: 24 additions & 5 deletions environment_setup/arm-templates/cloud-environment.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,35 @@
"metadata": {
"description": "Specifies the location for all resources."
}
},
"workspace": {
"type": "string"
},
"storageAccount": {
"type": "string",
"defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]"
},
"keyvault": {
"type": "string",
"defaultValue": "[concat(parameters('baseName'),'-AML-KV')]"
},
"appInsights": {
"type": "string",
"defaultValue": "[concat(parameters('baseName'),'-AML-AI')]"
},
"acr": {
"type": "string",
"defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]"
}
},
"variables": {
"amlWorkspaceName": "[concat(parameters('baseName'),'-AML-WS')]",
"storageAccountName": "[concat(toLower(parameters('baseName')), 'amlsa')]",
"amlWorkspaceName": "[parameters('workspace')]",
"storageAccountName": "[parameters('storageAccount')]",
"storageAccountType": "Standard_LRS",
"keyVaultName": "[concat(parameters('baseName'),'-AML-KV')]",
"keyVaultName": "[parameters('keyvault')]",
"tenantId": "[subscription().tenantId]",
"applicationInsightsName": "[concat(parameters('baseName'),'-AML-AI')]",
"containerRegistryName": "[concat(toLower(parameters('baseName')),'amlcr')]"
"applicationInsightsName": "[parameters('appInsights')]",
"containerRegistryName": "[parameters('acr')]"
},
"resources": [
{
Expand Down
4 changes: 2 additions & 2 deletions environment_setup/iac-create-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ steps:
inputs:
azureSubscription: 'AzureResourceConnection'
action: 'Create Or Update Resource Group'
resourceGroupName: '$(BASE_NAME)-AML-RG'
resourceGroupName: '$(RESOURCE_GROUP)'
location: $(LOCATION)
templateLocation: 'Linked artifact'
csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME)'
deploymentMode: 'Incremental'
displayName: 'Deploy MLOps resources to Azure'

Expand Down
2 changes: 1 addition & 1 deletion environment_setup/iac-remove-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ steps:
inputs:
azureSubscription: 'AzureResourceConnection'
action: 'DeleteRG'
resourceGroupName: '$(BASE_NAME)-AML-RG'
resourceGroupName: '$(RESOURCE_GROUP)'
location: $(LOCATION)
displayName: 'Delete resources in Azure'

Expand Down
51 changes: 19 additions & 32 deletions ml_service/pipelines/build_train_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,32 @@
# from azureml.core import Datastore
import os
import sys
from dotenv import load_dotenv
sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402
from workspace import get_workspace
from attach_compute import get_compute
from env_variables import Env


def main():
load_dotenv()
workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
subscription_id = os.environ.get("SUBSCRIPTION_ID")
tenant_id = os.environ.get("TENANT_ID")
app_id = os.environ.get("SP_APP_ID")
app_secret = os.environ.get("SP_APP_SECRET")
sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
model_name = os.environ.get("MODEL_NAME")
build_id = os.environ.get("BUILD_BUILDID")
pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")

e = Env()
# Get Azure machine learning workspace
aml_workspace = get_workspace(
workspace_name,
resource_group,
subscription_id,
tenant_id,
app_id,
app_secret)
e.workspace_name,
e.resource_group,
e.subscription_id,
e.tenant_id,
e.app_id,
e.app_secret)
print("get_workspace:")
print(aml_workspace)

# Get Azure machine learning cluster
aml_compute = get_compute(
aml_workspace,
compute_name,
vm_size)
e.compute_name,
e.vm_size)
if aml_compute is not None:
print("aml_compute:")
print(aml_compute)

run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
Expand All @@ -56,16 +43,16 @@ def main():
run_config.environment.docker.enabled = True

model_name = PipelineParameter(
name="model_name", default_value=model_name)
name="model_name", default_value=e.model_name)
release_id = PipelineParameter(
name="release_id", default_value="0"
)

train_step = PythonScriptStep(
name="Train Model",
script_name=train_script_path,
script_name=e.train_script_path,
compute_target=aml_compute,
source_directory=sources_directory_train,
source_directory=e.sources_directory_train,
arguments=[
"--release_id", release_id,
"--model_name", model_name,
Expand All @@ -77,9 +64,9 @@ def main():

evaluate_step = PythonScriptStep(
name="Evaluate Model ",
script_name=evaluate_script_path,
script_name=e.evaluate_script_path,
compute_target=aml_compute,
source_directory=sources_directory_train,
source_directory=e.sources_directory_train,
arguments=[
"--release_id", release_id,
"--model_name", model_name,
Expand All @@ -95,9 +82,9 @@ def main():
train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
train_pipeline.validate()
published_pipeline = train_pipeline.publish(
name=pipeline_name,
name=e.pipeline_name,
description="Model training/retraining pipeline",
version=build_id
version=e.build_id
)
print(f'Published pipeline: {published_pipeline.name}')
print(f'for build {published_pipeline.version}')
Expand Down
Loading

0 comments on commit 1eb4a42

Please sign in to comment.