diff --git a/README.md b/README.md index 0f9ab4a6..434be0df 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ description: "Code which demonstrates how to set up and operationalize an MLOps # MLOps with Azure ML -[![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=151&branchName=master) +CI: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/Model-Train-Register-CI?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=160&branchName=master) + +CD: [![Build Status](https://aidemos.visualstudio.com/MLOps/_apis/build/status/microsoft.MLOpsPython-CD?branchName=master)](https://aidemos.visualstudio.com/MLOps/_build/latest?definitionId=161&branchName=master) MLOps will help you to understand how to build a Continuous Integration and Continuous Delivery pipeline for an ML/AI project. We will be using the Azure DevOps Project for build and release/deployment pipelines along with Azure ML services for model retraining pipeline, model management and operationalization. diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py index 9e52af55..02f51bbc 100644 --- a/bootstrap/bootstrap.py +++ b/bootstrap/bootstrap.py @@ -98,7 +98,6 @@ def replace_project_name(project_dir, project_name, rename_name): r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py", # NOQA: E501 r"ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py", # NOQA: E501 r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501 - r"ml_service/pipelines/diabetes_regression_verify_train_pipeline.py", # NOQA: E501 r"ml_service/util/create_scoring_image.py", r"diabetes_regression/conda_dependencies.yml", r"diabetes_regression/evaluate/evaluate_model.py", diff --git a/data/README.md b/data/README.md new file mode 100644 index 00000000..a25aa451 --- /dev/null +++ b/data/README.md @@ -0,0 +1,3 @@ +This folder is used for example data, and it is not meant to be used for storing training data. + +Follow steps to [Configure Training Data]('docs/custom_model.md#configure-training-data.md') to use your own data for training. \ No newline at end of file diff --git a/docs/code_description.md b/docs/code_description.md index 8dc3c756..d30295e9 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -8,7 +8,7 @@ High level directory structure for this repository: ├── .pipelines <- Azure DevOps YAML pipelines for CI, PR and model training and deployment. ├── bootstrap <- Python script to initialize this repository with a custom project name. ├── charts <- Helm charts to deploy resources on Azure Kubernetes Service(AKS). -├── data <- Initial set of data to train and evaluate model. +├── data <- Initial set of data to train and evaluate model. Not for use to store data. ├── diabetes_regression <- The top-level folder for the ML project. │ ├── evaluate <- Python script to evaluate trained ML model. │ ├── register <- Python script to register trained ML model with Azure Machine Learning Service. @@ -52,7 +52,10 @@ The repository provides a template with folders structure suitable for maintaini - `.pipelines/code-quality-template.yml` : a pipeline template used by the CI and PR pipelines. It contains steps performing linting, data and unit testing. - `.pipelines/diabetes_regression-ci-image.yml` : a pipeline building a scoring image for the diabetes regression model. - `.pipelines/diabetes_regression-ci.yml` : a pipeline triggered when the code is merged into **master**. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline. -- `.pipelines/diabetes_regression-get-model-version-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-ci.yml` pipeline. It finds out if a new model was registered and retrieves a version of the new model. +- `.pipelines/diabetes_regression-cd.yml` : a pipeline triggered when the code is merged into **master** and the `.pipelines/diabetes_regression-ci.yml` completes. It performs linting, data integrity testing, unit testing, building and publishing an ML pipeline. +- `.pipelines/diabetes_regression-package-model-template.yml` : a pipeline triggered when the code is merged into **master**. It deploys the registered model to a target. +- `.pipelines/diabetes_regression-get-model-id-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-cd.yml` pipeline. It takes the model metadata artifact published by the previous pipeline and gets the model ID. +- `.pipelines/diabetes_regression-publish-model-artifact-template.yml` : a pipeline template used by the `.pipelines/diabetes_regression-ci.yml` pipeline. It finds out if a new model was registered and publishes a pipeline artifact containing the model metadata. - `.pipelines/helm-*.yml` : pipeline templates used by the `.pipelines/abtest.yml` pipeline. - `.pipelines/pr.yml` : a pipeline triggered when a **pull request** to the **master** branch is created. It performs linting, data integrity testing and unit testing only. @@ -62,7 +65,6 @@ The repository provides a template with folders structure suitable for maintaini - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py` : builds and publishes an ML training pipeline. It uses R on ML Compute. - `ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` : builds and publishes an ML training pipeline. It uses R on Databricks Compute. - `ml_service/pipelines/run_train_pipeline.py` : invokes a published ML training pipeline (Python on ML Compute) via REST API. -- `ml_service/pipelines/diabetes_regression_verify_train_pipeline.py` : determines whether the evaluate_model.py step of the training pipeline registered a new model. - `ml_service/util` : contains common utility functions used to build and publish an ML training pipeline. ### Environment Definitions diff --git a/docs/custom_container.md b/docs/custom_container.md index 8c031d15..46e692f9 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -61,7 +61,11 @@ Edit the [environment_setup/docker-image-pipeline.yml](../environment_setup/dock and modify the string `'public/mlops/python'` with an name suitable to describe your environment, e.g. `'mlops/diabetes_regression'`. -Save and run the pipeline. This will build and push a container image to your Azure Container Registry with +Save and run the pipeline, making sure to set the these runtime variables: `amlsdkversion` and `githubrelease`. The values are up to you to set depending on your environment. These will show as tags on your image. + +![Custom Container Vars](./images/custom-container-variables.png) + +This will build and push a container image to your Azure Container Registry with the name you have just edited. The next step is to modify the build pipeline to run the CI job on a container run from that image. diff --git a/docs/getting_started.md b/docs/getting_started.md index c3abed02..629e0dc2 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -73,7 +73,7 @@ More variables are available for further tweaking, but the above variables are a ### Variable Descriptions -**BASE_NAME** is used as a prefix for naming Azure resources. When sharing an Azure subscription, the prefix allows you to avoid naming collisions for resources that require unique names, for example, Azure Blob Storage and Registry DNS. Make sure to set BASE_NAME to a unique name so that created resources will have unique names, for example, MyUniqueMLamlcr, MyUniqueML-AML-KV, and so on. The length of the BASE_NAME value shouldn't exceed 10 characters and must contain letters and numbers only. +**BASE_NAME** is used as a prefix for naming Azure resources and should be unique. When sharing an Azure subscription, the prefix allows you to avoid naming collisions for resources that require unique names, for example, Azure Blob Storage and Registry DNS. Make sure to set BASE_NAME to a unique name so that created resources will have unique names, for example, MyUniqueMLamlcr, MyUniqueML-AML-KV, and so on. The length of the BASE_NAME value shouldn't exceed 10 characters and must contain letters and numbers only. **LOCATION** is the name of the [Azure location](https://azure.microsoft.com/en-us/global-infrastructure/locations/) for your resources. There should be no spaces in the name. For example, central, westus, westus2. @@ -133,7 +133,7 @@ Check that the newly created resources appear in the [Azure Portal](https://port At this point, you should have an Azure ML Workspace created. Similar to the Azure Resource Manager service connection, you need to create an additional one for the Azure ML Workspace. -Create a new service connection to your Azure ML Workspace using the [Machine Learning Extension](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) instructions to enable executing the Azure ML training pipeline. The connection name needs to match `WORKSPACE_SVC_CONNECTION` that you set in the variable group above. +Create a new service connection to your Azure ML Workspace using the [Machine Learning Extension](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) instructions to enable executing the Azure ML training pipeline. The connection name needs to match `WORKSPACE_SVC_CONNECTION` that you set in the variable group above (eg. 'aml-workspace-connection'). ![Created resources](./images/ml-ws-svc-connection.png) @@ -213,9 +213,25 @@ In order to use these pipelines: These pipelines rely on the model CI pipeline and reference it by name. +If you would like to change the name of your model CI pipeline, you must edit this section of yml for the CD and batch scoring pipeline, where it says `source: Model-Train-Register-CI` to use your own name. +``` +trigger: none +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pipelines: + - pipeline: model-train-ci + source: Model-Train-Register-CI # Name of the triggering pipeline + trigger: + branches: + include: + - master +``` + --- -These pipelines have the following behaviors: +The release deployment and batch scoring pipelines have the following behaviors: - The pipeline will **automatically trigger** on completion of the Model-Train-Register-CI pipeline for the master branch. - The pipeline will default to using the latest successful build of the Model-Train-Register-CI pipeline. It will deploy the model produced by that build. diff --git a/docs/images/custom-container-variables.png b/docs/images/custom-container-variables.png new file mode 100644 index 00000000..24a6a92a Binary files /dev/null and b/docs/images/custom-container-variables.png differ diff --git a/environment_setup/iac-create-environment-pipeline-tf.yml b/environment_setup/iac-create-environment-pipeline-tf.yml index 3e94677e..ef184546 100644 --- a/environment_setup/iac-create-environment-pipeline-tf.yml +++ b/environment_setup/iac-create-environment-pipeline-tf.yml @@ -37,7 +37,7 @@ steps: ensureBackend: true backendAzureRmResourceGroupLocation: $(LOCATION) backendAzureRmResourceGroupName: $(RESOURCE_GROUP) - backendAzureRmStorageAccountName: 'statestor' + backendAzureRmStorageAccountName: '$(BASE_NAME)statestor' backendAzureRmStorageAccountSku: 'Standard_LRS' backendAzureRmContainerName: 'tfstate-cont' backendAzureRmKey: 'mlopsinfra.tfstate' diff --git a/ml_service/pipelines/diabetes_regression_verify_train_pipeline.py b/ml_service/pipelines/diabetes_regression_verify_train_pipeline.py deleted file mode 100644 index 28511f9b..00000000 --- a/ml_service/pipelines/diabetes_regression_verify_train_pipeline.py +++ /dev/null @@ -1,79 +0,0 @@ -import argparse -import sys -import os -from azureml.core import Run, Experiment, Workspace -from ml_service.util.env_variables import Env -from diabetes_regression.util.model_helper import get_model - - -def main(): - - run = Run.get_context() - - if (run.id.startswith('OfflineRun')): - from dotenv import load_dotenv - load_dotenv() - sources_dir = os.environ.get("SOURCES_DIR_TRAIN") - if (sources_dir is None): - sources_dir = 'diabetes_regression' - workspace_name = os.environ.get("WORKSPACE_NAME") - experiment_name = os.environ.get("EXPERIMENT_NAME") - resource_group = os.environ.get("RESOURCE_GROUP") - subscription_id = os.environ.get("SUBSCRIPTION_ID") - build_id = os.environ.get('BUILD_BUILDID') - aml_workspace = Workspace.get( - name=workspace_name, - subscription_id=subscription_id, - resource_group=resource_group - ) - ws = aml_workspace - exp = Experiment(ws, experiment_name) - else: - exp = run.experiment - - e = Env() - - parser = argparse.ArgumentParser("register") - parser.add_argument( - "--build_id", - type=str, - help="The Build ID of the build triggering this pipeline run", - ) - parser.add_argument( - "--output_model_version_file", - type=str, - default="model_version.txt", - help="Name of a file to write model version to" - ) - - args = parser.parse_args() - if (args.build_id is not None): - build_id = args.build_id - model_name = e.model_name - - try: - tag_name = 'BuildId' - model = get_model( - model_name=model_name, - tag_name=tag_name, - tag_value=build_id, - aml_workspace=exp.workspace) - - if (model is not None): - print("Model was registered for this build.") - if (model is None): - print("Model was not registered for this run.") - sys.exit(1) - except Exception as e: - print(e) - print("Model was not registered for this run.") - sys.exit(1) - - # Save the Model Version for other AzDO jobs after script is complete - if args.output_model_version_file is not None: - with open(args.output_model_version_file, "w") as out_file: - out_file.write(str(model.version)) - - -if __name__ == '__main__': - main()