diff --git a/.env.example b/.env.example index 2f62dad7..ea1338ae 100644 --- a/.env.example +++ b/.env.example @@ -1,52 +1,39 @@ # Azure Subscription Variables SUBSCRIPTION_ID = '' -LOCATION = '' +LOCATION = 'westeurope' TENANT_ID = '' BASE_NAME = '' SP_APP_ID = '' SP_APP_SECRET = '' +RESOUCE_GROUP = 'mlops-rg' # Mock build/release ID for local testing - update ReleaseID each "release" BUILD_BUILDID = '001' RELEASE_RELEASEID = '001' # Azure ML Workspace Variables +WORKSPACE_NAME = '' EXPERIMENT_NAME = '' -SCRIPT_FOLDER = './' # AML Compute Cluster Config -AML_COMPUTE_CLUSTER_NAME = '' -AML_COMPUTE_CLUSTER_CPU_SKU = '' -AML_CLUSTER_MAX_NODES = '' -AML_CLUSTER_MIN_NODES = '' +AML_COMPUTE_CLUSTER_NAME = 'train-cluster' +AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2' +AML_CLUSTER_MAX_NODES = '4' +AML_CLUSTER_MIN_NODES = '0' AML_CLUSTER_PRIORITY = 'lowpriority' # Training Config MODEL_NAME = 'sklearn_regression_model.pkl' MODEL_VERSION = '1' TRAIN_SCRIPT_PATH = 'training/train.py' # AML Pipeline Config -TRAINING_PIPELINE_NAME = '' -PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml' +TRAINING_PIPELINE_NAME = 'Training Pipeline' MODEL_PATH = '' EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py' REGISTER_SCRIPT_PATH = 'register/register_model.py' SOURCES_DIR_TRAIN = 'code' -# These are not mandatory for the core workflow -# Remote VM Config -REMOTE_VM_NAME = '' -REMOTE_VM_USERNAME = '' -REMOTE_VM_PASSWORD = '' -REMOTE_VM_IP = '' -# Image config -IMAGE_NAME = '' -IMAGE_DESCRIPTION = '' -IMAGE_VERSION = '' -# ACI Config -ACI_CPU_CORES = '' -ACI_MEM_GB = '' -ACI_DESCRIPTION = '' - # Optional. Used by a training pipeline with R on Databricks DB_CLUSTER_ID = '' -DATABRICKS_COMPUTE_NAME = '' \ No newline at end of file + +# Optional. Container Image name for image creation +IMAGE_NAME = 'ml-trained' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3a5a8879..7bac8768 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ wheels/ .installed.cfg *.egg MANIFEST +venv/ # PyInstaller # Usually these files are written by a python script from a template diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml index c2453d4d..09c52d95 100644 --- a/.pipelines/azdo-ci-build-train.yml +++ b/.pipelines/azdo-ci-build-train.yml @@ -11,14 +11,9 @@ trigger: - ml_service/util/create_scoring_image.py variables: +- template: azdo-variables.yml - group: devopsforai-aml-vg -# Choose from default, build_train_pipeline_with_r.py, or build_train_pipeline_with_r_on_dbricks.py -- name: build-train-script - value: 'build_train_pipeline.py' -# Automatically triggers the train, evaluate, register pipeline after the CI steps. -# Uncomment to set to false or add same variable name at queue time with value of false to disable. -# - name: auto-trigger-training -# value: false + stages: - stage: 'Model_CI' @@ -34,7 +29,7 @@ stages: - template: azdo-base-pipeline.yml - script: | # Invoke the Python building and publishing a training pipeline - python3 $(Build.SourcesDirectory)/ml_service/pipelines/$(build-train-script) + python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }} failOnStderr: 'false' env: SP_APP_SECRET: '$(SP_APP_SECRET)' diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml index 8bf6ca56..24231b2a 100644 --- a/.pipelines/azdo-pr-build-train.yml +++ b/.pipelines/azdo-pr-build-train.yml @@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest variables: +- template: azdo-variables.yml - group: devopsforai-aml-vg diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml new file mode 100644 index 00000000..64a42d5b --- /dev/null +++ b/.pipelines/azdo-variables.yml @@ -0,0 +1,40 @@ +variables: + # Azure ML Workspace Variables +- name: EXPERIMENT_NAME + value: mlopspython + # AML Compute Cluster Config +- name: AML_COMPUTE_CLUSTER_CPU_SKU + value: STANDARD_DS2_V2 +- name: AML_COMPUTE_CLUSTER_NAME + value: train-cluster +- name: AML_CLUSTER_MIN_NODES + value: 0 +- name: AML_CLUSTER_MAX_NODES + value: 4 +- name: AML_CLUSTER_PRIORITY + value: lowpriority + # Training Config +- name: BUILD_TRAIN_SCRIPT + value: build_train_pipeline.py +- name: TRAIN_SCRIPT_PATH + value: training/train.py +- name: MODEL_NAME + value: sklearn_regression_model.pkl +- name: MODEL_VERSION + value: '1' + # AML Pipeline Config +- name: TRAINING_PIPELINE_NAME + value: 'Training Pipeline' +- name: MODEL_PATH + value: '' +- name: EVALUATE_SCRIPT_PATH + value: evaluate/evaluate_model.py +- name: REGISTER_SCRIPT_PATH + value: register/register_model.py +- name: SOURCES_DIR_TRAIN + value: code +- name: IMAGE_NAME + value: '' + # Optional. Used by a training pipeline with R on Databricks +- name: DB_CLUSTER_ID + value: '' \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md index d39be2e8..cc56c6c4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -47,41 +47,48 @@ Click on **Library** in the **Pipelines** section as indicated below: Please name your variable group **``devopsforai-aml-vg``** as we are using this name within our build yaml file. -The variable group should contain the following variables: +The variable group should contain the following required variables: | Variable Name | Suggested Value | | --------------------------- | -----------------------------------| -| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2 | -| AML_COMPUTE_CLUSTER_NAME | train-cluster | | BASE_NAME | [unique base name] | -| DB_CLUSTER_ID | [Optional Databricks cluster Id] | -| DATABRICKS_COMPUTE_NAME | [Optional Databricks compute name] | -| EVALUATE_SCRIPT_PATH | evaluate/evaluate_model.py | -| EXPERIMENT_NAME | mlopspython | | LOCATION | centralus | -| MODEL_NAME | sklearn_regression_model.pkl | -| REGISTER_SCRIPT_PATH | register/register_model.py | -| SOURCES_DIR_TRAIN | code | | SP_APP_ID | | | SP_APP_SECRET | | | SUBSCRIPTION_ID | | | TENANT_ID | | -| TRAIN_SCRIPT_PATH | training/train.py | -| TRAINING_PIPELINE_NAME | training-pipeline | +| RESOURCE_GROUP | | +| WORKSPACE_NAME | mlops-AML-WS | Mark **SP_APP_SECRET** variable as a secret one. -**Note:** The **BASE_NAME** parameter is used throughout the solution for naming +**Note:** + +The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one. + +The **BASE_NAME** parameter is used throughout the solution for naming Azure resources. When the solution is used in a shared subscription, there can be naming collisions with resources that require unique names like azure blob storage and registry DNS naming. Make sure to give a unique value to the BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have -unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of -the BASE_NAME value should not exceed 10 characters. +unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-KV, etc.). The length of +the BASE_NAME value should not exceed 10 characters. Make sure to select the **Allow access to all pipelines** checkbox in the variable group configuration. +## More variable options + +There are more variables used in the project. They're defined in two places one for local execution one for using Azure DevOps Pipelines + +### Local configuration + +In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well. + +### Azure DevOps configuration + +For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Adjust as needed the variables, also the defaults will give you an easy jump start. + Up until now you should have: * Forked (or cloned) the repo diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json index 590a4aed..f2b2ac2f 100644 --- a/environment_setup/arm-templates/cloud-environment.json +++ b/environment_setup/arm-templates/cloud-environment.json @@ -26,16 +26,35 @@ "metadata": { "description": "Specifies the location for all resources." } + }, + "workspace": { + "type": "string" + }, + "storageAccount": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]" + }, + "keyvault": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]" + }, + "appInsights": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]" + }, + "acr": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]" } }, "variables": { - "amlWorkspaceName": "[concat(parameters('baseName'),'-AML-WS')]", - "storageAccountName": "[concat(toLower(parameters('baseName')), 'amlsa')]", + "amlWorkspaceName": "[parameters('workspace')]", + "storageAccountName": "[parameters('storageAccount')]", "storageAccountType": "Standard_LRS", - "keyVaultName": "[concat(parameters('baseName'),'-AML-KV')]", + "keyVaultName": "[parameters('keyvault')]", "tenantId": "[subscription().tenantId]", - "applicationInsightsName": "[concat(parameters('baseName'),'-AML-AI')]", - "containerRegistryName": "[concat(toLower(parameters('baseName')),'amlcr')]" + "applicationInsightsName": "[parameters('appInsights')]", + "containerRegistryName": "[parameters('acr')]" }, "resources": [ { diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml index 2dd00694..f4c08ddf 100644 --- a/environment_setup/iac-create-environment.yml +++ b/environment_setup/iac-create-environment.yml @@ -25,11 +25,11 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'Create Or Update Resource Group' - resourceGroupName: '$(BASE_NAME)-AML-RG' + resourceGroupName: '$(RESOURCE_GROUP)' location: $(LOCATION) templateLocation: 'Linked artifact' csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json' - overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)' + overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME)' deploymentMode: 'Incremental' displayName: 'Deploy MLOps resources to Azure' diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml index 4ca8b04e..67626223 100644 --- a/environment_setup/iac-remove-environment.yml +++ b/environment_setup/iac-remove-environment.yml @@ -18,7 +18,7 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'DeleteRG' - resourceGroupName: '$(BASE_NAME)-AML-RG' + resourceGroupName: '$(RESOURCE_GROUP)' location: $(LOCATION) displayName: 'Delete resources in Azure' diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 481c68e5..b866201d 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -5,45 +5,32 @@ # from azureml.core import Datastore import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") - train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") - evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - model_name = os.environ.get("MODEL_NAME") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) + print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: + print("aml_compute:") print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( @@ -56,16 +43,16 @@ def main(): run_config.environment.docker.enabled = True model_name = PipelineParameter( - name="model_name", default_value=model_name) + name="model_name", default_value=e.model_name) release_id = PipelineParameter( name="release_id", default_value="0" ) train_step = PythonScriptStep( name="Train Model", - script_name=train_script_path, + script_name=e.train_script_path, compute_target=aml_compute, - source_directory=sources_directory_train, + source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, @@ -77,9 +64,9 @@ def main(): evaluate_step = PythonScriptStep( name="Evaluate Model ", - script_name=evaluate_script_path, + script_name=e.evaluate_script_path, compute_target=aml_compute, - source_directory=sources_directory_train, + source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, @@ -95,9 +82,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name, + name=e.pipeline_name, description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/build_train_pipeline_with_r.py b/ml_service/pipelines/build_train_pipeline_with_r.py index 7eae2c98..72ed8e2a 100644 --- a/ml_service/pipelines/build_train_pipeline_with_r.py +++ b/ml_service/pipelines/build_train_pipeline_with_r.py @@ -4,40 +4,29 @@ # from azureml.core import Datastore import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: print(aml_compute) @@ -66,9 +55,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name + "_with_R", + name=e.pipeline_name + "_with_R", description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py index 95de9e55..733683eb 100644 --- a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py +++ b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py @@ -1,42 +1,30 @@ from azureml.pipeline.core import Pipeline import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute from azureml.pipeline.steps import DatabricksStep +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("DATABRICKS_COMPUTE_NAME") - db_cluster_id = os.environ.get("DB_CLUSTER_ID") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: print(aml_compute) @@ -46,7 +34,7 @@ def main(): python_script_name="train_with_r_on_databricks.py", source_directory="code/training/R", run_name='DB_Python_R_demo', - existing_cluster_id=db_cluster_id, + existing_cluster_id=e.db_cluster_id, compute_target=aml_compute, allow_reuse=False ) @@ -58,9 +46,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name + "_with_R_on_DB", + name=e.pipeline_name + "_with_R_on_DB", description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index 1d942a8c..fdc8f5a5 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -1,31 +1,23 @@ -import os from azureml.pipeline.core import PublishedPipeline from azureml.core import Workspace from azureml.core.authentication import ServicePrincipalAuthentication -from dotenv import load_dotenv +import os +import sys +sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - experiment_name = os.environ.get("EXPERIMENT_NAME") - model_name = os.environ.get("MODEL_NAME") - app_id = os.environ.get('SP_APP_ID') - app_secret = os.environ.get('SP_APP_SECRET') - build_id = os.environ.get('BUILD_BUILDID') - + e = Env() service_principal = ServicePrincipalAuthentication( - tenant_id=tenant_id, - service_principal_id=app_id, - service_principal_password=app_secret) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) aml_workspace = Workspace.get( - name=workspace_name, - subscription_id=subscription_id, - resource_group=resource_group, + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group, auth=service_principal ) @@ -34,23 +26,23 @@ def main(): matched_pipes = [] for p in pipelines: - if p.version == build_id: + if p.version == e.build_id: matched_pipes.append(p) if(len(matched_pipes) > 1): published_pipeline = None - raise Exception(f"Multiple active pipelines are published for build {build_id}.") # NOQA: E501 + raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 elif(len(matched_pipes) == 0): published_pipeline = None - raise KeyError(f"Unable to find a published pipeline for this build {build_id}") # NOQA: E501 + raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 else: published_pipeline = matched_pipes[0] - pipeline_parameters = {"model_name": model_name} + pipeline_parameters = {"model_name": e.model_name} response = published_pipeline.submit( aml_workspace, - experiment_name, + e.experiment_name, pipeline_parameters) run_id = response.id diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py index 7a34cd38..569e3041 100644 --- a/ml_service/util/attach_compute.py +++ b/ml_service/util/attach_compute.py @@ -1,9 +1,8 @@ -import os -from dotenv import load_dotenv from azureml.core import Workspace from azureml.core.compute import AmlCompute from azureml.core.compute import ComputeTarget from azureml.exceptions import ComputeTargetException +from env_variables import Env def get_compute( @@ -11,10 +10,6 @@ def get_compute( compute_name: str, vm_size: str ): - # Load the environment variables from .env in case this script - # is called outside an existing process - load_dotenv() - # Verify that cluster does not exist already try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] @@ -22,12 +17,12 @@ def get_compute( print('Found existing compute target ' + compute_name + ' so using it.') else: + e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, - vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", - 'lowpriority'), - min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), - max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)), + vm_priority=e.vm_priority, + min_nodes=e.min_nodes, + max_nodes=e.max_nodes, idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py index 08ae49b5..7e99bd28 100644 --- a/ml_service/util/create_scoring_image.py +++ b/ml_service/util/create_scoring_image.py @@ -2,36 +2,25 @@ from azureml.core import Workspace from azureml.core.image import ContainerImage, Image from azureml.core.model import Model -from dotenv import load_dotenv from azureml.core.authentication import ServicePrincipalAuthentication +from env_variables import Env -load_dotenv() - -TENANT_ID = os.environ.get('TENANT_ID') -APP_ID = os.environ.get('SP_APP_ID') -APP_SECRET = os.environ.get('SP_APP_SECRET') -WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS" -SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID') -RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG" -MODEL_NAME = os.environ.get('MODEL_NAME') -MODEL_VERSION = os.environ.get('MODEL_VERSION') -IMAGE_NAME = os.environ.get('IMAGE_NAME') - +e = Env() SP_AUTH = ServicePrincipalAuthentication( - tenant_id=TENANT_ID, - service_principal_id=APP_ID, - service_principal_password=APP_SECRET) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) ws = Workspace.get( - WORKSPACE_NAME, + e.workspace_name, SP_AUTH, - SUBSCRIPTION_ID, - RESOURCE_GROUP + e.subscription_id, + e.resource_group ) -model = Model(ws, name=MODEL_NAME, version=MODEL_VERSION) +model = Model(ws, name=e.model_name, version=e.model_version) os.chdir("./code/scoring") image_config = ContainerImage.image_configuration( @@ -43,7 +32,7 @@ ) image = Image.create( - name=IMAGE_NAME, models=[model], image_config=image_config, workspace=ws + name=e.image_name, models=[model], image_config=image_config, workspace=ws ) image.wait_for_creation(show_output=True) diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py new file mode 100644 index 00000000..9fe6d061 --- /dev/null +++ b/ml_service/util/env_variables.py @@ -0,0 +1,127 @@ +import os +from dotenv import load_dotenv + + +class Singleton(object): + _instances = {} + + def __new__(class_, *args, **kwargs): + if class_ not in class_._instances: + class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501 + return class_._instances[class_] + + +class Env(Singleton): + + def __init__(self): + load_dotenv() + self._workspace_name = os.environ.get("WORKSPACE_NAME") + self._resource_group = os.environ.get("RESOURCE_GROUP") + self._subscription_id = os.environ.get("SUBSCRIPTION_ID") + self._tenant_id = os.environ.get("TENANT_ID") + self._app_id = os.environ.get("SP_APP_ID") + self._app_secret = os.environ.get("SP_APP_SECRET") + self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") + self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") + self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 + self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) + self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) + self._build_id = os.environ.get("BUILD_BUILDID") + self._pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") + self._sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") + self._train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") + self._evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") + self._model_name = os.environ.get("MODEL_NAME") + self._experiment_name = os.environ.get("EXPERIMENT_NAME") + self._model_version = os.environ.get('MODEL_VERSION') + self._image_name = os.environ.get('IMAGE_NAME') + self._model_path = os.environ.get('MODEL_PATH') + self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") + + @property + def workspace_name(self): + return self._workspace_name + + @property + def resource_group(self): + return self._resource_group + + @property + def subscription_id(self): + return self._subscription_id + + @property + def tenant_id(self): + return self._tenant_id + + @property + def app_id(self): + return self._app_id + + @property + def app_secret(self): + return self._app_secret + + @property + def vm_size(self): + return self._vm_size + + @property + def compute_name(self): + return self._compute_name + + @property + def db_cluster_id(self): + return self._db_cluster_id + + @property + def build_id(self): + return self._build_id + + @property + def pipeline_name(self): + return self._pipeline_name + + @property + def sources_directory_train(self): + return self._sources_directory_train + + @property + def train_script_path(self): + return self._train_script_path + + @property + def evaluate_script_path(self): + return self._evaluate_script_path + + @property + def model_name(self): + return self._model_name + + @property + def experiment_name(self): + return self._experiment_name + + @property + def vm_priority(self): + return self._vm_priority + + @property + def min_nodes(self): + return self._min_nodes + + @property + def max_nodes(self): + return self._max_nodes + + @property + def model_version(self): + return self._model_version + + @property + def image_name(self): + return self._image_name + + @property + def model_path(self): + return self._model_path diff --git a/ml_service/util/register_model.py b/ml_service/util/register_model.py index ea26a997..7c99aaac 100644 --- a/ml_service/util/register_model.py +++ b/ml_service/util/register_model.py @@ -1,47 +1,35 @@ import sys import os import os.path -from dotenv import load_dotenv from azureml.core import Workspace from azureml.core.model import Model from azureml.core.authentication import ServicePrincipalAuthentication +from env_variables import Env -# Load the environment variables from .env in case this script -# is called outside an existing process -load_dotenv() +e = Env() -TENANT_ID = os.environ.get('TENANT_ID') -APP_ID = os.environ.get('SP_APP_ID') -APP_SECRET = os.environ.get('SP_APP_SECRET') -MODEL_PATH = os.environ.get('MODEL_PATH') -MODEL_NAME = os.environ.get('MODEL_NAME') -WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS" -SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID') -RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG" - - -if os.path.isfile(MODEL_PATH) is False: - print("The given model path %s is invalid" % (MODEL_PATH)) +if os.path.isfile(e.model_path) is False: + print("The given model path %s is invalid" % (e.model_path)) sys.exit(1) SP_AUTH = ServicePrincipalAuthentication( - tenant_id=TENANT_ID, - service_principal_id=APP_ID, - service_principal_password=APP_SECRET) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) WORKSPACE = Workspace.get( - WORKSPACE_NAME, + e.workspace_name, SP_AUTH, - SUBSCRIPTION_ID, - RESOURCE_GROUP + e.subscription_id, + e.resource_group ) try: MODEL = Model.register( - model_path=MODEL_PATH, - model_name=MODEL_NAME, + model_path=e.model_path, + model_name=e.model_name, description="Forecasting Model", - workspace=WORKSPACE) + workspace=e.workspace) print("Model registered successfully. ID: " + MODEL.id) except Exception as caught_error: diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index b22b186c..3c49454d 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -2,17 +2,19 @@ import os sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace +from env_variables import Env # Just an example of a unit test against # a utility function common_scoring.next_saturday def test_get_workspace(): - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") + e = Env() + workspace_name = e.workspace_name + resource_group = e.resource_group + subscription_id = e.subscription_id + tenant_id = e.tenant_id + app_id = e.app_id + app_secret = e.app_secret aml_workspace = get_workspace( workspace_name,