Skip to content

Commit

Permalink
Merge branch 'main' into main-dec31
Browse files Browse the repository at this point in the history
  • Loading branch information
setuc committed Dec 23, 2022
2 parents 98639f4 + 15d5a58 commit 11c5957
Show file tree
Hide file tree
Showing 14 changed files with 360 additions and 99 deletions.
6 changes: 6 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.209.6/containers/python-3-miniconda/.devcontainer/base.Dockerfile
FROM mcr.microsoft.com/vscode/devcontainers/miniconda:0.202.1-3

# Update the conda environment according to the environment.yml file in the project.
COPY environment.yml /tmp/conda-tmp/
RUN /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml && rm -rf /tmp/conda-tmp
25 changes: 25 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.222.0/containers/python-3-miniconda
{
"name": "Miniconda (Python 3)",
"build": {
"context": "..",
"dockerfile": "Dockerfile",
},
// Set *default* container specific settings.json values on container create.
"settings": {
"python.defaultInterpreterPath": "/opt/conda/bin/python",
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
"ms-toolsai.vscode-ai",
],
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"azure-cli": "latest"
},
"onCreateCommand": "az extension add -n ml -y"
}
40 changes: 40 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: "CodeQL"

on:
push:
branches: [ "main", main*, feature* ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ "main" ]
schedule:
- cron: '0 3 * * 3'

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'python' ]

steps:
- name: Checkout repository
uses: actions/checkout@v3

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
queries: security-and-quality

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
channels:
- defaults
- anaconda
- conda-forge
dependencies:
- python=3.7.5
- pip
- pip:
- azureml-mlflow==1.38.0
- azureml-sdk==1.38.0
- scikit-learn==0.24.1
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0
52 changes: 48 additions & 4 deletions classical/aml-cli-v2/data-science/src/prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
Prepares raw data and provides training, validation and test datasets
"""

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Prepares raw data and provides training, validation and test datasets
"""

import argparse

from pathlib import Path
Expand Down Expand Up @@ -41,21 +47,52 @@
"vendor",
]

CAT_ORD_COLS = [
]
TARGET_COL = "cost"

NUMERIC_COLS = [
"distance",
"dropoff_latitude",
"dropoff_longitude",
"passengers",
"pickup_latitude",
"pickup_longitude",
"pickup_weekday",
"pickup_month",
"pickup_monthday",
"pickup_hour",
"pickup_minute",
"pickup_second",
"dropoff_weekday",
"dropoff_month",
"dropoff_monthday",
"dropoff_hour",
"dropoff_minute",
"dropoff_second",
]

CAT_NOM_COLS = [
"store_forward",
"vendor",
]

CAT_ORD_COLS = [
]

def parse_args():
'''Parse input arguments'''
'''Parse input arguments'''

parser = argparse.ArgumentParser("prep")
parser.add_argument("--raw_data", type=str, help="Path to raw data")
parser.add_argument("--train_data", type=str, help="Path to train dataset")
parser.add_argument("--val_data", type=str, help="Path to test dataset")
parser.add_argument("--test_data", type=str, help="Path to test dataset")

parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")

args = parser.parse_args()

return args
Expand All @@ -65,6 +102,8 @@ def log_training_data(df, table_name):
collector = Online_Collector(table_name)
collector.batch_collect(df)

def main(args):
'''Read, split, and save datasets'''
def main(args):
'''Read, split, and save datasets'''

Expand All @@ -88,11 +127,15 @@ def main(args):
train = data[msk_train]
val = data[msk_val]
test = data[msk_test]
test = data[msk_test]

mlflow.log_metric('train size', train.shape[0])
mlflow.log_metric('val size', val.shape[0])
mlflow.log_metric('test size', test.shape[0])

train.to_parquet((Path(args.train_data) / "train.parquet"))
val.to_parquet((Path(args.val_data) / "val.parquet"))
test.to_parquet((Path(args.test_data) / "test.parquet"))
train.to_parquet((Path(args.train_data) / "train.parquet"))
val.to_parquet((Path(args.val_data) / "val.parquet"))
test.to_parquet((Path(args.test_data) / "test.parquet"))
Expand All @@ -101,6 +144,7 @@ def main(args):
log_training_data(data, args.table_name)



if __name__ == "__main__":

mlflow.start_run()
Expand All @@ -120,9 +164,9 @@ def main(args):

for line in lines:
print(line)

main(args)

mlflow.end_run()


27 changes: 14 additions & 13 deletions classical/aml-cli-v2/mlops/azureml/train/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
experiment_name: taxi-fare-training
experiment_name: taxi-fare-training
description: Training Pipeline to train a model that predicts taxi fare price

# <inputs_and_outputs>
Expand All @@ -11,7 +12,7 @@ inputs:
enable_monitoring: 'false'
table_name: 'taximonitoring'

outputs:
outputs:
train_data:
val_data:
test_data:
Expand All @@ -32,8 +33,8 @@ jobs:
display_name: prep-data
code: ../../../data-science/src
command: >-
python prep.py
--raw_data ${{inputs.raw_data}}
python prep.py
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}
--val_data ${{outputs.val_data}}
--test_data ${{outputs.test_data}}
Expand All @@ -54,24 +55,24 @@ jobs:
display_name: train-model
code: ../../../data-science/src
command: >-
python train.py
--train_data ${{inputs.train_data}}
python train.py
--prepared_data ${{inputs.prepared_data}}
--model_output ${{outputs.model_output}}
environment: azureml:taxi-train-env@latest
inputs:
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
outputs:
model_output: ${{parent.outputs.trained_model}}

evaluate_model:
name: evaluate_model
display_name: evaluate-model
code: ../../../data-science/src
command: >-
python evaluate.py
--model_name ${{inputs.model_name}}
--model_input ${{inputs.model_input}}
--test_data ${{inputs.test_data}}
python evaluate.py
--model_name ${{inputs.model_name}}
--model_input ${{inputs.model_input}}
--test_data ${{inputs.test_data}}
--evaluation_output ${{outputs.evaluation_output}}
environment: azureml:taxi-train-env@latest
inputs:
Expand All @@ -86,9 +87,9 @@ jobs:
display_name: register-model
code: ../../../data-science/src
command: >-
python register.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
python register.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
--evaluation_output ${{inputs.evaluation_output}}
--model_info_output_path ${{outputs.model_info_output_path}}
environment: azureml:taxi-train-env@latest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ jobs:
echo "config-file=$config_env" >> $GITHUB_OUTPUT;
get-config:
needs: set-env-branch
uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main-dec31 # TODO - revert to @main
uses: Azure/mlops-templates/.github/workflows/read-yaml.yml@main
with:
file_name: ${{ needs.set-env-branch.outputs.config-file}}
register-environment:
needs: get-config
uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main-dec31 # TODO - revert to @main
needs: get-config
uses: Azure/mlops-templates/.github/workflows/register-environment.yml@main
with:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
Expand All @@ -37,7 +37,7 @@ jobs:
creds: ${{secrets.AZURE_CREDENTIALS}}
register-dataset:
needs: get-config
uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main-dec31 # TODO - revert to @main
uses: Azure/mlops-templates/.github/workflows/register-dataset.yml@main
with:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
Expand All @@ -47,7 +47,7 @@ jobs:
creds: ${{secrets.AZURE_CREDENTIALS}}
create-compute:
needs: [get-config]
uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main-dec31 # TODO - revert to @main
uses: Azure/mlops-templates/.github/workflows/create-compute.yml@main
with:
cluster_name: cpu-cluster
size: Standard_DS3_v2
Expand All @@ -60,7 +60,7 @@ jobs:
creds: ${{secrets.AZURE_CREDENTIALS}}
run-model-training-pipeline:
needs: [get-config, register-environment, register-dataset, create-compute]
uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main-dec31 # TODO - revert to @main
uses: Azure/mlops-templates/.github/workflows/run-pipeline.yml@main
with:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
Expand Down
Loading

0 comments on commit 11c5957

Please sign in to comment.