[Model Evaluation] Update Model Evaluation Components (#3391)

* Update Model Evaluation Components * Update env asset id * Enable inference config --------- Co-authored-by: Ankush Bhatia <[email protected]>
Azure · Sep 19, 2024 · 6560bc6 · 6560bc6
1 parent 8fd46c5
commit 6560bc6
Show file tree

Hide file tree

Showing 8 changed files with 22 additions and 16 deletions.
diff --git a/assets/training/model_evaluation/components/compute_metrics/spec.yaml b/assets/training/model_evaluation/components/compute_metrics/spec.yaml
@@ -3,7 +3,7 @@ name: compute_metrics
 display_name: Compute Metrics
 description: Calculate model performance metrics, given ground truth and prediction data.
 
-version: 0.0.31
+version: 0.0.32
 type: command
 tags:
  type: evaluation
@@ -77,7 +77,7 @@ outputs:
 
 is_deterministic: True
 code: ../../src
-environment: azureml://registries/azureml/environments/model-evaluation/versions/32
+environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
 
 command: >-
  python compute_metrics.py

diff --git a/assets/training/model_evaluation/components/distributed_model_prediction/spec.yaml b/assets/training/model_evaluation/components/distributed_model_prediction/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
 name: model_prediction_with_container 
-version: 0.0.2
+version: 0.0.3
 type: command
 display_name: Distributed Model Prediction
 description: "Optimized Distributed inference component for LLMs."
@@ -69,7 +69,7 @@ outputs:
 
 
 code: ../../src_distributed
-environment: azureml://registries/azureml/environments/foundation-model-inference/versions/46
+environment: azureml://registries/azureml/environments/foundation-model-inference/labels/latest
 command: >-
  python download_extra_dependency.py
  --mlflow-model '${{inputs.mlflow_model}}' ;

diff --git a/assets/training/model_evaluation/components/model_prediction/spec.yaml b/assets/training/model_evaluation/components/model_prediction/spec.yaml
@@ -3,7 +3,7 @@ name: model_prediction
 display_name: Model Prediction
 description: Generate predictions on a given mlflow model for supported tasks.
 
-version: 0.0.31
+version: 0.0.32
 type: command
 tags:
  type: evaluation
@@ -82,7 +82,7 @@ outputs:
 
 is_deterministic: True
 code: ../../src
-environment: azureml://registries/azureml/environments/model-evaluation/versions/32
+environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
 
 command: >-
  python download_dependencies.py

diff --git a/assets/training/model_evaluation/components/pipeline_component/spec.yaml b/assets/training/model_evaluation/components/pipeline_component/spec.yaml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
 name: model_evaluation_pipeline
-version: 0.0.31
+version: 0.0.32
 type: pipeline
 display_name: Model Evaluation Pipeline
 description: Pipeline component for model evaluation for supported tasks. \
@@ -87,7 +87,7 @@ outputs:
 jobs:
  validation_trigger_model_evaluation:
  type: command
- component: azureml:validation_trigger_model_evaluation:0.0.31
+ component: azureml:validation_trigger_model_evaluation:0.0.32
  compute: '${{parent.inputs.compute_name}}'
  resources:
  instance_type: '${{parent.inputs.instance_type}}'
@@ -111,7 +111,7 @@ jobs:
 
  model_prediction:
  type: command
- component: azureml:model_prediction:0.0.31
+ component: azureml:model_prediction:0.0.32
  compute: '${{parent.inputs.compute_name}}'
  resources:
  instance_type: '${{parent.inputs.instance_type}}'
@@ -128,7 +128,7 @@ jobs:
 
  compute_metrics:
  type: command
- component: azureml:compute_metrics:0.0.31
+ component: azureml:compute_metrics:0.0.32
  compute: '${{parent.inputs.compute_name}}'
  resources:
  instance_type: '${{parent.inputs.instance_type}}'

diff --git a/assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml b/assets/training/model_evaluation/components/validation_trigger_model_evaluation/spec.yaml
@@ -3,7 +3,7 @@ name: validation_trigger_model_evaluation
 display_name: Validation Trigger Model Evaluation
 description: Component for enabling validation of model evaluation pipeline.
 
-version: 0.0.31
+version: 0.0.32
 type: command
 tags:
  type: evaluation
@@ -81,7 +81,7 @@ outputs:
  is_control: true
 
 is_deterministic: True
-environment: azureml://registries/azureml/environments/model-evaluation/versions/32
+environment: azureml://registries/azureml/environments/model-evaluation/labels/latest
 code: ../../src
 
 command: > 

diff --git a/assets/training/model_evaluation/src_distributed/local_constants.py b/assets/training/model_evaluation/src_distributed/local_constants.py
@@ -25,6 +25,7 @@ class ModelPath:
  DEFAULT_TOKENIZER_FILE = "tokenizer_config.json"
  DEFAULT_MLFLOW_MODEL_PATH = "model"
  DEFAULT_TOKENIZER_PATH = "components/tokenizer"
+ INFERENCE_CONFIG_PATH = "ml_configs/inference_config.json"
 
 
 class ArgumentLiterals:

diff --git a/assets/training/model_evaluation/src_distributed/model_prediction.py b/assets/training/model_evaluation/src_distributed/model_prediction.py
@@ -123,7 +123,6 @@ def _make_chat_completion_data(self, input_df, last_chats, col_name):
  conversation = datarow[0]
  conversation.append({"role":"assistant", "content":last_chats[ind]})
  appended_data[col_name].append(conversation)
- logger.info(f"Final Conversations: {appended_data}")
  return pd.DataFrame(appended_data)
 
 
@@ -501,11 +500,15 @@ def main():
  )
  if not os.path.exists(tokenizer_path):
  tokenizer_path = model_path
+ inference_config = None
+ if os.path.exists(os.path.join(args.mlflow_model, ModelPath.INFERENCE_CONFIG_PATH)):
+ inference_config = os.path.join(args.mlflow_model, ModelPath.INFERENCE_CONFIG_PATH)
  engine_config, task_config, default_generator_configs, task_type, model_info = build_configs_from_model(
  mlmodel,
  model_path,
  config_path,
- tokenizer_path
+ tokenizer_path,
+ inference_config
  )
 
  config = {

diff --git a/assets/training/model_evaluation/src_distributed/prepare_data.py b/assets/training/model_evaluation/src_distributed/prepare_data.py
@@ -10,7 +10,7 @@
 from data_utils import read_model_prediction_data, parse_input_ground_truth_col, prepare_data
 from argparse import ArgumentParser
 from logging_utilities import get_logger
-from llm.optimized.inference.constants import ALL_TASKS
+from llm.optimized.inference.constants import ALL_TASKS, TaskType, SupportedTask
 from local_constants import ArgumentLiterals
 from itertools import repeat
 from typing import Union
@@ -79,7 +79,9 @@ def validate_and_get_columns(args):
  data = list(read_model_prediction_data(args["data"], nrows=1))[0]
  input_column_names, label_column_name, extra_y_test_cols = get_column_names(args, data)
 
- validate_input_column_names(input_column_names, data)
+ task = args[ArgumentLiterals.TASK]
+ if task not in [SupportedTask.CHAT_COMPLETION, TaskType.CONVERSATIONAL]:
+ validate_input_column_names(input_column_names, data)
 
 
  cols = []