-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into rdondera/vulner_241009
- Loading branch information
Showing
76 changed files
with
1,210 additions
and
172 deletions.
There are no files selected for viewing
6 changes: 6 additions & 0 deletions
6
assets/evaluation_on_cloud/environments/evaluations-built-in/asset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
name: evaluations-built-in | ||
version: auto | ||
type: environment | ||
spec: spec.yaml | ||
extra_config: environment.yaml | ||
categories: ["Evaluation"] |
8 changes: 8 additions & 0 deletions
8
assets/evaluation_on_cloud/environments/evaluations-built-in/context/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04:latest | ||
|
||
COPY requirements.txt /app/requirements.txt | ||
RUN pip install -r /app/requirements.txt | ||
|
||
# Copy your Python file into the image | ||
COPY evaluate_on_data.py /app/evaluate_on_data.py | ||
COPY save_evaluation.py /app/save_evaluation.py |
124 changes: 124 additions & 0 deletions
124
assets/evaluation_on_cloud/environments/evaluations-built-in/context/evaluate_on_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
"""Evaluate for a built-in or custom evulator.""" | ||
import argparse | ||
import json | ||
import logging | ||
import mlflow | ||
import os | ||
import pandas as pd | ||
import requests | ||
import shutil | ||
from azure.ai.ml.identity import AzureMLOnBehalfOfCredential | ||
from azure.ai.evaluation import evaluate | ||
from save_eval import load_evaluator | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def update_value_in_dict(d, key_substring, new_func): | ||
"""Recursively search for a value containing 'key_substring' and apply 'new_func' to modify it.""" | ||
for key, value in d.items(): | ||
if isinstance(value, dict): | ||
update_value_in_dict(value, key_substring, new_func) | ||
elif isinstance(value, str) and key_substring in value: | ||
d[key] = new_func(value) | ||
|
||
|
||
def find_file_and_get_parent_dir(root_dir, file_name="flow.flex.yaml"): | ||
"""Find the flex flow or any given file in a directory and return the parent directory.""" | ||
for dirpath, _, filenames in os.walk(root_dir): | ||
if file_name in filenames: | ||
logger.info(f"Found {file_name} in {dirpath}") | ||
return dirpath | ||
|
||
|
||
def copy_evaluator_files(command_line_args): | ||
"""Copy the mounted evaluator files to the relative paths to enable read/write.""" | ||
evaluator_name_id_map = json.loads(command_line_args.evaluator_name_id_map) | ||
for evaluator_name, evaluator_id in evaluator_name_id_map.items(): | ||
dir_path = find_file_and_get_parent_dir(evaluator_id) | ||
if dir_path: | ||
shutil.copytree(dir_path, f"./{evaluator_name}") | ||
logger.info(f"Copying {dir_path} to ./{evaluator_name}") | ||
logger.info(evaluator_name, os.listdir(f"./{evaluator_name}")) | ||
else: | ||
logger.info(f"Directory for evaluator {evaluator_name} not found.") | ||
|
||
|
||
def initialize_evaluators(command_line_args): | ||
"""Initialize the evaluators using correct parameters and credentials for rai evaluators.""" | ||
evaluators = {} | ||
evaluators_o = json.loads(command_line_args.evaluators) | ||
for evaluator_name, evaluator in evaluators_o.items(): | ||
init_params = evaluator["InitParams"] | ||
update_value_in_dict(init_params, "AZURE_OPENAI_API_KEY", lambda x: os.environ[x.upper()]) | ||
flow = load_evaluator('./' + evaluator_name) | ||
if any(rai_eval in evaluator["Id"] for rai_eval in rai_evaluators): | ||
init_params["credential"] = AzureMLOnBehalfOfCredential() | ||
evaluators[evaluator_name] = flow(**init_params) | ||
return evaluators | ||
|
||
|
||
def run_evaluation(command_line_args, evaluators): | ||
"""Run evaluation using evaluators.""" | ||
results = evaluate( | ||
data=command_line_args.eval_data, | ||
evaluators=evaluators | ||
) | ||
metrics = {} | ||
for metric_name, metric_value in results['metrics'].items(): | ||
logger.info("Logging metric:", metric_name, metric_value) | ||
metrics[metric_name] = metric_value | ||
mlflow.log_metrics(metrics) | ||
|
||
if results and results.get("rows"): | ||
# Convert the results to a DataFrame | ||
df = pd.DataFrame(results["rows"]) | ||
|
||
# Save the DataFrame as a JSONL file | ||
df.to_json("instance_results.jsonl", orient="records", lines=True) | ||
df.to_json("eval_results.jsonl", orient="records", lines=True) | ||
mlflow.log_artifact("instance_results.jsonl") | ||
mlflow.log_artifact("eval_results.jsonl") | ||
|
||
|
||
def get_promptflow_run_logs(): | ||
"""Get promptflow run logs.""" | ||
if os.path.exists("/root/.promptflow/.runs/"): | ||
runs = os.listdir("/root/.promptflow/.runs/") | ||
for run in runs: | ||
if os.path.exists(f"/root/.promptflow/.runs/{run}/logs.txt"): | ||
with open(f"/root/.promptflow/.runs/{run}/logs.txt", "r") as f: | ||
logger.info(f"RUN {run} =========================") | ||
logger.info(f.read()) | ||
else: | ||
logger.info("RUN DOES NOT EXIST") | ||
|
||
|
||
# Create a session for making HTTP requests | ||
session = requests.Session() | ||
|
||
# Parse command line arguments and debug to ensure working | ||
parser = argparse.ArgumentParser("eval") | ||
parser.add_argument("--eval_data", type=str) | ||
parser.add_argument("--eval_output", type=str) | ||
parser.add_argument("--evaluators", type=str) | ||
parser.add_argument("--evaluator_name_id_map", type=str) | ||
|
||
args = parser.parse_args() | ||
rai_evaluators = ['HateUnfairnessEvaluator', 'Sexual-Content-Evaluator', 'Hate-and-Unfairness-Evaluator', | ||
'Violent-Content-Evaluator', 'Self-Harm-Related-Content-Evaluator'] | ||
|
||
if __name__ == '__main__': | ||
copy_evaluator_files(args) | ||
evaluators = initialize_evaluators(args) | ||
logger.info("*************** Collecting Result of Evaluators ******************") | ||
# Run the evaluation | ||
with mlflow.start_run() as run: | ||
try: | ||
run_evaluation(args, evaluators) | ||
except Exception as e: | ||
logger.error("EXCEPT", e) | ||
get_promptflow_run_logs() |
5 changes: 5 additions & 0 deletions
5
assets/evaluation_on_cloud/environments/evaluations-built-in/context/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
azure-ai-evaluation | ||
openai | ||
azureml-mlflow | ||
azure-identity | ||
azure-ai-ml |
30 changes: 30 additions & 0 deletions
30
assets/evaluation_on_cloud/environments/evaluations-built-in/context/save_evaluation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
"""Load a built-in or custom evulator as flow.""" | ||
import importlib | ||
import logging | ||
import os | ||
import sys | ||
from promptflow.client import load_flow | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def load_evaluator(evaluator): | ||
"""Load evaluator as flow.""" | ||
logger.info(f"Loading evaluator {evaluator}") | ||
loaded_evaluator = load_flow(evaluator) | ||
logger.info(loaded_evaluator) | ||
module_parent = loaded_evaluator.path.parent.name | ||
module_name = loaded_evaluator.entry.split(":")[0] | ||
logger.info(f"Loading module {os.getcwd()} {module_name} from {module_parent}") | ||
module_path = os.path.join(os.getcwd(), module_parent, module_name + ".py") | ||
logger.info(f"Loading module {module_name} from {module_path}") | ||
spec = importlib.util.spec_from_file_location(module_name, module_path) | ||
mod = importlib.util.module_from_spec(spec) | ||
logger.info(f"Loaded module {mod}") | ||
sys.modules[module_name] = mod | ||
spec.loader.exec_module(mod) | ||
eval_class = getattr(mod, loaded_evaluator.entry.split(":")[1]) | ||
return eval_class |
11 changes: 11 additions & 0 deletions
11
assets/evaluation_on_cloud/environments/evaluations-built-in/environment.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
image: | ||
name: azureml/curated/evaluations-built-in | ||
os: linux | ||
context: | ||
dir: context | ||
dockerfile: Dockerfile | ||
template_files: | ||
- Dockerfile | ||
publish: | ||
location: mcr | ||
visibility: public |
11 changes: 11 additions & 0 deletions
11
assets/evaluation_on_cloud/environments/evaluations-built-in/spec.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json | ||
description: Python environment for running promptflow-evals based evaluators. | ||
|
||
name: "{{asset.name}}" | ||
version: "{{asset.version}}" | ||
|
||
os_type: linux | ||
|
||
build: | ||
path: "{{image.context.path}}" | ||
dockerfile_path: "{{image.dockerfile.path}}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
extra_config: model.yaml | ||
spec: spec.yaml | ||
type: model | ||
categories: ["Foundation Models"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
## Overview | ||
|
||
The CxrReportGen model utilizes a multimodal architecture, integrating a BiomedCLIP image encoder with a Phi-3-Mini text encoder to accurately interpret complex medical imaging studies of chest X-rays. CxrReportGen follows the same framework as **[MAIRA-2](https://www.microsoft.com/en-us/research/publication/maira-2-grounded-radiology-report-generation/)**. Its primary function is to generate comprehensive and structured radiology reports, with visual grounding represented by bounding boxes on the images. | ||
|
||
### Training information | ||
|
||
| **Training Dataset** | **Details** | | ||
|----------------|---------------------| | ||
| **[MIMIC-CXR](https://physionet.org/content/mimic-cxr/2.0.0/)** | Frontal chest X-rays from the training partition of the MIMIC-CXR dataset and the associated text reports. Rule-based processing was carried out to extract findings and impressions separately, or to map non-labeled report sections to the relevant sections. During training, text is randomly sampled from either the findings or the impression section. In total 203,170 images from this dataset were used.| | ||
| **Propiertary datasets** | Multiple other proprietary datasets, composed of procured data, were additionally leveraged for training. Caution was taken to ensure there was no leakage of test data samples in the data used for training. | | ||
|
||
**Training Statistics:** | ||
- **Data Size:** ~400,000 samples | ||
- **Batch Size:** 16 | ||
- **Epochs:** 3 | ||
- **Learning Rate:** 2.5e-05 | ||
- **Hardware:** 8 A100 GPUs | ||
- **Training Time:** 1 day and 19 hours | ||
- **Sku:** Standard_ND96amsr_A100_v4 | ||
|
||
### License and where to send questions or comments about the model | ||
The license for CXRReportGen is the MIT license. | ||
For questions or comments, please contact: [email protected] | ||
|
||
## Benchmark Results | ||
|
||
### Findings Generation on MIMIC-CXR test set: | ||
|
||
| CheXpert F1-14 (Micro) | CheXpert F1-5 (Micro)| RadGraph-F1 | ROUGE-L | BLEU-4| | ||
|----------------|--------------|-------------|---------|-------| | ||
| 59.1 | 59.7 | 40.8 | 39.1 |23.7 | | ||
|
||
|
||
### Grounded Reporting on [GR-Bench test set](https://arxiv.org/pdf/2406.04449v1): | ||
|
||
| CheXpert F1-14 (Micro) | RadGraph-F1 | ROUGE-L | Box-Completion (Precision/Recall)| | ||
|------------------------|------------ |----------|-----------------| | ||
| 60.0 | 55.6 | 56.6 | 71.5/82.0 | | ||
|
||
## Carbon Footprint | ||
The estimated carbon emissions during training are 0.06364 tCO2eq. | ||
|
||
|
||
## Sample Input and Output | ||
|
||
### Input: | ||
```json | ||
{'input_data': | ||
{'columns': ['frontal_image', 'lateral_image', 'indication', 'technique', 'comparison'], | ||
'index': [0], | ||
'data': [ | ||
[ | ||
base64.encodebytes(read_image(frontal)).decode("utf-8"), | ||
base64.encodebytes(read_image(lateral)).decode("utf-8"), | ||
'Pneumonia', | ||
'One view chest', | ||
'None' | ||
]]}, | ||
'params': {}} | ||
``` | ||
|
||
### Output: | ||
Output is json encoded inside an array. | ||
```python | ||
findings = json.loads(result[0]["output"]) | ||
findings | ||
``` | ||
|
||
```json | ||
[['Cardiac silhouette remains normal in size.', None], | ||
['Hilar contours are unremarkable.', None], | ||
['There are some reticular appearing opacities in the left base not seen on the prior exam.', | ||
[[0.505, 0.415, 0.885, 0.775]]], | ||
['There is blunting of the right costophrenic sulcus.', | ||
[[0.005, 0.555, 0.155, 0.825]]], | ||
['Upper lungs are clear.', None]] | ||
``` | ||
The generated bounding box coordinates are the (x, y) coordinates of the top left and bottom right corners of the box, e.g. (x_topleft, y_topleft, x_bottomright, y_bottomright). These are relative to the cropped image (that is, the image that the model ultimately got as input), so be careful while visualising. | ||
|
||
You can optionally apply the below code on the output to adjust the size: | ||
```python | ||
def adjust_box_for_original_image_size(box: BoxType, width: int, height: int) -> BoxType: | ||
""" | ||
This function adjusts the bounding boxes from the MAIRA-2 model output to account for the image processor | ||
cropping the image to be square prior to the model forward pass. The box coordinates are adjusted to be | ||
relative to the original shape of the image assuming the image processor cropped the image based on the length | ||
of the shortest side. | ||
Args: | ||
box (BoxType): | ||
The box to be adjusted, normalised to (0, 1). | ||
width (int): | ||
Original width of the image, in pixels. | ||
height (int): | ||
Original height of the image, in pixels. | ||
Returns: | ||
BoxType: The box normalised relative to the original size of the image. | ||
""" | ||
crop_width = crop_height = min(width, height) | ||
x_offset = (width - crop_width) // 2 | ||
y_offset = (height - crop_height) // 2 | ||
|
||
norm_x_min, norm_y_min, norm_x_max, norm_y_max = box | ||
|
||
abs_x_min = int(norm_x_min * crop_width + x_offset) | ||
abs_x_max = int(norm_x_max * crop_width + x_offset) | ||
abs_y_min = int(norm_y_min * crop_height + y_offset) | ||
abs_y_max = int(norm_y_max * crop_height + y_offset) | ||
|
||
adjusted_norm_x_min = abs_x_min / width | ||
adjusted_norm_x_max = abs_x_max / width | ||
adjusted_norm_y_min = abs_y_min / height | ||
adjusted_norm_y_max = abs_y_max / height | ||
|
||
return (adjusted_norm_x_min, adjusted_norm_y_min, adjusted_norm_x_max, adjusted_norm_y_max) | ||
``` | ||
|
||
## Ethical Considerations | ||
|
||
CxrReportGen should not be used as a diagnostic tool or as a substitute for professional medical advice. It is designed to assist radiologists by generating findings and reports, but final clinical decisions should always be made by human experts. | ||
|
||
For detailed guidelines on ethical use, refer to Microsoft's [Responsible AI Principles](https://www.microsoft.com/en-us/ai/responsible-ai). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
path: | ||
container_name: models | ||
container_path: huggingface/CxrReportGen/mlflow_model_folder | ||
storage_name: automlcesdkdataresources | ||
type: azureblob | ||
publish: | ||
description: description.md | ||
type: mlflow_model |
Oops, something went wrong.