Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into docs
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored May 24, 2024
2 parents 5e0201a + ef2a9fe commit 0e48262
Show file tree
Hide file tree
Showing 28 changed files with 525 additions and 28 deletions.
2 changes: 1 addition & 1 deletion automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def _run(self, i):
if str(state['docker'].get('run', True)).lower() in ['false', '0', 'no']:
print (recursion_spaces+' - Skipping script::{} run as we are inside docker'.format(found_script_artifact))
return {'return': 0}
elif str(state['docker'].get('docker_real_run', True)).lower() in ['false', '0', 'no']:
elif str(state['docker'].get('real_run', True)).lower() in ['false', '0', 'no']:
print (recursion_spaces+' - Doing fake run for script::{} as we are inside docker'.format(found_script_artifact))
fake_run = True
env['CM_TMP_FAKE_RUN']='yes'
Expand Down
15 changes: 14 additions & 1 deletion script/add-custom-nvidia-system/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ automation_alias: script
automation_uid: 5b4e0237da074764

category: "MLPerf benchmark support"

docker:
real_run: False

# User-friendly tags to find this CM script
tags:
Expand Down Expand Up @@ -94,6 +95,13 @@ variations:
add_deps_recursive:
nvidia-inference-common-code:
tags: _ctuning
go:
group: code
add_deps_recursive:
nvidia-inference-common-code:
tags: _go




versions:
Expand All @@ -111,3 +119,8 @@ versions:
add_deps_recursive:
nvidia-inference-common-code:
version: r3.1

r4.0:
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
4 changes: 4 additions & 0 deletions script/app-mlperf-inference-ctuning-cpp-tflite/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@
"tags": "get,ml-model,efficientnet,raw,_tflite"
},
{
"names": [
"tensorflow",
"tflite"
],
"tags": "get,tensorflow,lib,_tflite"
},
{
Expand Down
2 changes: 1 addition & 1 deletion script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio

env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j")
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \
" run.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
" main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
scenario_extra_options + mode_extra_options + dataset_options
cmd = cmd.replace("--count", "--max_examples")
Expand Down
38 changes: 34 additions & 4 deletions script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,10 @@ deps:
CM_MLPERF_NVIDIA_HARNESS_RUN_MODE:
- run_harness

- tags: get,generic-python-lib,_package.nvmitten,_path./opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl
- tags: get,generic-python-lib,_package.nvmitten
update_tags_from_env_with_prefix:
_path.:
- CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH
enable_if_env:
CM_RUN_STATE_DOCKER:
- 'yes'
Expand Down Expand Up @@ -308,6 +311,23 @@ post_deps:

# Variations to customize dependencies
variations:
# MLPerf inference version
v4.0:
group: version
default: true
env:
CM_MLPERF_INFERENCE_VERSION: "v4.0"
CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-FP8-quantized
adr:
pytorch:
tags: _for-nvidia-mlperf-inference-v4.0
v3.1:
env:
CM_MLPERF_INFERENCE_VERSION: "v3.1"
CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-07142023.pth
adr:
pytorch:
tags: _for-nvidia-mlperf-inference-v3.1
# Target devices
cpu:
group: device
Expand Down Expand Up @@ -366,6 +386,7 @@ variations:
- tags: get,generic-python-lib,_transformers
- tags: get,generic-python-lib,_safetensors
- tags: get,generic-python-lib,_onnx
- tags: get,generic-python-lib,_onnx-graphsurgeon

bert-99:
group: model
Expand Down Expand Up @@ -476,18 +497,25 @@ variations:
deps:
- tags: get,generic-python-lib,_package.datasets
- tags: get,generic-python-lib,_package.simplejson
- tags: get,generic-python-lib,_onnx
- tags: get,generic-python-lib,_transformers
- tags: get,generic-python-lib,_onnx-graphsurgeon
env:
CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download"

gptj_,build:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"

gptj_,build_engine:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"

Expand Down Expand Up @@ -877,7 +905,9 @@ variations:

gptj_,run_harness:
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
- tags: install,pytorch,from.src
names:
- pytorch
- tags: get,cmake
version_min: "3.25.0"
env:
Expand Down
8 changes: 6 additions & 2 deletions script/app-mlperf-inference-nvidia/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ def preprocess(i):
return {'return':1, 'error': 'Windows is not supported in this script yet'}
env = i['env']

if str(env.get('CM_RUN_STATE_DOCKER', '')).lower() in ['1', 'true', 'yes']:
return {'return': 0}

if env.get('CM_MODEL', '') == '':
return {'return': 1, 'error': 'Please select a variation specifying the model to run'}

Expand Down Expand Up @@ -153,7 +156,7 @@ def preprocess(i):
cmds.append("make download_data BENCHMARKS='gptj'")

fp32_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'checkpoint-final')
fp8_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', 'GPTJ-07142023.pth')
fp8_model_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'GPTJ-6B', 'fp8-quantized-ammo', env['CM_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX'])
vocab_path = os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'bert', 'vocab.txt')

if not os.path.exists(os.path.dirname(fp32_model_path)):
Expand All @@ -163,7 +166,8 @@ def preprocess(i):

if not os.path.exists(fp32_model_path):
env['CM_REQUIRE_GPTJ_MODEL_DOWNLOAD'] = 'yes' # download via prehook_deps
cmds.append(f"cp -r $CM_ML_MODEL_FILE_WITH_PATH {fp32_model_path}")
if make_command == "build_engine":
cmds.append(f"cp -r $CM_ML_MODEL_FILE_WITH_PATH {fp32_model_path}")

model_name = "gptj"
model_path = fp8_model_path
Expand Down
39 changes: 33 additions & 6 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,16 @@ variations:
tags: _float32
librispeech-accuracy-script:
tags: _int32
cnndm-accuracy-script:
tags: _int32
env:
CM_MLPERF_PYTHON: 'yes'
CM_MLPERF_IMPLEMENTATION: mlcommons_python
CM_SQUAD_ACCURACY_DTYPE: float32
CM_IMAGENET_ACCURACY_DTYPE: float32
CM_OPENIMAGES_ACCURACY_DTYPE: float32
CM_LIBRISPEECH_ACCURACY_DTYPE: float32
CM_CNNDM_ACCURACY_DTYPE: int32
prehook_deps:
- names:
- python-reference-mlperf-inference
Expand Down Expand Up @@ -235,6 +238,10 @@ variations:
default_variations:
backend: onnxruntime

nvidia-original,r4.1_default:
docker:
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public

nvidia-original:
docker:
interactive: True
Expand Down Expand Up @@ -273,12 +280,6 @@ variations:
CM_IMAGENET_ACCURACY_DTYPE: int32
CM_CNNDM_ACCURACY_DTYPE: int32
CM_LIBRISPEECH_ACCURACY_DTYPE: int8
deps:
- tags: get,cuda-devices
skip_if_env:
CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
- "yes"
- "on"
prehook_deps:
- names:
- nvidia-original-mlperf-inference
Expand Down Expand Up @@ -904,6 +905,12 @@ variations:
add_deps_recursive:
mlperf-inference-implementation:
tags: _cuda
deps:
- tags: get,cuda-devices
skip_if_env:
CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
- "yes"
- "on"
rocm:
docker:
all_gpus: 'yes'
Expand Down Expand Up @@ -1142,6 +1149,25 @@ variations:
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'

r4.1_default:
group:
reproducibility
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
tags: _go
nvidia-inference-server:
version: r4.0
tags: _go
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
env:
CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'


invalid_variation_combinations:
-
Expand Down Expand Up @@ -1240,6 +1266,7 @@ docker:
- tags: get,mlperf,inference,results,dir
- tags: get,mlperf,inference,submission,dir
pre_run_cmds:
#- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update
- cm pull repo
mounts:
- "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}"
Expand Down
16 changes: 16 additions & 0 deletions script/authenticate-github-cli/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
alias: authenticate-github-cli
automation_alias: script
automation_uid: 5b4e0237da074764
cache: true
input_mapping:
with_token: CM_GH_AUTH_TOKEN
with-token: CM_GH_AUTH_TOKEN
tags:
- auth
- authenticate
- github
- gh
- cli
uid: 7b57673ac14a4337
deps:
- tags: get,gh,cli
27 changes: 27 additions & 0 deletions script/authenticate-github-cli/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

cmd = "gh auth login"
if env.get('CM_GH_AUTH_TOKEN', '') != '':
cmd = f" echo {env['CM_GH_AUTH_TOKEN']} | {cmd} --with-token"

env['CM_RUN_CMD'] = cmd
quiet = (env.get('CM_QUIET', False) == 'yes')

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':0}
1 change: 1 addition & 0 deletions script/authenticate-github-cli/run.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rem native script
18 changes: 18 additions & 0 deletions script/authenticate-github-cli/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency

echo "Running: "
echo "${CM_RUN_CMD}"
echo ""

if [[ ${CM_FAKE_RUN} != "yes" ]]; then
eval "${CM_RUN_CMD}"
test $? -eq 0 || exit 1
fi

39 changes: 39 additions & 0 deletions script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ variations:
add_deps_recursive:
nvidia-inference-common-code:
tags: _ctuning
go:
group: code
add_deps_recursive:
nvidia-inference-common-code:
tags: _go
nvidia-only:
group: code
add_deps_recursive:
Expand All @@ -180,6 +185,22 @@ variations:
nvidia-inference-common-code:
tags: _mlcommons

r4.0:
group: version
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
nvidia-scratch-space:
tags: _version.4_1
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
names:
- pytorch
- torch
- tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
names:
- pytorchvision
- torchvision

versions:
r2.1:
Expand Down Expand Up @@ -213,6 +234,24 @@ versions:
- torchvision
- tags: install,nccl,libs,_cuda

r4.0:
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
nvidia-scratch-space:
tags: _version.4_1
env:
BUILD_TRTLLM: 1
deps:
- tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
names:
- pytorch
- torch
- tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
names:
- pytorchvision
- torchvision

docker:
skip_run_cmd: 'no'
all_gpus: 'yes'
Expand Down
1 change: 1 addition & 0 deletions script/get-git-repo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"folder": "CM_GIT_CHECKOUT_FOLDER",
"patch": "CM_GIT_PATCH",
"update": "CM_GIT_REPO_PULL",
"pull": "CM_GIT_REPO_PULL",
"env_key": "CM_GIT_ENV_KEY",
"submodules": "CM_GIT_RECURSE_SUBMODULES"
},
Expand Down
Loading

0 comments on commit 0e48262

Please sign in to comment.