Merge branch 'mlperf-inference' into changesmixtral

mlcommons · Oct 8, 2024 · 8f4f86a · 8f4f86a
2 parents bc62ed5 + 758d087
commit 8f4f86a
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 5 deletions.
diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml
@@ -0,0 +1,49 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: MLPerf inference DLRM-v2
+
+on:
+ schedule:
+ - cron: "30 21 * * *"
+
+jobs:
+ build_reference:
+ if: github.repository_owner == 'gateoverflow'
+ runs-on: [ self-hosted, GO-spr, linux, x64 ]
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.12" ]
+ backend: [ "pytorch" ]
+ device: [ "cpu", "cuda" ]
+
+ steps:
+ - name: Test MLPerf Inference DLRM-v2 reference implementation
+ run: |
+ source gh_action/bin/deactivate || python3 -m venv gh_action
+ source gh_action/bin/activate
+ export CM_REPOS=$HOME/GH_CM
+ python3 -m pip install cm4mlops
+ cm pull repo
+ cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+
+ build_intel:
+ if: github.repository_owner == 'gateoverflow'
+ runs-on: [ self-hosted, GO-spr, linux, x64 ]
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.12" ]
+ backend: [ "pytorch" ]
+ device: [ "cpu" ]
+
+ steps:
+ - name: Test MLPerf Inference DLRM-v2 INTEL implementation
+ run: |
+ source gh_action/bin/deactivate || python3 -m venv gh_action
+ source gh_action/bin/activate
+ export CM_REPOS=$HOME/GH_CM
+ python3 -m pip install cm4mlops
+ cm pull repo
+ cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml
@@ -28,6 +28,6 @@ jobs:
  cm pull repo
  - name: Test MLPerf Inference GPTJ
  run: |
- cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+ cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --custom_system_nvidia=yes --clean
  cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
 
diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -22,5 +22,5 @@ jobs:
  export CM_REPOS=$HOME/GH_CM
  python3 -m pip install cm4mlops
  cm pull repo
- cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+ cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --custom_system_nvidia=yes --clean
  cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
@@ -52,7 +52,7 @@ jobs:
  pip install --upgrade cm4mlops
  pip install tabulate
  cm pull repo
- cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --clean
+ cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean
  cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
  cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
  cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
diff --git a/script/clean-nvidia-mlperf-inference-scratch-space/customize.py b/script/clean-nvidia-mlperf-inference-scratch-space/customize.py
@@ -25,6 +25,9 @@ def preprocess(i):
  if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'preprocessed_data':
  clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "preprocessed_data", "coco2014-tokenized-sdxl")} """
  cache_rm_tags = "nvidia-harness,_preprocess_data,_sdxl"
+ if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'downloaded_model':
+ clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "models", "SDXL")} """
+ cache_rm_tags = "nvidia-harness,_download_model,_sdxl"
 
  cache_rm_tags = cache_rm_tags + extra_cache_rm_tags
 

diff --git a/script/get-cuda-devices/customize.py b/script/get-cuda-devices/customize.py
@@ -53,8 +53,8 @@ def postprocess(i):
  key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
  env[key_env] = val
 
- state['cm_cuda_num_devices'] = gpu_id
- env['CM_CUDA_NUM_DEVICES'] = gpu_id
+ state['cm_cuda_num_devices'] = gpu_id + 1
+ env['CM_CUDA_NUM_DEVICES'] = gpu_id + 1
 
  state['cm_cuda_device_prop'] = p
  state['cm_cuda_devices_prop'] = gpu

diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
@@ -43,6 +43,7 @@ input_mapping:
  category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
  clean: CM_MLPERF_CLEAN_ALL
  compliance: CM_MLPERF_LOADGEN_COMPLIANCE
+ custom_system_nvidia: CM_CUSTOM_SYSTEM_NVIDIA
  dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT
  dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER
  debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM