Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into changesmixtral
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Oct 8, 2024
2 parents bc62ed5 + 758d087 commit 8f4f86a
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 5 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/test-mlperf-inference-dlrm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: MLPerf inference DLRM-v2

on:
schedule:
- cron: "30 21 * * *"

jobs:
build_reference:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, GO-spr, linux, x64 ]
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
backend: [ "pytorch" ]
device: [ "cpu", "cuda" ]

steps:
- name: Test MLPerf Inference DLRM-v2 reference implementation
run: |
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
python3 -m pip install cm4mlops
cm pull repo
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
build_intel:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, GO-spr, linux, x64 ]
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
backend: [ "pytorch" ]
device: [ "cpu" ]

steps:
- name: Test MLPerf Inference DLRM-v2 INTEL implementation
run: |
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
python3 -m pip install cm4mlops
cm pull repo
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
2 changes: 1 addition & 1 deletion .github/workflows/test-mlperf-inference-gptj.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ jobs:
cm pull repo
- name: Test MLPerf Inference GPTJ
run: |
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --custom_system_nvidia=yes --clean
cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
2 changes: 1 addition & 1 deletion .github/workflows/test-mlperf-inference-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ jobs:
export CM_REPOS=$HOME/GH_CM
python3 -m pip install cm4mlops
cm pull repo
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --custom_system_nvidia=yes --clean
cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
2 changes: 1 addition & 1 deletion .github/workflows/test-scc24-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
pip install --upgrade cm4mlops
pip install tabulate
cm pull repo
cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --clean
cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean
cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def preprocess(i):
if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'preprocessed_data':
clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "preprocessed_data", "coco2014-tokenized-sdxl")} """
cache_rm_tags = "nvidia-harness,_preprocess_data,_sdxl"
if env.get('CM_CLEAN_ARTIFACT_NAME', '') == 'downloaded_model':
clean_cmd = f"""rm -rf {os.path.join(env['CM_NVIDIA_MLPERF_SCRATCH_PATH'], "models", "SDXL")} """
cache_rm_tags = "nvidia-harness,_download_model,_sdxl"

cache_rm_tags = cache_rm_tags + extra_cache_rm_tags

Expand Down
4 changes: 2 additions & 2 deletions script/get-cuda-devices/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def postprocess(i):
key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
env[key_env] = val

state['cm_cuda_num_devices'] = gpu_id
env['CM_CUDA_NUM_DEVICES'] = gpu_id
state['cm_cuda_num_devices'] = gpu_id + 1
env['CM_CUDA_NUM_DEVICES'] = gpu_id + 1

state['cm_cuda_device_prop'] = p
state['cm_cuda_devices_prop'] = gpu
Expand Down
1 change: 1 addition & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ input_mapping:
category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
clean: CM_MLPERF_CLEAN_ALL
compliance: CM_MLPERF_LOADGEN_COMPLIANCE
custom_system_nvidia: CM_CUSTOM_SYSTEM_NVIDIA
dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT
dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER
debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM
Expand Down

0 comments on commit 8f4f86a

Please sign in to comment.