Skip to content

Commit

Permalink
Merge pull request #298 from mlcommons/mlperf-inference
Browse files Browse the repository at this point in the history
dev <- Mlperf inference
  • Loading branch information
arjunsuresh authored Sep 26, 2024
2 parents 0a9238f + 03a740f commit db60dad
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 76 deletions.
44 changes: 28 additions & 16 deletions .github/workflows/code-review.yml
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
name: OpenAI Code Review

on:
pull_request:
pull_request_target:
types: [opened, synchronize]
paths:
- 'automation/**'
- 'script/**'
- '!**.md'

permissions:
issues: write
pull-requests: write

jobs:
code_review_job:
code_review:
runs-on: ubuntu-latest
if: github.repository_owner == 'gateoverflow'
name: ChatGPT Code Review
if: github.repository_owner == 'gateoverflow' && github.event.pull_request.changed_files > 0
steps:
- name: GenAI Code Review
uses: cirolini/genai-code-review@v2
with:
openai_api_key: ${{ secrets.openai_api_key }}
github_token: ${{ secrets.GITHUB_TOKEN }}
github_pr_id: ${{ github.event.number }}
openai_model: "GPT-4o" # optional
openai_temperature: 0.5 # optional
openai_max_tokens: 2048 # optional
mode: files # files or patch
language: en # optional, default is 'en'
custom_prompt: "" # optional
# Run code review via OpenAI
# Step to run the OpenAI Code Review using the GATEOverflow action
- name: Run OpenAI Code Review
uses: GATEOverflow/genai-code-review@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }} # GitHub token for authentication
openai_api_key: ${{ secrets.OPENAI_API_KEY }} # OpenAI API key for accessing the GPT model
github_pr_id: ${{ github.event.pull_request.number }} # ID of the pull request to review
openai_model: "gpt-4o" # Model to use for the code review
openai_temperature: 0.5 # Temperature setting for the model's output
openai_max_tokens: 2048 # Maximum number of tokens for the model's response
mode: "files" # Mode of review, can be "files" or "diff"
language: "en" # Language for the review output
custom_prompt: "" # Optional custom prompt for the model
continue-on-error: true # Allow the workflow to continue even if this step fails
2 changes: 1 addition & 1 deletion .github/workflows/test-mlperf-inference-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf inference SDXL

on:
schedule:
- cron: "1 2 * * */3"
- cron: "1 2 * * *"

jobs:
build_reference:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-scc24-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf inference SDXL

on:
schedule:
- cron: "1 3 * * */3"
- cron: "43 1 * * *"

jobs:
build_reference:
Expand Down
13 changes: 10 additions & 3 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,9 @@ deps:
## Pytorch (CPU)
- tags: get,generic-python-lib,_torch
names:
- ml-engine-pytorch
- pytorch
- torch
- ml-engine-pytorch
- pytorch
skip_if_env:
CM_MODEL:
- dlrm-v2-99
Expand Down Expand Up @@ -838,6 +839,9 @@ variations:
MLPERF_TVM_TORCH_QUANTIZED_ENGINE: qnnpack
deps:
- tags: get,generic-python-lib,_torch
names:
- torch
- pytorch
- tags: get,tvm
names:
- tvm
Expand Down Expand Up @@ -865,7 +869,6 @@ variations:

gptj_:
deps:
- tags: get,generic-python-lib,_torch
- tags: get,generic-python-lib,_package.datasets
- tags: get,generic-python-lib,_package.attrs
- tags: get,generic-python-lib,_package.accelerate
Expand Down Expand Up @@ -1099,6 +1102,10 @@ variations:
- dlrm-src
# to force the version
- tags: get,generic-python-lib,_torch
names:
- torch
- pytorch
- ml-engine-pytorch
version: "1.13.1"
- tags: get,generic-python-lib,_mlperf_logging
- tags: get,generic-python-lib,_opencv-python
Expand Down
3 changes: 2 additions & 1 deletion script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ variations:
CM_IMAGENET_ACCURACY_DTYPE: int32
CM_CNNDM_ACCURACY_DTYPE: int32
CM_LIBRISPEECH_ACCURACY_DTYPE: int8
CM_DOCKER_USE_VIRTUAL_PYTHON: no
prehook_deps:
- names:
- nvidia-original-mlperf-inference
Expand Down Expand Up @@ -1162,7 +1163,7 @@ variations:
mlperf-inference-implementation:
tags: _cuda
deps:
- tags: get,cuda-devices
- tags: get,cuda-devices,_with-pycuda
skip_if_env:
CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
- "yes"
Expand Down
7 changes: 5 additions & 2 deletions script/build-dockerfile/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,11 @@ def preprocess(i):

f.write(EOL+'# Install python packages' + EOL)
python = get_value(env, config, 'PYTHON', 'CM_DOCKERFILE_PYTHON')
f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)

docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes")
if str(docker_use_virtual_python).lower() not in [ "no", "0", "false"]:
f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL)
f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL)
#f.write('RUN . /opt/venv/cm/bin/activate' + EOL)
f.write('RUN {} -m pip install '.format(python) + " ".join(get_value(env, config, 'python-packages')) + ' ' + pip_extra_flags + ' ' + EOL)

Expand Down
75 changes: 36 additions & 39 deletions script/download-file/run.sh
Original file line number Diff line number Diff line change
@@ -1,61 +1,58 @@
#!/bin/bash


# Execute config command if it exists
if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then
echo ""
echo "${CM_DOWNLOAD_CONFIG_CMD}"
eval "${CM_DOWNLOAD_CONFIG_CMD}"
test $? -eq 0 || exit $?
echo -e "\nExecuting: ${CM_DOWNLOAD_CONFIG_CMD}"
eval "${CM_DOWNLOAD_CONFIG_CMD}" || exit $?
fi

# Assume download is required by default
require_download=1

if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
# No download needed if a local file path is specified or the tool is 'cmutil'
if [[ -n "${CM_DOWNLOAD_LOCAL_FILE_PATH}" || ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then
require_download=0
fi

if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then
require_download=0

elif [ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" ]; then
if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then
echo ""
echo "${CM_DOWNLOAD_CHECKSUM_CMD}"
eval "${CM_DOWNLOAD_CHECKSUM_CMD}"
if [ $? -ne 0 ]; then
# checksum not supposed to fail for locally given file
if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then
exit 1
else
CM_PRE_DOWNLOAD_CLEAN=true
fi
# If the file exists, check the checksum if necessary
if [[ -e "${CM_DOWNLOAD_DOWNLOADED_PATH}" && -n "${CM_DOWNLOAD_CHECKSUM_CMD}" ]]; then
echo -e "\nChecking checksum: ${CM_DOWNLOAD_CHECKSUM_CMD}"
eval "${CM_DOWNLOAD_CHECKSUM_CMD}"

if [[ $? -ne 0 ]]; then
# If the checksum fails, handle errors based on whether the file is local
if [[ -n "${CM_DOWNLOAD_LOCAL_FILE_PATH}" ]]; then
echo "Checksum failed for local file. Exiting."
exit 1
else
require_download="0"
echo "Checksum failed. Marking for re-download."
CM_PRE_DOWNLOAD_CLEAN=true
fi
else
# If checksum succeeds, no download is required
require_download=0
fi
fi

if [[ ${require_download} == "1" ]]; then
# Perform download if required
if [[ ${require_download} == 1 ]]; then
echo ""
if [ -n "${CM_PRE_DOWNLOAD_CLEAN}" ] && [ "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]; then
echo "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
eval "${CM_PRE_DOWNLOAD_CLEAN_CMD}"
fi

echo ""
echo "${CM_DOWNLOAD_CMD}"
eval "${CM_DOWNLOAD_CMD}"
test $? -eq 0 || exit $?
# If a pre-download clean command is specified and needed, execute it
if [[ -n "${CM_PRE_DOWNLOAD_CLEAN}" && "${CM_PRE_DOWNLOAD_CLEAN,,}" != "false" ]]; then
echo "Executing pre-download clean: ${CM_PRE_DOWNLOAD_CLEAN_CMD}"
eval "${CM_PRE_DOWNLOAD_CLEAN_CMD}" || exit $?
fi

# Execute the download command
echo "Downloading: ${CM_DOWNLOAD_CMD}"
eval "${CM_DOWNLOAD_CMD}" || exit $?
fi

if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" || ${require_download} == "1" ]]; then
if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then
echo ""
echo "${CM_DOWNLOAD_CHECKSUM_CMD}"
eval "${CM_DOWNLOAD_CHECKSUM_CMD}"
test $? -eq 0 || exit $?
# Verify checksum again if necessary
if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" || ${require_download} == 1 ]]; then
if [[ -n "${CM_DOWNLOAD_CHECKSUM_CMD}" ]]; then
echo -e "\nVerifying checksum after download: ${CM_DOWNLOAD_CHECKSUM_CMD}"
eval "${CM_DOWNLOAD_CHECKSUM_CMD}" || exit $?
fi
fi

test $? -eq 0 || exit $?
16 changes: 16 additions & 0 deletions script/get-cuda-devices/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,25 @@ docker:

new_env_keys:
- CM_CUDA_DEVICE_*
- CM_CUDA_NUM_DEVICES

new_state_keys:
- cm_cuda_device_prop
- cm_cuda_devices_prop
- cm_cuda_num_devices

print_files_if_script_error:
- tmp-run.out

variations:
with-pycuda:
env:
CM_CUDA_DEVICES_DETECT_USING_PYCUDA: 'yes'
deps:
- tags: get,python3
names:
- python
- python3
- tags: get,generic-python-lib,_package.pycuda
names:
- pycuda
39 changes: 31 additions & 8 deletions script/get-cuda-devices/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
import os
import subprocess

def preprocess(i):

env = i['env']

if str(env.get('CM_CUDA_DEVICES_DETECT_USING_PYCUDA', '')).lower() in [ "1", "yes", "true"]:
i['run_script_input']['script_name'] = 'detect'

return {'return':0}

def postprocess(i):

env = i['env']
Expand All @@ -18,22 +27,36 @@ def postprocess(i):

# properties
p = {}
gpu = {}

gpu_id = -1

for line in lst:
print (line)
#print (line)

j = line.find(':')

if j>=0:
key = line[:j].strip()
val = line[j+1:].strip()
key = line[:j].strip()
val = line[j+1:].strip()

if key == "GPU Device ID":
gpu_id+=1
gpu[gpu_id] = {}

p[key] = val
if gpu_id < 0:
continue

key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
env[key_env] = val
gpu[gpu_id][key] = val
p[key] = val

key_env = 'CM_CUDA_DEVICE_PROP_'+key.upper().replace(' ','_')
env[key_env] = val

state['cm_cuda_num_devices'] = gpu_id
env['CM_CUDA_NUM_DEVICES'] = gpu_id

state['cm_cuda_device_prop'] = p
state['cm_cuda_devices_prop'] = gpu

return {'return':0}

return {'return':0}
47 changes: 47 additions & 0 deletions script/get-cuda-devices/detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pycuda.driver as cuda
import pycuda.autoinit

def get_gpu_info():
num_gpus = cuda.Device.count()
all_gpu_info = []

for i in range(num_gpus):
device = cuda.Device(i)
cuda_runtime_version = cuda.get_version()
cuda_runtime_version_str = f"{cuda_runtime_version[0]}.{cuda_runtime_version[1]}"

gpu_info = {
"GPU Device ID": device.pci_bus_id(),
"GPU Name": device.name(),
"GPU compute capability": f"{device.compute_capability()[0]}.{device.compute_capability()[1]}",
"CUDA driver version": f"{cuda.get_driver_version() // 1000}.{(cuda.get_driver_version() % 1000) // 10}",
"CUDA runtime version": cuda_runtime_version_str,
"Global memory": device.total_memory(),
"Max clock rate": f"{device.get_attribute(cuda.device_attribute.CLOCK_RATE)} MHz",
"Total amount of shared memory per block": device.get_attribute(cuda.device_attribute.MAX_SHARED_MEMORY_PER_BLOCK),
"Total number of registers available per block": device.get_attribute(cuda.device_attribute.MAX_REGISTERS_PER_BLOCK),
"Warp size": device.get_attribute(cuda.device_attribute.WARP_SIZE),
"Maximum number of threads per multiprocessor": device.get_attribute(cuda.device_attribute.MAX_THREADS_PER_MULTIPROCESSOR),
"Maximum number of threads per block": device.get_attribute(cuda.device_attribute.MAX_THREADS_PER_BLOCK),
"Max dimension size of a thread block X": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_X),
"Max dimension size of a thread block Y": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_Y),
"Max dimension size of a thread block Z": device.get_attribute(cuda.device_attribute.MAX_BLOCK_DIM_Z),
"Max dimension size of a grid size X": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_X),
"Max dimension size of a grid size Y": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_Y),
"Max dimension size of a grid size Z": device.get_attribute(cuda.device_attribute.MAX_GRID_DIM_Z),
}

all_gpu_info.append(gpu_info)

return all_gpu_info


# Print the GPU information for all available GPUs
if __name__ == "__main__":
gpu_info_list = get_gpu_info()
with open ("tmp-run.out", "w") as f:
for idx, gpu_info in enumerate(gpu_info_list):
print(f"GPU {idx}:")
for key, value in gpu_info.items():
f.write(f"{key}: {value}\n")

4 changes: 4 additions & 0 deletions script/get-cuda-devices/detect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

${CM_PYTHON_BIN_WITH_PATH} ${CM_TMP_CURRENT_SCRIPT_PATH}/detect.py
test $? -eq 0 || exit $?
2 changes: 1 addition & 1 deletion script/get-ml-model-gptj/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@
"tags": "get,nvidia,scratch,space"
},
{
"tags": "get,cuda-devices"
"tags": "get,cuda-devices,_with-pycuda"
},
{
"tags": "get,ml-model,gpt-j,_fp32,_pytorch",
Expand Down
2 changes: 1 addition & 1 deletion script/get-ml-model-llama2/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@
"tags": "get,nvidia,scratch,space"
},
{
"tags": "get,cuda-devices"
"tags": "get,cuda-devices,_with-pycuda"
},
{
"tags": "get,ml-model,llama2-70b,_fp32,_pytorch",
Expand Down
Loading

0 comments on commit db60dad

Please sign in to comment.