Skip to content

Commit

Permalink
Merge branch 'GATEOverflow:mlperf-inference' into mlperf-inference
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored May 25, 2024
2 parents 6882917 + cf2f46c commit f62c4ce
Show file tree
Hide file tree
Showing 17 changed files with 471 additions and 48 deletions.
1 change: 1 addition & 0 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
branches:
- main
- docs
- mlperf-inference

jobs:

Expand Down
3 changes: 3 additions & 0 deletions automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4313,6 +4313,9 @@ def enable_or_skip_script(meta, env):
Internal: enable a dependency based on enable_if_env and skip_if_env meta information
(AND function)
"""
if type(meta) != dict:
print( "The meta entry is not a dictionary for skip/enable if_env {}".format(meta))

for key in meta:
meta_key = [str(v).lower() for v in meta[key]]
if key in env:
Expand Down
66 changes: 58 additions & 8 deletions script/app-mlperf-inference-intel/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,6 @@ deps:



########################################################################
# Install MLPerf inference dependencies

- tags: get,mlperf,inference,results,_ctuning
names:
inference-results
version: v3.1


# Post dependencies to run this app including for power measurement
post_deps:
Expand All @@ -166,6 +158,32 @@ post_deps:

# Variations to customize dependencies
variations:
# version
v4.0:
group: version
default: true
env:
CM_MLPERF_INFERENCE_CODE_VERSION: "v4.0"
adr:
pytorch:
tags: _for-intel-mlperf-inference-v4.0
deps:
- tags: get,mlperf,inference,results,_go
names:
inference-results
version: v4.0
v3.1:
env:
CM_MLPERF_INFERENCE_CODE_VERSION: "v3.1"
adr:
pytorch:
tags: _for-intel-mlperf-inference-v3.1
deps:
- tags: get,mlperf,inference,results,_ctuning
names:
inference-results
version: v3.1

# Target devices
cpu:
group: device
Expand Down Expand Up @@ -238,6 +256,8 @@ variations:
deps:
- tags: get,conda,_name.bert-pt
- tags: install,llvm,src,_tag.llvmorg-15.0.7,_runtimes.libcxx:libcxxabi:openmp,_clang,_release,_for-intel-mlperf-inference-v3.1-bert
names:
- llvm-from-src
- tags: get,generic-sys-util,_libffi7
- tags: get,generic,conda-package,_package.python
names:
Expand All @@ -255,8 +275,14 @@ variations:
- conda-package
- jemalloc
- tags: get,pytorch,from.src,_for-intel-mlperf-inference-v3.1-bert
names:
- pytorch-from-src
- tags: install,onednn,from.src,_for-intel-mlperf-inference-v3.1-bert
names:
- onednn-from-src
- tags: install,transformers,from.src,_for-intel-mlperf-inference-v3.1-bert
names:
- transformers-from-src

gptj_:
env:
Expand All @@ -277,6 +303,9 @@ variations:
- device-info
- sut
- loadgen-batchsize
enable_if_env:
CM_MLPERF_INFERENCE_CODE_VERSION:
- v3.1
force_cache: true
- tags: get,generic-python-lib,_package.optimum
names:
Expand All @@ -294,6 +323,8 @@ variations:
conda-python:
version: "3.9"
- tags: install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj
names:
- llvm-from-src
- names:
- conda-package
- ncurses
Expand All @@ -304,6 +335,8 @@ variations:
- conda-package
- jemalloc
- tags: install,ipex,from.src,_for-intel-mlperf-inference-v3.1-gptj
names:
- ipex-from-src
- tags: get,generic,conda-package,_package.ninja
names:
- conda-package
Expand All @@ -312,6 +345,8 @@ variations:
INTEL_GPTJ_INT4:
- 'yes'
- tags: install,tpp-pex,from.src,_for-intel-mlperf-inference-v3.1-gptj
names:
- tpp-pex-from-src
enable_if_env:
INTEL_GPTJ_INT4:
- 'yes'
Expand Down Expand Up @@ -342,13 +377,27 @@ variations:
- pip-package
- accelerate
- tags: get,generic-python-lib,_custom-python,_package.torch,_url.git+https://github.com/pytorch/pytorch.git@927dc662386af052018212c7d01309a506fc94cd
enable_if_env:
CM_MLPERF_INFERENCE_CODE_VERSION:
- v3.1
env:
CM_PYTHON_BIN_WITH_PATH: "<<<CM_CONDA_BIN_PATH>>>/python3"
"+ CXXFLAGS":
- "-Wno-nonnull"
- "-Wno-maybe-uninitialized"
- "-Wno-uninitialized"
- "-Wno-free-nonheap-object"
- tags: get,generic-python-lib,_custom-python,_package.torch
env:
CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu
enable_if_env:
CM_MLPERF_INFERENCE_CODE_VERSION:
- 'v4.0'
- tags: install,intel-neural-speed,_for-intel-mlperf-inference-v4.0-gptj,_branch.mlperf-v4-0
enable_if_env:
CM_MLPERF_INFERENCE_CODE_VERSION:
- 'v4.0'


gptj-99:
group: model
Expand Down Expand Up @@ -461,6 +510,7 @@ variations:
- device-info
- sut
- loadgen-batchsize
- loadgen-scenario
force_cache: true

# Download MLPerf inference source
Expand Down
13 changes: 12 additions & 1 deletion script/app-mlperf-inference-intel/build_gptj_harness.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ mkdir -p ${WORKLOAD_DATA}/model
export INT8_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int8-model
export INT4_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int4-model

python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data

python download-dataset.py --split validation --output-dir ${WORKLOAD_DATA}/validation-data
test $? -eq 0 || exit $?
python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data
test $? -eq 0 || exit $?

if [[ -f ${INT8_MODEL_DIR}/best_model.pt ]]; then
exit 0
fi
Expand All @@ -34,6 +37,14 @@ export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_valid
#export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model
#sudo -E bash run_quantization.sh
#bash run_quantization.sh

INSTALLED_NS=$(python -c "import neural_speed; print(neural_speed.__path__[0])")
PATH_CONVERTED=`pwd`

export INSTALLED_NS=$INSTALLED_NS
echo "INSTALLED_NS=$INSTALLED_NS"
#export PATH_CONVERTED=$PATH_CONVERTED

echo "${RUN_QUANTIZATION_CMD}"
eval "${RUN_QUANTIZATION_CMD}"
test $? -eq 0 || exit $?
35 changes: 31 additions & 4 deletions script/app-mlperf-inference-intel/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ def preprocess(i):

backend = env['CM_MLPERF_BACKEND']
device = env['CM_MLPERF_DEVICE']
harness_root = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], 'closed', 'Intel', 'code', ml_model, backend+"-"+device)
code_base_folder = backend + '-' + device
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == 'v4.0':
if 'gptj' in ml_model:
code_base_folder = "ITREX"

harness_root = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], 'closed', 'Intel', 'code', ml_model, code_base_folder)

env['CM_HARNESS_CODE_ROOT'] = harness_root

Expand Down Expand Up @@ -79,16 +84,25 @@ def preprocess(i):
env['MLPERF_INFERENCE_ROOT'] = env['CM_MLPERF_INFERENCE_SOURCE']
if env.get('INTEL_GPTJ_INT4', '') == 'yes':
model_precision = "int4"
env['RUN_QUANTIZATION_CMD'] = "bash run_quantization_int4.sh"
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == 'v3.1':
env['RUN_QUANTIZATION_CMD'] = "bash run_quantization_int4.sh"
else:
env['FILE_TAG'] = "final"
env['OUT_DIR'] = os.getcwd()
env['RUN_QUANTIZATION_CMD'] = "bash run_quantization.sh"
else:
model_precision = "int8"
env['RUN_QUANTIZATION_CMD'] = "bash run_quantization.sh"
final_model_path = os.path.join(harness_root, "data", f"gpt-j-{model_precision}-model", "best_model.pt")
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v3.1":
final_model_path = os.path.join(harness_root, "data", f"gpt-j-{model_precision}-model", "best_model.pt")
else:
final_model_path = os.path.join(env['OUT_DIR'], "checkpoint-final-final-q4-j-int8-pc.bin")
model_dir_name = f"{model_precision.upper()}_MODEL_DIR"
env[model_dir_name] = os.path.dirname(final_model_path)
if not os.path.exists(env[model_dir_name]):
os.makedirs(env[model_dir_name])
env['CM_ML_MODEL_PATH'] = env[model_dir_name]
env['CM_ML_MODEL_FILE_WITH_PATH'] = final_model_path
if env.get('CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH', '') != '' and env.get('INT8_MODEL_DIR', '') != '':
shutil.copy(env['CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH'], env[model_dir_name])
if env.get('CM_MLPERF_INFERENCE_INTEL_GPTJ_INT4_MODEL_PATH', '') != '' and env.get('INT4_MODEL_DIR', '') != '':
Expand Down Expand Up @@ -120,7 +134,20 @@ def preprocess(i):
env['QUANTIZED_MODEL'] = os.path.join(env["INT8_MODEL_DIR"], "best_model.pt")
env['PRECISION'] = "int8"
env['CM_RUN_DIR'] = i['run_script_input']['path']
env['CM_RUN_CMD'] = "bash run_gptj_harness.sh "
if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v3.1":
env['CM_RUN_CMD'] = "bash run_gptj_harness_v3_1.sh "
elif env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v4.0":
env['CM_RUN_CMD'] = "bash run_gptj_harness_v4_0.sh "

if env['CM_MLPERF_RUN_STYLE'] == "test":
env['TOTAL_SAMPLE_COUNT'] = env['CM_TEST_QUERY_COUNT']
else:
env['TOTAL_SAMPLE_COUNT'] = env.get('CM_MLPERF_MAX_QUERY_COUNT', env['CM_TEST_QUERY_COUNT'])

if env['CM_MLPERF_LOADGEN_SCENARIO'] == "Offline":
env['WORKERS_PER_PROC'] = 4
else:
env['WORKERS_PER_PROC'] = 1

return {'return':0}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cmd="python runner.py --workload-name gptj \
--num-proc ${NUM_PROC} \
--cpus-per-proc ${CPUS_PER_PROC} \
--model-checkpoint-path ${CHECKPOINT_DIR} \
${WARMUP} \
--dataset-path ${VALIDATION_DATA_JSON} \
--batch-size ${BATCH_SIZE} \
--mlperf-conf ${CM_MLPERF_CONF} \
Expand Down
75 changes: 75 additions & 0 deletions script/app-mlperf-inference-intel/run_gptj_harness_v4_0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/bash
export PATH=${CM_CONDA_BIN_PATH}:$PATH

export KMP_BLOCKTIME=1
export KMP_AFFINITY=granularity=fine,compact,1,0
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
# export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
#

BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}

DIR_SCRIPT=$(dirname "${BASH_SOURCE[0]}")
[ -z $DIR_NS ] && DIR_NS="$DIR_SCRIPT/gpt-j-env/neural-speed"
[ -z $VALIDATION_DATA_JSON ] && VALIDATION_DATA_JSON="$DIR_SCRIPT/gpt-j-env/cnn_dailymail_validation.json"
[ -z $CHECKPOINT_DIR ] && CHECKPOINT_DIR="$DIR_SCRIPT/gpt-j-env/finetuned_gptj"

# num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')

export num_physical_cores=$(lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l)
IFS=', ' read -r -a available_nodes_list <<<"$(numactl -s | grep nodebind | sed -E 's/^nodebind: (.+)$/\1/')"
declare -p available_nodes_list
num_numa="${#available_nodes_list[@]}"
declare -p num_numa

find "$DIR_NS" -name CMakeCache.txt -exec rm {} \;
CMAKE_ARGS="-DNS_PROFILING=ON" pip install -e "$DIR_NS"

[ -z $NUM_PROC ] && NUM_PROC=$num_numa
CPUS_PER_PROC=$((num_physical_cores / num_numa))
[ -z $WORKERS_PER_PROC ] && WORKERS_PER_PROC=1
[ -z $CPUS_PER_WORKER ] && CPUS_PER_WORKER= # e.g. 8:8:8:8:8:8:8
[ -z $BATCH_PROC_ALLOC ] && BATCH_PROC_ALLOC= # e.g. 12:12:12:12:12:12:12
[ -z $LOGICAL_CORES_START ] && LOGICAL_CORES_START=-1 # set to -1 to disable / or use $num_physical_cores
[ -z $CORES_OFFSET ] && CORES_OFFSET=0

[ -z $BATCH_SIZE ] && BATCH_SIZE=12
[ -z $BEAM_SIZE ] && BEAM_SIZE=4

OUTPUT_DIR="${CM_MLPERF_OUTPUT_DIR}"
MODEL_PATH="${CM_ML_MODEL_FILE_WITH_PATH}"
cd ${CM_HARNESS_CODE_ROOT}
export WORKLOAD_DATA=${CM_HARNESS_CODE_ROOT}/data
export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_validation.json


for i in $(seq 0 $(($NUM_PROC - 1))); do
[[ ! -e "${MODEL_PATH}${i}" ]] && ln -fs "$(basename $MODEL_PATH)" "${MODEL_PATH}${i}"
done

echo "Start time: $(date)"
cmd="python runner.py --workload-name gptj \
--scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
--mode ${LOADGEN_MODE} \
--num-proc ${NUM_PROC} \
--cpus-per-proc ${CPUS_PER_PROC} \
--dataset-path ${VALIDATION_DATA_JSON} \
--model-path ${MODEL_PATH} \
--model-checkpoint ${CHECKPOINT_DIR} \
--batch-size ${BATCH_SIZE} \
--beam-size ${BEAM_SIZE} \
--mlperf-conf ${CM_MLPERF_CONF} \
--user-conf ${CM_MLPERF_USER_CONF} \
--workers-per-proc ${WORKERS_PER_PROC} \
--total-sample-count ${TOTAL_SAMPLE_COUNT} \
--output-dir ${OUTPUT_DIR} \
--cores-offset ${CORES_OFFSET} \
--logical-cores-start \"${LOGICAL_CORES_START}\" \
--cpus-per-worker \"${CPUS_PER_WORKER}\" \
--batch-proc-alloc \"${BATCH_PROC_ALLOC}\" \
2>&1 | tee ${OUTPUT_DIR}.log"
echo "$cmd"
eval "$cmd"
test $? -eq 0 || exit $?
echo "End time: $(date)"

4 changes: 4 additions & 0 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ deps:
CM_MODEL:
- gptj-99
- gptj-99.9
skip_if_env:
NETWORK:
- lon



## RetinaNet (PyTorch weights, FP32)
Expand Down
21 changes: 14 additions & 7 deletions script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ def preprocess(i):
else:
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x

env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH')
if not env['MODEL_DIR']:
env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH')))
if env.get('CM_NETWORK_LOADGEN', '') != "lon":
env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH')
if not env['MODEL_DIR']:
env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH')))

RUN_CMD = ""
state['RUN'] = {}
Expand Down Expand Up @@ -176,10 +177,16 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
if env['CM_MODEL'] in [ "gptj-99", "gptj-99.9" ]:

env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j")
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \
" main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
scenario_extra_options + mode_extra_options + dataset_options
if env.get('CM_NETWORK_LOADGEN', '') != "lon":
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \
" main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
scenario_extra_options + mode_extra_options + dataset_options
else:
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \
" main.py" + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
scenario_extra_options + mode_extra_options + dataset_options
cmd = cmd.replace("--count", "--max_examples")
if env['CM_MLPERF_DEVICE'] == "gpu":
gpu_options = " --gpu"
Expand Down
Loading

0 comments on commit f62c4ce

Please sign in to comment.