From 3ed37d40076aab6c361be65e3528db03422316e8 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sat, 25 May 2024 02:36:57 +0530 Subject: [PATCH 1/5] Bug fix - Model downloading in LON(client) --- .../_cm.yaml | 3 +++ .../customize.py | 21 ++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b9fc789f81..3f237c88c4 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -347,6 +347,9 @@ deps: CM_MODEL: - gptj-99 - gptj-99.9 + NETWORK: + - sut + ## RetinaNet (PyTorch weights, FP32) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index ee97038ff4..c078506e17 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -75,9 +75,10 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x - env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') - if not env['MODEL_DIR']: - env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) + if env.get('CM_NETWORK_LOADGEN', '') == "sut": + env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') + if not env['MODEL_DIR']: + env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) RUN_CMD = "" state['RUN'] = {} @@ -176,10 +177,16 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio if env['CM_MODEL'] in [ "gptj-99", "gptj-99.9" ]: env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j") - cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \ - " run.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ - ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ - scenario_extra_options + mode_extra_options + dataset_options + if env.get('CM_NETWORK_LOADGEN', '') == "sut": + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \ + " main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ + scenario_extra_options + mode_extra_options + dataset_options + else: + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \ + " main.py" + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ + scenario_extra_options + mode_extra_options + dataset_options cmd = cmd.replace("--count", "--max_examples") if env['CM_MLPERF_DEVICE'] == "gpu": gpu_options = " --gpu" From 96f47dcb08bf8f64e9e919ff5883c6a991a3fde9 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sat, 25 May 2024 03:04:44 +0530 Subject: [PATCH 2/5] corrected Conditions --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 3 ++- script/app-mlperf-inference-mlcommons-python/customize.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 3f237c88c4..22101446c9 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -347,8 +347,9 @@ deps: CM_MODEL: - gptj-99 - gptj-99.9 + skip_if_env: NETWORK: - - sut + - lon diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index c078506e17..b172293c0f 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -75,7 +75,7 @@ def preprocess(i): else: env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf "+ x + env['CM_MLPERF_CONF'] + x - if env.get('CM_NETWORK_LOADGEN', '') == "sut": + if env.get('CM_NETWORK_LOADGEN', '') != "lon": env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') if not env['MODEL_DIR']: env['MODEL_DIR'] = os.path.dirname(env.get('CM_MLPERF_CUSTOM_MODEL_PATH', env.get('CM_ML_MODEL_FILE_WITH_PATH'))) @@ -177,7 +177,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio if env['CM_MODEL'] in [ "gptj-99", "gptj-99.9" ]: env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j") - if env.get('CM_NETWORK_LOADGEN', '') == "sut": + if env.get('CM_NETWORK_LOADGEN', '') != "lon": cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \ " main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ From 5036c9d557f0d437f3643addb92444fce77c48ef Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 25 May 2024 11:20:51 +0100 Subject: [PATCH 3/5] Update publish.yaml --- .github/workflows/publish.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 5df31b971e..fa9ace5da0 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -10,6 +10,7 @@ on: branches: - main - docs + - mlperf-inference jobs: From 991c3d446fd3e3fe9c175328305687a6fca14f54 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 25 May 2024 05:31:48 -0700 Subject: [PATCH 4/5] Fix intel gptj v4.0 --- automation/script/module.py | 3 + script/app-mlperf-inference-intel/_cm.yaml | 66 ++++++++-- .../build_gptj_harness.sh | 13 +- .../app-mlperf-inference-intel/customize.py | 33 ++++- ...tj_harness.sh => run_gptj_harness_v3_1.sh} | 1 - .../run_gptj_harness_v4_0.sh | 75 +++++++++++ script/app-mlperf-inference/_cm.yaml | 10 +- .../customize.py | 24 ++-- script/get-python3/customize.py | 2 +- .../_cm.json | 122 ++++++++++++++++++ .../customize.py | 21 +++ .../run.sh | 14 ++ script/process-mlperf-accuracy/_cm.json | 88 ++++++++++++- script/process-mlperf-accuracy/customize.py | 18 ++- 14 files changed, 449 insertions(+), 41 deletions(-) rename script/app-mlperf-inference-intel/{run_gptj_harness.sh => run_gptj_harness_v3_1.sh} (99%) create mode 100644 script/app-mlperf-inference-intel/run_gptj_harness_v4_0.sh create mode 100644 script/install-intel-neural-speed-from-src/_cm.json create mode 100644 script/install-intel-neural-speed-from-src/customize.py create mode 100644 script/install-intel-neural-speed-from-src/run.sh diff --git a/automation/script/module.py b/automation/script/module.py index d72d284933..b437f32c4a 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4313,6 +4313,9 @@ def enable_or_skip_script(meta, env): Internal: enable a dependency based on enable_if_env and skip_if_env meta information (AND function) """ + if type(meta) != dict: + print( "The meta entry is not a dictionary for skip/enable if_env {}".format(meta)) + for key in meta: meta_key = [str(v).lower() for v in meta[key]] if key in env: diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index ce70f07d68..11d544cf4a 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -136,14 +136,6 @@ deps: - ######################################################################## - # Install MLPerf inference dependencies - - - tags: get,mlperf,inference,results,_ctuning - names: - inference-results - version: v3.1 - # Post dependencies to run this app including for power measurement post_deps: @@ -166,6 +158,32 @@ post_deps: # Variations to customize dependencies variations: + # version + v4.0: + group: version + default: true + env: + CM_MLPERF_INFERENCE_CODE_VERSION: "v4.0" + adr: + pytorch: + tags: _for-intel-mlperf-inference-v4.0 + deps: + - tags: get,mlperf,inference,results,_go + names: + inference-results + version: v4.0 + v3.1: + env: + CM_MLPERF_INFERENCE_CODE_VERSION: "v3.1" + adr: + pytorch: + tags: _for-intel-mlperf-inference-v3.1 + deps: + - tags: get,mlperf,inference,results,_ctuning + names: + inference-results + version: v3.1 + # Target devices cpu: group: device @@ -238,6 +256,8 @@ variations: deps: - tags: get,conda,_name.bert-pt - tags: install,llvm,src,_tag.llvmorg-15.0.7,_runtimes.libcxx:libcxxabi:openmp,_clang,_release,_for-intel-mlperf-inference-v3.1-bert + names: + - llvm-from-src - tags: get,generic-sys-util,_libffi7 - tags: get,generic,conda-package,_package.python names: @@ -255,8 +275,14 @@ variations: - conda-package - jemalloc - tags: get,pytorch,from.src,_for-intel-mlperf-inference-v3.1-bert + names: + - pytorch-from-src - tags: install,onednn,from.src,_for-intel-mlperf-inference-v3.1-bert + names: + - onednn-from-src - tags: install,transformers,from.src,_for-intel-mlperf-inference-v3.1-bert + names: + - transformers-from-src gptj_: env: @@ -277,6 +303,9 @@ variations: - device-info - sut - loadgen-batchsize + enable_if_env: + CM_MLPERF_INFERENCE_CODE_VERSION: + - v3.1 force_cache: true - tags: get,generic-python-lib,_package.optimum names: @@ -294,6 +323,8 @@ variations: conda-python: version: "3.9" - tags: install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj + names: + - llvm-from-src - names: - conda-package - ncurses @@ -304,6 +335,8 @@ variations: - conda-package - jemalloc - tags: install,ipex,from.src,_for-intel-mlperf-inference-v3.1-gptj + names: + - ipex-from-src - tags: get,generic,conda-package,_package.ninja names: - conda-package @@ -312,6 +345,8 @@ variations: INTEL_GPTJ_INT4: - 'yes' - tags: install,tpp-pex,from.src,_for-intel-mlperf-inference-v3.1-gptj + names: + - tpp-pex-from-src enable_if_env: INTEL_GPTJ_INT4: - 'yes' @@ -342,6 +377,9 @@ variations: - pip-package - accelerate - tags: get,generic-python-lib,_custom-python,_package.torch,_url.git+https://github.com/pytorch/pytorch.git@927dc662386af052018212c7d01309a506fc94cd + enable_if_env: + CM_MLPERF_INFERENCE_CODE_VERSION: + - v3.1 env: CM_PYTHON_BIN_WITH_PATH: "<<>>/python3" "+ CXXFLAGS": @@ -349,6 +387,17 @@ variations: - "-Wno-maybe-uninitialized" - "-Wno-uninitialized" - "-Wno-free-nonheap-object" + - tags: get,generic-python-lib,_custom-python,_package.torch + env: + CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu + enable_if_env: + CM_MLPERF_INFERENCE_CODE_VERSION: + - 'v4.0' + - tags: install,intel-neural-speed,_for-intel-mlperf-inference-v4.0-gptj,_branch.mlperf-v4-0 + enable_if_env: + CM_MLPERF_INFERENCE_CODE_VERSION: + - 'v4.0' + gptj-99: group: model @@ -461,6 +510,7 @@ variations: - device-info - sut - loadgen-batchsize + - loadgen-scenario force_cache: true # Download MLPerf inference source diff --git a/script/app-mlperf-inference-intel/build_gptj_harness.sh b/script/app-mlperf-inference-intel/build_gptj_harness.sh index 149dcf6608..3c2f26dc45 100644 --- a/script/app-mlperf-inference-intel/build_gptj_harness.sh +++ b/script/app-mlperf-inference-intel/build_gptj_harness.sh @@ -22,9 +22,12 @@ mkdir -p ${WORKLOAD_DATA}/model export INT8_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int8-model export INT4_MODEL_DIR=${WORKLOAD_DATA}/gpt-j-int4-model -python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data python download-dataset.py --split validation --output-dir ${WORKLOAD_DATA}/validation-data +test $? -eq 0 || exit $? +python download-calibration-dataset.py --calibration-list-file calibration-list.txt --output-dir ${WORKLOAD_DATA}/calibration-data +test $? -eq 0 || exit $? + if [[ -f ${INT8_MODEL_DIR}/best_model.pt ]]; then exit 0 fi @@ -34,6 +37,14 @@ export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_valid #export INT4_CALIBRATION_DIR=${WORKLOAD_DATA}/quantized-int4-model #sudo -E bash run_quantization.sh #bash run_quantization.sh + +INSTALLED_NS=$(python -c "import neural_speed; print(neural_speed.__path__[0])") +PATH_CONVERTED=`pwd` + +export INSTALLED_NS=$INSTALLED_NS +echo "INSTALLED_NS=$INSTALLED_NS" +#export PATH_CONVERTED=$PATH_CONVERTED + echo "${RUN_QUANTIZATION_CMD}" eval "${RUN_QUANTIZATION_CMD}" test $? -eq 0 || exit $? diff --git a/script/app-mlperf-inference-intel/customize.py b/script/app-mlperf-inference-intel/customize.py index 18a03d45d7..1156bf3937 100644 --- a/script/app-mlperf-inference-intel/customize.py +++ b/script/app-mlperf-inference-intel/customize.py @@ -27,7 +27,12 @@ def preprocess(i): backend = env['CM_MLPERF_BACKEND'] device = env['CM_MLPERF_DEVICE'] - harness_root = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], 'closed', 'Intel', 'code', ml_model, backend+"-"+device) + code_base_folder = backend + '-' + device + if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == 'v4.0': + if 'gptj' in ml_model: + code_base_folder = "ITREX" + + harness_root = os.path.join(env['CM_MLPERF_INFERENCE_RESULTS_PATH'], 'closed', 'Intel', 'code', ml_model, code_base_folder) env['CM_HARNESS_CODE_ROOT'] = harness_root @@ -79,16 +84,25 @@ def preprocess(i): env['MLPERF_INFERENCE_ROOT'] = env['CM_MLPERF_INFERENCE_SOURCE'] if env.get('INTEL_GPTJ_INT4', '') == 'yes': model_precision = "int4" - env['RUN_QUANTIZATION_CMD'] = "bash run_quantization_int4.sh" + if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == 'v3.1': + env['RUN_QUANTIZATION_CMD'] = "bash run_quantization_int4.sh" + else: + env['FILE_TAG'] = "final" + env['OUT_DIR'] = os.getcwd() + env['RUN_QUANTIZATION_CMD'] = "bash run_quantization.sh" else: model_precision = "int8" env['RUN_QUANTIZATION_CMD'] = "bash run_quantization.sh" - final_model_path = os.path.join(harness_root, "data", f"gpt-j-{model_precision}-model", "best_model.pt") + if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v3.1": + final_model_path = os.path.join(harness_root, "data", f"gpt-j-{model_precision}-model", "best_model.pt") + else: + final_model_path = os.path.join(env['OUT_DIR'], "checkpoint-final-final-q4-j-int8-pc.bin") model_dir_name = f"{model_precision.upper()}_MODEL_DIR" env[model_dir_name] = os.path.dirname(final_model_path) if not os.path.exists(env[model_dir_name]): os.makedirs(env[model_dir_name]) env['CM_ML_MODEL_PATH'] = env[model_dir_name] + env['CM_ML_MODEL_FILE_WITH_PATH'] = final_model_path if env.get('CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH', '') != '' and env.get('INT8_MODEL_DIR', '') != '': shutil.copy(env['CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH'], env[model_dir_name]) if env.get('CM_MLPERF_INFERENCE_INTEL_GPTJ_INT4_MODEL_PATH', '') != '' and env.get('INT4_MODEL_DIR', '') != '': @@ -120,7 +134,18 @@ def preprocess(i): env['QUANTIZED_MODEL'] = os.path.join(env["INT8_MODEL_DIR"], "best_model.pt") env['PRECISION'] = "int8" env['CM_RUN_DIR'] = i['run_script_input']['path'] - env['CM_RUN_CMD'] = "bash run_gptj_harness.sh " + if env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v3.1": + env['CM_RUN_CMD'] = "bash run_gptj_harness_v3_1.sh " + elif env.get('CM_MLPERF_INFERENCE_CODE_VERSION', '') == "v4.0": + env['CM_RUN_CMD'] = "bash run_gptj_harness_v4_0.sh " + + if env['CM_MLPERF_RUN_STYLE'] == "test": + env['TOTAL_SAMPLE_COUNT'] = env['CM_TEST_QUERY_COUNT'] + + if env['CM_MLPERF_LOADGEN_SCENARIO'] == "Offline": + env['WORKERS_PER_PROC'] = 4 + else: + env['WORKERS_PER_PROC'] = 1 return {'return':0} diff --git a/script/app-mlperf-inference-intel/run_gptj_harness.sh b/script/app-mlperf-inference-intel/run_gptj_harness_v3_1.sh similarity index 99% rename from script/app-mlperf-inference-intel/run_gptj_harness.sh rename to script/app-mlperf-inference-intel/run_gptj_harness_v3_1.sh index 43e57bbb0c..74988df28a 100644 --- a/script/app-mlperf-inference-intel/run_gptj_harness.sh +++ b/script/app-mlperf-inference-intel/run_gptj_harness_v3_1.sh @@ -35,7 +35,6 @@ cmd="python runner.py --workload-name gptj \ --num-proc ${NUM_PROC} \ --cpus-per-proc ${CPUS_PER_PROC} \ --model-checkpoint-path ${CHECKPOINT_DIR} \ - ${WARMUP} \ --dataset-path ${VALIDATION_DATA_JSON} \ --batch-size ${BATCH_SIZE} \ --mlperf-conf ${CM_MLPERF_CONF} \ diff --git a/script/app-mlperf-inference-intel/run_gptj_harness_v4_0.sh b/script/app-mlperf-inference-intel/run_gptj_harness_v4_0.sh new file mode 100644 index 0000000000..9186f733ae --- /dev/null +++ b/script/app-mlperf-inference-intel/run_gptj_harness_v4_0.sh @@ -0,0 +1,75 @@ +#!/bin/bash +export PATH=${CM_CONDA_BIN_PATH}:$PATH + +export KMP_BLOCKTIME=1 +export KMP_AFFINITY=granularity=fine,compact,1,0 +export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so +# export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so +# + +BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE} + +DIR_SCRIPT=$(dirname "${BASH_SOURCE[0]}") +[ -z $DIR_NS ] && DIR_NS="$DIR_SCRIPT/gpt-j-env/neural-speed" +[ -z $VALIDATION_DATA_JSON ] && VALIDATION_DATA_JSON="$DIR_SCRIPT/gpt-j-env/cnn_dailymail_validation.json" +[ -z $CHECKPOINT_DIR ] && CHECKPOINT_DIR="$DIR_SCRIPT/gpt-j-env/finetuned_gptj" + +# num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }') + +export num_physical_cores=$(lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l) +IFS=', ' read -r -a available_nodes_list <<<"$(numactl -s | grep nodebind | sed -E 's/^nodebind: (.+)$/\1/')" +declare -p available_nodes_list +num_numa="${#available_nodes_list[@]}" +declare -p num_numa + +find "$DIR_NS" -name CMakeCache.txt -exec rm {} \; +CMAKE_ARGS="-DNS_PROFILING=ON" pip install -e "$DIR_NS" + +[ -z $NUM_PROC ] && NUM_PROC=$num_numa +CPUS_PER_PROC=$((num_physical_cores / num_numa)) +[ -z $WORKERS_PER_PROC ] && WORKERS_PER_PROC=1 +[ -z $CPUS_PER_WORKER ] && CPUS_PER_WORKER= # e.g. 8:8:8:8:8:8:8 +[ -z $BATCH_PROC_ALLOC ] && BATCH_PROC_ALLOC= # e.g. 12:12:12:12:12:12:12 +[ -z $LOGICAL_CORES_START ] && LOGICAL_CORES_START=-1 # set to -1 to disable / or use $num_physical_cores +[ -z $CORES_OFFSET ] && CORES_OFFSET=0 + +[ -z $BATCH_SIZE ] && BATCH_SIZE=12 +[ -z $BEAM_SIZE ] && BEAM_SIZE=4 + +OUTPUT_DIR="${CM_MLPERF_OUTPUT_DIR}" +MODEL_PATH="${CM_ML_MODEL_FILE_WITH_PATH}" +cd ${CM_HARNESS_CODE_ROOT} +export WORKLOAD_DATA=${CM_HARNESS_CODE_ROOT}/data +export VALIDATION_DATA_JSON=${WORKLOAD_DATA}/validation-data/cnn_dailymail_validation.json + + +for i in $(seq 0 $(($NUM_PROC - 1))); do + [[ ! -e "${MODEL_PATH}${i}" ]] && ln -fs "$(basename $MODEL_PATH)" "${MODEL_PATH}${i}" +done + +echo "Start time: $(date)" +cmd="python runner.py --workload-name gptj \ + --scenario ${CM_MLPERF_LOADGEN_SCENARIO} \ + --mode ${LOADGEN_MODE} \ + --num-proc ${NUM_PROC} \ + --cpus-per-proc ${CPUS_PER_PROC} \ + --dataset-path ${VALIDATION_DATA_JSON} \ + --model-path ${MODEL_PATH} \ + --model-checkpoint ${CHECKPOINT_DIR} \ + --batch-size ${BATCH_SIZE} \ + --beam-size ${BEAM_SIZE} \ + --mlperf-conf ${CM_MLPERF_CONF} \ + --user-conf ${CM_MLPERF_USER_CONF} \ + --workers-per-proc ${WORKERS_PER_PROC} \ + --total-sample-count ${TOTAL_SAMPLE_COUNT} \ + --output-dir ${OUTPUT_DIR} \ + --cores-offset ${CORES_OFFSET} \ + --logical-cores-start \"${LOGICAL_CORES_START}\" \ + --cpus-per-worker \"${CPUS_PER_WORKER}\" \ + --batch-proc-alloc \"${BATCH_PROC_ALLOC}\" \ + 2>&1 | tee ${OUTPUT_DIR}.log" +echo "$cmd" +eval "$cmd" +test $? -eq 0 || exit $? +echo "End time: $(date)" + diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 09f010af61..f4434961ab 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -338,6 +338,11 @@ variations: env: CM_MLPERF_IMPLEMENTATION: intel + intel-original,gptj_: + adr: + cnndm-accuracy-script: + tags: _int32 + intel-original,gptj_,build-harness: docker: run: false @@ -637,9 +642,6 @@ variations: - all CM_MLPERF_ACCURACY_RESULTS_DIR: - 'on' - skip_if_env: - CM_MLPERF_IMPLEMENTATION: - - intel names: - cnndm-accuracy-script - mlperf-accuracy-script @@ -1165,6 +1167,8 @@ variations: nvidia-inference-server: version: r4.0 tags: _go + intel-harness: + tags: _v4.0 default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index 87e10eeeed..edbfd7cfa4 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -173,7 +173,7 @@ def preprocess(i): elif env['CM_MLPERF_RUN_STYLE'] == "test": if scenario == "Offline": - metric_value = 1 + metric_value = float(env.get('CM_MLPERF_INFERENCE_TEST_QPS', 1)) if scenario in [ "SingleStream" ]: metric_value = 1000 @@ -254,36 +254,36 @@ def preprocess(i): ranging_user_conf += ml_model_name + "." + scenario + ".min_duration = 300000" + "\n" if env['CM_MLPERF_RUN_STYLE'] == "test": + max_duration_test = env.get('CM_MLPERF_MAX_DURATION_TEST', 30000) query_count = env.get('CM_TEST_QUERY_COUNT', "5") user_conf += ml_model_name + "." + scenario + ".max_query_count = " + query_count + "\n" user_conf += ml_model_name + "." + scenario + ".min_query_count = " + query_count + "\n" user_conf += ml_model_name + "." + scenario + ".min_duration = 0" + "\n" - #else: - # user_conf += ml_model_name + "." + scenario + ".min_duration = 20000" + "\n" - # user_conf += ml_model_name + "." + scenario + ".max_duration = 20000 \n " + # max_duration is effective for all scenarios except the Offline + if env.get('CM_MLPERF_USE_MAX_DURATION', 'yes').lower() not in [ "no", "false", "0"]: + if scenario != "Offline": + user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_test}" + "\n" elif env['CM_MLPERF_RUN_STYLE'] == "fast": + max_duration_fast = env.get('CM_MLPERF_MAX_DURATION_FAST', 120000) if scenario == "Server": + user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_fast}" + "\n" target_qps = conf['target_qps'] query_count = str(int((660/fast_factor) * (float(target_qps)))) user_conf += ml_model_name + "." + scenario + ".max_query_count = " + query_count + "\n" else: + max_duration_valid = env.get('CM_MLPERF_MAX_DURATION_VALID', 660000) + max_duration_ranging = env.get('CM_MLPERF_MAX_DURATION_RANGING', 300000) if scenario == "MultiStream" or scenario == "SingleStream": if env.get('CM_MLPERF_USE_MAX_DURATION', 'yes').lower() not in [ "no", "false", "0" ] and env.get('CM_MLPERF_MODEL_EQUAL_ISSUE_MODE', 'no').lower() not in [ "yes", "1", "true" ]: - user_conf += ml_model_name + "." + scenario + ".max_duration = 660000 \n" + user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_valid}" + "\n" elif env.get('CM_MLPERF_INFERENCE_MIN_DURATION','') != '': user_conf += ml_model_name + "." + scenario + ".min_duration = " + env['CM_MLPERF_INFERENCE_MIN_DURATION'] +" \n" if scenario == "MultiStream": user_conf += ml_model_name + "." + scenario + ".min_query_count = "+ env.get('CM_MLPERF_INFERENCE_MULTISTREAM_MIN_QUERY_COUNT', "662") + "\n" if short_ranging: - ranging_user_conf += ml_model_name + "." + scenario + ".max_duration = 300000 \n " - elif scenario == "SingleStream_old": - query_count = str(max(int((1000 / float(conf['target_latency'])) * 660), 64)) - user_conf += ml_model_name + "." + scenario + ".max_query_count = " + str(int(query_count)+40) + "\n" - #user_conf += ml_model_name + "." + scenario + ".min_query_count = " + query_count + "\n" - if short_ranging: - ranging_user_conf += ml_model_name + "." + scenario + ".max_query_count = " + str(int(query_count)+40) + "\n" + ranging_user_conf += ml_model_name + "." + scenario + f".max_duration = {max_duration_ranging} \n " elif scenario == "Offline": query_count = int(float(conf['target_qps']) * 660) query_count = str(max(query_count, required_min_queries_offline)) diff --git a/script/get-python3/customize.py b/script/get-python3/customize.py index 3e2cc0b6f4..5d07f6ac86 100644 --- a/script/get-python3/customize.py +++ b/script/get-python3/customize.py @@ -7,7 +7,7 @@ def preprocess(i): env = i['env'] - if env.get('CM_PYTHON_CONDA', '') == 'yes': + if env.get('CM_PYTHON_CONDA', '') == 'yes' and env.get('CM_CONDA_BIN_PATH', '') != '': env['CM_PYTHON_BIN_WITH_PATH'] = os.path.join(env['CM_CONDA_BIN_PATH'], "python") recursion_spaces = i['recursion_spaces'] diff --git a/script/install-intel-neural-speed-from-src/_cm.json b/script/install-intel-neural-speed-from-src/_cm.json new file mode 100644 index 0000000000..23d4004e1d --- /dev/null +++ b/script/install-intel-neural-speed-from-src/_cm.json @@ -0,0 +1,122 @@ +{ + "alias": "install-intel-neural-speed-from-src", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "cache": true, + "category": "Detection or installation of tools and artifacts", + "deps": [ + { + "tags": "detect,os" + }, + { + "tags": "detect,cpu" + }, + { + "env": { + "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_INTEL_NEURAL_SPEED_SRC_REPO_PATH" + }, + "extra_cache_tags": "intel-neural-speed,neural-speed-src,src,intel-neural-speed-src,neural-speed-src-repo", + "names": [ + "neural-speed-src-repo" + ], + "tags": "get,git,repo", + "update_tags_from_env_with_prefix": { + "_branch.": [ + "CM_GIT_CHECKOUT" + ], + "_repo.": [ + "CM_GIT_URL" + ], + "_sha.": [ + "CM_GIT_CHECKOUT_SHA" + ], + "_tag.": [ + "CM_GIT_CHECKOUT_TAG" + ] + } + } + ], + "env": { + "CM_GIT_URL": "https://github.com/intel/neural-speed" + }, + "name": "Build Intel Neural Speed from sources", + "new_env_keys": [ + "CM_INTEL_NEURAL_SPEED_*" + ], + "sort": 1000, + "tags": [ + "install", + "src", + "from.src", + "neural-speed", + "intel-neural-speed" + ], + "uid": "b5477fdc929744ce", + "variations": { + "branch.#": { + "env": { + "CM_GIT_CHECKOUT": "#" + } + }, + "repo.#": { + "env": { + "CM_GIT_URL": "#" + }, + "group": "repo" + }, + "repo.https://github.com/intel/neural-speed": { + "default": true, + "env": { + "CM_GIT_URL": "https://github.com/intel/neural-speed" + }, + "group": "repo" + }, + "sha.#": { + "env": { + "CM_GIT_CHECKOUT_SHA": "#" + } + }, + "tag.#": { + "ad": { + "neural-speed-src-repo": { + "tags": "_no-recurse-submodules,_full-history" + } + }, + "env": { + "CM_GIT_CHECKOUT_TAG": "#" + } + }, + "for-intel-mlperf-inference-v4.0-gptj": { + "adr": { + "conda-package": { + "tags": "_name.gptj-pt" + } + }, + "deps": [ + { + "names": [ + "conda" + ], + "tags": "get,conda,_name.gptj-pt" + }, + { + "names": [ + "conda-package", + "python3" + ], + "tags": "get,generic,conda-package,_package.python", + "version": "3.9" + }, + { + "names": [ + "conda-package", + "wheel" + ], + "tags": "get,generic,conda-package,_package.wheel,_source.conda-forge" + } + ] + } + }, + "warnings": [ + ] +} diff --git a/script/install-intel-neural-speed-from-src/customize.py b/script/install-intel-neural-speed-from-src/customize.py new file mode 100644 index 0000000000..abb5680baf --- /dev/null +++ b/script/install-intel-neural-speed-from-src/customize.py @@ -0,0 +1,21 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + if os_info['platform'] == 'windows': + return {'return':1, 'error': 'Windows is not supported in this script yet'} + + env = i['env'] + + env['CM_PYTHON_BIN_WITH_PATH'] = os.path.join(env['CM_CONDA_BIN_PATH'], "python") + + automation = i['automation'] + + recursion_spaces = i['recursion_spaces'] + + env['+PATH'] = [] + + return {'return':0} diff --git a/script/install-intel-neural-speed-from-src/run.sh b/script/install-intel-neural-speed-from-src/run.sh new file mode 100644 index 0000000000..7068890f3c --- /dev/null +++ b/script/install-intel-neural-speed-from-src/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +CUR_DIR=$PWD +echo $PWD +rm -rf neural-speed +cmd="cp -r ${CM_INTEL_NEURAL_SPEED_SRC_REPO_PATH} neural-speed" +echo "$cmd" +eval "$cmd" +${CM_PYTHON_BIN_WITH_PATH} -m pip install -r neural-speed/requirements.txt +test $? -eq 0 || exit $? +CMAKE_ARGS="-DNS_PROFILING=ON" ${CM_PYTHON_BIN_WITH_PATH} -m pip install -ve ./neural-speed +test $? -eq 0 || exit $? + +echo "******************************************************" diff --git a/script/process-mlperf-accuracy/_cm.json b/script/process-mlperf-accuracy/_cm.json index d8c1f5d373..351434e7f8 100644 --- a/script/process-mlperf-accuracy/_cm.json +++ b/script/process-mlperf-accuracy/_cm.json @@ -44,20 +44,96 @@ "tags": "get,dataset,cnndm,_validation" }, { - "tags": "get,generic-python-lib,_package.rouge_score" + "tags": "get,generic-python-lib,_package.datasets", + "names": + [ + "pip-package", + "datasets" + ] }, { - "tags": "get,generic-python-lib,_package.nltk" + "tags": "get,generic-python-lib,_package.rouge_score", + "names": + [ + "pip-package", + "rouge-score" + ] }, { - "tags": "get,generic-python-lib,_package.evaluate" + "tags": "get,generic-python-lib,_package.nltk", + "names": + [ + "pip-package", + "nltk" + ] }, { - "tags": "get,generic-python-lib,_package.absl-py" + "tags": "get,generic-python-lib,_package.evaluate", + "names": + [ + "pip-package", + "evaluate" + ] }, { - "tags": "get,generic-python-lib,_package.rouge_score" - } + "tags": "get,generic-python-lib,_package.absl-py", + "names": + [ + "pip-package", + "absl-py" + ] + }, + { + "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/evaluation.py", + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": + [ + "intel" + ] + }, + "force_cache": true, + "extra_cache_tags": "intel,accuracy,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_ACCURACY_FILE_WITH_PATH" + } + }, + { + "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/dataset.py", + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": + [ + "intel" + ] + }, + "force_cache": true, + "extra_cache_tags": "intel,dataset,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_FILE_WITH_PATH" + } + }, + { + "tags": "download,file,_url.https://raw.githubusercontent.com/mlcommons/inference_results_v4.0/main/closed/Intel/code/gptj-99/ITREX/item.py", + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": + [ + "intel" + ] + }, + "force_cache": true, + "extra_cache_tags": "intel,dataset,item,file,gptj,mlperf,inference", + "env": { + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_ITEM_FILE_WITH_PATH" + } + }, + { + "tags": "get,ml-model,gptj,_fp32,_pytorch", + "enable_if_env": { + "CM_MLPERF_IMPLEMENTATION": + [ + "intel" + ] + } + } ], "env": { "CM_DATASET": "cnndm" diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 895227b449..25f81a0921 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -63,13 +63,21 @@ def preprocess(i): "' --output_dtype " + env['CM_ACCURACY_DTYPE'] + env.get('CM_OUTPUT_TRANSPOSED','') + max_examples_string + " > '" + out_file + "'" elif dataset == "cnndm": - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j", - "evaluation.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ - "' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") +" > '" + out_file + "'" + if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'intel': + accuracy_checker_file = env['CM_MLPERF_INFERENCE_INTEL_GPTJ_ACCURACY_FILE_WITH_PATH'] + env['+PYTHONPATH'] = [os.path.dirname(env['CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_FILE_WITH_PATH'])] + [os.path.dirname(env['CM_MLPERF_INFERENCE_INTEL_GPTJ_DATASET_ITEM_FILE_WITH_PATH'])] + env['+PYTHONPATH'] + suffix_string = " --model-name-or-path '"+ env['GPTJ_CHECKPOINT_PATH'] +"'" + else: + accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j", + "evaluation.py") + suffix_string = " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ + "' --dataset-file '" + env['CM_DATASET_EVAL_PATH'] + "'" +suffix_string +" > '" + out_file + "'" elif dataset == "openorca": - CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b", - "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ + accuracy_checker_file = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama2-70b", + "evaluate-accuracy.py") + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + accuracy_checker_file + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA2_FILE_WITH_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + \ "' --dataset-file '" + env['CM_DATASET_PREPROCESSED_PATH'] + "'"+ " --dtype " + env.get('CM_ACCURACY_DTYPE', "int32") +" > '" + out_file + "'" From cf2f46c7ba49442df7dd1ac7730424c67f192663 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 25 May 2024 05:41:52 -0700 Subject: [PATCH 5/5] Fix intel gptj v4.0 --- script/app-mlperf-inference-intel/customize.py | 2 ++ script/generate-mlperf-inference-user-conf/customize.py | 1 + 2 files changed, 3 insertions(+) diff --git a/script/app-mlperf-inference-intel/customize.py b/script/app-mlperf-inference-intel/customize.py index 1156bf3937..699e786b16 100644 --- a/script/app-mlperf-inference-intel/customize.py +++ b/script/app-mlperf-inference-intel/customize.py @@ -141,6 +141,8 @@ def preprocess(i): if env['CM_MLPERF_RUN_STYLE'] == "test": env['TOTAL_SAMPLE_COUNT'] = env['CM_TEST_QUERY_COUNT'] + else: + env['TOTAL_SAMPLE_COUNT'] = env.get('CM_MLPERF_MAX_QUERY_COUNT', env['CM_TEST_QUERY_COUNT']) if env['CM_MLPERF_LOADGEN_SCENARIO'] == "Offline": env['WORKERS_PER_PROC'] = 4 diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index edbfd7cfa4..1102112aa8 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -271,6 +271,7 @@ def preprocess(i): target_qps = conf['target_qps'] query_count = str(int((660/fast_factor) * (float(target_qps)))) user_conf += ml_model_name + "." + scenario + ".max_query_count = " + query_count + "\n" + env['CM_MLPERF_MAX_QUERY_COUNT'] = query_count else: max_duration_valid = env.get('CM_MLPERF_MAX_DURATION_VALID', 660000)