Skip to content

Commit

Permalink
Add support for different SPR/ICX SKUs
Browse files Browse the repository at this point in the history
  • Loading branch information
louie-tsai committed Jan 6, 2023
1 parent 8b68e7b commit f448d4c
Show file tree
Hide file tree
Showing 29 changed files with 308 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,3 @@ bash make_preprocess.sh
bash run_SPR56C_2S.sh acc
bash run_SPR56C_2S.sh perf
```

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RUN_TYPE=acc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RUN_TYPE=perf
19 changes: 19 additions & 0 deletions closed/Intel/code/3d-unet-99.9/pytorch-cpu-kits19/run_offline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
number_threads=`nproc --all`
echo $number_threads
number_instance=$((number_threads/8))
echo $number_instance


if [ -z "${RUN_TYPE}" ]; then
echo "NO RUN_TYPE ( perf/acc)found export RUN_TYPE=perf"
export RUN_TYPE=perf
fi
echo $RUN_TYPE
bash run_mlperf.sh --type=${RUN_TYPE} \
--precision=int8 \
--user-conf=/workspace/user.conf \
--num-instance=$number_instance \
--cpus-per-instance=4 \
--scenario=Offline


4 changes: 2 additions & 2 deletions closed/Intel/code/3d-unet-99.9/pytorch-cpu-kits19/user.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Please set these fields depending on the performance of your system to
# override default LoadGen settings.
*.Offline.target_qps = 1.5
#*.Offline.min_query_count = 1200
#*.Offline.min_duration = 6000
*.Offline.min_query_count = 1200
*.Offline.min_duration = 6000
59 changes: 59 additions & 0 deletions closed/Intel/code/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# MLPerf Inference Benchmarks on AWS instances

## Setup Instructions



## How to Run on AWS

### Docker images list

| IMAGE ID | IMAGE REPO and TAG |
| --------- | ------------------------------------------------ |
| ce03546c4d75 | intel/intel-optimized-pytorch:mlperf-inference-2.1-3dunet_v2 |
| a17e4ea194cc | intel/intel-optimized-pytorch:mlperf-infernece-2.1-resnet50_v2 |
| 038cf950d509 | intel/intel-optimized-pytorch:mlperf-inference-2.1-dlrm_v2 |
| 5dfdb372208a | intel/intel-optimized-pytorch:mlperf-inference-2.1-datacenter-retinanet_v2 |

### 3dunet

```
cd 3d-unet-99.9/pytorch-cpu-kits19/
```

You could run the workload with prepared AWS AMI image.
please replace the IMAGE ID, YOUR SCRIPT and PATH accordingly.
```
../../run_docker.sh <IMAGE ID> /workspace/<YOUR SCRIPT in current host folder> <PATH of 3dunet codes in docker image> aws_3dunet
```
Here is the example, and it should also work with the prepared AMI image.
```
../../run_docker.sh ce03546c4d75 /workspace/run_offline.sh /opt/workdir/code/3d-unet-99.9/pytorch-cpu-kits19 aws_3dunet
```

### Resnet50

```
cd resnet50/pytorch-cpu/
```
```
../../run_docker.sh a17e4ea194cc /workspace/run_offline.sh /opt/workdir/code/resnet50/pytorch-cpu aws_rn50
```

### DLRM

```
cd dlrm-99.9/pytorch-cpu/
```
```
../../run_docker.sh 038cf950d509 /workspace/run_offline.sh /opt/workdir/code/dlrm-99.9/pytorch-cpu aws_dlrm
```
> Please change 'run_offline.sh' to 'run_offline_accuracy.sh', 'run_server.sh' or 'run_server_accuracy.sh' accordingly, based on your desired benchmark type and the mode.
### Retinanet

```
cd retinanet/pytorch-cpu/
```
```
../../run_docker.sh 5dfdb372208a /workspace/run_offline.sh /opt/workdir/code/retinanet/pytorch-cpu aws_ret
```
6 changes: 3 additions & 3 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/run_local.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

source ./run_common.sh
source /workspace/run_common.sh

common_opt="--config ./mlperf.conf"
OUTPUT_DIR=$PWD/output/$name/$mode/$test_type
common_opt="--config /workspace/mlperf.conf"
OUTPUT_DIR=/workspace/output/$name/$mode/$test_type
if [[ $test_type == "performance" ]]; then
OUTPUT_DIR=$OUTPUT_DIR/run_1
fi
Expand Down
2 changes: 1 addition & 1 deletion closed/Intel/code/dlrm-99.9/pytorch-cpu/run_main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ if [ $1 == "server" ]; then
fi

echo "Running $mode bs=$batch_size $dtype $test_type $DNNL_MAX_CPU_ISA"
./run_local.sh pytorch dlrm terabyte cpu $dtype $test_type --scenario $mode --max-ind-range=40000000 --samples-to-aggregate-quantile-file=../tools/dist_quantile.txt --max-batchsize=$batch_size $extra_option
/workspace/run_local.sh pytorch dlrm terabyte cpu $dtype $test_type --scenario $mode --max-ind-range=40000000 --samples-to-aggregate-quantile-file=../tools/dist_quantile.txt --max-batchsize=$batch_size $extra_option
13 changes: 7 additions & 6 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/run_mlperf.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,18 @@ do
done

WORK_DIR=$PWD

source ./setup_dataset.sh
pwd
ls
source /workspace/setup_dataset.sh

if [ ${mode} = "offline" ]; then
scenario="offline"
source ./setup_env_offline.sh
source /workspace/setup_env_offline_ICX.sh
fi

if [ ${mode} = "server" ]; then
scenario="server"
source ./setup_env_server.sh
source /workspace/setup_env_server_ICX.sh
fi

if [ ${run_type} = "perf" ];then
Expand All @@ -52,5 +53,5 @@ if [ ${dtype} = "fp32" ];then
fi

echo ${mode} $BATCH_SIZE ${run_type} ${dtype} "mode"
sudo ./run_clean.sh
./run_main.sh ${scenario} ${accuracy} ${dtype}
sudo /workspace/run_clean.sh
/workspace/run_main.sh ${scenario} ${accuracy} ${dtype}
1 change: 1 addition & 0 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/run_offline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/workspace/run_mlperf.sh --mode=offline --type=perf --dtype=int8
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/workspace/run_mlperf.sh --mode=offline --type=acc --dtype=int8
1 change: 1 addition & 0 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/run_server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/workspace/run_mlperf.sh --mode=server --type=perf --dtype=int8
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/workspace/run_mlperf.sh --mode=server --type=acc --dtype=int8
2 changes: 1 addition & 1 deletion closed/Intel/code/dlrm-99.9/pytorch-cpu/setup_dataset.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export DATA_DIR=/data/mlperf_data/dlrm/
export MODEL_DIR=/data/mlperf_data/dlrm/
export MODEL_DIR=/media
12 changes: 12 additions & 0 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/setup_env_offline_ICX.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
number_threads=`nproc --all`
number_cores=$((number_threads/2))
number_sockets=`grep physical.id /proc/cpuinfo | sort -u | wc -l`
cpu_per_socket=$((number_cores/number_sockets))

export NUM_SOCKETS=$number_sockets # i.e. 8
export CPUS_PER_SOCKET=$cpu_per_socket # i.e. 28
export CPUS_PER_PROCESS=$cpu_per_socket # which determine how much processes will be used
# process-per-socket = CPUS_PER_SOCKET/CPUS_PER_PROCESS
export CPUS_PER_INSTANCE=1 # instance-per-process number=CPUS_PER_PROCESS/CPUS_PER_INSTANCE
# total-instance = instance-per-process * process-per-socket
export BATCH_SIZE=16000
12 changes: 12 additions & 0 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/setup_env_server_ICX.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
number_threads=`nproc --all`
number_cores=$((number_threads/2))
number_sockets=`grep physical.id /proc/cpuinfo | sort -u | wc -l`
cpu_per_socket=$((number_cores/number_sockets))

export NUM_SOCKETS=$number_sockets # i.e. 8
export CPUS_PER_SOCKET=$cpu_per_socket # i.e. 28
export CPUS_PER_PROCESS=$cpu_per_socket # which determine how much processes will be used
# process-per-socket = CPUS_PER_SOCKET/CPUS_PER_PROCESS
export CPUS_PER_INSTANCE=8 # instance-per-process number=CPUS_PER_PROCESS/CPUS_PER_INSTANCE
# total-instance = instance-per-process * process-per-socket
export BATCH_SIZE=8000
2 changes: 2 additions & 0 deletions closed/Intel/code/dlrm-99.9/pytorch-cpu/user.conf.ICX40C_2S
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dlrm.Server.target_qps = 14000.0
dlrm.Offline.target_qps = 100430.0
26 changes: 26 additions & 0 deletions closed/Intel/code/offline_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
cd resnet50/pytorch-cpu/
chmod +x run_offline_accuracy.sh
chmod +x run_offline.sh
../../run_docker.sh a17e4ea194cc /workspace/run_offline_accuracy.sh /opt/workdir/code/resnet50/pytorch-cpu aws_rn50 resnet50_acc.txt
../../run_docker.sh a17e4ea194cc /workspace/run_offline.sh /opt/workdir/code/resnet50/pytorch-cpu aws_rn50 resnet50_perf.txt
cd ../../

cd dlrm-99.9/pytorch-cpu/
chmod +x run_offline_accuracy.sh
chmod +x run_offline.sh
../../run_docker.sh 038cf950d509 /workspace/run_offline_accuracy.sh /opt/workdir/code/dlrm-99.9/pytorch-cpu aws_dlrm dlrm_acc.txt
../../run_docker.sh 038cf950d509 /workspace/run_offline.sh /opt/workdir/code/dlrm-99.9/pytorch-cpu aws_dlrm dlrm_perf.txt
cd ../../

cd retinanet/pytorch-cpu/
chmod +x run_offline_accuracy.sh
chmod +x run_offline.sh
../../run_docker.sh 5dfdb372208a /workspace/run_offline_accuracy.sh /opt/workdir/code/retinanet/pytorch-cpu aws_ret retinanet_acc.txt
../../run_docker.sh 5dfdb372208a /workspace/run_offline.sh /opt/workdir/code/retinanet/pytorch-cpu aws_ret retinanet_perf.txt
cd ../../

cd 3d-unet-99.9/pytorch-cpu-kits19/
chmod +x run_offline.sh
../../run_docker.sh ce03546c4d75 /workspace/run_offline.sh /opt/workdir/code/3d-unet-99.9/pytorch-cpu-kits19 aws_3dunet 3dunet_acc.txt env_acc.list
../../run_docker.sh ce03546c4d75 /workspace/run_offline.sh /opt/workdir/code/3d-unet-99.9/pytorch-cpu-kits19 aws_3dunet 3dunet_perf.txt env_perf.list
cd ../../
2 changes: 2 additions & 0 deletions closed/Intel/code/resnet50/pytorch-cpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ git clone -b v2.6 https://github.com/oneapi-src/oneDNN.git onednn
wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh
bash Anaconda3-2022.05-Linux-x86_64.sh
```
+ Setup conda environment to install requirements, and build packages
```
bash prepare_env.sh
Expand Down
20 changes: 12 additions & 8 deletions closed/Intel/code/resnet50/pytorch-cpu/run_offline.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/bin/bash
number_threads=`nproc --all`
number_cores=$((number_threads/2))
number_sockets=`grep physical.id /proc/cpuinfo | sort -u | wc -l`
cpu_per_socket=$((number_cores/number_sockets))

export DATA_DIR=${PWD}/ILSVRC2012_img_val
export RN50_START=${PWD}/models/resnet50-start-int8-model.pth
Expand Down Expand Up @@ -30,30 +34,30 @@ if [ -z "${RN50_FULL}" ]; then
exit 1
fi

CONDA_ENV_NAME=rn50-mlperf
source ~/anaconda3/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
#CONDA_ENV_NAME=rn50-mlperf
#source ~/anaconda3/etc/profile.d/conda.sh
#conda activate ${CONDA_ENV_NAME}

export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"

export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so
#export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so

export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
#export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so

KMP_SETTING="KMP_AFFINITY=granularity=fine,compact,1,0"
export KMP_BLOCKTIME=1
export $KMP_SETTING

CUR_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
APP=${CUR_DIR}/build/bin/mlperf_runner
APP=${PWD}/build/bin/mlperf_runner

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CONDA_PREFIX}/lib

if [ -e "mlperf_log_summary.txt" ]; then
rm mlperf_log_summary.txt
fi

numactl -C 0-55,56-111 -m 0,1 ${APP} --scenario Offline \
numactl -m 0 ${APP} --scenario Offline \
--mode Performance \
--mlperf_conf ${CUR_DIR}/src/mlperf.conf \
--user_conf ${CUR_DIR}/src/user.conf \
Expand All @@ -62,7 +66,7 @@ numactl -C 0-55,56-111 -m 0,1 ${APP} --scenario Offline \
--rn50-part3 ${RN50_END} \
--rn50-full-model ${RN50_FULL} \
--data_path ${DATA_DIR} \
--num_instance 224 \
--num_instance $number_threads \
--warmup_iters 20 \
--cpus_per_instance 1\
--total_sample_count 50000 \
Expand Down
14 changes: 7 additions & 7 deletions closed/Intel/code/resnet50/pytorch-cpu/run_offline_accuracy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,30 @@ if [ -z "${RN50_FULL}" ]; then
exit 1
fi

CONDA_ENV_NAME=rn50-mlperf
source ~/anaconda3/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
#CONDA_ENV_NAME=rn50-mlperf
#source ~/anaconda3/etc/profile.d/conda.sh
#conda activate ${CONDA_ENV_NAME}

export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"

export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so
#export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so

export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
#export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so

KMP_SETTING="KMP_AFFINITY=granularity=fine,compact,1,0"
export KMP_BLOCKTIME=1
export $KMP_SETTING

CUR_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
APP=${CUR_DIR}/build/bin/mlperf_runner
APP=${PWD}/build/bin/mlperf_runner

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CONDA_PREFIX}/lib

if [ -e mlperf_log_accuracy.json ]; then
rm mlperf_log_accuracy.json
fi

numactl -C 0-55,56-111 -m 0,1 ${APP} --scenario Offline \
numactl -C 0-47 -m 0 ${APP} --scenario Offline \
--mode Accuracy \
--mlperf_conf ${CUR_DIR}/src/mlperf.conf \
--user_conf ${CUR_DIR}/src/user.conf \
Expand Down
19 changes: 11 additions & 8 deletions closed/Intel/code/resnet50/pytorch-cpu/run_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,33 @@ if [ -z "${RN50_FULL}" ]; then
exit 1
fi

CONDA_ENV_NAME=rn50-mlperf
source ~/anaconda3/etc/profile.d/conda.sh
conda activate ${CONDA_ENV_NAME}
# comment out the follwoing lines for AWS
# CONDA_ENV_NAME=rn50-mlperf
# source ~/anaconda3/etc/profile.d/conda.sh
# conda activate ${CONDA_ENV_NAME}

export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"

export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so

export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
# comment out the follwoing lines for AWS
# export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so
# export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so

KMP_SETTING="KMP_AFFINITY=granularity=fine,compact,1,0"
export KMP_BLOCKTIME=1
export $KMP_SETTING

CUR_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
APP=${CUR_DIR}/build/bin/mlperf_runner

# change {CUR_DIR} to {PWD} for AWS
APP=${PWD}/build/bin/mlperf_runner

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CONDA_PREFIX}/lib

if [ -e "mlperf_log_summary.txt" ]; then
rm mlperf_log_summary.txt
fi

numactl -C 0-55,56-111 -m 0,1 ${APP} --scenario Server \
numactl -C 0-47 -m 0,1 ${APP} --scenario Server \
--mode Performance \
--mlperf_conf ${CUR_DIR}/src/mlperf.conf \
--user_conf ${CUR_DIR}/src/user.conf \
Expand Down
Loading

0 comments on commit f448d4c

Please sign in to comment.