Skip to content

Commit

Permalink
feat: fine-tune for ubuntu 24.04 on hpc
Browse files Browse the repository at this point in the history
  • Loading branch information
HoKim98 committed Aug 21, 2024
1 parent ae9c051 commit faaa1cb
Show file tree
Hide file tree
Showing 18 changed files with 328 additions and 45 deletions.
28 changes: 17 additions & 11 deletions templates/contrib/perf-test/Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,38 @@
# Load environment variables
set dotenv-load

clean:
kubectl delete -f job-disk-io.yaml || true
kubectl delete -f deployment-toolkit.yaml -f pvc.yaml || true

deploy:
kubectl apply -f deployment-toolkit.yaml -f pvc.yaml
@sleep 1
kubectl rollout status deployment perf-test-toolkit

start *ARGS:
reset:
@just clean
@just deploy

start *ARGS: deploy
kubectl delete -f job-disk-io.yaml || true
kubectl apply -f job-disk-io.yaml
@sleep 1
kubectl wait --for=condition=ready pods -l 'app.kubernetes.io/component=perf-test-disk-io'

exec *ARGS:
@kubectl exec -it -c shell 'deployment/perf-test-toolkit' -- {{ ARGS }}

_stat_avg rw:
@echo "1024 * $( \
just exec cat '/data/perf-test-disk-io_{{ rw }}.log' \
| grep -P '^ +bw' \
| grep -Po 'avg=\K[0-9\.]+' \
| bc \
)" | bc
@just exec cat '/data/perf-test-disk-io_{{ rw }}.log' \
| grep -P '^ +bw'

stat:
@echo -n 'read (bps) '
@just _stat_avg 'read'
@echo -n 'write (bps) '
@just _stat_avg 'write'
@echo -n 'randread (bps) '
@just _stat_avg 'randread'
@echo -n 'randwrite (bps) '
@just _stat_avg 'randwrite'
@# echo -n 'randread (bps) '
@# just _stat_avg 'randread'
@# echo -n 'randwrite (bps) '
@# just _stat_avg 'randwrite'
8 changes: 8 additions & 0 deletions templates/contrib/perf-test/deployment-toolkit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: node-role.kubernetes.io/kiss
operator: In
values:
- Compute
- weight: 2
preference:
matchExpressions:
- key: node-role.kubernetes.io/kiss
Expand All @@ -40,6 +47,7 @@ spec:
operator: In
values:
- Compute
- ControlPlane
- Gateway
containers:
- name: shell
Expand Down
61 changes: 41 additions & 20 deletions templates/contrib/perf-test/job-disk-io.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: node-role.kubernetes.io/kiss
operator: In
values:
- Compute
- weight: 2
preference:
matchExpressions:
- key: node-role.kubernetes.io/kiss
Expand All @@ -35,6 +42,7 @@ spec:
operator: In
values:
- Compute
- ControlPlane
- Gateway
containers:
- name: shell
Expand Down Expand Up @@ -78,62 +86,75 @@ spec:
FILE_OUTPUT_PREFIX="/out/${NAME}"
ARGS=""
ARGS+="--direct ${DIRECT} "
ARGS+="--fsync ${FSYNC} "
ARGS+="--group_reporting "
ARGS+="--iodepth ${IO_DEPTH} "
ARGS+="--numjobs ${NUM_JOBS} "
ARGS+="--runtime ${DURATION} "
ARGS+="--size ${FILE_SIZE} "
ARGS+="--time_based "
ARGS+="--directory /data "
# ARGS+="--ioengine rbd "
# ARGS+="--rbdname rbd0 "
###########################################################
# Serial Read #
###########################################################
echo '[serial-read]'
fio --name "${NAME}_read" --rw 'read' --output "${FILE_OUTPUT_PREFIX}_read.log" \
--direct "${BUFFERED}" --directory /data --group_reporting --time_based \
--numjobs "${NUM_JOBS}" --runtime "${DURATION}" \
--bs "${FILE_BLOCK_UNIT_SIZE}" --size "${FILE_SIZE}"
--bs "${FILE_BLOCK_UNIT_SIZE}" ${ARGS}
###########################################################
# Serial Write #
###########################################################
echo '[serial-write]'
fio --name "${NAME}_write" --rw 'write' --output "${FILE_OUTPUT_PREFIX}_write.log" \
--direct "${BUFFERED}" --directory /data --group_reporting --time_based \
--numjobs "${NUM_JOBS}" --runtime "${DURATION}" \
--bs "${FILE_BLOCK_UNIT_SIZE}" --size "${FILE_SIZE}"
--bs "${FILE_BLOCK_UNIT_SIZE}" ${ARGS}
###########################################################
# Random Read #
###########################################################
echo '[random-read]'
fio --name "${NAME}_randread" --rw 'randread' --output "${FILE_OUTPUT_PREFIX}_randread.log" \
--direct "${BUFFERED}" --directory /data --group_reporting --time_based \
--numjobs "${NUM_JOBS}" --runtime "${DURATION}" \
--bs "${FILE_BLOCK_UNIT_SIZE}" --size "${FILE_SIZE}"
# echo '[random-read]'
# fio --name "${NAME}_randread" --rw 'randread' --output "${FILE_OUTPUT_PREFIX}_randread.log" \
# --bs '4K' ${ARGS}
###########################################################
# Random Write #
###########################################################
echo '[random-write]'
fio --name "${NAME}_randwrite" --rw 'randwrite' --output "${FILE_OUTPUT_PREFIX}_randwrite.log" \
--direct "${BUFFERED}" --directory /data --group_reporting --time_based \
--numjobs "${NUM_JOBS}" --runtime "${DURATION}" \
--bs "${FILE_BLOCK_UNIT_SIZE}" --size "${FILE_SIZE}"
# echo '[random-write]'
# fio --name "${NAME}_randwrite" --rw 'randwrite' --output "${FILE_OUTPUT_PREFIX}_randwrite.log" \
# --bs '4K' ${ARGS}
env:
- name: BUFFERED
- name: DIRECT
value: "1"
- name: DURATION
value: "30"
value: "60"
- name: FILE_BLOCK_UNIT_SIZE
value: 1M
value: 4M
- name: FILE_PATH
value: /data/testfile
- name: FILE_SIZE
value: 1G
- name: FSYNC
value: "1"
- name: IO_DEPTH
value: "64"
- name: NAME
valueFrom:
fieldRef:
fieldPath: metadata.labels['app.kubernetes.io/component']
- name: NUM_JOBS
value: "8"
value: "12"
# volumeDevices:
# - name: data
# devicePath: /dev/rbd0
volumeMounts:
- name: data
mountPath: /data
Expand Down
3 changes: 2 additions & 1 deletion templates/contrib/perf-test/pvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ spec:
resources:
requests:
storage: 1Ti
storageClassName: ceph-block
storageClassName: ceph-filesystem-noreplicas
# volumeMode: Block
volumeMode: Filesystem
---
apiVersion: v1
Expand Down
7 changes: 6 additions & 1 deletion templates/contrib/speed-test/iperf3/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@ spec:
labels:
app: iperf3
spec:
hostIPC: true
containers:
- name: iperf3
image: docker.io/leodotcloud/swiss-army-knife:latest
image: docker.io/library/ubuntu:24.04
imagePullPolicy: Always
command:
- /usr/bin/env
- sleep
- infinity
ports:
- containerPort: 5201
33 changes: 33 additions & 0 deletions templates/csi/directpv/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
# Copyright (c) 2022 Ho Kim ([email protected]). All rights reserved.
# Use of this source code is governed by a GPL-3-style license that can be
# found in the LICENSE file.

# Prehibit errors
set -e -o pipefail
# Verbose
set -x

###########################################################
# Install DirectPV #
###########################################################

echo "- Installing DirectPV ... "

kubectl krew install directpv

kubectl directpv install --node-selector node-role.kubernetes.io/kiss=Storage

###########################################################
# Provision DirectPV Drives #
###########################################################

echo "- Provisioning DirectPV Drives ... "

DRIVES_FILE="/tmp/drives.yaml"
kubectl directpv discover --output-file "${DRIVES_FILE}"
kubectl directpv init "${DRIVES_FILE}" --dangerous
rm -f "${DRIVES_FILE}"

# Finished!
echo "Installed!"
44 changes: 44 additions & 0 deletions templates/csi/minio/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
# Copyright (c) 2022 Ho Kim ([email protected]). All rights reserved.
# Use of this source code is governed by a GPL-3-style license that can be
# found in the LICENSE file.

# Prehibit errors
set -e -o pipefail
# Verbose
set -x

###########################################################
# Configuration #
###########################################################

# Configure default environment variables
HELM_CHART_DEFAULT="https://operator.min.io"
NAMESPACE_DEFAULT="minio-operator"

# Set environment variables
HELM_CHART="${HELM_CHART:-$HELM_CHART_DEFAULT}"
NAMESPACE="${NAMESPACE:-$NAMESPACE_DEFAULT}"

###########################################################
# Configure Helm Channel #
###########################################################

echo "- Configuring Helm channel ... "

helm repo add "${NAMESPACE}" "${HELM_CHART}"

###########################################################
# Install Operator #
###########################################################

echo "- Installing Operator ... "

helm upgrade --install "minio-operator" \
"${NAMESPACE}/minio-operator" \
--create-namespace \
--namespace "${NAMESPACE}" \
--values "./values-operator.yaml"

# Finished!
echo "Installed!"
26 changes: 26 additions & 0 deletions templates/csi/minio/values-operator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
# Root key for Operator Helm Chart
operator:
###
#
# The `affinity <https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/>`__ or anti-affinity settings to apply to Operator pods.
#
# These settings determine the distribution of pods across worker nodes and can help prevent or allow colocating pods onto the same worker nodes.
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: node-role.kubernetes.io/kiss
operator: In
values:
- Compute
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/kiss
operator: In
values:
- Compute
- ControlPlane
37 changes: 37 additions & 0 deletions templates/csi/rook-ceph/ceph-block-noreplicas.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
apiVersion: ceph.rook.io/v1
kind: CephBlockPool
metadata:
name: ceph-blockpool-noreplicas
namespace: csi-rook-ceph
spec:
erasureCoded:
codingChunks: 0
dataChunks: 0
failureDomain: host
replicated:
requireSafeReplicaSize: false
size: 1
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-block-noreplicas
annotations:
storageclass.kubernetes.io/is-default-class: "false"
allowVolumeExpansion: true
provisioner: csi-rook-ceph.rbd.csi.ceph.com
reclaimPolicy: Delete
volumeBindingMode: Immediate
parameters:
clusterID: csi-rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: csi-rook-ceph
csi.storage.k8s.io/fstype: ext4
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: csi-rook-ceph
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: csi-rook-ceph
imageFeatures: layering
imageFormat: "2"
pool: ceph-blockpool-noreplicas
22 changes: 22 additions & 0 deletions templates/csi/rook-ceph/ceph-filesystem-noreplicas.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,25 @@ spec:
memory: 4Gi
statusCheck:
mirror: {}
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-filesystem-noreplicas
annotations:
storageclass.kubernetes.io/is-default-class: "false"
allowVolumeExpansion: true
provisioner: csi-rook-ceph.cephfs.csi.ceph.com
reclaimPolicy: Delete
volumeBindingMode: Immediate
parameters:
clusterID: csi-rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: csi-rook-ceph
csi.storage.k8s.io/fstype: ext4
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: csi-rook-ceph
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: csi-rook-ceph
fsName: ceph-filesystem-noreplicas
pool: ceph-filesystem-noreplicas-data0
4 changes: 2 additions & 2 deletions templates/csi/rook-ceph/values-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ cephClusterSpec:
cpu: "2"
memory: "4Gi"
requests:
cpu: "1"
memory: "1Gi"
cpu: "0.5"
memory: "500Mi"

# The option to automatically remove OSDs that are out and are safe to destroy.
removeOSDsIfOutAndSafeToRemove: false
Expand Down
Loading

0 comments on commit faaa1cb

Please sign in to comment.