Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for scheduling pods based on node labels, taints, and affinity #352

Merged
merged 20 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
af2ef40
feat: add support for targeting node labels, taints, and affinity
leninmehedy Sep 18, 2023
f36d385
feat: add priorityClassName
leninmehedy Sep 21, 2023
fdc10af
feat: add pod-disruption-budget config
leninmehedy Sep 21, 2023
f031422
fix: create a single pdb for all network-nodes
leninmehedy Sep 21, 2023
8797775
fix: use -0 suffix to support GKE kubeVersion string
leninmehedy Sep 22, 2023
b7331fd
fix: add correct node-selector values
leninmehedy Sep 22, 2023
cecf140
fix: add node labels to local cluster similar to GKE
leninmehedy Oct 3, 2023
d869ae8
fix: update default test config in order to run bats tests locally
leninmehedy Oct 3, 2023
cfd27a4
fix: add separate pdb for network nodes
leninmehedy Oct 3, 2023
9f59a25
fix: delete namespace when chart is uninstalled
leninmehedy Oct 3, 2023
0f3d319
fix: add flags to enable or disable various auxiliary resource deploy…
leninmehedy Oct 4, 2023
f394c4a
ci: fix start and stop makefile targets
leninmehedy Oct 4, 2023
8cbd127
ci: cleanup makefile targets
leninmehedy Oct 4, 2023
bb747f1
ci: avoid deleting namespace to reduce time of execution
leninmehedy Oct 4, 2023
13aa177
ci: use kind config file to create cluster with node annotations
leninmehedy Oct 4, 2023
a0b8487
docs: add comments to avoid confusion later
leninmehedy Oct 4, 2023
d3087c7
fix: update Chart.lock
leninmehedy Oct 4, 2023
cb3d7e5
ci: delete namespace as needed when deploying to GKE
leninmehedy Oct 4, 2023
3ea9155
ci: add total time of execution for debugging
leninmehedy Oct 4, 2023
b8ecdc9
ci: add more time execution log for debugging
leninmehedy Oct 4, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/zxc-compile-code.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
uses: helm/kind-action@dda0770415bac9fc20092cacbc54aa298604d140 # v1.8.0
if: ${{ inputs.enable-unit-tests && !cancelled() }}
with:
cluster_name: fst
config: dev/dev-cluster.yaml
version: v0.19.0
verbosity: 3
wait: 120s
Expand All @@ -125,6 +125,7 @@ jobs:
run: |
kubectl config get-contexts
kubectl get crd
kubectl get node --show-labels

# This step is currently required because the Hedera Services artifacts are not publicly accessible.
# May be removed once the artifacts are publicly accessible.
Expand Down
4 changes: 2 additions & 2 deletions charts/hedera-network/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ dependencies:
- name: tenant
repository: https://operator.min.io/
version: 5.0.7
digest: sha256:cf355b295abceb5814ef57d3e146ec9d4e8db7365a700079d683bd5f766ad374
generated: "2023-09-20T13:51:41.203996+10:00"
digest: sha256:5dbc1a4af8f2b057dbd7730b6308e1a2954f3f95f86e8484bb232e64ed12e923
generated: "2023-10-04T15:47:44.747012+11:00"
8 changes: 5 additions & 3 deletions charts/hedera-network/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,20 @@ version: 0.8.0
appVersion: "0.8.0"

# This is range of versions of Kubernetes server that is supported by this chart.
kubeVersion: ">=1.25.0"
# Note we need to use -0 suffix to support GKE version
# Reference: https://github.com/helm/helm/issues/3810#issuecomment-379877753
kubeVersion: ">=1.25.0-0"

dependencies:
- name: hedera-explorer
version: 0.2.0
condition: cloud.minio.enable
condition: hedera-explorer.enable

- name: hedera-mirror
alias: hedera-mirror-node
version: 0.86.0
repository: https://hashgraph.github.io/hedera-mirror-node/charts
condition: cloud.minio.enable
condition: hedera-mirror-node.enable

- name: tenant
alias: minio-server
Expand Down
22 changes: 22 additions & 0 deletions charts/hedera-network/templates/network-node-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ metadata:
namespace: {{ default $.Release.Namespace $.Values.global.namespaceOverride }}
labels:
app: network-{{ $node.name }}
{{- if $.Values.deployment.podLabels }}
{{- $.Values.deployment.podLabels | toYaml | nindent 4 }}
{{- end }}
{{- if $.Values.deployment.podAnnotations }}
annotations:
{{- $.Values.deployment.podAnnotations | toYaml | nindent 4 }}
{{- end }}
spec:
replicas: 1
serviceName: "network-{{ $node.name }}"
Expand All @@ -30,6 +37,21 @@ spec:
fullstack.hedera.com/type: network-node
fullstack.hedera.com/node-name: {{ $node.name }}
spec:
{{- if $.Values.deployment.nodeSelectors }}
nodeSelector:
{{- $.Values.deployment.nodeSelectors | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.tolerations }}
tolerations:
{{- $.Values.deployment.tolerations | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.affinity }}
affinity:
{{- $.Values.deployment.affinity | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.priorityClassName }}
priorityClassName: {{ $.Values.deployment.priorityClassName }}
{{- end }}
terminationGracePeriodSeconds: {{ $.Values.terminationGracePeriodSeconds }}
volumes:
- name: hgcapp-storage # change me
Expand Down
24 changes: 24 additions & 0 deletions charts/hedera-network/templates/pdb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{- if $.Values.deployment.podDisruptionBudget.create }}
{{ range $index, $node := $.Values.hedera.nodes }}
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: network-node-pdb-{{ $node.name }}
namespace: {{ default $.Release.Namespace $.Values.global.namespaceOverride }}
labels:
fullstack.hedera.com/type: pod-disruption-budget
fullstack.hedera.com/node-name: {{ $node.name }}
spec:
selector:
matchLabels:
fullstack.hedera.com/type: network-node
fullstack.hedera.com/node-name: {{ $node.name }}
{{- if $.Values.deployment.podDisruptionBudget.minAvailable }}
minAvailable: {{ $.Values.deployment.podDisruptionBudget.minAvailable }}
{{- end }}
{{- if $.Values.deployment.podDisruptionBudget.maxUnavailable }}
maxUnavailable: {{ $.Values.deployment.podDisruptionBudget.maxUnavailable }}
{{- end }}
{{- end }}
{{- end }}
2 changes: 2 additions & 0 deletions charts/hedera-network/templates/rbac/pod-monitor.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if $.Values.tester.deployPodMonitor | eq "true" }}
apiVersion: v1
kind: ServiceAccount
metadata:
Expand All @@ -17,3 +18,4 @@ roleRef:
kind: ClusterRole
name: {{ $.Values.tester.clusterRoleName }}
apiGroup: rbac.authorization.k8s.io
{{- end }}
15 changes: 12 additions & 3 deletions charts/hedera-network/tests/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
# Every script must load (source) this in the beginning
# Warning: avoid making these variables readonly since it can be sourced multiple times

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

# load .env file if it exists in order to load variables with custom values
ENV_FILE="$(dirname "${BASH_SOURCE[0]}")/.env"
ENV_FILE="${CUR_DIR}/.env"
if [[ -f "${ENV_FILE}" ]]; then
set -a
# shellcheck source=./../temp/.env
Expand All @@ -13,8 +15,15 @@ if [[ -f "${ENV_FILE}" ]]; then
fi

# set global env variables if not set
BATS_HOME="${BATS_HOME:-../../../dev/bats}"
TESTS_DIR="${TESTS_DIR:-.}"
BATS_HOME="${BATS_HOME:-${CUR_DIR}/../../../dev/bats}"
TESTS_DIR="${TESTS_DIR:-${CUR_DIR}}"

TOTAL_NODES="${TOTAL_NODES:-3}"
USER="${USER:-changeme}"
NAMESPACE="${NAMESPACE:-fst-${USER}}"
LOG_DIR="${LOG_DIR:-${CUR_DIR}/logs}"
LOG_FILE="${LOG_FILE:-helm-test.log}"
OUTPUT_LOG="${OUTPUT_LOG:-false}"
[ ! -d "${LOG_DIR}" ] && mkdir "${LOG_DIR}"

echo "--------------------------Env Setup: fullstack-testing Helm Test------------------------------------------------"
Expand Down
3 changes: 3 additions & 0 deletions charts/hedera-network/tests/env.template
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
USER="${USER:-changeme}"
NAMESPACE="${NAMESPACE:-fst-${USER}}"

TOTAL_NODES=3

LOG_DIR="${LOG_DIR:-/tmp/fullstack-testing-logs}"
Expand Down
2 changes: 1 addition & 1 deletion charts/hedera-network/tests/run.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
CUR_DIR=$(dirname "${BASH_SOURCE[0]}")
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
source "${CUR_DIR}/env.sh"
source "${CUR_DIR}/logging.sh"

Expand Down
43 changes: 40 additions & 3 deletions charts/hedera-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ global:

# cloud configuration
cloud:
minio:
enable: true
buckets:
streamBucket: "fst-streams"
backupBucket: "fst-backups"
s3:
enable: "true"
gcs:
enable: "true"
minio:
enable: true

# telemetry configurations
telemetry:
Expand All @@ -26,6 +26,7 @@ terminationGracePeriodSeconds: 10

# helm test container
tester:
deployPodMonitor: "true"
clusterRoleName: "pod-monitor-role" # this is a shared cluster role for all namespaces
image:
registry: "ghcr.io"
Expand Down Expand Up @@ -63,7 +64,6 @@ gatewayApi:
route:
hostname: "{{ .node.name }}.fst.local"


# default settings for a single node
# This default configurations can be overridden for each node in the hedera.nodes section.
defaults:
Expand Down Expand Up @@ -222,7 +222,9 @@ minio-server:
certificate:
requestAutoCert: false

# hedera mirror node configuration
hedera-mirror-node:
enable: true
global:
namespaceOverride: "{{ tpl (.Values.global.namespaceOverride | toString) }}"
# importer is a component of the hedera mirror node
Expand Down Expand Up @@ -250,7 +252,9 @@ hedera-mirror-node:
bucketName: "fst-streams"
# for s3 configuration of mirror node look at uploader-mirror-secrets.yaml

# hedera explorer configuration
hedera-explorer:
enable: true
global:
namespaceOverride: "{{ tpl (.Values.global.namespaceOverride | toString) }}"
# The hedera explorer UI /api url will proxy all request to mirror node
Expand All @@ -272,6 +276,39 @@ hedera-explorer:
}
]

# common deployment configuration
deployment:
podAnnotations: {}
podLabels: {}
nodeSelectors:
fullstack-scheduling.io/os: linux
fullstack-scheduling.io/role: network
tolerations:
- key: "fullstack-scheduling.io/os"
operator: "Equal"
value: "linux"
effect: "NoSchedule"
- key: "fullstack-scheduling.io/role"
operator: "Equal"
value: "network"
effect: "NoSchedule"
# Specify pod affinity
# Use complete affinity spec starting with key "nodeAffinity:"
# Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#node-affinity
affinity: {}
priorityClassName: {}
## PodDisruptionBudget for fullstack testing pods
## Default backend Pod Disruption Budget configuration
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
## @param deployment.podDisruptionBudget.create Enable Pod Disruption Budget configuration
## @param deployment.podDisruptionBudget.minAvailable Minimum number/percentage of pods that should remain scheduled
## @param deployment.podDisruptionBudget.maxUnavailable Maximum number/percentage of pods that should remain scheduled
##
podDisruptionBudget:
create: true
minAvailable: 1
maxUnavailable: ""

# hedera node configuration
# Only the name of the node is required. The rest of the configuration will be inherited from `defaults` section
hedera:
Expand Down
28 changes: 9 additions & 19 deletions dev/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,10 @@ run-func:
source "${SCRIPTS_DIR}/${SCRIPT_NAME}" && ${FUNC}

.PHONY: start
start: deploy-minio-operator-if-required update-helm-dependencies deploy-network setup-nodes start-nodes
start: ci-deploy-network setup-nodes start-nodes

.PHONY: stop
stop: stop-nodes destroy-network

.PHONY: restart
restart: stop-nodes start-nodes
Expand Down Expand Up @@ -233,30 +236,17 @@ destroy-test-container:
local-kubectl-bats:
source "${SCRIPTS_DIR}/${DOCKER_SCRIPT}" && build_kubectl_bats "${CLUSTER_NAME}"

# Here we run all steps in sequence, if any step fails, deploy-all trap the EXIT and run cleanup
.PHONY: run-deploy-seq
run-deploy-seq: setup deploy-network helm-test setup-nodes start-nodes

.PHONY: deploy-all
deploy-all:
.PHONY: ci-test
ci-test:
# Enable cleanup_test function so that even if test fails, we cleanup the cluster.
# We are only enabling this in this make target, however if necessary, similar pattern can be used in other targets.
# Ref: https://stackoverflow.com/questions/28597794/how-can-i-clean-up-after-an-error-in-a-makefile
function cleanup_test {
# NOTE: It needs latest make (version ~=4.3)
function cleanup_test () {
$(MAKE) destroy-network
}
trap cleanup_test EXIT # always destroy-network on exit
$(MAKE) run-deploy-seq

.PHONY: destroy-all
destroy-all:
-$(MAKE) destroy-network
-$(MAKE) undeploy-minio-operator
-$(MAKE) destroy-prometheus-operator

.PHONY: ci-test
ci-test: setup-cluster local-kubectl-bats
$(MAKE) deploy-all CHART_VALUES_FILES="$(PWD)/ci/ci-values.yaml"
$(MAKE) ci-deploy-network setup-nodes start-nodes

.PHONY: ci-deploy-network
ci-deploy-network: setup-cluster local-kubectl-bats
Expand Down
8 changes: 8 additions & 0 deletions dev/dev-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
name: fst # this is overridden if CLUSTER_NAME env var is set. Check .env file
nodes:
- role: control-plane
labels:
fullstack-scheduling.io/os: linux
fullstack-scheduling.io/role: network
6 changes: 4 additions & 2 deletions dev/scripts/docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ function build_kubectl_bats() {
[[ -z "${CLUSTER_NAME}" ]] && echo "ERROR: [build_kubectl_bats] Cluster name is required" && return 1

echo ""
echo "Building kubectl-bats image"
echo "Building kubectl-bats image"
echo "-----------------------------------------------------------------------------------------------------"
cd "${DOCKERFILE_DIR}/kubectl-bats" && docker build -t "${KUBECTL_BATS_IMAGE}" .
cd "${DOCKERFILE_DIR}/kubectl-bats" && docker build -t "${KUBECTL_BATS_IMAGE}" .
kind load docker-image "${KUBECTL_BATS_IMAGE}" -n "${CLUSTER_NAME}"

log_time "build_kubectl_bats"
}
21 changes: 20 additions & 1 deletion dev/scripts/env.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env bash

start_time=$(date +%s)

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

readonly SCRIPT_DIR
Expand Down Expand Up @@ -58,7 +60,11 @@ function setup_kubectl_context() {
kubectl get ns

echo "Setting kubectl context..."
kubectl config use-context "kind-${CLUSTER_NAME}"
local count
count=$(kubectl config get-contexts --no-headers | grep -c "kind-${CLUSTER_NAME}")
if [[ $count -ne 0 ]]; then
kubectl config use-context "kind-${CLUSTER_NAME}"
fi
kubectl config set-context --current --namespace="${NAMESPACE}"
kubectl config get-contexts
}
Expand All @@ -68,6 +74,19 @@ function setup() {
load_env_file
}

function log_time() {
local end_time duration execution_time

local func_name=$1

end_time=$(date +%s)
duration=$((end_time - start_time))
execution_time=$(printf "%.2f seconds" "${duration}")
echo "-----------------------------------------------------------------------------------------------------"
echo "<<< ${func_name} execution took: ${execution_time} >>>"
echo "-----------------------------------------------------------------------------------------------------"
}

setup

echo "--------------------------Env Setup: fullstack-testing ------------------------------------------------"
Expand Down
Loading