diff --git a/modules/cloud-init/nfs-cloud-init.tftpl b/modules/nfs-server/files/nfs-cloud-init.tftpl similarity index 100% rename from modules/cloud-init/nfs-cloud-init.tftpl rename to modules/nfs-server/files/nfs-cloud-init.tftpl diff --git a/modules/nfs-server/main.tf b/modules/nfs-server/main.tf index 0f116fd2..233e8483 100644 --- a/modules/nfs-server/main.tf +++ b/modules/nfs-server/main.tf @@ -28,7 +28,7 @@ resource "nebius_compute_v1_instance" "nfs_server" { } ] - cloud_init_user_data = templatefile("../modules/cloud-init/nfs-cloud-init.tftpl", { + cloud_init_user_data = templatefile("${path.module}/files/nfs-cloud-init.tftpl", { ssh_user_name = var.ssh_user_name, ssh_public_key = var.ssh_public_key, nfs_ip_range = var.nfs_ip_range, diff --git a/soperator/VERSION b/soperator/VERSION index 15b989e3..41c11ffb 100644 --- a/soperator/VERSION +++ b/soperator/VERSION @@ -1 +1 @@ -1.16.0 +1.16.1 diff --git a/soperator/installations/example/.envrc b/soperator/installations/example/.envrc index a5e78248..50fe149d 100644 --- a/soperator/installations/example/.envrc +++ b/soperator/installations/example/.envrc @@ -11,34 +11,46 @@ if [ -z "${NEBIUS_PROJECT_ID}" ]; then return 1 fi -# Separate declaration and assignment for IAM token +# region IAM token + unset NEBIUS_IAM_TOKEN nebius iam whoami > /dev/null nebius iam get-access-token > /dev/null NEBIUS_IAM_TOKEN=$(nebius iam get-access-token) export NEBIUS_IAM_TOKEN -# Separate declaration and assignment for VPC subnet +# endregion IAM token + +# region VPC subnet + NEBIUS_VPC_SUBNET_ID=$(nebius vpc subnet list \ --parent-id "${NEBIUS_PROJECT_ID}" \ --format json \ | jq -r '.items[0].metadata.id') export NEBIUS_VPC_SUBNET_ID -# Export Nebius Cloud metadata to Terraform variables +# endregion VPC subnet + +# region TF variables + export TF_VAR_iam_token="${NEBIUS_IAM_TOKEN}" export TF_VAR_iam_tenant_id="${NEBIUS_TENANT_ID}" export TF_VAR_iam_project_id="${NEBIUS_PROJECT_ID}" export TF_VAR_vpc_subnet_id="${NEBIUS_VPC_SUBNET_ID}" +export TFE_PARALLELISM=20 -# Separate declaration and assignment for group editors -NEBIUS_GROUP_EDITORS_ID=$(nebius iam group get-by-name \ - --parent-id "${NEBIUS_TENANT_ID}" \ - --name 'editors' \ - --format json \ - | jq -r '.metadata.id') +echo "Exported variables:" +echo "NEBIUS_TENANT_ID: ${NEBIUS_TENANT_ID}" +echo "NEBIUS_PROJECT_ID: ${NEBIUS_PROJECT_ID}" +echo "NEBIUS_VPC_SUBNET_ID: ${NEBIUS_VPC_SUBNET_ID}" +echo "TFE_PARALLELISM: ${TFE_PARALLELISM}" + +# endregion TF variables + +# region Remote state + +# region Service account -# Separate declaration and assignment for service account NEBIUS_SA_TERRAFORM_ID=$(nebius iam service-account list \ --parent-id "${NEBIUS_PROJECT_ID}" \ --format json \ @@ -55,7 +67,16 @@ else echo "Found existing service account with ID: $NEBIUS_SA_TERRAFORM_ID" fi -# Check if service account is already a member of editors group +# endregion Service account + +# region `editors` group + +NEBIUS_GROUP_EDITORS_ID=$(nebius iam group get-by-name \ + --parent-id "${NEBIUS_TENANT_ID}" \ + --name 'editors' \ + --format json \ + | jq -r '.metadata.id') + IS_MEMBER=$(nebius iam group-membership list-members \ --parent-id "${NEBIUS_GROUP_EDITORS_ID}" \ --page-size 1000 \ @@ -73,19 +94,35 @@ else echo "Service account is already a member of editors group" fi -# Separate declaration and assignment for access key +# endregion `editors` group + +# region Access key + +DATE_FORMAT='+%Y-%m-%dT%H:%M:%SZ' + +if [[ "$(uname)" == "Darwin" ]]; then + # macOS + EXPIRATION_DATE=$(date -v +1d "${DATE_FORMAT}") +else + # Linux (assumes GNU date) + EXPIRATION_DATE=$(date -d '+1 day' "${DATE_FORMAT}") +fi + echo 'Creating new access key for Object Storage' NEBIUS_SA_ACCESS_KEY_ID=$(nebius iam access-key create \ --parent-id "${NEBIUS_PROJECT_ID}" \ --name "slurm-tf-ak-$(date +%s)" \ --account-service-account-id "${NEBIUS_SA_TERRAFORM_ID}" \ --description 'Temporary S3 Access' \ - --expires-at "$(date -v+1d '+%Y-%m-%dT%H:%M:%SZ')" \ + --expires-at "${EXPIRATION_DATE}" \ --format json \ | jq -r '.resource_id') echo "Created new access key: ${NEBIUS_SA_ACCESS_KEY_ID}" -# Separate declaration and assignment for AWS access key +# endregion Access key + +# region AWS access key + AWS_ACCESS_KEY_ID=$(nebius iam access-key get-by-id \ --id "${NEBIUS_SA_ACCESS_KEY_ID}" \ --format json | jq -r '.status.aws_access_key_id') @@ -112,6 +149,10 @@ fi export AWS_SECRET_ACCESS_KEY +# endregion AWS access key + +# region Bucket + NEBIUS_BUCKET_NAME="tfstate-slurm-k8s-$(echo -n "${NEBIUS_TENANT_ID}-${NEBIUS_PROJECT_ID}" | md5sum | awk '$0=$1')" export NEBIUS_BUCKET_NAME # Check if bucket exists @@ -130,14 +171,9 @@ else echo "Using existing bucket: ${NEBIUS_BUCKET_NAME}" fi -export TFE_PARALLELISM=20 +# endregion Bucket -# print all exported variables -echo "Exported variables:" -echo "NEBIUS_TENANT_ID: ${NEBIUS_TENANT_ID}" -echo "NEBIUS_PROJECT_ID: ${NEBIUS_PROJECT_ID}" -echo "NEBIUS_BUCKET_NAME: ${NEBIUS_BUCKET_NAME}" -echo "TFE_PARALLELISM: ${TFE_PARALLELISM}" +# region Backend override cat > terraform_backend_override.tf << EOF terraform { @@ -157,3 +193,7 @@ terraform { } } EOF + +# endregion Backend override + +# endregion Remote state diff --git a/soperator/installations/example/main.tf b/soperator/installations/example/main.tf index 7637d89d..605a15e2 100644 --- a/soperator/installations/example/main.tf +++ b/soperator/installations/example/main.tf @@ -68,6 +68,24 @@ module "filestore" { } } +module "nfs-server" { + count = var.nfs.enabled ? 1 : 0 + source = "../../../modules/nfs-server" + parent_id = data.nebius_iam_v1_project.this.id + subnet_id = data.nebius_vpc_v1_subnet.this.id + ssh_user_name = "soperator" + ssh_public_key = var.slurm_login_ssh_root_public_keys[0] + nfs_ip_range = data.nebius_vpc_v1_subnet.this.ipv4_private_pools.pools[0].cidrs[0].cidr + nfs_size = var.nfs.size_gibibytes * 1024 * 1024 * 1024 + nfs_path = "/mnt/nfs" + platform = var.nfs.resource.platform + preset = var.nfs.resource.preset + + providers = { + nebius = nebius + } +} + module "k8s" { depends_on = [ module.filestore, @@ -83,14 +101,15 @@ module "k8s" { k8s_version = var.k8s_version name = var.k8s_cluster_name slurm_cluster_name = var.slurm_cluster_name + company_name = var.company_name node_group_system = var.slurm_nodeset_system node_group_controller = var.slurm_nodeset_controller node_group_workers = flatten([for i, nodeset in var.slurm_nodeset_workers : [ - for subset in range(ceil(nodeset.size / nodeset.split_factor)) : + for subset in range(ceil(nodeset.size / nodeset.nodes_per_nodegroup)) : { - size = nodeset.split_factor + size = nodeset.nodes_per_nodegroup max_unavailable_percent = nodeset.max_unavailable_percent resource = nodeset.resource boot_disk = nodeset.boot_disk @@ -177,8 +196,11 @@ module "slurm" { source = "../../modules/slurm" - name = var.slurm_cluster_name - operator_version = var.slurm_operator_version + name = var.slurm_cluster_name + operator_version = var.slurm_operator_version + k8s_cluster_context = module.k8s.cluster_context + + iam_project_id = var.iam_project_id node_count = { controller = var.slurm_nodeset_controller.size @@ -188,32 +210,54 @@ module "slurm" { resources = { system = { - cpu_cores = local.resources.system.cpu_cores - memory_gibibytes = local.resources.system.memory_gibibytes - ephemeral_storage_gibibytes = ceil(var.slurm_nodeset_system.boot_disk.size_gibibytes - module.resources.k8s_ephemeral_storage_reserve.gibibytes) + cpu_cores = local.resources.system.cpu_cores + memory_gibibytes = local.resources.system.memory_gibibytes + ephemeral_storage_gibibytes = floor( + module.resources.k8s_ephemeral_storage_coefficient * var.slurm_nodeset_system.boot_disk.size_gibibytes + -module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) } controller = { - cpu_cores = local.resources.controller.cpu_cores - memory_gibibytes = local.resources.controller.memory_gibibytes - ephemeral_storage_gibibytes = ceil(var.slurm_nodeset_controller.boot_disk.size_gibibytes - module.resources.k8s_ephemeral_storage_reserve.gibibytes) + cpu_cores = local.resources.controller.cpu_cores + memory_gibibytes = local.resources.controller.memory_gibibytes + ephemeral_storage_gibibytes = floor( + module.resources.k8s_ephemeral_storage_coefficient * var.slurm_nodeset_controller.boot_disk.size_gibibytes + -module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) } worker = [for i, worker in var.slurm_nodeset_workers : { - cpu_cores = local.resources.workers[i].cpu_cores - memory_gibibytes = local.resources.workers[i].memory_gibibytes - ephemeral_storage_gibibytes = ceil(worker.boot_disk.size_gibibytes - module.resources.k8s_ephemeral_storage_reserve.gibibytes) - gpus = local.resources.workers[i].gpus + cpu_cores = local.resources.workers[i].cpu_cores + memory_gibibytes = local.resources.workers[i].memory_gibibytes + ephemeral_storage_gibibytes = ( + module.k8s.gpu_involved + ? floor( + module.resources.k8s_ephemeral_storage_coefficient * worker.boot_disk.size_gibibytes + -2 * module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) + : floor( + module.resources.k8s_ephemeral_storage_coefficient * worker.boot_disk.size_gibibytes + -module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) + ) + gpus = local.resources.workers[i].gpus } ] login = { - cpu_cores = local.resources.login.cpu_cores - memory_gibibytes = local.resources.login.memory_gibibytes - ephemeral_storage_gibibytes = ceil(var.slurm_nodeset_login.boot_disk.size_gibibytes - module.resources.k8s_ephemeral_storage_reserve.gibibytes) + cpu_cores = local.resources.login.cpu_cores + memory_gibibytes = local.resources.login.memory_gibibytes + ephemeral_storage_gibibytes = floor( + module.resources.k8s_ephemeral_storage_coefficient * var.slurm_nodeset_login.boot_disk.size_gibibytes + -module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) } accounting = var.accounting_enabled ? { - cpu_cores = local.resources.accounting.cpu_cores - memory_gibibytes = local.resources.accounting.memory_gibibytes - ephemeral_storage_gibibytes = ceil(var.slurm_nodeset_accounting.boot_disk.size_gibibytes - module.resources.k8s_ephemeral_storage_reserve.gibibytes) + cpu_cores = local.resources.accounting.cpu_cores + memory_gibibytes = local.resources.accounting.memory_gibibytes + ephemeral_storage_gibibytes = floor( + module.resources.k8s_ephemeral_storage_coefficient * var.slurm_nodeset_accounting.boot_disk.size_gibibytes + -module.resources.k8s_ephemeral_storage_reserve.gibibytes + ) } : null } @@ -249,6 +293,13 @@ module "slurm" { } : null } + nfs = { + enabled = var.nfs.enabled + path = var.nfs.enabled ? module.nfs-server[0].nfs_export_path : null + host = var.nfs.enabled ? module.nfs-server[0].nfs_server_internal_ip : null + mount_path = var.nfs.enabled ? var.nfs.mount_path : null + } + shared_memory_size_gibibytes = var.slurm_shared_memory_size_gibibytes nccl_topology_type = var.slurm_nodeset_workers[0].resource.platform == "gpu-h100-sxm" ? "H100 GPU cluster" : "auto" @@ -278,6 +329,8 @@ module "login_script" { } slurm_cluster_name = var.slurm_cluster_name + k8s_cluster_context = module.k8s.cluster_context + providers = { kubernetes = kubernetes } diff --git a/soperator/installations/example/terraform.tfvars b/soperator/installations/example/terraform.tfvars index 234fb12a..5cb51fa2 100644 --- a/soperator/installations/example/terraform.tfvars +++ b/soperator/installations/example/terraform.tfvars @@ -6,6 +6,9 @@ # # #----------------------------------------------------------------------------------------------------------------------# +# Name of the company. It is used for context name of the cluster in .kubeconfig file. +company_name = "company" + #----------------------------------------------------------------------------------------------------------------------# # # # # @@ -94,6 +97,20 @@ filestore_accounting = { # endregion Storage +# region nfs-server + +# nfs = { +# enabled = true +# size_gibibytes = 93 +# mount_path = "/mnt/nfs" +# resource = { +# platform = "cpu-e2" +# preset = "16vcpu-64gb" +# } +# } + +# endregion nfs-server + #----------------------------------------------------------------------------------------------------------------------# # # # # @@ -140,7 +157,7 @@ slurm_cluster_name = "soperator" # Version of soperator. # --- -slurm_operator_version = "1.16.0" +slurm_operator_version = "1.16.1" # Type of the Slurm partition config. Could be either `default` or `custom`. # By default, "default". @@ -195,13 +212,13 @@ slurm_nodeset_controller = { # Configuration of Slurm Worker node sets. # There can be only one Worker node set for a while. -# Split factor allows you to split node set into equally-sized node groups to keep your cluster accessible and working -# during maintenance. Example: split_factor 3 for 12 nodes will create for you 3 groups with 4 nodes in every group. +# nodes_per_nodegroup allows you to split node set into equally-sized node groups to keep your cluster accessible and working +# during maintenance. Example: nodes_per_nodegroup=3 for size=12 nodes will create 4 groups with 3 nodes in every group. # infiniband_fabric is required field # --- slurm_nodeset_workers = [{ size = 16 - split_factor = 4 + nodes_per_nodegroup = 4 max_unavailable_percent = 50 resource = { platform = "gpu-h100-sxm" diff --git a/soperator/installations/example/variables.tf b/soperator/installations/example/variables.tf index b1a9a480..c67d2dce 100644 --- a/soperator/installations/example/variables.tf +++ b/soperator/installations/example/variables.tf @@ -34,6 +34,16 @@ data "nebius_vpc_v1_subnet" "this" { id = var.vpc_subnet_id } +variable "company_name" { + description = "Name of the company. It is used for context name of the cluster in .kubeconfig file." + type = string + + validation { + condition = var.company_name != "" + error_message = "Company name is not provided" + } +} + # endregion Cloud # region Infrastructure @@ -127,6 +137,35 @@ variable "filestore_accounting" { # endregion Storage +# region nfs-server + +variable "nfs" { + type = object({ + enabled = bool + size_gibibytes = number + mount_path = optional(string, "/mnt/nfs") + resource = object({ + platform = string + preset = string + }) + }) + default = { + enabled = false + size_gibibytes = 93 + resource = { + platform = "cpu-e2" + preset = "16vcpu-64gb" + } + } + + validation { + condition = var.nfs.enabled ? var.nfs.size_gibibytes % 93 == 0 && var.nfs.size_gibibytes <= 262074 : true + error_message = "NFS size must be a multiple of 93 GiB and maximum value is 262074 GiB" + } +} + +# endregion nfs-server + # region k8s variable "k8s_version" { @@ -291,7 +330,7 @@ variable "slurm_nodeset_workers" { description = "Configuration of Slurm Worker node sets." type = list(object({ size = number - split_factor = number + nodes_per_nodegroup = number max_unavailable_percent = number resource = object({ platform = string @@ -309,7 +348,7 @@ variable "slurm_nodeset_workers" { nullable = false default = [{ size = 1 - split_factor = 1 + nodes_per_nodegroup = 1 max_unavailable_percent = 50 resource = { platform = "cpu-e2" @@ -330,9 +369,9 @@ variable "slurm_nodeset_workers" { validation { condition = length([for worker in var.slurm_nodeset_workers : - 1 if worker.size % worker.split_factor != 0 + 1 if worker.size % worker.nodes_per_nodegroup != 0 ]) == 0 - error_message = "Worker count must be divisible by split_factor." + error_message = "Worker count must be divisible by nodes_per_nodegroup." } } diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/VERSION b/soperator/mlperf/gpt3-impl-4.0-nvidia/VERSION deleted file mode 100644 index e2c2ff71..00000000 --- a/soperator/mlperf/gpt3-impl-4.0-nvidia/VERSION +++ /dev/null @@ -1 +0,0 @@ -4.0-20 diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/build_and_push.sh b/soperator/mlperf/gpt3-impl-4.0-nvidia/build_and_push.sh deleted file mode 100755 index 42d6ebc7..00000000 --- a/soperator/mlperf/gpt3-impl-4.0-nvidia/build_and_push.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -e - -REGISTRY="cr.ai.nebius.cloud/crnbu823dealq64cp1s6" -REPOSITORY="nvidia-megatron" -TAG=$(cat ./VERSION) - -echo "Build image" -docker build -f ./Dockerfile -t $REPOSITORY:$TAG --platform linux/amd64 -m 64G . - -echo "Tag image" -docker tag $REPOSITORY:$TAG $REGISTRY/$REPOSITORY:$TAG - -echo "Push image" -docker push $REGISTRY/$REPOSITORY:$TAG - diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/start.sh b/soperator/mlperf/gpt3-impl-4.0-nvidia/start.sh deleted file mode 100755 index 550c5762..00000000 --- a/soperator/mlperf/gpt3-impl-4.0-nvidia/start.sh +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/bash - -set -e - -usage() { - echo "usage: ${0} -N [-w ] [-c ]" >&2 - echo " [-e ]" >&2 - echo " [-i ] [-D ] [-C ] [-R ] [-S ]" >&2 - echo " [-q (quick_start)] [-r (remove_prev_logs)] [-d (debug)] [-p (nsys_profiling)] [-h (help)]" >&2 - exit 1 -} - -# Defaults -image="cr.ai.nebius.cloud/crnbu823dealq64cp1s6/nvidia-megatron:$(cat ./VERSION)" -dataset_dir="/mlperf-data/gpt3-dataset-4.0" -checkpoint_dir="/mlperf-data/gpt3-checkpoint-4.0" -result_dir="./result" - -while getopts N:w:c:e:i:D:C:R:S:qrdph flag -do - case "${flag}" in - N) nodes=${OPTARG};; - w) nodelist=${OPTARG};; - c) config=${OPTARG};; - e) experiment=${OPTARG};; - i) image=${OPTARG};; - D) dataset_dir=${OPTARG};; - C) checkpoint_dir=${OPTARG};; - R) result_dir=${OPTARG};; - S) shared_image_cache_dir=${OPTARG};; - q) quick_start=1;; - r) rmlogs=1;; - d) debug=1;; - p) nsys_profiling=1;; - h) usage;; - *) usage;; - esac -done - -if [ -z "${nodes}" ]; then - usage -fi - -if [ -n "${nodelist}" ]; then - selected_nodes=$(sinfo -N --nodes=${nodelist} --format="%N %t" --noheader | uniq) - exit_code=$? - echo "SELECTED NODES:" - echo "${selected_nodes}" - if [ $exit_code -ne 0 ]; then - echo "ERROR: sinfo: exit code ${exit_code}" - exit 1 - fi - - echo "" - - num_selected_nodes=$(echo "${selected_nodes}" | wc -l) - if [ "${num_selected_nodes}" -ne "${nodes}" ]; then - echo "ERROR: Requested nodes = ${nodes} ('-N ${nodes}') doesn't match selected nodes = ${num_selected_nodes} ('-w ${nodelist}')" - exit 1 - fi -fi - -if [ -z "${config}" ]; then - config="config_H100x8_NODEx${nodes}_default.sh" -fi -echo "Apply ${config}" -source "${config}" - - -echo "Configure paths" -export CONT="${image}" -export PREPROC_DATA="${dataset_dir}/preprocessed_c4_spm" -export SPM="${dataset_dir}/spm/c4_en_301_5Mexp2_spm.model" -export LOAD_CHECKPOINTS_PATH="${checkpoint_dir}/ckpt4000-consumed_samples=0" -export LOGDIR="${result_dir}" -export CONTAINER_PRELOAD_SHARED_PATH="${shared_image_cache_dir}" - -echo "Configure training" -export NEXP=1 -export NCCL_SOCKET_IFNAME="eth0" -export TORCH_CUDA_ARCH_LIST="9.0" - -echo "Extract cluster name:" -export MLPERF_CLUSTER_NAME=$(scontrol show config | grep -E "ClusterName\s+" | awk -F' = ' '{print $2}') -echo " ${MLPERF_CLUSTER_NAME}" - -if [[ $quick_start -eq 1 ]]; then - echo "Disable everything excapt training" - export WARMUP_STEPS=0 - export TRAIN_ONLY=1 - export NCCL_TEST=0 - export TIME_TAGS=0 - export NVTX_FLAG=0 - export SYNTH_DATA=0 - export EPOCH_PROF=0 - export API_LOGGING=0 - export CLEAR_CACHES=0 - export CHECK_COMPLIANCE=0 - export ATTEMPT_CUDA_GDB_CORE_DUMP=0 - export POSTPROCESS_CUDA_GDB_CORE_DUMP=0 - export REMOVE_CUDA_GDB_CORE_DUMP=0 - export HANG_MONITOR_TIMEOUT=0 - export JET=0 -fi - -if [[ $rmlogs -eq 1 ]]; then - echo "Remove previous logs" - rm gpt3-*.out || true - rm -rf "${LOGDIR}/" || true - rm -rf "./api_logs/" || true -fi - -if [[ $debug -eq 1 ]]; then - echo "Enable debug logging" - export NCCL_DEBUG=INFO - export GDRCOPY_ENABLE_LOGGING=1 - export GDRCOPY_LOG_LEVEL=1 -fi - -if [[ $nsys_profiling -eq 1 ]]; then - # Configure NSYS profiler - export NVTX_FLAG=1 - export PROFILE=True - export PROFILE_START_STEP=10 - export PROFILE_END_STEP=11 - export PROFILE_RANKS="0,1,2,3,4,5,6,7" - - # Early stopping: - export TARGET_LOG_PPL=2.75 -fi - -if [ -z "${experiment}" ]; then - job_name="gpt3" - job_output="gpt3-%j.out" -else - job_name="gpt3-${experiment}" - job_output="gpt3-%j-${experiment}.out" -fi - -node_allocation="--nodes=${nodes}" -if [ -n "${nodelist}" ]; then - node_allocation="--nodelist='${nodelist}'" -fi - -echo "Submit Slurm job" -sbatch \ - -t $WALLTIME \ - -J "${job_name}" \ - --output="${job_output}" \ - --export=ALL \ - ${node_allocation} \ - --ntasks-per-node="${SBATCH_GPUS_PER_NODE}" \ - ${EXCLUSIVE:+--exclusive} \ - run.sub - -squeue diff --git a/soperator/modules/available_resources/main.tf b/soperator/modules/available_resources/main.tf index cd60bbf8..e20e181f 100644 --- a/soperator/modules/available_resources/main.tf +++ b/soperator/modules/available_resources/main.tf @@ -106,7 +106,7 @@ locals { } c-96vcpu-384gb = { cpu_cores = 96 - 2 - memory_gibibytes = 384 - 10 + memory_gibibytes = 384 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { @@ -119,7 +119,7 @@ locals { } c-128vcpu-512gb = { cpu_cores = 128 - 2 - memory_gibibytes = 512 - 10 + memory_gibibytes = 512 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { @@ -132,7 +132,7 @@ locals { } c-160vcpu-640gb = { cpu_cores = 160 - 2 - memory_gibibytes = 640 - 10 + memory_gibibytes = 640 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { @@ -145,7 +145,7 @@ locals { } c-192vcpu-768gb = { cpu_cores = 192 - 2 - memory_gibibytes = 768 - 10 + memory_gibibytes = 768 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { @@ -158,7 +158,7 @@ locals { } c-224vcpu-896gb = { cpu_cores = 224 - 2 - memory_gibibytes = 896 - 10 + memory_gibibytes = 896 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { @@ -171,7 +171,7 @@ locals { } c-256vcpu-1024gb = { cpu_cores = 256 - 2 - memory_gibibytes = 1024 - 10 + memory_gibibytes = 1024 - 16 gpus = 0 gpu_cluster_compatible = false sufficient = { diff --git a/soperator/modules/available_resources/outputs.tf b/soperator/modules/available_resources/outputs.tf index 2437aaca..38a5b7f9 100644 --- a/soperator/modules/available_resources/outputs.tf +++ b/soperator/modules/available_resources/outputs.tf @@ -3,6 +3,10 @@ output "this" { value = local.resources } +output "k8s_ephemeral_storage_coefficient" { + value = 0.9 +} + output "k8s_ephemeral_storage_reserve" { value = data.units_data_size.k8s_ephemeral_storage_reserve } diff --git a/soperator/modules/k8s/k8s_cluster.tf b/soperator/modules/k8s/k8s_cluster.tf index f5a0c4a1..a8515560 100644 --- a/soperator/modules/k8s/k8s_cluster.tf +++ b/soperator/modules/k8s/k8s_cluster.tf @@ -27,12 +27,22 @@ resource "terraform_data" "kubectl_cluster_context" { ] triggers_replace = [ - nebius_mk8s_v1_cluster.this.id + nebius_mk8s_v1_cluster.this.id, + timestamp(), ] provisioner "local-exec" { working_dir = path.root - interpreter = ["nebius", "mk8s", "cluster", "get-credentials", "--external", "--force", "--id"] - command = nebius_mk8s_v1_cluster.this.id + interpreter = ["/bin/bash", "-c"] + command = join( + " ", + [ + "nebius", "mk8s", "cluster", "get-credentials", + "--context-name", local.context_name, + "--external", + "--force", + "--id", nebius_mk8s_v1_cluster.this.id + ] + ) } } diff --git a/soperator/modules/k8s/locals.tf b/soperator/modules/k8s/locals.tf index 301c2e75..95a8878c 100644 --- a/soperator/modules/k8s/locals.tf +++ b/soperator/modules/k8s/locals.tf @@ -24,4 +24,13 @@ locals { (worker ? module.labels.label_workload_gpu : module.labels.label_workload_cpu) ] } + + context_name = join( + "-", + [ + "nebius", + replace(lower(var.company_name), " ", "-"), + "slurm" + ] + ) } diff --git a/soperator/modules/k8s/outputs.tf b/soperator/modules/k8s/outputs.tf index aac85d13..02a96574 100644 --- a/soperator/modules/k8s/outputs.tf +++ b/soperator/modules/k8s/outputs.tf @@ -11,6 +11,11 @@ output "cluster_id" { value = nebius_mk8s_v1_cluster.this.id } +output "cluster_context" { + description = "Context name of the K8s cluster." + value = local.context_name +} + output "allocation_id" { description = "ID of the VPC allocation used for SSH connection into Slurm cluster." value = local.allocation_id diff --git a/soperator/modules/k8s/variables.tf b/soperator/modules/k8s/variables.tf index db90c595..415dfb0e 100644 --- a/soperator/modules/k8s/variables.tf +++ b/soperator/modules/k8s/variables.tf @@ -32,6 +32,16 @@ variable "etcd_cluster_size" { default = 3 } +variable "company_name" { + description = "Name of the company. It is used for context name of the cluster in .kubeconfig file." + type = string + + validation { + condition = var.company_name != "" + error_message = "Company name is not provided" + } +} + #--- variable "node_group_system" { diff --git a/soperator/modules/login/lb.tf b/soperator/modules/login/lb.tf index d8a59b43..2a81c60d 100644 --- a/soperator/modules/login/lb.tf +++ b/soperator/modules/login/lb.tf @@ -1,26 +1,32 @@ -data "kubernetes_service" "slurm_login" { +resource "terraform_data" "wait_for_slurm_login_service" { count = !var.node_port.used ? 1 : 0 - metadata { - namespace = var.slurm_cluster_name - name = "${var.slurm_cluster_name}-login-svc" + provisioner "local-exec" { + interpreter = ["/bin/bash", "-c"] + command = join( + " ", + [ + "kubectl", "wait", + "--for=jsonpath='{.status.loadBalancer.ingress}'", + "--timeout", "5m", + "--context", var.k8s_cluster_context, + "-n", var.slurm_cluster_name, + "service/${var.slurm_cluster_name}-login-svc" + ] + ) } } -resource "terraform_data" "wait_for_slurm_login_service" { +data "kubernetes_service" "slurm_login" { count = !var.node_port.used ? 1 : 0 depends_on = [ - data.kubernetes_service.slurm_login - ] - - triggers_replace = [ - one(one(data.kubernetes_service.slurm_login).metadata).resource_version + terraform_data.wait_for_slurm_login_service ] - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - command = "kubectl wait --for=jsonpath='{.status.loadBalancer.ingress}' --timeout 5m -n ${var.slurm_cluster_name} service/${var.slurm_cluster_name}-login-svc" + metadata { + namespace = var.slurm_cluster_name + name = "${var.slurm_cluster_name}-login-svc" } } @@ -28,7 +34,7 @@ resource "terraform_data" "lb_service_ip" { count = !var.node_port.used ? 1 : 0 depends_on = [ - terraform_data.wait_for_slurm_login_service + data.kubernetes_service.slurm_login ] triggers_replace = [ diff --git a/soperator/modules/login/variables.tf b/soperator/modules/login/variables.tf index a07ed5c8..d2bebf86 100644 --- a/soperator/modules/login/variables.tf +++ b/soperator/modules/login/variables.tf @@ -1,3 +1,9 @@ +variable "k8s_cluster_context" { + description = "Context name of the K8s cluster." + type = string + nullable = false +} + variable "node_port" { description = "NodePort service configuration." type = object({ diff --git a/soperator/modules/monitoring/main.tf b/soperator/modules/monitoring/main.tf index 862da3ad..f093e9e7 100644 --- a/soperator/modules/monitoring/main.tf +++ b/soperator/modules/monitoring/main.tf @@ -150,7 +150,7 @@ resource "helm_release" "slurm_monitor" { resource "helm_release" "dashboard" { for_each = tomap({ - dcgm_exporter = "dcgm-exporter" + gpu_metrics = "gpu-metrics" slurm_exporter = "exporter" kube_state_metrics = "kube-state-metrics" node_exporter = "node-exporter" diff --git a/soperator/modules/monitoring/templates/dashboards/dcgm_exporter.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/dcgm_exporter.yaml.tftpl deleted file mode 100644 index 0a1f314f..00000000 --- a/soperator/modules/monitoring/templates/dashboards/dcgm_exporter.yaml.tftpl +++ /dev/null @@ -1,832 +0,0 @@ -resources: - - apiVersion: v1 - kind: ConfigMap - metadata: - namespace: ${namespace} - name: ${name} - labels: - grafana_dashboard: "1" - data: - ${filename}: |- - { - "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "6.7.3" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - } - ], - "annotations": { - "list": [ - { - "$$hashKey": "object:192", - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.19+) cluster", - "editable": true, - "gnetId": 12239, - "graphTooltip": 0, - "id": null, - "iteration": 1588401887165, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 18, - "x": 0, - "y": 0 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_GPU_TEMP{instance=~\"$instance\", gpu=~\"$gpu\"}", - "instant": false, - "interval": "", - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU Temperature", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "celsius", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "datasource": "$datasource", - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 14, - "options": { - "fieldOptions": { - "calcs": [ - "mean" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 83 - }, - { - "color": "red", - "value": 87 - } - ] - }, - "unit": "celsius" - }, - "overrides": [], - "values": false - }, - "orientation": "auto", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "avg(DCGM_FI_DEV_GPU_TEMP{instance=~\"$instance\", gpu=~\"$gpu\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GPU Avg. Temp", - "type": "gauge" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 18, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pluginVersion": "6.5.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_POWER_USAGE{instance=~\"$instance\", gpu=~\"$gpu\"}", - "interval": "", - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU Power Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "watt", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "datasource": "$datasource", - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 8 - }, - "id": 16, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "sum" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 2400, - "min": 0, - "nullValueMode": "connected", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 1800 - }, - { - "color": "red", - "value": 2200 - } - ] - }, - "unit": "watt" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "sum(DCGM_FI_DEV_POWER_USAGE{instance=~\"$instance\", gpu=~\"$gpu\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "range": false, - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GPU Power Total", - "type": "gauge" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 2, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_SM_CLOCK{instance=~\"$instance\", gpu=~\"$gpu\"} * 1000000", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU SM Clocks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "hertz", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_GPU_UTIL{instance=~\"$instance\", gpu=~\"$gpu\"}", - "interval": "", - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU Utilization", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "hiddenSeries": false, - "id": 18, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_FB_USED{instance=~\"$instance\", gpu=~\"$gpu\"}", - "interval": "", - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU Framebuffer Mem Used", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decmbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_PROF_PIPE_TENSOR_ACTIVE{instance=~\"$instance\", gpu=~\"$gpu\"}", - "interval": "", - "legendFormat": "GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Tensor Core Utilization", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": false, - "schemaVersion": 22, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "definition": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)", - "hide": 0, - "includeAll": true, - "index": -1, - "label": null, - "multi": true, - "name": "instance", - "options": [], - "query": "label_values(DCGM_FI_DEV_GPU_TEMP, instance)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "definition": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)", - "hide": 0, - "includeAll": true, - "index": -1, - "label": null, - "multi": true, - "name": "gpu", - "options": [], - "query": "label_values(DCGM_FI_DEV_GPU_TEMP, gpu)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "NVIDIA DCGM Exporter Dashboard", - "uid": "Oxed_c6Wz", - "variables": { - "list": [] - }, - "version": 1 - } diff --git a/soperator/modules/monitoring/templates/dashboards/gpu_metrics.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/gpu_metrics.yaml.tftpl new file mode 100644 index 00000000..bd1df039 --- /dev/null +++ b/soperator/modules/monitoring/templates/dashboards/gpu_metrics.yaml.tftpl @@ -0,0 +1,2627 @@ +resources: + - apiVersion: v1 + kind: ConfigMap + metadata: + namespace: ${namespace} + name: ${name} + labels: + grafana_dashboard: "1" + data: + ${filename}: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 9, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "repeat": "hostname", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "$hostname Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_load1{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "1min", + "range": true, + "refId": "A" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_load5{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "5min", + "range": true, + "refId": "B" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_load15{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "15min", + "range": true, + "refId": "C" + } + ], + "title": "System Load", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 6, + "y": 1 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "DCGM_FI_DEV_POWER_USAGE{Hostname=\"$hostname\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Power Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 2400, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1800 + }, + { + "color": "#d44a3a", + "value": 2200 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 14, + "y": 1 + }, + "id": 30, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(DCGM_FI_DEV_POWER_USAGE{Hostname=\"$hostname\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Power Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 1 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "1e6*sum(DCGM_FI_DEV_NVLINK_BANDWIDTH_TOTAL{Hostname=\"$hostname\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "NVLINK Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 6 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_memory_MemTotal_bytes{instance=\"$ip\"} - node_memory_MemFree_bytes{instance=\"$ip\"} - node_memory_Buffers_bytes{instance=\"$ip\"} - node_memory_Cached_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Used", + "refId": "A" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_memory_Buffers_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Buffered", + "refId": "B" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_memory_Cached_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Cached", + "range": true, + "refId": "C" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_memory_MemFree_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Free", + "refId": "D" + } + ], + "title": "System Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 6, + "y": 6 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "DCGM_FI_DEV_GPU_TEMP{Hostname=\"$hostname\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Temperature", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "GPU Shutdown Temp : 90 C\nGPU Slowdown Temp : 87 C\nGPU Max Operating Temp : 83 C", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 90, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 83 + }, + { + "color": "#d44a3a", + "value": 87 + } + ] + }, + "unit": "celsius" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 14, + "y": 6 + }, + "id": 31, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(DCGM_FI_DEV_GPU_TEMP{Hostname=\"$hostname\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Avg. Temp", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Rx" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 6 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(DCGM_FI_PROF_PCIE_TX_BYTES{Hostname=\"$hostname\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Tx", + "range": true, + "refId": "A" + }, + { + "datasource": { + "uid": "prometheus" + }, + "expr": "sum(DCGM_FI_PROF_PCIE_RX_BYTES{instance=\"$ip\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Rx", + "refId": "B" + } + ], + "title": "PCIe Throughput", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.75 + }, + { + "color": "#d44a3a", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 11 + }, + "id": 27, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "(sum(node_filesystem_size_bytes{device!=\"rootfs\",instance=\"$ip\"}) - sum(node_filesystem_avail_bytes{device!=\"rootfs\",instance=\"$ip\"})) / sum(node_filesystem_size_bytes{device!=\"rootfs\",instance=\"$ip\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "C" + } + ], + "title": "Disk Usage", + "type": "stat" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 11 + }, + "id": 28, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^\\{container=\"node\\-exporter\", endpoint=\"http\\-metrics\", instance=\"192\\.168\\.0\\.53:9100\", job=\"node\\-exporter\", namespace=\"monitoring\\-system\", pod=\"prometheus\\-stack\\-prometheus\\-node\\-exporter\\-7lz6v\", prometheus=\"monitoring\\-system/select\\-all\", service=\"prometheus\\-stack\\-prometheus\\-node\\-exporter\"\\}$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "((node_memory_MemTotal_bytes{instance=\"$ip\"} - node_memory_MemFree_bytes{instance=\"$ip\"} - node_memory_Buffers_bytes{instance=\"$ip\"} - node_memory_Cached_bytes{instance=\"$ip\"}) / node_memory_MemTotal_bytes{instance=\"$ip\"}) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 6, + "y": 11 + }, + "id": 57, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "DCGM_FI_DEV_GPU_UTIL{Hostname=\"$hostname\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 14, + "y": 11 + }, + "id": 58, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(DCGM_FI_DEV_GPU_UTIL{Hostname=\"$hostname\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Total Utilization", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Receive" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 11 + }, + "id": 37, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(node_network_transmit_bytes_total{instance=\"$ip\",device=~\"eth.*\"}[$__interval]))", + "format": "time_series", + "hide": false, + "interval": ".5m", + "intervalFactor": 2, + "legendFormat": "Transmit", + "range": true, + "refId": "B" + }, + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(node_network_receive_bytes_total{instance=\"$ip\",device=~\"eth.*\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": ".5m", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + } + ], + "title": "Ethernet Throughput", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "I/O time" + }, + "properties": [ + { + "id": "unit", + "value": "ms" + }, + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 38, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "rate(node_disk_read_bytes_total{instance=\"$ip\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "Read ({{device}})", + "refId": "E" + }, + { + "datasource": { + "uid": "prometheus" + }, + "expr": "rate(node_disk_written_bytes_total{instance=\"$ip\"}[$__interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "Write ({{device}})", + "refId": "F" + }, + { + "datasource": { + "uid": "prometheus" + }, + "expr": "sum by (instance) (rate(node_disk_io_time_ms{instance=\"$ip\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "I/O time", + "refId": "A" + } + ], + "title": "Disk Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 6, + "y": 16 + }, + "id": 39, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{Hostname=\"$hostname\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Mem Cpy Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 14, + "y": 16 + }, + "id": 40, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(DCGM_FI_DEV_MEM_COPY_UTIL{Hostname=\"$hostname\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Avg. Mem. Cpy Utilization", + "type": "stat" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Receive" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 16 + }, + "id": 41, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "sum(rate(node_infiniband_port_data_transmitted_bytes_total{instance=\"$ip\"}[$__interval]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Transmit", + "refId": "B" + }, + { + "datasource": { + "uid": "prometheus" + }, + "expr": "sum(rate(node_infiniband_port_data_received_bytes_total{instance=\"$ip\"}[$__interval]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Receive", + "refId": "A" + } + ], + "title": "Infiniband Throughput", + "type": "timeseries" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 0, + "y": 21 + }, + "id": 59, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^nodename$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "node_uname_info{instance=\"$ip\"}", + "format": "table", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Hostname", + "type": "stat" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 3, + "y": 21 + }, + "id": 60, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^release$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "node_uname_info{instance=\"$ip\"}", + "format": "table", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Kernel", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 6, + "y": 21 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "DCGM_FI_DEV_FB_USED{Hostname=\"$hostname\"}/(DCGM_FI_DEV_FB_USED{Hostname=\"$hostname\"}+DCGM_FI_DEV_FB_FREE{Hostname=\"$hostname\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "title": "GPU Mem Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 3200, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2600 + }, + { + "color": "#d44a3a", + "value": 3000 + } + ] + }, + "unit": "watt" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 14, + "y": 21 + }, + "id": 54, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^node_hwmon_power_average_watt{chip=\"lnxsybus:00_acpi000d:00\", instance=\"$ip\", job=\"cluster\", module=\"node-exporter\", sensor=\"power1\"}$/", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_hwmon_power_average_watt{Hostname=\"$hostname\"}", + "format": "time_series", + "intervalFactor": 1, + "range": true, + "refId": "A" + } + ], + "title": "System Total Power Draw", + "type": "stat" + }, + { + "datasource": { + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "watt" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "PSU1 Input" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ed0e0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PSU2 Input" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#eab839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PSU3 Input" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#ef843c", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PSU4 Input" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#d683ce", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 7, + "x": 17, + "y": 21 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "expr": "node_hwmon_power_average_watt{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A" + } + ], + "title": "System Power Draw", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 0, + "y": 23 + }, + "id": 44, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(DCGM_FI_DEV_SM_CLOCK{Hostname=\"$hostname\"}*1000000)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU SM Clocks", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 3, + "y": 23 + }, + "id": 45, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(DCGM_FI_DEV_MEM_CLOCK{Hostname=\"$hostname\"}*1000000)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "GPU Memory Clocks", + "type": "stat" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "worker-0", + "value": "worker-0" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_pod_info,pod)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "worker", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info,pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "^worker-.*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "computeinstance-e00bgs8hr09kcxsbhn", + "value": "computeinstance-e00bgs8hr09kcxsbhn" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_pod_info{pod=\"$worker\"},node)", + "hide": 2, + "includeAll": false, + "label": "hostname", + "multi": false, + "name": "hostname", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info{pod=\"$worker\"},node)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "192.168.0.53:9100", + "value": "192.168.0.53:9100" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(node_uname_info{job=\"node-exporter\", nodename=\"$hostname\"},instance)", + "hide": 2, + "includeAll": false, + "label": "ip", + "multi": false, + "name": "ip", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{job=\"node-exporter\", nodename=\"$hostname\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "GPU Metrics", + "uid": "syl6zhqkz", + "version": 46, + "weekStart": "" + } diff --git a/soperator/modules/monitoring/templates/dashboards/kube_state_metrics.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/kube_state_metrics.yaml.tftpl index d80e08e9..81f6a8bd 100644 --- a/soperator/modules/monitoring/templates/dashboards/kube_state_metrics.yaml.tftpl +++ b/soperator/modules/monitoring/templates/dashboards/kube_state_metrics.yaml.tftpl @@ -9,60 +9,14 @@ resources: data: ${filename}: |- { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - }, - { - "name": "VAR_DATASOURCE", - "type": "constant", - "label": "datasource", - "value": "Prometheus", - "description": "" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "7.1.1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - }, - { - "type": "panel", - "id": "table-old", - "name": "Table (old)", - "version": "" - } - ], "annotations": { "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -71,29 +25,20 @@ resources: } ] }, - "description": "Summary metrics about kube-state-metrics v2 version(https://github.com/kubernetes/kube-state-metrics); Referenced 6417", + "description": "Summary metrics about kube-state-metrics v2 version(https://github.com/kubernetes/kube-state-metrics)", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 13332, "graphTooltip": 1, - "id": null, - "iteration": 1627560367926, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": false, - "tags": [ - "kubernetes-app" - ], - "title": "Dashboards", - "type": "dashboards" - } - ], + "id": 12, + "links": [], "panels": [ { "collapsed": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -102,33 +47,58 @@ resources: }, "id": 58, "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Cluster", "type": "row" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percentunit" }, "overrides": [] }, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 4, "w": 6, @@ -136,42 +106,30 @@ resources: "y": 1 }, "id": 4, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "11.1.0", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_info{cluster=~\"$cluster\",node=~\"$node\"}) / sum(kube_node_status_allocatable{cluster=~\"$cluster\",resource=\"pods\",node=~\"$node\"})", "format": "time_series", "interval": "", @@ -180,43 +138,49 @@ resources: "refId": "A" } ], - "thresholds": "80,90", "title": "Cluster Pod Requested", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percentunit" }, "overrides": [] }, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 4, "w": 6, @@ -224,85 +188,79 @@ resources: "y": 1 }, "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "11.1.0", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests{cluster=~\"$cluster\",resource=\"cpu\",node=~\"$node\"})/ sum(kube_node_status_allocatable{node=~\"$node\",cluster=~\"$cluster\",resource=\"cpu\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "80,90", "title": "Cluster CPU Requested", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "#d44a3a", + "value": 90 + } + ] + }, + "unit": "percentunit" }, "overrides": [] }, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { "h": 4, "w": 6, @@ -310,110 +268,123 @@ resources: "y": 1 }, "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "11.1.0", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests{cluster=~\"$cluster\",resource=\"memory\",node=~\"$node\"}) / sum(kube_node_status_allocatable{node=~\"$node\",cluster=~\"$cluster\",resource=\"memory\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], - "thresholds": "80,90", "title": "Cluster Memory Requested", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "pods", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 5 }, - "hiddenSeries": false, "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, "pluginVersion": "7.1.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_node_status_allocatable{cluster=~\"$cluster\",resource=\"pods\",node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, @@ -421,6 +392,9 @@ resources: "refId": "A" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_info{node=~\"$node\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -428,96 +402,93 @@ resources: "refId": "C" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cluster Pod Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "pods", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 5 }, - "hiddenSeries": false, "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, "pluginVersion": "7.1.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_node_status_capacity{node=~\"$node\",cluster=~\"$cluster\",resource=\"cpu\"})", "format": "time_series", "intervalFactor": 1, @@ -525,6 +496,9 @@ resources: "refId": "A" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_node_status_allocatable{node=~\"$node\",cluster=~\"$cluster\",resource=\"cpu\"})", "format": "time_series", "intervalFactor": 1, @@ -532,6 +506,9 @@ resources: "refId": "B" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests{cluster=~\"$cluster\",resource=\"cpu\",node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, @@ -539,6 +516,9 @@ resources: "refId": "C" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_limits{cluster=~\"$cluster\",resource=\"cpu\",node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, @@ -546,97 +526,93 @@ resources: "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cluster CPU Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": "cores", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bits" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 5 }, - "hiddenSeries": false, "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, "pluginVersion": "7.1.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_node_status_allocatable{node=~\"$node\",cluster=~\"$cluster\",resource=\"memory\"})", "format": "time_series", "intervalFactor": 1, @@ -644,6 +620,9 @@ resources: "refId": "A" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_node_status_capacity{node=~\"$node\",cluster=~\"$cluster\",resource=\"memory\"})", "format": "time_series", "intervalFactor": 1, @@ -651,6 +630,9 @@ resources: "refId": "B" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests{cluster=~\"$cluster\",resource=\"memory\",node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, @@ -658,6 +640,9 @@ resources: "refId": "C" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_limits{cluster=~\"$cluster\",resource=\"memory\",node=~\"$node\"})", "format": "time_series", "intervalFactor": 1, @@ -665,50 +650,15 @@ resources: "refId": "D" } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cluster Mem Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bits", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "collapsed": true, - "datasource": "prometheus", + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, @@ -716,514 +666,507 @@ resources: "y": 10 }, "id": 22, - "panels": [ + "panels": [], + "targets": [ { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 0, - "y": 11 + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "id": 24, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" + "refId": "A" + } + ], + "title": "Node", + "type": "row" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 11 + }, + "id": 24, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(kube_node_info{cluster=~\"$cluster\"})", - "format": "time_series", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "expr": "sum(kube_node_info{cluster=~\"$cluster\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number Of Nodes", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } ], - "thresholds": "", - "title": "Number Of Nodes", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 11 + }, + "id": 26, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 4, - "y": 11 + "datasource": { + "uid": "$datasource" }, - "id": 26, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 + "expr": "sum(kube_node_spec_unschedulable{cluster=~\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Nodes Unavailable", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(kube_node_spec_unschedulable{cluster=~\"$cluster\"})", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "1", - "title": "Nodes Unavailable", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": {} + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 3, - "w": 8, - "x": 8, - "y": 11 - }, - "hiddenSeries": false, - "id": 78, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.1.1", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_node_status_condition{cluster=~\"$cluster\",condition=\"Ready\",status=\"false\"}==1", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{node}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Node NotReady", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "thresholdsStyle": { + "mode": "off" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.1.1", + "targets": [ { - "aliasColors": {}, - "bars": false, - "cacheTimeout": null, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 3, - "w": 8, - "x": 16, - "y": 11 - }, - "hiddenSeries": false, - "id": 51, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true + "datasource": { + "uid": "$datasource" }, - "percentage": false, - "pluginVersion": "7.1.1", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kube_node_status_condition{condition=\"DiskPressure\",cluster=~\"$cluster\",status=\"true\"}==1", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{node}} {{condition}}", - "refId": "A" + "expr": "kube_node_status_condition{cluster=~\"$cluster\",condition=\"Ready\",status=\"false\"}==1", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}}", + "refId": "A" + } + ], + "title": "Node NotReady", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "expr": "kube_node_status_condition{condition=\"MemoryPressure\",node=~\"$node\",cluster=~\"$cluster\",status=\"true\"}==1", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{node}} {{condition}}", - "refId": "B" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "expr": "kube_node_status_condition{condition=~\"PIDPressure\",node=~\"$node\",cluster=~\"$cluster\",status=\"true\"}==1", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{node}} {{condition}}", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Node Pressure", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "thresholdsStyle": { + "mode": "off" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 51, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.1.1", + "targets": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": 0, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 14 - }, - "hiddenSeries": false, - "id": 88, - "legend": { - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true + "datasource": { + "uid": "$datasource" }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "percentage": false, - "pluginVersion": "7.1.1", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(kube_node_info{cluster=~\"$cluster\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "node number", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "expr": "kube_node_status_condition{condition=\"DiskPressure\",cluster=~\"$cluster\",status=\"true\"}==1", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{node}} {{condition}}", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + "expr": "kube_node_status_condition{condition=\"MemoryPressure\",node=~\"$node\",cluster=~\"$cluster\",status=\"true\"}==1", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{node}} {{condition}}", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "expr": "kube_node_status_condition{condition=~\"PIDPressure\",node=~\"$node\",cluster=~\"$cluster\",status=\"true\"}==1", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{node}} {{condition}}", + "refId": "C" + } + ], + "title": "Node Pressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "7.1.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(kube_node_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "", + "refId": "A" } ], - "title": "Node", - "type": "row" + "title": "node number", + "type": "timeseries" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 11 + "y": 21 }, "id": 14, "panels": [ { "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { "custom": {} @@ -1239,7 +1182,6 @@ resources: }, "id": 16, "links": [], - "pageSize": null, "scroll": true, "showHeader": true, "sort": { @@ -1296,6 +1238,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_deployment_status_replicas{namespace=~\"$namespace\",cluster=~\"$cluster\"}", "format": "time_series", "instant": true, @@ -1310,7 +1255,7 @@ resources: "type": "table-old" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -1318,10 +1263,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -1340,8 +1302,6 @@ resources: "y": 17 }, "id": 18, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -1355,7 +1315,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -1376,6 +1353,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_deployment_status_replicas{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -1384,7 +1364,7 @@ resources: ], "thresholds": "", "title": "Deployment Replicas", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -1396,7 +1376,7 @@ resources: "valueName": "avg" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -1404,10 +1384,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -1426,8 +1423,6 @@ resources: "y": 17 }, "id": 19, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -1441,7 +1436,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -1462,6 +1474,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_deployment_status_replicas_updated{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -1470,7 +1485,7 @@ resources: ], "thresholds": "", "title": "Deployment Replicas - Updated", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -1482,7 +1497,7 @@ resources: "valueName": "avg" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -1490,10 +1505,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -1512,8 +1544,6 @@ resources: "y": 17 }, "id": 20, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -1527,7 +1557,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -1548,6 +1595,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_deployment_status_replicas_unavailable{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -1556,7 +1606,7 @@ resources: ], "thresholds": "", "title": "Deployment Replicas - Unavailable", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -1568,23 +1618,37 @@ resources: "valueName": "avg" } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Deployments", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 12 + "y": 22 }, "id": 71, "panels": [ { "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fontSize": "100%", "gridPos": { "h": 5, @@ -1595,7 +1659,6 @@ resources: "id": 75, "links": [], "options": {}, - "pageSize": null, "scroll": true, "showHeader": true, "sort": { @@ -1652,6 +1715,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_statefulset_status_replicas_ready{namespace=~\"$namespace\",cluster=~\"$cluster\"}", "format": "time_series", "instant": true, @@ -1668,10 +1734,11 @@ resources: { "aliasColors": {}, "bars": false, - "cacheTimeout": null, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "gridPos": { "h": 5, @@ -1705,6 +1772,9 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_statefulset_status_replicas_ready{namespace=~\"$namespace\",cluster=~\"$cluster\"}/kube_statefulset_status_replicas{namespace=~\"$namespace\",cluster=~\"$cluster\"}*100", "format": "time_series", "instant": false, @@ -1714,9 +1784,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Statefulset replicas", "tooltip": { "shared": true, @@ -1725,54 +1793,58 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Statefuleset", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 13 + "y": 23 }, "id": 28, "panels": [ { - "cacheTimeout": null, "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { "custom": {} @@ -1788,7 +1860,6 @@ resources: }, "id": 68, "links": [], - "pageSize": null, "pluginVersion": "6.2.5", "scroll": true, "showHeader": true, @@ -1800,7 +1871,6 @@ resources: { "alias": "Time", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -1817,7 +1887,6 @@ resources: { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "#56A64B", "#73BF69", @@ -1834,6 +1903,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "changes(kube_pod_container_status_restarts_total{namespace=~\"$namespace\",cluster=~\"$cluster\"}[30m])>1", "format": "time_series", "instant": true, @@ -1843,8 +1915,6 @@ resources: "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Pods restart in 30m", "transform": "timeseries_to_rows", "type": "table-old" @@ -1852,10 +1922,11 @@ resources: { "aliasColors": {}, "bars": false, - "cacheTimeout": null, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { "custom": {} @@ -1898,6 +1969,9 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_pod_status_reason{cluster=~\"$cluster\",namespace=~\"$namespace\", reason=\"Evicted\"}==1", "format": "time_series", "instant": false, @@ -1908,9 +1982,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Evicted", "tooltip": { "shared": true, @@ -1919,9 +1991,7 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1931,26 +2001,20 @@ resources: "format": "short", "label": "", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -1958,10 +2022,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -1980,8 +2061,6 @@ resources: "y": 26 }, "id": 30, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -1995,7 +2074,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2016,6 +2112,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_status_phase{cluster=~\"$cluster\",namespace=~\"$namespace\", phase=\"Running\"})", "format": "time_series", "interval": "", @@ -2025,7 +2124,7 @@ resources: ], "thresholds": "", "title": "Pods Running", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2037,7 +2136,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2045,10 +2144,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2067,8 +2183,6 @@ resources: "y": 29 }, "id": 33, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2082,7 +2196,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2103,6 +2234,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_status_phase{cluster=~\"$cluster\",namespace=~\"$namespace\", phase=\"Succeeded\"})", "format": "time_series", "interval": "", @@ -2112,7 +2246,7 @@ resources: ], "thresholds": "", "title": "Pods Succeeded", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2124,7 +2258,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2132,10 +2266,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2154,8 +2305,6 @@ resources: "y": 29 }, "id": 32, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2169,7 +2318,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2190,6 +2356,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_status_phase{cluster=~\"$cluster\",namespace=~\"$namespace\", phase=\"Failed\"})", "format": "time_series", "interval": "", @@ -2199,7 +2368,7 @@ resources: ], "thresholds": "", "title": "Pods Failed", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2211,7 +2380,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2219,10 +2388,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2241,8 +2427,6 @@ resources: "y": 32 }, "id": 31, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2256,7 +2440,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2277,6 +2478,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_status_phase{cluster=~\"$cluster\",namespace=~\"$namespace\", phase=\"Pending\"})", "format": "time_series", "interval": "", @@ -2286,7 +2490,7 @@ resources: ], "thresholds": "", "title": "Pods Pending", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2298,7 +2502,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2306,10 +2510,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2328,8 +2549,6 @@ resources: "y": 32 }, "id": 34, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2343,7 +2562,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2364,6 +2600,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_status_phase{cluster=~\"$cluster\",namespace=~\"$namespace\", phase=\"Unknown\"})", "format": "time_series", "interval": "", @@ -2373,7 +2612,7 @@ resources: ], "thresholds": "", "title": "Pods Unknown", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2385,22 +2624,34 @@ resources: "valueName": "current" } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Pods", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 14 + "y": 24 }, "id": 36, "panels": [ { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2408,10 +2659,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2430,8 +2698,6 @@ resources: "y": 15 }, "id": 38, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2445,7 +2711,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2466,6 +2749,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_status_running{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "interval": "", @@ -2476,7 +2762,7 @@ resources: ], "thresholds": "", "title": "Containers Running", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2488,7 +2774,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2496,10 +2782,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2518,8 +2821,6 @@ resources: "y": 15 }, "id": 39, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2533,7 +2834,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2554,6 +2872,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_status_waiting{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -2562,7 +2883,7 @@ resources: ], "thresholds": "", "title": "Containers Waiting", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2574,7 +2895,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2582,10 +2903,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2604,8 +2942,6 @@ resources: "y": 15 }, "id": 40, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2619,7 +2955,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2640,6 +2993,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_status_terminated{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -2648,7 +3004,7 @@ resources: ], "thresholds": "", "title": "Containers Terminated", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2660,7 +3016,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2668,10 +3024,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2690,8 +3063,6 @@ resources: "y": 15 }, "id": 41, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2705,7 +3076,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2726,6 +3114,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(changes(kube_pod_container_status_restarts_total{namespace=~\"$namespace\",cluster=~\"$cluster\"}[30m]))", "format": "time_series", "intervalFactor": 1, @@ -2734,7 +3125,7 @@ resources: ], "thresholds": "", "title": "Containers Restarts (Last 30 Minutes)", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2746,7 +3137,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2754,10 +3145,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2776,8 +3184,6 @@ resources: "y": 18 }, "id": 43, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2791,7 +3197,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2812,6 +3235,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests_cpu_cores{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -2820,7 +3246,7 @@ resources: ], "thresholds": "", "title": "CPU Cores Requested by Containers", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2832,7 +3258,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -2840,10 +3266,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -2862,8 +3305,6 @@ resources: "y": 18 }, "id": 42, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -2877,7 +3318,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -2898,6 +3356,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_pod_container_resource_requests_memory_bytes{namespace=~\"$namespace\" ,cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -2906,7 +3367,7 @@ resources: ], "thresholds": "", "title": "Memory Requested By Containers", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -2918,18 +3379,29 @@ resources: "valueName": "current" } ], - "repeat": null, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Containers", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 15 + "y": 25 }, "id": 80, "panels": [ @@ -2938,7 +3410,10 @@ resources: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "fieldConfig": { "defaults": { "custom": {} @@ -2983,6 +3458,10 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "kube_hpa_status_current_replicas{cluster=~\"$cluster\",namespace=~\"$namespace\"}", "instant": false, "interval": "", @@ -2990,6 +3469,10 @@ resources: "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "kube_hpa_spec_max_replicas{cluster=~\"$cluster\",namespace=~\"$namespace\"}", "instant": false, "interval": "", @@ -2997,6 +3480,10 @@ resources: "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "kube_hpa_spec_min_replicas{cluster=~\"$cluster\",namespace=~\"$namespace\"}", "instant": false, "interval": "", @@ -3005,9 +3492,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "hpa", "tooltip": { "shared": true, @@ -3016,33 +3501,24 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3050,7 +3526,10 @@ resources: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "fieldConfig": { "defaults": { "custom": {} @@ -3092,6 +3571,10 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "sum(kube_hpa_status_current_replicas{hpa=~\".*\"})", "instant": false, "interval": "", @@ -3100,9 +3583,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "total-hpa-current", "tooltip": { "shared": true, @@ -3111,33 +3592,24 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3145,7 +3617,10 @@ resources: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "fieldConfig": { "defaults": { "custom": {} @@ -3187,6 +3662,10 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "kube_hpa_status_current_replicas{hpa=~\".*\"} == kube_hpa_spec_max_replicas{hpa=~\".*\"}", "instant": false, "interval": "", @@ -3195,9 +3674,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "current==max", "tooltip": { "shared": true, @@ -3206,33 +3683,24 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3240,7 +3708,10 @@ resources: "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "fieldConfig": { "defaults": { "custom": {} @@ -3282,6 +3753,10 @@ resources: "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "expr": "kube_hpa_status_current_replicas{hpa=~\".*\"} == kube_hpa_spec_min_replicas{hpa=~\".*\"}", "instant": false, "interval": "", @@ -3290,9 +3765,7 @@ resources: } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "current==min", "tooltip": { "shared": true, @@ -3301,52 +3774,55 @@ resources: }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "HPA", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 16 + "y": 26 }, "id": 45, "panels": [ { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -3354,10 +3830,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -3376,8 +3869,6 @@ resources: "y": 8 }, "id": 47, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -3391,7 +3882,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -3412,6 +3920,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_job_status_succeeded{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -3420,7 +3931,7 @@ resources: ], "thresholds": "", "title": "Jobs Succeeded", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -3432,9 +3943,10 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "description": "", "fieldConfig": { "defaults": { @@ -3452,12 +3964,10 @@ resources: "hideTimeOverride": false, "id": 62, "links": [], - "pageSize": null, "pluginVersion": "6.2.5", "scroll": true, "showHeader": true, "sort": { - "col": null, "desc": false }, "styles": [ @@ -3471,7 +3981,6 @@ resources: { "alias": "Job", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -3486,7 +3995,6 @@ resources: { "alias": "Day", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -3503,6 +4011,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "(time()-kube_job_status_completion_time{namespace=~\"$namespace\",cluster=~\"$cluster\"})/60/60/24>1", "format": "time_series", "instant": true, @@ -3512,17 +4023,16 @@ resources: "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Job last success time from now", "transform": "timeseries_to_rows", "transparent": true, "type": "table-old" }, { - "cacheTimeout": null, "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "description": "", "fieldConfig": { "defaults": { @@ -3540,7 +4050,6 @@ resources: "hideTimeOverride": false, "id": 76, "links": [], - "pageSize": null, "pluginVersion": "6.2.5", "scroll": true, "showHeader": true, @@ -3559,7 +4068,6 @@ resources: { "alias": "Job", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -3574,7 +4082,6 @@ resources: { "alias": "Failed Pod number", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -3591,6 +4098,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_job_status_failed{namespace=~\"$namespace\",cluster=~\"$cluster\"}>1", "format": "time_series", "instant": true, @@ -3600,15 +4110,13 @@ resources: "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Job failed", "transform": "timeseries_to_rows", "transparent": true, "type": "table-old" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -3616,10 +4124,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -3638,8 +4163,6 @@ resources: "y": 11 }, "id": 48, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -3653,7 +4176,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -3674,6 +4214,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_job_status_active{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -3682,7 +4225,7 @@ resources: ], "thresholds": "", "title": "Jobs Active", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -3694,7 +4237,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -3702,10 +4245,27 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, @@ -3724,8 +4284,6 @@ resources: "y": 14 }, "id": 49, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -3739,7 +4297,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -3760,6 +4335,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_job_status_failed{namespace=~\"$namespace\",cluster=~\"$cluster\"})", "format": "time_series", "intervalFactor": 1, @@ -3768,7 +4346,7 @@ resources: ], "thresholds": "", "title": "Jobs Failed", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -3780,22 +4358,34 @@ resources: "valueName": "current" } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "Jobs", "type": "row" }, { "collapsed": true, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 27 }, "id": 64, "panels": [ { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -3803,7 +4393,30 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "format": "none", "gauge": { "maxValue": 100, @@ -3819,8 +4432,6 @@ resources: "y": 17 }, "id": 66, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -3834,9 +4445,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, - "options": {}, - "pluginVersion": "6.2.5", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -3857,6 +4483,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_persistentvolumeclaim_status_phase{phase=\"Bound\"}==1)", "format": "time_series", "intervalFactor": 1, @@ -3865,10 +4494,8 @@ resources: } ], "thresholds": "", - "timeFrom": null, - "timeShift": null, "title": "pvc is bound", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -3880,7 +4507,7 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, + "autoMigrateFrom": "singlestat", "colorBackground": false, "colorValue": false, "colors": [ @@ -3888,7 +4515,30 @@ resources: "rgba(237, 129, 40, 0.89)", "#d44a3a" ], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "format": "none", "gauge": { "maxValue": 100, @@ -3904,8 +4554,6 @@ resources: "y": 17 }, "id": 69, - "interval": null, - "links": [], "mappingType": 1, "mappingTypes": [ { @@ -3919,9 +4567,24 @@ resources: ], "maxDataPoints": 100, "nullPointMode": "connected", - "nullText": null, - "options": {}, - "pluginVersion": "6.2.5", + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", "postfix": "", "postfixFontSize": "50%", "prefix": "", @@ -3942,6 +4605,9 @@ resources: "tableColumn": "", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "sum(kube_persistentvolumeclaim_status_phase{phase!=\"Bound\"}==1)", "format": "time_series", "intervalFactor": 1, @@ -3950,10 +4616,8 @@ resources: } ], "thresholds": "", - "timeFrom": null, - "timeShift": null, "title": "pvc is not bound", - "type": "singlestat", + "type": "stat", "valueFontSize": "80%", "valueMaps": [ { @@ -3965,9 +4629,10 @@ resources: "valueName": "current" }, { - "cacheTimeout": null, "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fontSize": "100%", "gridPos": { "h": 8, @@ -3978,7 +4643,6 @@ resources: "id": 67, "links": [], "options": {}, - "pageSize": null, "pluginVersion": "6.2.5", "scroll": true, "showHeader": true, @@ -3997,7 +4661,6 @@ resources: { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -4012,6 +4675,9 @@ resources: ], "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "kube_persistentvolumeclaim_resource_requests_storage_bytes/1024/1024/1024", "format": "time_series", "instant": true, @@ -4020,35 +4686,43 @@ resources: "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "pvc", "transform": "timeseries_to_rows", "type": "table-old" } ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], "title": "PVC", "type": "row" } ], "refresh": false, - "schemaVersion": 26, - "style": "dark", - "tags": [ - "kubernetes", - "kubernetes-app" - ], + "schemaVersion": 39, + "tags": [], "templating": { "list": [ { - "allValue": null, - "current": {}, - "datasource": "$datasource", + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "definition": "label_values(kube_node_info, cluster)", - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "cluster", "options": [], @@ -4058,20 +4732,23 @@ resources: "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "definition": "label_values(kube_node_info{cluster=~\"$cluster\"}, node)", - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": false, "name": "node", "options": [], @@ -4081,17 +4758,22 @@ resources: "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": "", - "current": {}, - "datasource": "$datasource", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "definition": "", - "error": null, "hide": 0, "includeAll": true, "label": "", @@ -4104,28 +4786,13 @@ resources: "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "current": { - "value": "prometheus", - "text": "prometheus", - "selected": false - }, - "error": null, "hide": 2, - "label": null, "name": "datasource", - "options": [ - { - "value": "prometheus", - "text": "prometheus", - "selected": false - } - ], "query": "prometheus", "skipUrlSync": false, "type": "constant" @@ -4161,7 +4828,8 @@ resources: ] }, "timezone": "browser", - "title": "kube-state-metrics-v2", + "title": "K8s cluster metrics", "uid": "garysdevil-kube-state-metrics-v2", - "version": 12 + "version": 3, + "weekStart": "" } diff --git a/soperator/modules/monitoring/templates/dashboards/node_exporter.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/node_exporter.yaml.tftpl index f636f84d..358bdb0c 100644 --- a/soperator/modules/monitoring/templates/dashboards/node_exporter.yaml.tftpl +++ b/soperator/modules/monitoring/templates/dashboards/node_exporter.yaml.tftpl @@ -9,18039 +9,18002 @@ resources: data: ${filename}: |- { - "annotations": { - "list": [ - { - "$$hashKey": "object:1058", - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 1860, - "graphTooltip": 1, - "id": 8, - "links": [ - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "GitHub", - "type": "link", - "url": "https://github.com/rfmoz/grafana-dashboards" - }, - { - "icon": "external link", - "tags": [], - "targetBlank": true, - "title": "Grafana", - "type": "link", - "url": "https://grafana.com/grafana/dashboards/1860" - } - ], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 261, - "panels": [], - "targets": [ + "annotations": { + "list": [ { + "$$hashKey": "object:1058", + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "000000001" + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" }, - "refId": "A" + "type": "dashboard" } - ], - "title": "Quick CPU / Mem / Disk", - "type": "row" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Resource pressure via PSI", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "percentage", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "dark-yellow", - "value": 70 - }, - { - "color": "dark-red", - "value": 90 - } - ] - }, - "unit": "percentunit" + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 1860, + "graphTooltip": 1, + "id": 10, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 323, - "options": { - "displayMode": "basic", - "maxVizHeight": 300, - "minVizHeight": 10, - "minVizWidth": 0, - "namePlacement": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 }, - "showUnfilled": true, - "sizing": "auto", - "text": {}, - "valueMode": "color" + "id": 261, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Quick CPU / Mem / Disk", + "type": "row" }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "CPU", - "range": false, - "refId": "CPU some", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "Mem", - "range": false, - "refId": "Memory some", - "step": 240 + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "I/O", - "range": false, - "refId": "I/O some", - "step": 240 - } - ], - "title": "Pressure", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Busy state of all CPU cores together", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" + "description": "Resource pressure via PSI", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-red", + "value": 90 } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] + ] + }, + "unit": "percentunit" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 20, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 1 }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 323, + "options": { + "displayMode": "basic", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "irate(node_pressure_cpu_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "CPU", + "range": false, + "refId": "CPU some", + "step": 240 }, - "editorMode": "code", - "exemplar": false, - "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$node\"}[$__rate_interval])))", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "CPU Busy", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "System load over all CPU cores together", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "irate(node_pressure_memory_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "Mem", + "range": false, + "refId": "Memory some", + "step": 240 }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "irate(node_pressure_io_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "I/O", + "range": false, + "refId": "I/O some", + "step": 240 + } + ], + "title": "Pressure", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Busy state of all CPU cores together", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 155, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 1 }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "scalar(node_load1{instance=\"$node\",job=\"$job\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Sys Load", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 20, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 * (1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", instance=\"$ip\"}[$__rate_interval])))", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "CPU Busy", + "type": "gauge" }, - "description": "Non available RAM memory", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "System load over all CPU cores together", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 1 - }, - "hideTimeOverride": false, - "id": 16, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "((node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\", job=\"$job\"}) / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"}) * 100", - "format": "time_series", - "hide": true, - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 + "id": 155, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "(1 - (node_memory_MemAvailable_bytes{instance=\"$node\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$node\", job=\"$job\"})) * 100", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "B", - "step": 240 - } - ], - "title": "RAM Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "scalar(node_load1{instance=\"$ip\"}) * 100 / count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu))", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "Sys Load", + "type": "gauge" }, - "description": "Used Swap", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Non available RAM memory", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 10 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 25 - } - ] + ] + }, + "unit": "percent" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 21, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "hideTimeOverride": false, + "id": 16, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "((node_memory_MemTotal_bytes{instance=\"$ip\", job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$ip\", job=\"$job\"}) / node_memory_MemTotal_bytes{instance=\"$ip\", job=\"$job\"}) * 100", + "format": "time_series", + "hide": true, + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 }, - "editorMode": "code", - "exemplar": false, - "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"})) * 100", - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=\"$ip\", job=\"$job\"} / node_memory_MemTotal_bytes{instance=\"$ip\", job=\"$job\"})) * 100", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "B", + "step": 240 + } + ], + "title": "RAM Used", + "type": "gauge" }, - "description": "Used Root FS", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Used Swap", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 25 + } + ] + }, + "unit": "percent" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 154, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Root FS Used", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 21, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "((node_memory_SwapTotal_bytes{instance=\"$ip\"} - node_memory_SwapFree_bytes{instance=\"$ip\"}) / (node_memory_SwapTotal_bytes{instance=\"$ip\"})) * 100", + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "SWAP Used", + "type": "gauge" }, - "description": "Total number of CPU cores", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Used Root FS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" }, - "unit": "short" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 14, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 154, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$ip\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$ip\",mountpoint=\"/\",fstype!=\"rootfs\"})", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "Root FS Used", + "type": "gauge" }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "CPU Cores", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "System uptime", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Total number of CPU cores", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "unit": "s" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 20, - "y": 1 - }, - "hideTimeOverride": true, - "id": 15, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 1 }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "Uptime", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "CPU Cores", + "type": "stat" }, - "description": "Total RootFS", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 70 - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "System uptime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, - "unit": "bytes" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 3 - }, - "id": 23, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 1 }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RootFS Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "hideTimeOverride": true, + "id": 15, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_time_seconds{instance=\"$ip\"} - node_boot_time_seconds{instance=\"$ip\"}", + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "Uptime", + "type": "stat" }, - "description": "Total RAM", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Total RootFS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "bytes" }, - "unit": "bytes" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 3 - }, - "id": 75, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 3 }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "RAM Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 23, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_filesystem_size_bytes{instance=\"$ip\",mountpoint=\"/\",fstype!=\"rootfs\"}", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "RootFS Total", + "type": "stat" }, - "description": "Total SWAP", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Total RAM", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "unit": "bytes" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 3 - }, - "id": 18, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "gridPos": { + "h": 2, + "w": 2, + "x": 20, + "y": 3 }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", - "instant": true, - "intervalFactor": 1, - "range": false, - "refId": "A", - "step": 240 - } - ], - "title": "SWAP Total", - "type": "stat" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 263, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Basic CPU / Mem / Net / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 75, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_memory_MemTotal_bytes{instance=\"$ip\"}", + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "RAM Total", + "type": "stat" }, - "description": "Basic CPU info", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Total SWAP", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "unit": "percentunit" + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 22, + "y": 3 + }, + "id": 18, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_memory_SwapTotal_bytes{instance=\"$ip\"}", + "instant": true, + "intervalFactor": 1, + "range": false, + "refId": "A", + "step": 240 + } + ], + "title": "SWAP Total", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - "overrides": [ + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 263, + "panels": [], + "targets": [ { - "matcher": { - "id": "byName", - "options": "Busy Iowait" + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } + "refId": "A" + } + ], + "title": "Basic CPU / Mem / Net / Disk", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Basic CPU info", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "percentunit" }, - { - "matcher": { - "id": "byName", - "options": "Busy Iowait" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Busy Iowait" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Idle" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Busy Iowait" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy System" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy User" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Busy System" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Busy Other" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Busy User" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" + } } - } - ] + ] + }, + { + "matcher": { + "id": "byName", + "options": "Busy Other" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 77, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "width": 250 + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 77, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 250 }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Busy System", + "range": true, + "refId": "A", + "step": 240 }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "hide": false, - "instant": false, - "intervalFactor": 1, - "legendFormat": "Busy System", - "range": true, - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Busy User", + "range": true, + "refId": "B", + "step": 240 }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Busy User", - "range": true, - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Busy Iowait", - "range": true, - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Busy Iowait", + "range": true, + "refId": "C", + "step": 240 }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Busy IRQs", - "range": true, - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=~\".*irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Busy IRQs", + "range": true, + "refId": "D", + "step": 240 }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Busy Other", - "range": true, - "refId": "E", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq'}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Busy Other", + "range": true, + "refId": "E", + "step": 240 }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Idle", - "range": true, - "refId": "F", - "step": 240 - } - ], - "title": "CPU Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Idle", + "range": true, + "refId": "F", + "step": 240 + } + ], + "title": "CPU Basic", + "type": "timeseries" }, - "description": "Basic memory usage", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" + "description": "Basic memory usage", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "bytes" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Buffers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Cached" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Committed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "PageTables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Page_Tables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "RAM_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "SWAP Used" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "SWAP Used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Slab" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap Used" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap Used" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Unused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM Total" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "RAM Total" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Cache + Buffer" }, - { - "id": "custom.fillOpacity", - "value": 0 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM Free" }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] + ] + }, + { + "matcher": { + "id": "byName", + "options": "Available" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#DEDAF7", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 78, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "RAM Total", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "RAM Cache + Buffer" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{instance=\"$ip\"} - node_memory_MemFree_bytes{instance=\"$ip\"} - (node_memory_Cached_bytes{instance=\"$ip\"} + node_memory_Buffers_bytes{instance=\"$ip\"} + node_memory_SReclaimable_bytes{instance=\"$ip\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "RAM Used", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] + "expr": "node_memory_Cached_bytes{instance=\"$ip\"} + node_memory_Buffers_bytes{instance=\"$ip\"} + node_memory_SReclaimable_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "RAM Cache + Buffer", + "refId": "C", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "RAM Free" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] + "expr": "node_memory_MemFree_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "RAM Free", + "refId": "D", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Available" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "(node_memory_SwapTotal_bytes{instance=\"$ip\"} - node_memory_SwapFree_bytes{instance=\"$ip\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SWAP Used", + "refId": "E", + "step": 240 + } + ], + "title": "Memory Basic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Basic network info per interface", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#DEDAF7", - "mode": "fixed" + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Recv_bytes_eth2" }, - { - "id": "custom.fillOpacity", - "value": 0 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Recv_bytes_lo" }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 78, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "RAM Total", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "RAM Used", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "RAM Cache + Buffer", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "RAM Free", - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "SWAP Used", - "refId": "E", - "step": 240 - } - ], - "title": "Memory Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Basic network info per interface", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Recv_bytes_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Recv_drop_eth2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Recv_bytes_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Recv_drop_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Recv_drop_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Recv_errs_eth2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Recv_drop_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Recv_errs_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CCA300", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Recv_errs_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_bytes_eth2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Recv_errs_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CCA300", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_bytes_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_bytes_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_drop_eth2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_bytes_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_drop_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_drop_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_errs_eth2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_drop_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Trans_errs_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CCA300", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_errs_eth2" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "recv_bytes_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Trans_errs_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CCA300", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "recv_drop_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "recv_bytes_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "recv_drop_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#967302", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "recv_drop_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "recv_errs_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "recv_drop_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#967302", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "recv_errs_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "recv_errs_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_bytes_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "recv_errs_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_bytes_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "trans_bytes_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_drop_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "trans_bytes_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_drop_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#967302", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "trans_drop_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_errs_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "trans_drop_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#967302", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "trans_errs_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "trans_errs_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*trans.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 74, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "trans_errs_lo" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] + "expr": "irate(node_network_receive_bytes_total{instance=\"$ip\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "recv {{device}}", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*trans.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_transmit_bytes_total{instance=\"$ip\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "trans {{device}} ", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 74, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + ], + "title": "Network Traffic Basic", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "recv {{device}}", - "refId": "A", - "step": 240 + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "trans {{device}} ", - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Basic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Disk space used of all filesystems mounted", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "description": "Disk space used of all filesystems mounted", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "links": [], + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, - "unit": "percent" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 13 + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 152, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$ip\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$ip\",device!~'rootfs'})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "title": "Disk Space Used Basic", + "type": "timeseries" }, - "id": 152, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 265, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "CPU / Memory / Net / Disk", + "type": "row" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "percentage", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 70, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" }, - "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{mountpoint}}", - "refId": "A", - "step": 240 - } - ], - "title": "Disk Space Used Basic", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 265, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "CPU / Memory / Net / Disk", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "percentage", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 70, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "percent" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Idle - Waiting for something to happen" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Idle - Waiting for something to happen" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Iowait - Waiting for I/O to complete" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Iowait - Waiting for I/O to complete" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Irq - Servicing interrupts" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Irq - Servicing interrupts" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Nice - Niced processes executing in user mode" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Nice - Niced processes executing in user mode" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Softirq - Servicing softirqs" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Softirq - Servicing softirqs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Steal - Time spent in other operating systems when running in a virtualized environment" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCE2DE", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Steal - Time spent in other operating systems when running in a virtualized environment" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCE2DE", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "System - Processes executing in kernel mode" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "System - Processes executing in kernel mode" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "User - Normal processes executing in user mode" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#5195CE", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "User - Normal processes executing in user mode" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#5195CE", + "mode": "fixed" + } } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 250 - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "System - Processes executing in kernel mode", - "range": true, - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "User - Normal processes executing in user mode", - "range": true, - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"nice\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Nice - Niced processes executing in user mode", - "range": true, - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum by(instance) (irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Iowait - Waiting for I/O to complete", - "range": true, - "refId": "E", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Irq - Servicing interrupts", - "range": true, - "refId": "F", - "step": 240 + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"softirq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Softirq - Servicing softirqs", - "range": true, - "refId": "G", - "step": 240 + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 21 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"steal\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", - "range": true, - "refId": "H", - "step": 240 + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 250 + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Idle - Waiting for something to happen", - "range": true, - "refId": "J", - "step": 240 - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"system\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "System - Processes executing in kernel mode", + "range": true, + "refId": "A", + "step": 240 }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"user\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "User - Normal processes executing in user mode", + "range": true, + "refId": "B", + "step": 240 }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"nice\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Nice - Niced processes executing in user mode", + "range": true, + "refId": "C", + "step": 240 }, - "unit": "bytes" - }, - "overrides": [ { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by(instance) (irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"iowait\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Iowait - Waiting for I/O to complete", + "range": true, + "refId": "E", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"irq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Irq - Servicing interrupts", + "range": true, + "refId": "F", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"softirq\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Softirq - Servicing softirqs", + "range": true, + "refId": "G", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"steal\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", + "range": true, + "refId": "H", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Committed" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(node_cpu_seconds_total{instance=\"$ip\", mode=\"idle\"}[$__rate_interval])) / scalar(count(count(node_cpu_seconds_total{instance=\"$ip\"}) by (cpu)))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Idle - Waiting for something to happen", + "range": true, + "refId": "J", + "step": 240 + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "bytes" }, - { - "matcher": { - "id": "byName", - "options": "Free" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Buffers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Cached" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Committed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "PageTables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Page_Tables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap - Swap memory usage" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "RAM_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Slab" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused - Free memory unassigned" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap - Swap memory usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Hardware Corrupted - *./" + ] }, - "properties": [ - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 21 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Apps - Memory used by user-space applications", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Cache - Parked file data (file content) cache", - "refId": "E", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Buffers - Block device (e.g. harddisk) cache", - "refId": "F", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Unused - Free memory unassigned", - "refId": "G", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Swap - Swap space used", - "refId": "H", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", - "refId": "I", - "step": 240 - } - ], - "title": "Memory Stack", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bits out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "receive_packets_eth0" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "receive_packets_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Swap_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "transmit_packets_eth0" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Unused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "transmit_packets_lo" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "Unused - Free memory unassigned" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Trans.*/" + ] }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 33 - }, - "id": 84, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Receive", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Transmit", - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "matcher": { + "id": "byRegexp", + "options": "/.*Hardware Corrupted - *./" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 33 - }, - "id": 156, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 21 }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{mountpoint}}", - "refId": "A", - "step": 240 - } - ], - "title": "Disk Space Used", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "IO read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "overrides": [ + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{instance=\"$ip\"} - node_memory_MemFree_bytes{instance=\"$ip\"} - node_memory_Buffers_bytes{instance=\"$ip\"} - node_memory_Cached_bytes{instance=\"$ip\"} - node_memory_Slab_bytes{instance=\"$ip\"} - node_memory_PageTables_bytes{instance=\"$ip\"} - node_memory_SwapCached_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Apps - Memory used by user-space applications", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_PageTables_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", + "refId": "B", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] + "expr": "node_memory_SwapCached_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", + "refId": "C", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Slab_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", + "refId": "D", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Cached_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Cache - Parked file data (file content) cache", + "refId": "E", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Buffers_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Buffers - Block device (e.g. harddisk) cache", + "refId": "F", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemFree_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Unused - Free memory unassigned", + "refId": "G", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "(node_memory_SwapTotal_bytes{instance=\"$ip\"} - node_memory_SwapFree_bytes{instance=\"$ip\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Swap - Swap space used", + "refId": "H", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", + "refId": "I", + "step": 240 + } + ], + "title": "Memory Stack", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bits out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "bps" }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "receive_packets_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "receive_packets_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "transmit_packets_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "transmit_packets_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*Trans.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] + "expr": "irate(node_network_receive_bytes_total{instance=\"$ip\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_transmit_bytes_total{instance=\"$ip\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 240 + } + ], + "title": "Network Traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "bytes" }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 156, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_filesystem_size_bytes{instance=\"$ip\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "title": "Disk Space Used", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "IO read (-) / write (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "iops" }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } } - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 45 - }, - "id": 229, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", - "intervalFactor": 4, - "legendFormat": "{{device}} - Reads completed", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", - "intervalFactor": 1, - "legendFormat": "{{device}} - Writes completed", - "refId": "B", - "step": 240 - } - ], - "title": "Disk IOps", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "io time" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*read*./" + ] }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda.*/" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byType", - "options": "time" + ] }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "hidden" - } - ] - } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 45 - }, - "id": 42, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}} - Successfully read bytes", - "refId": "A", - "step": 240 + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 45 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}} - Successfully written bytes", - "refId": "B", - "step": 240 - } - ], - "title": "I/O Usage Read / Write", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "%util", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 40, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" + "id": 229, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [ + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "io time" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_reads_completed_total{instance=\"$ip\",device=~\"$diskdevices\"}[$__rate_interval])", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byType", - "options": "time" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "hidden" - } - ] + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_writes_completed_total{instance=\"$ip\",device=~\"$diskdevices\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 57 + ], + "title": "Disk IOps", + "type": "timeseries" }, - "id": 127, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [$__rate_interval])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A", - "step": 240 - } - ], - "title": "I/O Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "percentage", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 70, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 3, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "max": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes read (-) / write (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/^Guest - /" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#5195ce", - "mode": "fixed" + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "Bps" }, - { - "matcher": { - "id": "byRegexp", - "options": "/^GuestNice - /" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "io time" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#c15c17", - "mode": "fixed" + { + "matcher": { + "id": "byRegexp", + "options": "/.*read*./" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byType", + "options": "time" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ] - }, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 57 + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_read_bytes_total{instance=\"$ip\",device=~\"$diskdevices\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}} - Successfully read bytes", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_written_bytes_total{instance=\"$ip\",device=~\"$diskdevices\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}} - Successfully written bytes", + "refId": "B", + "step": 240 + } + ], + "title": "I/O Usage Read / Write", + "type": "timeseries" }, - "id": 319, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "%util", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "io time" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byType", + "options": "time" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 127, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_io_time_seconds_total{instance=\"$ip\",device=~\"$diskdevices\"} [$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 240 + } + ], + "title": "I/O Utilization", + "type": "timeseries" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "percentage", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 70, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 3, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" }, - "editorMode": "code", - "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\", mode=\"user\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[1m])))", - "hide": false, - "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system", - "range": true, - "refId": "A" + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/^Guest - /" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#5195ce", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/^GuestNice - /" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#c15c17", + "mode": "fixed" + } + } + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 319, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$ip\", mode=\"user\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$ip\"}[1m])))", + "hide": false, + "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system", + "range": true, + "refId": "A" }, - "editorMode": "code", - "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$node\",job=\"$job\", mode=\"nice\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[1m])))", - "hide": false, - "legendFormat": "GuestNice - Time spent running a niced guest (virtual CPU for guest operating system)", - "range": true, - "refId": "B" - } - ], - "title": "CPU spent seconds in guests (VMs)", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by(instance) (irate(node_cpu_guest_seconds_total{instance=\"$ip\", mode=\"nice\"}[1m])) / on(instance) group_left sum by (instance)((irate(node_cpu_seconds_total{instance=\"$ip\"}[1m])))", + "hide": false, + "legendFormat": "GuestNice - Time spent running a niced guest (virtual CPU for guest operating system)", + "range": true, + "refId": "B" + } + ], + "title": "CPU spent seconds in guests (VMs)", + "type": "timeseries" }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 69 + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 69 + }, + "id": 266, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Memory Meminfo", + "type": "row" }, - "id": 266, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 22 - }, - "id": 136, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary", - "refId": "B", - "step": 240 + } + ] } - ], - "title": "Memory Active / Inactive", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 136, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Inactive_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes", + "refId": "A", + "step": 240 }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Active_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Active / Inactive", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*CommitLimit - *./" }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*CommitLimit - *./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 135, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 135, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Committed_AS_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Committed_AS - Amount of memory presently allocated on the system", + "refId": "A", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Committed_AS - Amount of memory presently allocated on the system", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system", - "refId": "B", - "step": 240 - } - ], - "title": "Memory Committed", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_CommitLimit_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Committed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 191, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" + } + ] } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Active_file - File-backed memory on active LRU list", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs", - "refId": "D", - "step": 240 - } - ], - "title": "Memory Active / Inactive Detail", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 80 + }, + "id": 191, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Inactive_file_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", + "refId": "A", + "step": 240 }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Inactive_anon_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Active_file_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Active_file - File-backed memory on active LRU list", + "refId": "C", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Active_anon_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs", + "refId": "D", + "step": 240 + } + ], + "title": "Memory Active / Inactive Detail", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#58140C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dirty" }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B7DBAB", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mapped" }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM + Swap" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Swap" }, - { - "matcher": { - "id": "byName", - "options": "Total Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VmallocUsed" }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 130, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 80 + }, + "id": 130, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Writeback_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writeback - Memory which is actively being written back to disk", + "refId": "A", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Writeback - Memory which is actively being written back to disk", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Dirty - Memory which is waiting to get written back to the disk", - "refId": "C", - "step": 240 - } - ], - "title": "Memory Writeback and Dirty", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_WritebackTmp_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Dirty_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Dirty - Memory which is waiting to get written back to the disk", + "refId": "C", + "step": 240 + } + ], + "title": "Memory Writeback and Dirty", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" }, - { - "matcher": { - "id": "byName", - "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" }, - { - "matcher": { - "id": "byName", - "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] - } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 90 + }, + "id": 138, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Mapped_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Mapped - Used memory in mapped pages files which have been mapped, such as libraries", + "refId": "A", + "step": 240 }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Shmem_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)", + "refId": "B", + "step": 240 }, - "id": 138, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_ShmemHugePages_bytes{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "refId": "C", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Mapped - Used memory in mapped pages files which have been mapped, such as libraries", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "ShmemPmdMapped - Amount of shared (shmem/tmpfs) memory backed by huge pages", - "refId": "D", - "step": 240 - } - ], - "title": "Memory Shared and Mapped", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ShmemPmdMapped - Amount of shared (shmem/tmpfs) memory backed by huge pages", + "refId": "D", + "step": 240 + } + ], + "title": "Memory Shared and Mapped", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Apps" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" }, - { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 160, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 90 + }, + "id": 160, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_KernelStack_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable", + "refId": "A", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules", - "refId": "B", - "step": 240 - } - ], - "title": "Memory Kernel / CPU", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_Percpu_bytes{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Kernel / CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Active" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#58140C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dirty" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B7DBAB", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mapped" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM + Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VmallocUsed" }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 70, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" + } + ] } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 100 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_VmallocChunk_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocChunk - Largest contiguous block of vmalloc area which is free", + "refId": "A", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "VmallocChunk - Largest contiguous block of vmalloc area which is free", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "VmallocTotal - Total size of vmalloc memory area", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "VmallocUsed - Amount of vmalloc area which is used", - "refId": "C", - "step": 240 - } - ], - "title": "Memory Vmalloc", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_VmallocTotal_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocTotal - Total size of vmalloc memory area", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_VmallocUsed_bytes{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocUsed - Amount of vmalloc area which is used", + "refId": "C", + "step": 240 + } + ], + "title": "Memory Vmalloc", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Active" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#58140C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dirty" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B7DBAB", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mapped" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM + Swap" }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VmallocUsed" }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Inactive *./" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 52 - }, - "id": 129, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Inactive *./" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 100 + }, + "id": 129, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_AnonHugePages_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AnonHugePages - Memory in anonymous huge pages", + "refId": "A", + "step": 240 }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "AnonHugePages - Memory in anonymous huge pages", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "AnonPages - Memory in user pages not backed by files", - "refId": "B", - "step": 240 - } - ], - "title": "Memory Anonymous", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_AnonPages_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AnonPages - Memory in user pages not backed by files", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Anonymous", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Active" }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#58140C", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dirty" }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B7DBAB", + "mode": "fixed" } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mapped" }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM" }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM + Swap" }, - { - "matcher": { - "id": "byName", - "options": "Total Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" } - ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Swap" }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 62 - }, - "id": 132, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet committed to the storage", - "refId": "A", - "step": 240 - } - ], - "title": "Memory NFS", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Memory Meminfo", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 267, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "pages out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VmallocUsed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 176, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pagesin - Page in operations", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pagesout - Page out operations", - "refId": "B", - "step": 240 + } + ] } - ], - "title": "Memory Pages In / Out", - "type": "timeseries" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "pages out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*out/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 22, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pswpin - Pages swapped in", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pswpout - Pages swapped out", - "refId": "B", - "step": 240 - } - ], - "title": "Memory Pages Swap In / Out", - "type": "timeseries" + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 110 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "faults", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ + "id": 132, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_NFS_Unstable_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet committed to the storage", + "refId": "A", + "step": 240 + } + ], + "title": "Memory NFS", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 120 + }, + "id": 267, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "pages out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*out/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 176, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "Apps" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] + "expr": "irate(node_vmstat_pgpgin{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pagesin - Page in operations", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Buffers" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" + "expr": "irate(node_vmstat_pgpgout{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pagesout - Page out operations", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Pages In / Out", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "pages out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*out/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 22, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "Cache" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] + "expr": "irate(node_vmstat_pswpin{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pswpin - Pages swapped in", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Cached" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_vmstat_pswpout{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pswpout - Pages swapped out", + "refId": "B", + "step": 240 + } + ], + "title": "Memory Pages Swap In / Out", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "faults", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Apps" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A437C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#CFFAFF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RAM_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#806EB7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#2F575E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Unused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pgfault - Page major and minor fault operations" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "normal" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 175, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 350 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "Committed" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] + "expr": "irate(node_vmstat_pgfault{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgfault - Page major and minor fault operations", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Free" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A437C", - "mode": "fixed" - } - } - ] + "expr": "irate(node_vmstat_pgmajfault{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgmajfault - Major page fault operations", + "refId": "B", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#CFFAFF", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_vmstat_pgfault{instance=\"$ip\"}[$__rate_interval]) - irate(node_vmstat_pgmajfault{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgminfault - Minor page fault operations", + "refId": "C", + "step": 240 + } + ], + "title": "Memory Page Faults", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#99440A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#58140C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6D1F62", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cached" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Committed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dirty" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B7DBAB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Mapped" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PageTables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Page_Tables" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Slab_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Swap_Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#511749", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total RAM + Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Swap" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "VmallocUsed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 307, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "Inactive" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_vmstat_oom_kill{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "oom killer invocations ", + "refId": "A", + "step": 240 + } + ], + "title": "OOM Killer", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Memory Vmstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 121 + }, + "id": 293, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Variation*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 260, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_estimated_error_seconds{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Estimated error in seconds", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_offset_seconds{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Time offset in between local system and reference clock", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_maxerror_seconds{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum error in seconds", + "refId": "C", + "step": 240 + } + ], + "title": "Time Synchronized Drift", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 291, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "PageTables" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "RAM_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#806EB7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#2F575E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Unused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Pgfault - Page major and minor fault operations" - }, - "properties": [ - { - "id": "custom.fillOpacity", - "value": 0 - }, - { - "id": "custom.stacking", - "value": { - "group": false, - "mode": "normal" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 51 - }, - "id": 175, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 350 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pgfault - Page major and minor fault operations", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pgmajfault - Major page fault operations", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Pgminfault - Minor page fault operations", - "refId": "C", - "step": 240 - } - ], - "title": "Memory Page Faults", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#99440A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#58140C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6D1F62", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cached" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Committed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#508642", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Dirty" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B7DBAB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Mapped" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "PageTables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Page_Tables" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Slab_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Swap_Cache" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C15C17", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#511749", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total RAM + Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052B51", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total Swap" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "VmallocUsed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 51 - }, - "id": 307, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "oom killer invocations ", - "refId": "A", - "step": 240 - } - ], - "title": "OOM Killer", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Memory Vmstat", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 71 - }, - "id": 293, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Variation*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 260, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Estimated error in seconds", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Time offset in between local system and reference clock", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Maximum error in seconds", - "refId": "C", - "step": 240 - } - ], - "title": "Time Synchronized Drift", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 291, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Phase-locked loop time adjust", - "refId": "A", - "step": 240 - } - ], - "title": "Time PLL Adjust", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Variation*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 34 - }, - "id": 168, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Local clock frequency adjustment", - "refId": "B", - "step": 240 - } - ], - "title": "Time Synchronized Status", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 34 - }, - "id": 294, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Seconds between clock ticks", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "International Atomic Time (TAI) offset", - "refId": "B", - "step": 240 - } - ], - "title": "Time Misc", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "System Timesync", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 72 - }, - "id": 269, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 8, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Context switches", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Interrupts", - "refId": "B", - "step": 240 - } - ], - "title": "Context Switches / Interrupts", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_load1{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 1m", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_load5{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 5m", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_load15{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 15m", - "refId": "C", - "step": 240 - } - ], - "title": "System Load", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "hertz" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Max" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 10 - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - }, - { - "id": "custom.fillBelowTo", - "value": "Min" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Min" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 10, - 10 - ], - "fill": "dash" - } - }, - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - }, - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": false, - "viz": false - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 36 - }, - "id": 321, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "node_cpu_scaling_frequency_hertz{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "CPU {{ cpu }}", - "range": true, - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_max_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Max", - "range": true, - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "avg(node_cpu_scaling_frequency_min_hertz{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Min", - "range": true, - "refId": "C", - "step": 240 - } - ], - "title": "CPU Frequency Scaling", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "https://docs.kernel.org/accounting/psi.html", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Memory some" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Memory full" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "light-red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "I/O some" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "I/O full" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "light-blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 36 - }, - "id": 322, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "CPU some", - "range": true, - "refId": "CPU some", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Memory some", - "range": true, - "refId": "Memory some", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Memory full", - "range": true, - "refId": "Memory full", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "I/O some", - "range": true, - "refId": "I/O some", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "I/O full", - "range": true, - "refId": "I/O full", - "step": 240 - } - ], - "title": "Pressure Stall Information", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Enable with --collector.interrupts argument on node-exporter", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Critical*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 46 - }, - "id": 259, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ type }} - {{ info }}", - "refId": "A", - "step": 240 - } - ], - "title": "Interrupts Detail", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 46 - }, - "id": 306, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "CPU {{ cpu }}", - "refId": "A", - "step": 240 - } - ], - "title": "Schedule timeslices executed by each cpu", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 151, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Entropy available to random number generators", - "refId": "A", - "step": 240 - } - ], - "title": "Entropy", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 308, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Time spent", - "refId": "A", - "step": 240 - } - ], - "title": "CPU time spent in user and system contexts", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 66 - }, - "id": 64, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Maximum open file descriptors", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Open file descriptors", - "refId": "B", - "step": 240 - } - ], - "title": "File Descriptors", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "System Misc", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 73 - }, - "id": 304, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "temperature", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "celsius" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Critical*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 59 - }, - "id": 158, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ chip_name }} {{ sensor }} temp", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Alarm", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ chip_name }} {{ sensor }} Critical", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ chip_name }} {{ sensor }} Critical Historical", - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ chip_name }} {{ sensor }} Max", - "refId": "E", - "step": 240 - } - ], - "title": "Hardware temperature monitor", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Max*./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 59 - }, - "id": 300, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Current {{ name }} in {{ type }}", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Max {{ name }} in {{ type }}", - "refId": "B", - "step": 240 - } - ], - "title": "Throttle cooling device", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 69 - }, - "id": 302, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ power_supply }} online", - "refId": "A", - "step": 240 - } - ], - "title": "Power supply", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Hardware Misc", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 74 - }, - "id": 296, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 46 - }, - "id": 297, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ name }} Connections", - "refId": "A", - "step": 240 - } - ], - "title": "Systemd Sockets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Inactive" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Active" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Deactivating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFCB7D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Activating" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#C8F2C2", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 46 - }, - "id": 298, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Activating", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Active", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Deactivating", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Failed", - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Inactive", - "refId": "E", - "step": 240 - } - ], - "title": "Systemd Units State", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Systemd", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 75 - }, - "id": 270, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The number (after merges) of I/O requests completed per second for the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "IO read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "intervalFactor": 4, - "legendFormat": "{{device}} - Reads completed", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "intervalFactor": 1, - "legendFormat": "{{device}} - Writes completed", - "refId": "B", - "step": 240 - } - ], - "title": "Disk IOps Completed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The number of bytes read from or written to the device per second", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 47 - }, - "id": 33, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "{{device}} - Read bytes", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Written bytes", - "refId": "B", - "step": 240 - } - ], - "title": "Disk R/W Data", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "time. read (-) / write (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 37, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "hide": false, - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - Read wait time avg", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}} - Write wait time avg", - "refId": "B", - "step": 240 - } - ], - "title": "Disk Average Wait Time", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The average queue length of the requests that were issued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "aqu-sz", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 57 - }, - "id": 35, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}}", - "refId": "A", - "step": 240 - } - ], - "title": "Average Queue Size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The number of read and write requests merged per second that were queued to the device", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "I/Os", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*Read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] + "expr": "node_timex_loop_time_constant{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Phase-locked loop time adjust", + "refId": "A", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 67 - }, - "id": 133, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + ], + "title": "Time PLL Adjust", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "intervalFactor": 1, - "legendFormat": "{{device}} - Read merged", - "refId": "A", - "step": 240 + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Variation*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 168, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "intervalFactor": 1, - "legendFormat": "{{device}} - Write merged", - "refId": "B", - "step": 240 - } - ], - "title": "Disk R/W Merged", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially. But for devices serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "%util", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_sync_status{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_frequency_adjustment_ratio{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Local clock frequency adjustment", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 67 + ], + "title": "Time Synchronized Status", + "type": "timeseries" }, - "id": 36, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - IO", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - discard", - "refId": "B", - "step": 240 - } - ], - "title": "Time Spent Doing I/Os", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 294, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_tick_seconds{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Seconds between clock ticks", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_timex_tai_offset_seconds{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "International Atomic Time (TAI) offset", + "refId": "B", + "step": 240 + } + ], + "title": "Time Misc", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "System Timesync", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Outstanding req.", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 122 + }, + "id": 269, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 8, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] + "expr": "irate(node_context_switches_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Context switches", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_intr_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Interrupts", + "refId": "B", + "step": 240 + } + ], + "title": "Context Switches / Interrupts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 7, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] + "expr": "node_load1{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 1m", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] + "expr": "node_load5{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 5m", + "refId": "B", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, + "expr": "node_load15{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 15m", + "refId": "C", + "step": 240 + } + ], + "title": "System Load", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "hertz" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Max" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 10 + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + }, + { + "id": "custom.fillBelowTo", + "value": "Min" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": false, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 321, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_cpu_scaling_frequency_hertz{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{ cpu }}", + "range": true, + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(node_cpu_scaling_frequency_max_hertz{instance=\"$ip\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Max", + "range": true, + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "avg(node_cpu_scaling_frequency_min_hertz{instance=\"$ip\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Min", + "range": true, + "refId": "C", + "step": 240 + } + ], + "title": "CPU Frequency Scaling", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "https://docs.kernel.org/accounting/psi.html", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Memory some" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory full" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "I/O some" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "I/O full" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 322, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(node_pressure_cpu_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "CPU some", + "range": true, + "refId": "CPU some", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(node_pressure_memory_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Memory some", + "range": true, + "refId": "Memory some", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(node_pressure_memory_stalled_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Memory full", + "range": true, + "refId": "Memory full", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(node_pressure_io_waiting_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "I/O some", + "range": true, + "refId": "I/O some", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(node_pressure_io_stalled_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "I/O full", + "range": true, + "refId": "I/O full", + "step": 240 + } + ], + "title": "Pressure Stall Information", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Enable with --collector.interrupts argument on node-exporter", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Critical*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Max*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 259, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_interrupts_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ type }} - {{ info }}", + "refId": "A", + "step": 240 + } + ], + "title": "Interrupts Detail", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 306, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_schedstat_timeslices_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{ cpu }}", + "refId": "A", + "step": 240 + } + ], + "title": "Schedule timeslices executed by each cpu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 151, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, + "expr": "node_entropy_available_bits{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Entropy available to random number generators", + "refId": "A", + "step": 240 + } + ], + "title": "Entropy", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 308, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(process_cpu_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Time spent", + "refId": "A", + "step": 240 + } + ], + "title": "CPU time spent in user and system contexts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Max*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 64, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] + "expr": "process_max_fds{instance=\"$ip\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum open file descriptors", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] + "expr": "process_open_fds{instance=\"$ip\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Open file descriptors", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 77 + ], + "title": "File Descriptors", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "System Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 123 + }, + "id": 304, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "temperature", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "celsius" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Critical*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Max*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 59 + }, + "id": 158, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_hwmon_temp_celsius{instance=\"$ip\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip_name }} {{ sensor }} temp", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$ip\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$ip\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip_name }} {{ sensor }} Critical Alarm", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_hwmon_temp_crit_celsius{instance=\"$ip\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip_name }} {{ sensor }} Critical", + "refId": "C", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$ip\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$ip\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip_name }} {{ sensor }} Critical Historical", + "refId": "D", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_hwmon_temp_max_celsius{instance=\"$ip\"} * on(chip) group_left(chip_name) node_hwmon_chip_names{instance=\"$ip\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip_name }} {{ sensor }} Max", + "refId": "E", + "step": 240 + } + ], + "title": "Hardware temperature monitor", + "type": "timeseries" }, - "id": 34, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Max*./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 59 + }, + "id": 300, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_cooling_device_cur_state{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Current {{ name }} in {{ type }}", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_cooling_device_max_state{instance=\"$ip\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Max {{ name }} in {{ type }}", + "refId": "B", + "step": 240 + } + ], + "title": "Throttle cooling device", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - IO now", - "refId": "A", - "step": 240 - } - ], - "title": "Instantaneous Queue Size", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 302, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_power_supply_online{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ power_supply }} online", + "refId": "A", + "step": 240 + } + ], + "title": "Power supply", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Hardware Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "IOs", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" - }, - "overrides": [ + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 124 + }, + "id": 296, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 297, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sda_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ name }} Connections", + "refId": "A", + "step": 240 + } + ], + "title": "Systemd Sockets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inactive" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Active" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#73BF69", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Deactivating" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFCB7D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Activating" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C8F2C2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 298, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{instance=\"$ip\",state=\"activating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Activating", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{instance=\"$ip\",state=\"active\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{instance=\"$ip\",state=\"deactivating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Deactivating", + "refId": "C", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{instance=\"$ip\",state=\"failed\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Failed", + "refId": "D", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{instance=\"$ip\",state=\"inactive\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Inactive", + "refId": "E", + "step": 240 + } + ], + "title": "Systemd Units State", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Systemd", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 125 + }, + "id": 270, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The number (after merges) of I/O requests completed per second for the device", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "IO read (-) / write (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 9, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_reads_completed_total{instance=\"$ip\"}[$__rate_interval])", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" + "expr": "irate(node_disk_writes_completed_total{instance=\"$ip\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 240 + } + ], + "title": "Disk IOps Completed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The number of bytes read from or written to the device per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes read (-) / write (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 33, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_read_bytes_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "{{device}} - Read bytes", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sde_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" + "expr": "irate(node_disk_written_bytes_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Written bytes", + "refId": "B", + "step": 240 + } + ], + "title": "Disk R/W Data", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "time. read (-) / write (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/.*sda1.*/" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#584477", - "mode": "fixed" - } - } - ] - }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 37, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_read_time_seconds_total{instance=\"$ip\"}[$__rate_interval]) / irate(node_disk_reads_completed_total{instance=\"$ip\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Read wait time avg", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_write_time_seconds_total{instance=\"$ip\"}[$__rate_interval]) / irate(node_disk_writes_completed_total{instance=\"$ip\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Write wait time avg", + "refId": "B", + "step": 240 + } + ], + "title": "Disk Average Wait Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The average queue length of the requests that were issued to the device", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "aqu-sz", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 57 + }, + "id": 35, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sda2_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" + "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}", + "refId": "A", + "step": 240 + } + ], + "title": "Average Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The number of read and write requests merged per second that were queued to the device", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "I/Os", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 67 + }, + "id": 133, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sda3_.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F4D598", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_reads_merged_total{instance=\"$ip\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Read merged", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#0A50A1", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_disk_writes_merged_total{instance=\"$ip\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Write merged", + "refId": "B", + "step": 240 + } + ], + "title": "Disk R/W Merged", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially. But for devices serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "%util", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 67 + }, + "id": 36, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BF1B00", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_io_time_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdb3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0752D", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - discard", + "refId": "B", + "step": 240 + } + ], + "title": "Time Spent Doing I/Os", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Outstanding req.", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 77 + }, + "id": 34, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#962D82", - "mode": "fixed" - } - } - ] - }, + "expr": "node_disk_io_now{instance=\"$ip\"}", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO now", + "refId": "A", + "step": 240 + } + ], + "title": "Instantaneous Queue Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "IOs", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#584477", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda2_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sda3_.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F4D598", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#0A50A1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdb3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0752D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#962D82", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#614D93", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdc3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#65C5DB", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9934E", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EA6460", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde1.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E0F9D7", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sdd2.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCEACA", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*sde3.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F9E2D2", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 77 + }, + "id": 301, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#614D93", - "mode": "fixed" - } - } - ] + "expr": "irate(node_disk_discards_completed_total{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Discards completed", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdc3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#9AC48A", - "mode": "fixed" - } - } - ] - }, + "expr": "irate(node_disk_discards_merged_total{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Discards merged", + "refId": "B", + "step": 240 + } + ], + "title": "Disk IOps Discards completed / merged", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Storage Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 126 + }, + "id": 271, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 43, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_filesystem_avail_bytes{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Available", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_filesystem_free_bytes{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Free", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_filesystem_size_bytes{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Size", + "refId": "C", + "step": 240 + } + ], + "title": "Filesystem space available", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "file nodes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 41, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#65C5DB", - "mode": "fixed" - } - } - ] - }, + "expr": "node_filesystem_files_free{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Free file nodes", + "refId": "A", + "step": 240 + } + ], + "title": "File Nodes Free", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "files", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 28, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9934E", - "mode": "fixed" - } - } - ] + "expr": "node_filefd_maximum{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Max open files", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EA6460", - "mode": "fixed" - } - } - ] - }, + "expr": "node_filefd_allocated{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Open files", + "refId": "B", + "step": 240 + } + ], + "title": "File Descriptor", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "file Nodes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 72 + }, + "id": 219, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sde1.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E0F9D7", - "mode": "fixed" - } - } - ] - }, + "expr": "node_filesystem_files{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - File nodes total", + "refId": "A", + "step": 240 + } + ], + "title": "File Nodes Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "/ ReadOnly" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 44, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*sdd2.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FCEACA", - "mode": "fixed" - } - } - ] + "expr": "node_filesystem_readonly{instance=\"$ip\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - ReadOnly", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*sde3.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F9E2D2", - "mode": "fixed" - } - } - ] + "expr": "node_filesystem_device_error{instance=\"$ip\",device!~'rootfs',fstype!~'tmpfs'}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Device error", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 77 - }, - "id": 301, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - Discards completed", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}} - Discards merged", - "refId": "B", - "step": 240 - } - ], - "title": "Disk IOps Discards completed / merged", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Storage Disk", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 76 + ], + "title": "Filesystem in ReadOnly / Error", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Storage Filesystem", + "type": "row" }, - "id": 271, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 62 - }, - "id": 43, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Available", - "metric": "", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Free", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Size", - "refId": "C", - "step": 240 - } - ], - "title": "Filesystem space available", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "file nodes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 62 - }, - "id": 41, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Free file nodes", - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Free", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "files", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 72 - }, - "id": 28, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Max open files", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Open files", - "refId": "B", - "step": 240 - } - ], - "title": "File Descriptor", - "type": "timeseries" + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "file Nodes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 72 - }, - "id": 219, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - File nodes total", - "refId": "A", - "step": 240 - } - ], - "title": "File Nodes Size", - "type": "timeseries" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 127 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "max": 1, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "/ ReadOnly" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" + "id": 272, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "packets out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "receive_packets_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "receive_packets_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "transmit_packets_eth0" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "transmit_packets_lo" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Trans.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 31 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 300 + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 82 - }, - "id": 44, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - ReadOnly", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{mountpoint}} - Device error", - "refId": "B", - "step": 240 - } - ], - "title": "Filesystem in ReadOnly / Error", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Storage Filesystem", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 77 - }, - "id": 272, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_receive_packets_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_transmit_packets_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 240 + } + ], + "title": "Network Traffic by Packets", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "packets out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "receive_packets_eth0" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "packets out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Trans.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 142, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 300 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "receive_packets_lo" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] + "expr": "irate(node_network_receive_errs_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive errors", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byName", - "options": "transmit_packets_eth0" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + "expr": "irate(node_network_transmit_errs_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit errors", + "refId": "B", + "step": 240 + } + ], + "title": "Network Traffic Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "packets out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "pps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Trans.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" } - } - ] - }, + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 143, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 300 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "transmit_packets_lo" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] + "expr": "irate(node_network_receive_drop_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive drop", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*Trans.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "irate(node_network_transmit_drop_total{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit drop", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 60, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}} - Receive", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}} - Transmit", - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic by Packets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + ], + "title": "Network Traffic Drop", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "packets out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "entries", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "NF conntrack limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*Trans.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 142, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Receive errors", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Transmit errors", - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "packets out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "pps" - }, - "overrides": [ + "expr": "node_nf_conntrack_entries{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NF conntrack entries", + "refId": "A", + "step": 240 + }, { - "matcher": { - "id": "byRegexp", - "options": "/.*Trans.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "node_nf_conntrack_entries_limit{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NF conntrack limit", + "refId": "B", + "step": 240 } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 41 - }, - "id": 143, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Receive drop", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}} - Transmit drop", - "refId": "B", - "step": 240 - } - ], - "title": "Network Traffic Drop", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + ], + "title": "NF Conntrack", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "entries", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "NF conntrack limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "bytes", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 280, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 41 - }, - "id": 61, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "NF conntrack entries", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "NF conntrack limit", - "refId": "B", - "step": 240 - } - ], - "title": "NF Conntrack", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "bytes", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 51 - }, - "id": 280, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ device }} - Speed", - "refId": "A", - "step": 240 - } - ], - "title": "Speed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 51 - }, - "id": 309, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "width": 300 - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{interface}} - Operational state UP", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Physical link state", - "refId": "B" - } - ], - "title": "Network Operational Status", - "type": "timeseries" - } - ], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "refId": "A" - } - ], - "title": "Network Traffic", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 78 - }, - "id": 274, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_network_speed_bytes{instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ device }} - Speed", + "refId": "A", + "step": 240 + } + ], + "title": "Speed", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "datagrams out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 309, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "width": 300 + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*Out.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "node_network_up{operstate=\"up\",instance=\"$ip\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{interface}} - Operational state UP", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*Snd.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "node_network_carrier{instance=\"$ip\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{device}} - Physical link state", + "refId": "B" } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 55, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "InDatagrams - Datagrams received", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "OutDatagrams - Datagrams sent", - "refId": "B", - "step": 240 - } - ], - "title": "UDP In / Out", - "type": "timeseries" + ], + "title": "Network Operational Status", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Network Traffic", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "datagrams", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 128 + }, + "id": 274, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "datagrams out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Out.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Snd.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InDatagrams - Datagrams received", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OutDatagrams - Datagrams sent", + "refId": "B", + "step": 240 + } + ], + "title": "UDP In / Out", + "type": "timeseries" }, - "id": 109, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "datagrams", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 109, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_InErrors{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RcvbufErrors - UDP buffer errors received", + "refId": "D", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "SndbufErrors - UDP buffer errors send", + "refId": "E", + "step": 240 + } + ], + "title": "UDP Errors", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "RcvbufErrors - UDP buffer errors received", - "refId": "D", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "SndbufErrors - UDP buffer errors send", - "refId": "E", - "step": 240 - } - ], - "title": "UDP Errors", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "datagrams out (-) / in (+)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*Out.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Snd.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 299, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets", + "refId": "B", + "step": 240 + } + ], + "title": "TCP In / Out", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "datagrams out (-) / in (+)", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 104, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "/.*Out.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed", + "refId": "A", + "step": 240 }, { - "matcher": { - "id": "byRegexp", - "options": "/.*Snd.*/" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] + "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits", + "refId": "C", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_OutRsts{instance=\"$ip\"}[$__rate_interval])", + "interval": "", + "legendFormat": "OutRsts - Segments sent with RST flag", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\"$ip\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "TCPRcvQDrop - Packets meant to be queued in rcv queue but dropped because socket rcvbuf limit hit", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "irate(node_netstat_TcpExt_TCPOFOQueue{instance=\"$ip\"}[$__rate_interval])", + "hide": false, + "interval": "", + "legendFormat": "TCPOFOQueue - TCP layer receives an out of order packet and has enough memory to queue it", + "range": true, + "refId": "H" } - ] + ], + "title": "TCP Errors", + "type": "timeseries" }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 299, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "connections", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*MaxConn *./" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_netstat_Tcp_CurrEstab{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_netstat_Tcp_MaxConn{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dynamic is \"-1\")", + "refId": "B", + "step": 240 + } + ], + "title": "TCP Connections", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets", - "refId": "B", - "step": 240 - } - ], - "title": "TCP In / Out", - "type": "timeseries" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "connections", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 82, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$ip\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state", + "refId": "B", + "step": 240 + } + ], + "title": "TCP Direct Transition", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Network Netstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "000000001" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 104, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored", - "refId": "B", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits", - "refId": "C", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets", - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)", - "refId": "E" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_OutRsts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "interval": "", - "legendFormat": "OutRsts - Segments sent with RST flag", - "refId": "F" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPRcvQDrop{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "TCPRcvQDrop - Packets meant to be queued in rcv queue but dropped because socket rcvbuf limit hit", - "range": true, - "refId": "G" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "irate(node_netstat_TcpExt_TCPOFOQueue{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "TCPOFOQueue - TCP layer receives an out of order packet and has enough memory to queue it", - "range": true, - "refId": "H" - } - ], - "title": "TCP Errors", - "type": "timeseries" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 129 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "connections", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*MaxConn *./" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } + "id": 279, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 40, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 52 - }, - "id": 85, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_scrape_collector_duration_seconds{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape duration", + "refId": "A", + "step": 240 + } + ], + "title": "Node Exporter Scrape Time", + "type": "timeseries" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dynamic is \"-1\")", - "refId": "B", - "step": 240 - } - ], - "title": "TCP Connections", - "type": "timeseries" - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "counter", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*error.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + }, + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 157, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_scrape_collector_success{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape success", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_textfile_scrape_error{instance=\"$ip\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape textfile error (1 = true)", + "refId": "B", + "step": 240 + } + ], + "title": "Node Exporter Scrape", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "000000001" + }, + "refId": "A" + } + ], + "title": "Node Exporter", + "type": "row" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ { + "current": { + "selected": false, + "text": "computeinstance-e00bgs8hr09kcxsbhn", + "value": "computeinstance-e00bgs8hr09kcxsbhn" + }, "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "connections", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 52 - }, - "id": 82, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "definition": "label_values(kube_pod_info{pod=\"$worker\"},node)", + "hide": 2, + "includeAll": false, + "label": "", + "multi": false, + "name": "node", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info{pod=\"$worker\"},node)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state", - "refId": "B", - "step": 240 + "hide": 2, + "includeAll": false, + "multi": false, + "name": "diskdevices", + "options": [ + { + "selected": true, + "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" } ], - "title": "TCP Direct Transition", - "type": "timeseries" - } - ], - "targets": [ + "query": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "skipUrlSync": false, + "type": "custom" + }, { - "datasource": { - "type": "prometheus", - "uid": "000000001" + "current": { + "selected": false, + "text": "192.168.0.53:9100", + "value": "192.168.0.53:9100" }, - "refId": "A" - } - ], - "title": "Network Netstat", - "type": "row" - }, - { - "collapsed": true, - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 79 - }, - "id": 279, - "panels": [ - { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "seconds", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 66 - }, - "id": 40, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{collector}} - Scrape duration", - "refId": "A", - "step": 240 - } - ], - "title": "Node Exporter Scrape Time", - "type": "timeseries" + "definition": "label_values(node_uname_info{nodename=\"$node\"},instance)", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "ip", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{nodename=\"$node\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" }, { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "counter", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*error.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - }, - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 66 - }, - "id": 157, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "current": { + "selected": false, + "text": "worker-0", + "value": "worker-0" }, - "pluginVersion": "9.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{collector}} - Scrape success", - "refId": "A", - "step": 240 - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{collector}} - Scrape textfile error (1 = true)", - "refId": "B", - "step": 240 - } - ], - "title": "Node Exporter Scrape", - "type": "timeseries" - } - ], - "targets": [ - { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "prometheus" }, - "refId": "A" + "definition": "label_values(kube_pod_info,pod)", + "hide": 0, + "includeAll": false, + "label": "worker", + "multi": false, + "name": "worker", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_info,pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "^worker.*", + "skipUrlSync": false, + "sort": 0, + "type": "query" } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" ], - "title": "Node Exporter", - "type": "row" - } - ], - "refresh": "1m", - "revision": 1, - "schemaVersion": 39, - "tags": [ - "linux" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "Datasource", - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "selected": false, - "text": "node-exporter", - "value": "node-exporter" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "", - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [], - "query": { - "query": "label_values(node_uname_info, job)", - "refId": "Prometheus-job-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values(node_uname_info{job=\"$job\"}, instance)", - "hide": 0, - "includeAll": false, - "label": "Host", - "multi": false, - "name": "node", - "options": [], - "query": { - "query": "label_values(node_uname_info{job=\"$job\"}, instance)", - "refId": "Prometheus-node-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "selected": false, - "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", - "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" - }, - "hide": 2, - "includeAll": false, - "multi": false, - "name": "diskdevices", - "options": [ - { - "selected": true, - "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", - "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" - } - ], - "query": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", - "skipUrlSync": false, - "type": "custom" - } - ] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Node Exporter", - "uid": "rYdddlPWkqiutaQ1", - "version": 2, - "weekStart": "" + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Node Metrics", + "uid": "rYdddlPWkqiutaQ1", + "version": 20, + "weekStart": "" } diff --git a/soperator/modules/monitoring/templates/dashboards/pod_resources.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/pod_resources.yaml.tftpl index b4c4aa2b..ee4104d2 100644 --- a/soperator/modules/monitoring/templates/dashboards/pod_resources.yaml.tftpl +++ b/soperator/modules/monitoring/templates/dashboards/pod_resources.yaml.tftpl @@ -1313,7 +1313,7 @@ resources: }, "timepicker": {}, "timezone": "", - "title": "Pod Resources", + "title": "Pod metrics", "uid": "px1WKJznk", "version": 1, "weekStart": "" diff --git a/soperator/modules/monitoring/templates/dashboards/slurm_exporter.yaml.tftpl b/soperator/modules/monitoring/templates/dashboards/slurm_exporter.yaml.tftpl index c1b8802a..d4aad90b 100644 --- a/soperator/modules/monitoring/templates/dashboards/slurm_exporter.yaml.tftpl +++ b/soperator/modules/monitoring/templates/dashboards/slurm_exporter.yaml.tftpl @@ -9,2381 +9,2111 @@ resources: data: ${filename}: |- { - "annotations": { - "list": [ - { - "$$hashKey": "object:1345", - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "slurm-exporter", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 4323, - "graphTooltip": 0, - "id": 6, - "links": [], - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "NCCL tests introduce the \"Bus Bandwidth\" metric to better reflect how optimally hardware is used for inter-GPU communication, allowing for comparison with hardware peak bandwidth regardless of the number of ranks, unlike traditional time or algorithm bandwidth measurements.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "noValue": "All Benchmark passed", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": "" - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 0 - }, - "id": 51, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ + "annotations": { + "list": [ { + "$$hashKey": "object:1345", + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "prometheus" + "type": "datasource", + "uid": "grafana" }, - "editorMode": "code", - "expr": "topk(5, slurm_job_nccl_benchmark_avg_bandwidth[3h] > 420) by (slurm_node)", - "instant": false, - "legendFormat": "{{slurm_node}}", - "range": true, - "refId": "A" + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" } - ], - "title": "TOP 5 nccl failed benchmark", - "type": "gauge" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 4323, + "graphTooltip": 0, + "id": 5, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Cluster Nodes", + "type": "row" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "no dara": { - "index": 0, - "text": "No Failed jobs" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "type": "value" - } - ], - "noValue": "No failed jobs", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "color": "red", - "value": 1 + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 0 - }, - "id": 52, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "topk(5, slurm_job_nccl_benchmark_succeed[3h]==0)", - "instant": false, - "legendFormat": "{{slurm_node}}", - "range": true, - "refId": "A" - } - ], - "title": "TOP 5 nccl benchmark failed jobs", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "mappings": [ + "overrides": [ { - "options": { - "no dara": { - "index": 0, - "text": "No Failed jobs" - } + "matcher": { + "id": "byName", + "options": "Total Nodes" }, - "type": "value" + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] } - ], - "noValue": "No failed jobs", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 53, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_alloc + slurm_nodes_comp", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Allocated Nodes (including compl)", + "refId": "A" }, - "editorMode": "code", - "expr": "(slurm_queue_running / (\n slurm_nodes_alloc + \n slurm_nodes_down + \n slurm_nodes_drain + \n slurm_nodes_idle + \n slurm_nodes_mix + \n slurm_nodes_comp + \n slurm_nodes_maint + \n slurm_nodes_resv\n)) * 100", - "hide": false, - "instant": false, - "legendFormat": "% Running jobs form total worker node", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_mix", + "intervalFactor": 2, + "legendFormat": "Mixed Nodes", + "refId": "B" }, - "editorMode": "code", - "expr": "(count(DCGM_FI_PROF_PIPE_TENSOR_ACTIVE == 0) / count(DCGM_FI_PROF_PIPE_TENSOR_ACTIVE)) * 100", - "hide": true, - "instant": false, - "legendFormat": "% inactive GPU", - "range": true, - "refId": "B" - } - ], - "title": "Running jobs and unsed gpu", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 12, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_idle", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Idle Nodes", + "refId": "C" }, - "refId": "A" - } - ], - "title": "Cluster Nodes", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_alloc + slurm_nodes_down + slurm_nodes_drain + slurm_nodes_idle + slurm_nodes_mix + slurm_nodes_comp + slurm_nodes_maint + slurm_nodes_resv", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "Total Nodes", + "refId": "D" + } + ], + "title": "Nodes", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Total Nodes" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 1, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + ] + }, + "unit": "short" }, - "expr": "slurm_nodes_alloc + slurm_nodes_comp", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Allocated Nodes (including compl)", - "refId": "A" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Down Nodes" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#e24d42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Nodes in *fail* state" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6d1f62", + "mode": "fixed" + } + } + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_mix", - "intervalFactor": 2, - "legendFormat": "Mixed Nodes", - "refId": "B" + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 1 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_idle", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Idle Nodes", - "refId": "C" + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_alloc + slurm_nodes_down + slurm_nodes_drain + slurm_nodes_idle + slurm_nodes_mix + slurm_nodes_comp + slurm_nodes_maint + slurm_nodes_resv", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "legendFormat": "Total Nodes", - "refId": "D" - } - ], - "title": "Nodes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_down", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Down Nodes", + "refId": "A" }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_nodes_drain", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Draining Nodes", + "refId": "B" }, - "unit": "short" - }, - "overrides": [ { - "matcher": { - "id": "byName", - "options": "Down Nodes" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#e24d42", - "mode": "fixed" - } - } - ] + "expr": "slurm_nodes_err != 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Nodes in *error* state", + "refId": "C" }, { - "matcher": { - "id": "byName", - "options": "Nodes in *fail* state" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6d1f62", - "mode": "fixed" - } - } - ] + "expr": "slurm_nodes_fail != 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Nodes in *fail* state", + "refId": "D" } - ] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 9 + ], + "title": "Fail/Down/Drain/Err Nodes", + "type": "timeseries" }, - "id": 5, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 13, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "SLURM Jobs", + "type": "row" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_down", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Down Nodes", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "expr": "slurm_nodes_drain", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Draining Nodes", - "refId": "B" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_err != 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Nodes in *error* state", - "refId": "C" + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 13 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_nodes_fail != 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Nodes in *fail* state", - "refId": "D" - } - ], - "title": "Fail/Down/Drain/Err Nodes", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 13, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_completing != 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Completing Jobs", + "refId": "A" }, - "refId": "A" - } - ], - "title": "SLURM Jobs", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_running", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Running Jobs", + "refId": "B" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_pending", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Pending Jobs", + "refId": "C" }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_completed != 0", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Completed Jobs", + "refId": "D" + } + ], + "title": "RUNNING/COMPL/PEND Jobs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "unit": "short" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Timed out Jobs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890f02", + "mode": "fixed" + } + } + ] + } + ] }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 21 - }, - "id": 2, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_completing != 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Completing Jobs", - "refId": "A" + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 13 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_running", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Running Jobs", - "refId": "B" + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_timeout", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Timed out Jobs", + "refId": "A" }, - "expr": "slurm_queue_pending", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pending Jobs", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_failed", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "Failed Jobs", + "refId": "B" }, - "expr": "slurm_queue_completed != 0", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Completed Jobs", - "refId": "D" - } - ], - "title": "RUNNING/COMPL/PEND Jobs", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_node_fail", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Failed jobs (due to NodeFail)", + "refId": "C" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_suspended", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Suspended Jobs", + "refId": "D" }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_queue_cancelled", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cancelled Jobs", + "refId": "E" }, - "unit": "short" - }, - "overrides": [ { - "matcher": { - "id": "byName", - "options": "Timed out Jobs" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890f02", - "mode": "fixed" - } - } - ] + "expr": "slurm_queue_preempted", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Preempted Jobs", + "refId": "F" } - ] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 21 + ], + "title": "FAIL/SUSP/CANC/PREEMPT/TIMEDOUT Jobs", + "type": "timeseries" }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_timeout", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Timed out Jobs", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_failed", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "legendFormat": "Failed Jobs", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_node_fail", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Failed jobs (due to NodeFail)", - "refId": "C" + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_suspended", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Suspended Jobs", - "refId": "D" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_cancelled", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cancelled Jobs", - "refId": "E" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_queue_preempted", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Preempted Jobs", - "refId": "F" - } - ], - "title": "FAIL/SUSP/CANC/PREEMPT/TIMEDOUT Jobs", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 14, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "CPU cores allocation", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 14, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "CPU cores allocation", + "type": "row" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "slurm_alloc_cpu_cores{cluster=\"kronos\",job=\"kronos_cores\"}" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#ea6460", - "mode": "fixed" + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 } - } - ] + ] + }, + "unit": "short" }, - { - "matcher": { - "id": "byName", - "options": "slurm_cpu_cores_total{cluster=\"kronos\",job=\"kronos_cores\"}" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "slurm_alloc_cpu_cores{cluster=\"kronos\",job=\"kronos_cores\"}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#ea6460", + "mode": "fixed" + } + } + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#052b51", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "slurm_cpu_cores_total{cluster=\"kronos\",job=\"kronos_cores\"}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052b51", + "mode": "fixed" + } } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "slurm_idle_cpu_cores{cluster=\"kronos\",job=\"kronos_cores\"}" + ] }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#f2c96d", - "mode": "fixed" + { + "matcher": { + "id": "byName", + "options": "slurm_idle_cpu_cores{cluster=\"kronos\",job=\"kronos_cores\"}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#f2c96d", + "mode": "fixed" + } } - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_cpus_total", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Total number of CPU cores", - "refId": "B" + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_cpus_alloc", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Allocated CPU cores", - "refId": "A" + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 25 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_cpus_idle", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Idle CPU cores", - "refId": "C" - } - ], - "title": "CPU Allocation", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "overrides": [ + "targets": [ { - "matcher": { - "id": "byName", - "options": "debug" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "super-light-purple", - "mode": "fixed" - } - } - ] + "expr": "slurm_cpus_total", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total number of CPU cores", + "refId": "B" }, { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsZero", - "value": 0 - } + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] + "expr": "slurm_cpus_alloc", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Allocated CPU cores", + "refId": "A" }, { - "matcher": { - "id": "byValue", - "options": { - "op": "gte", - "reducer": "allIsNull", - "value": 0 - } + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": true, - "tooltip": true, - "viz": false - } - } - ] + "expr": "slurm_cpus_idle", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Idle CPU cores", + "refId": "C" } - ] + ], + "title": "CPU Allocation", + "type": "timeseries" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 48, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_partition_cpus_allocated != 0", - "interval": "", - "legendFormat": "{{partition}}", - "refId": "A" - } - ], - "title": "CPUs Allocated per Partition", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 50, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_partition_cpus_idle", - "interval": "", - "legendFormat": "{{partition}}", - "refId": "A" - } - ], - "title": "CPUs Idle per Partition", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 50 - }, - "id": 15, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "refId": "A" - } - ], - "title": "SLURM Scheduler Details", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The number of current active slurmctld threads.", - "fieldConfig": { - "defaults": { - "mappings": [ + "overrides": [ { - "options": { - "match": "null", - "result": { - "text": "N/A" - } + "matcher": { + "id": "byName", + "options": "debug" }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "super-light-purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 51 - }, - "id": 7, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_threads", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Slurm Scheduler Threads", - "refId": "A" - } - ], - "title": "Slurm Scheduler Threads", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "The agent mechanism helps to control communication between the Slrum daemons and the controller for a best effort.", - "fieldConfig": { - "defaults": { - "mappings": [ + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, { - "options": { - "match": "null", - "result": { - "text": "N/A" + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 } }, - "type": "special" + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 48, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_partition_cpus_allocated != 0", + "interval": "", + "legendFormat": "{{partition}}", + "refId": "A" + } + ], + "title": "CPUs Allocated per Partition", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "unit": "none" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 51 - }, - "id": 8, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_partition_cpus_idle", + "interval": "", + "legendFormat": "{{partition}}", + "refId": "A" + } + ], + "title": "CPUs Idle per Partition", + "type": "timeseries" }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_queue_size", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Agent Queue Size", - "refId": "A" - } - ], - "title": "Agent Queue Size", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 15, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "SLURM Scheduler Details", + "type": "row" }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The number of current active slurmctld threads.", + "fieldConfig": { + "defaults": { + "mappings": [ { - "color": "red", - "value": 100 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, - "unit": "none" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 51 - }, - "id": 26, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_dbd_queue_size", - "legendFormat": "DBD Agent Queue length", - "refId": "A" - } - ], - "title": "DBD Agent Queue Length", - "type": "gauge" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 56 - }, - "id": 16, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "SLURM Scheduler Cycles", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 43 + }, + "id": 7, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_threads", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Slurm Scheduler Threads", + "refId": "A" + } + ], + "title": "Slurm Scheduler Threads", + "type": "stat" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "The agent mechanism helps to control communication between the Slrum daemons and the controller for a best effort.", + "fieldConfig": { + "defaults": { + "mappings": [ { - "color": "red", - "value": 80 + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, - "unit": "µs" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ - "mean", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 43 + }, + "id": 8, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_queue_size", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Agent Queue Size", + "refId": "A" + } + ], + "title": "Agent Queue Size", + "type": "stat" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_last_cycle", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Scheduler Last Cycle Time", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "none" }, - "expr": "slurm_scheduler_mean_cycle", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Scheduler Mean Cycle Time", - "refId": "B" - } - ], - "title": "Scheduler Cycles", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 43 + }, + "id": 26, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_dbd_queue_size", + "legendFormat": "DBD Agent Queue length", + "refId": "A" + } + ], + "title": "DBD Agent Queue Length", + "type": "gauge" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 16, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "SLURM Scheduler Cycles", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "µs" }, - "unit": "µs" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 57 - }, - "id": 3, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_backfill_last_cycle", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Scheduler Backfill Last Cycle", - "refId": "A" + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_last_cycle", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Scheduler Last Cycle Time", + "refId": "A" }, - "expr": "slurm_scheduler_backfill_mean_cycle", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Scheduler Backfill Mean Cycle", - "refId": "B" - } - ], - "title": "Backfill Scheduler Cycles", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_mean_cycle", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Scheduler Mean Cycle Time", + "refId": "B" + } + ], + "title": "Scheduler Cycles", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "µs" }, - "unit": "short" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_backfill_last_cycle", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Scheduler Backfill Last Cycle", + "refId": "A" }, - "expr": "slurm_scheduler_backfill_depth_mean", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Mean of processed jobs during backfilling scheduling cycles", - "refId": "A" - } - ], - "title": "Scheduler Backfill Depth Mean", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_backfill_mean_cycle", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Scheduler Backfill Mean Cycle", + "refId": "B" + } + ], + "title": "Backfill Scheduler Cycles", + "type": "timeseries" }, - "description": "Number of heterogeneous job components started thanks to backfilling since last Slurm start", - "fieldConfig": { - "defaults": { - "displayName": "", - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "unit": "short" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 64 - }, - "id": 34, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "sizing": "auto" - }, - "pluginVersion": "11.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_backfilled_heterogeneous_total", - "legendFormat": "Heterogeneous job components", - "refId": "A" - } - ], - "title": " Total backfilled heterogeneous Job components", - "type": "gauge" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 70 - }, - "id": 32, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "Total Backfilled Jobs", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_backfill_depth_mean", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Mean of processed jobs during backfilling scheduling cycles", + "refId": "A" + } + ], + "title": "Scheduler Backfill Depth Mean", + "type": "timeseries" }, - "description": "Number of jobs started thanks to backfilling since last Slurm start.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of heterogeneous job components started thanks to backfilling since last Slurm start", + "fieldConfig": { + "defaults": { + "displayName": "", + "mappings": [ { - "color": "red", - "value": 80 + "id": 0, + "op": "=", + "text": "N/A", + "type": 1, + "value": "null" } - ] + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" }, - "unit": "short" + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 56 + }, + "id": 34, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" }, - "overrides": [ + "pluginVersion": "11.1.0", + "targets": [ { - "matcher": { - "id": "byName", - "options": "DELTA: Total number Backfilled Jobs (since last Slurm start)" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - } - ] + "expr": "slurm_scheduler_backfilled_heterogeneous_total", + "legendFormat": "Heterogeneous job components", + "refId": "A" } - ] + ], + "title": " Total backfilled heterogeneous Job components", + "type": "gauge" }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 71 - }, - "id": 28, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "delta(slurm_scheduler_backfilled_jobs_since_start_total[10m])", - "legendFormat": "DELTA: Total number Backfilled Jobs (since last Slurm start)", - "refId": "A" + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "slurm_scheduler_backfilled_jobs_since_start_total", - "legendFormat": "Total number Backfilled Jobs (since last Slurm start)", - "refId": "B" - } - ], - "title": "Total Backfilled Jobs (since last slurm start)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 32, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Total Backfilled Jobs", + "type": "row" }, - "description": "Number of jobs started thanks to backfilling since last time stats where reset", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of jobs started thanks to backfilling since last Slurm start.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "unit": "short" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DELTA: Total number Backfilled Jobs (since last Slurm start)" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + } + ] }, - "overrides": [ + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 28, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ { - "matcher": { - "id": "byName", - "options": "DELTA: Total Backfilled Jobs (since last stats cycle start)" + "datasource": { + "type": "prometheus", + "uid": "prometheus" }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - } - ] + "expr": "delta(slurm_scheduler_backfilled_jobs_since_start_total[10m])", + "legendFormat": "DELTA: Total number Backfilled Jobs (since last Slurm start)", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_backfilled_jobs_since_start_total", + "legendFormat": "Total number Backfilled Jobs (since last Slurm start)", + "refId": "B" } - ] + ], + "title": "Total Backfilled Jobs (since last slurm start)", + "type": "timeseries" }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 71 - }, - "id": 30, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "6.6.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "Number of jobs started thanks to backfilling since last time stats where reset", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "expr": "slurm_scheduler_backfilled_jobs_since_cycle_total", - "legendFormat": " Total Backfilled Jobs (since last stats cycle start)", - "refId": "A" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "DELTA: Total Backfilled Jobs (since last stats cycle start)" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + } + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "6.6.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "slurm_scheduler_backfilled_jobs_since_cycle_total", + "legendFormat": " Total Backfilled Jobs (since last stats cycle start)", + "refId": "A" }, - "expr": "delta(slurm_scheduler_backfilled_jobs_since_cycle_total[10m])", - "legendFormat": "DELTA: Total Backfilled Jobs (since last stats cycle start)", - "refId": "B" - } - ], - "title": "Total Backfilled Jobs (since last stats cycle start)", - "type": "timeseries" - } - ], - "refresh": "30s", - "schemaVersion": 39, - "tags": [ - "Slurm" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-24h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "delta(slurm_scheduler_backfilled_jobs_since_cycle_total[10m])", + "legendFormat": "DELTA: Total Backfilled Jobs (since last stats cycle start)", + "refId": "B" + } + ], + "title": "Total Backfilled Jobs (since last stats cycle start)", + "type": "timeseries" + } ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "SLURM Dashboard", - "uid": "bX7jn6dZkuytdD", - "version": 1, - "weekStart": "" + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Slurm metrics", + "uid": "bX7jn6dZkuytdD", + "version": 1, + "weekStart": "" } diff --git a/soperator/modules/monitoring/templates/helm_values/fb_logs_collector.yaml.tftpl b/soperator/modules/monitoring/templates/helm_values/fb_logs_collector.yaml.tftpl index 77f66733..a3c16ee2 100644 --- a/soperator/modules/monitoring/templates/helm_values/fb_logs_collector.yaml.tftpl +++ b/soperator/modules/monitoring/templates/helm_values/fb_logs_collector.yaml.tftpl @@ -9,7 +9,7 @@ serviceMonitor: scrapeTimeout: 20s dashboards: - enabled: true + enabled: false labelKey: grafana_dashboard labelValue: 1 annotations: {} diff --git a/soperator/modules/slurm/locals.tf b/soperator/modules/slurm/locals.tf index d65240f5..2da3d0e2 100644 --- a/soperator/modules/slurm/locals.tf +++ b/soperator/modules/slurm/locals.tf @@ -3,12 +3,14 @@ locals { repository = { slurm = "oci://cr.eu-north1.nebius.cloud/soperator${!var.operator_stable ? "-unstable" : ""}" mariadb = "https://helm.mariadb.com/mariadb-operator" + raw = "https://bedag.github.io/helm-charts/" } chart = { slurm_cluster = "slurm-cluster" slurm_cluster_storage = "slurm-cluster-storage" slurm_operator_crds = "soperator-crds" + raw = "raw" operator = { slurm = "soperator" @@ -19,6 +21,7 @@ locals { version = { slurm = var.operator_version mariadb = "0.31.0" + raw = "2.0.0" } } @@ -79,4 +82,6 @@ locals { ephemeral_storage = 5 } } + + slurm_node_extra = "{ \"monitoring\": \"https://console.eu.nebius.com/${var.iam_project_id}/compute/instances/$INSTANCE_ID/monitoring\" }" } diff --git a/soperator/modules/slurm/main.tf b/soperator/modules/slurm/main.tf index 9b6c6abb..acbabc55 100644 --- a/soperator/modules/slurm/main.tf +++ b/soperator/modules/slurm/main.tf @@ -136,10 +136,25 @@ resource "helm_release" "slurm_operator" { wait_for_jobs = true } +resource "helm_release" "custom_supervisord_config" { + name = "custom-supervisord-config" + repository = local.helm.repository.raw + chart = local.helm.chart.raw + version = local.helm.version.raw + + create_namespace = true + namespace = var.name + + values = [] + + wait = true +} + resource "helm_release" "slurm_cluster" { depends_on = [ helm_release.slurm_operator, helm_release.slurm_cluster_storage, + helm_release.custom_supervisord_config, ] name = local.helm.chart.slurm_cluster @@ -165,6 +180,8 @@ resource "helm_release" "slurm_cluster" { mount_path = submount.mount_path }] + nfs = var.nfs + nccl_topology_type = var.nccl_topology_type nccl_benchmark = { enable = var.nccl_benchmark_enable @@ -175,7 +192,7 @@ resource "helm_release" "slurm_cluster" { nodes = { accounting = { - enabled = var.accounting_enabled + enabled = var.accounting_enabled use_protected_secret = var.use_protected_secret mariadb_operator = var.accounting_enabled ? { enabled = var.accounting_enabled @@ -209,7 +226,8 @@ resource "helm_release" "slurm_cluster" { ephemeral_storage = one(var.resources.worker).ephemeral_storage_gibibytes - local.resources.munge.ephemeral_storage gpus = one(var.resources.worker).gpus } - shared_memory = var.shared_memory_size_gibibytes + shared_memory = var.shared_memory_size_gibibytes + slurm_node_extra = local.slurm_node_extra } login = { @@ -259,6 +277,16 @@ resource "terraform_data" "wait_for_slurm_cluster" { provisioner "local-exec" { interpreter = ["/bin/bash", "-c"] - command = "kubectl wait --for=jsonpath='{.status.phase}'=Available --timeout 1h -n ${var.name} slurmcluster.slurm.nebius.ai/${var.name}" + command = join( + " ", + [ + "kubectl", "wait", + "--for=jsonpath='{.status.phase}'=Available", + "--timeout", "1h", + "--context", var.k8s_cluster_context, + "-n", var.name, + "slurmcluster.slurm.nebius.ai/${var.name}" + ] + ) } } diff --git a/soperator/modules/slurm/templates/custom_supervisord_cm.yaml.tftpl b/soperator/modules/slurm/templates/custom_supervisord_cm.yaml.tftpl new file mode 100644 index 00000000..98249e69 --- /dev/null +++ b/soperator/modules/slurm/templates/custom_supervisord_cm.yaml.tftpl @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: custom-supervisord-config +data: + supervisord.conf: |- + [supervisord] + nodaemon=true + logfile=/dev/null ; Output only to stdout/stderr + logfile_maxbytes=0 + pidfile=/var/run/supervisord.pid + + [program:slurmd] + priority=1 + stdout_logfile=/dev/fd/1 + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/fd/2 + stderr_logfile_maxbytes=0 + redirect_stderr=true + command=/opt/bin/slurm/slurmd_entrypoint.sh + autostart=true + autorestart=true + startsecs=0 + stopasgroup=true ; Send SIGTERM to all child processes of supervisord + killasgroup=true ; Send SIGKILL to all child processes of supervisord + stopsignal=SIGTERM ; Signal to send to the program to stop it + stopwaitsecs=10 ; Wait for the process to stop before sending a SIGKILL + + [program:sshd] + priority=10 + stdout_logfile=/dev/fd/1 + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/fd/2 + stderr_logfile_maxbytes=0 + redirect_stderr=true + command=/usr/sbin/sshd -D -e -f /mnt/ssh-configs/sshd_config + autostart=true + autorestart=true + startsecs=0 + stopasgroup=true ; Send SIGTERM to all child processes of supervisord + killasgroup=true ; Send SIGKILL to all child processes of supervisord + stopsignal=SIGTERM ; Signal to send to the program to stop it + stopwaitsecs=10 ; Wait for the process to stop before sending a SIGKILL + + [program:dockerd] + priority=1 + stdout_logfile=/dev/fd/1 + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/fd/2 + stderr_logfile_maxbytes=0 + redirect_stderr=true + command=/usr/bin/dockerd + autostart=true + autorestart=true + startsecs=0 + stopasgroup=true ; Send SIGTERM to all child processes of supervisord + killasgroup=true ; Send SIGKILL to all child processes of supervisord + stopsignal=SIGTERM ; Signal to send to the program to stop it + stopwaitsecs=10 ; Wait for the process to stop before sending a SIGKILL diff --git a/soperator/modules/slurm/templates/helm_values/slurm_cluster.yaml.tftpl b/soperator/modules/slurm/templates/helm_values/slurm_cluster.yaml.tftpl index 99a8b015..5e0abf6a 100644 --- a/soperator/modules/slurm/templates/helm_values/slurm_cluster.yaml.tftpl +++ b/soperator/modules/slurm/templates/helm_values/slurm_cluster.yaml.tftpl @@ -108,6 +108,14 @@ volumeSources: emptyDir: sizeLimit: ${nodes.worker.resources.ephemeral_storage}Gi + %{~ if nfs.enabled ~} + - name: nfs + nfs: + path: ${nfs.path} + readOnly: false + server: ${nfs.host} + %{~ endif ~} + %{~ for sub_mount in jail_submounts ~} - name: jail-submount-${sub_mount.name} persistentVolumeClaim: @@ -199,6 +207,9 @@ slurmNodes: size: ${nodes.worker.size} k8sNodeFilterName: ${k8s_node_filters.worker.name} cgroupVersion: v2 + enableGDRCopy: true + slurmNodeExtra: ${slurm_node_extra} + supervisordConfigMapRefName: custom-supervisord-config slurmd: resources: cpu: ${nodes.worker.resources.cpu * 1000}m @@ -214,8 +225,13 @@ slurmNodes: spool: volumeClaimTemplateSpec: null volumeSourceName: worker-spool - %{~ if length(jail_submounts) > 0 ~} + %{~ if length(jail_submounts) > 0 || nfs.enabled ~} jailSubMounts: + %{~ if nfs.enabled ~} + - mountPath: ${nfs.mount_path} + name: nfs + volumeSourceName: nfs + %{~ endif ~} %{~ for sub_mount in jail_submounts ~} - name: ${sub_mount.name} mountPath: ${sub_mount.mount_path} @@ -241,7 +257,6 @@ slurmNodes: - ${key} %{~ endfor ~} %{~ endif ~} - %{~ if length(jail_submounts) > 0 ~} sshd: resources: cpu: ${nodes.login.resources.cpu * 1000}m @@ -252,8 +267,14 @@ slurmNodes: cpu: ${nodes.munge.resources.cpu * 1000}m memory: ${nodes.munge.resources.memory}Gi ephemeralStorage: ${nodes.munge.resources.ephemeral_storage}Gi + %{~ if length(jail_submounts) > 0 || nfs.enabled ~} volumes: jailSubMounts: + %{~ if nfs.enabled ~} + - mountPath: ${nfs.mount_path} + name: nfs + volumeSourceName: nfs + %{~ endif ~} %{~ for sub_mount in jail_submounts ~} - name: ${sub_mount.name} mountPath: ${sub_mount.mount_path} diff --git a/soperator/modules/slurm/variables.tf b/soperator/modules/slurm/variables.tf index ad850a12..eacf1c05 100644 --- a/soperator/modules/slurm/variables.tf +++ b/soperator/modules/slurm/variables.tf @@ -14,6 +14,17 @@ variable "operator_stable" { default = true } +variable "iam_project_id" { + description = "ID of the IAM project." + type = string +} + +variable "k8s_cluster_context" { + description = "Context name of the K8s cluster." + type = string + nullable = false +} + # region PartitionConfiguration variable "slurm_partition_config_type" { @@ -171,6 +182,27 @@ variable "filestores" { # endregion Filestore +# region nfs-server + +variable "nfs" { + type = object({ + enabled = bool + mount_path = optional(string, "/mnt/nfs") + path = optional(string) + host = optional(string) + }) + default = { + enabled = false + } + + validation { + condition = var.nfs.enabled ? var.nfs.path != null && var.nfs.host != null : true + error_message = "NFS path and host must be set." + } +} + +# endregion nfs-server + # region Config variable "shared_memory_size_gibibytes" { diff --git a/soperator/test/README.md b/soperator/test/README.md new file mode 100644 index 00000000..f4da2b58 --- /dev/null +++ b/soperator/test/README.md @@ -0,0 +1,141 @@ +# Slurm cluster testing and benchmarking + +We offer few kinds of checks: +- [Tests](#tests): + - [Quick tests](./quickcheck); +- [Benchmarks](#benchmarks): + - [MLCommons Stable Diffusion benchmark](./mlperf-sd). + - [MLCommons GPT3 benchmark](./mlperf-gpt3). + +## Uploading + +Each of them require you to upload data and scripts to the Slurm cluster. +For this purpose, use [`deliver.sh`](./deliver.sh) script. + +```console +$ ./deliver.sh -h +Usage: ./deliver.sh [FLAGS] [-h] +Required flags: + -t [str ] Test type. One of: + quickcheck + mlperf-sd + mlperf-gpt3 + -u [str ] SSH username + -k [path] Path to private SSH key + -a [str ] Address of login node (IP or domain name) + +Flags: + -p [int ] SSH port of login node. + By default, 22 + + -h Print help and exit +``` + +It accepts following parameters: +- `-t` - type of the test you want to run. It must be one of: + - `quickcheck` - for quick tests + - `mlperf-sd` - for MLCommons Stable Diffusion benchmark + - `mlperf-gpt3` - for MLCommons GPT3 benchmark +- `-u` - SSH **username** for login nodes +- `-k` - path to the private part of the keypair used for **username** auth +- `-a` - address of the Slurm login node. It could be either IP address, or domain name you gave it in `/etc/hosts` +- `-p` - SSH port of the login node + +It will upload data and scripts of chosen test type to the Slurm cluster alongside with [common](./common) scripts. +You can upload all test types sequentially. + +Once it's uploaded, you can find these tests inside `/opt/slurm-test` directory on your cluster. + +## Tests + +For quick check tests, see its [README](./quickcheck/README.md). + +## Benchmarks + +Benchmarks need datasets and checkpoints to be downloaded to the cluster. +As well as some configuration needed to be done before running training. + +There are `init.sh` scripts for both [Stable Diffusion](./mlperf-sd/init.sh) and [GPT3](./mlperf-gpt3/init.sh) MLCommons benchmarks. + +1. `cd` to the directory of benchmark + + For Stable Diffusion: + ```shell + cd mlperf-sd + ``` + For GPT3: + ```shell + cd mlperf-gpt3 + ``` +2. Run `init.sh` + + ```console + $ ./init.sh -h + Usage: ./init.sh [-h] + Required flags: + -d [path] Path to data directory + This is where datasets and checkpoints will be stored + + Flags: + -h Print help and exit + ``` + +These scripts only require path to the data directory to be provided. +It's better to have it on dedicated shared storage that can handle multiple connections from Slurm workers +(aka Jail sub-mounts). + +
+Creating storage for MLCommons benchmarks + +You can create storage within this Terraform recipe, as in provided [terraform.tfvars](../installations/example/terraform.tfvars): + +```terraform +# Shared filesystems to be mounted inside jail. +# --- +filestore_jail_submounts = [{ + name = "mlcommons-data" + mount_path = "/data" + spec = { + size_gibibytes = 4096 + block_size_kibibytes = 32 + } +}] +``` + +Or, you can use the same filestore for multiple clusters. +In order to do this, create it on your own with the Nebius CLI + +```shell +nebius compute filesystem create \ + --parent-id "${NEBIUS_PROJECT_ID}" \ + --name 'shared-mlcommons-data' \ + --type 'network_ssd' \ + --size-bytes 4398046511104 +``` + +And provide its ID to the recipe as follows: + +```terraform +# Shared filesystems to be mounted inside jail. +# --- +filestore_jail_submounts = [{ + name = "mlcommons-data" + mount_path = "/data" + existing = { + id = "" + } +}] +``` + +It will attach the storage to your cluster at `/data` directory. +
+ +The init script will: +- Create needed directories +- Configure **rclone** and **enroot** +- Configure running scripts +- Run Slurm jobs to download datasets and checkpoints + +Once downloading is done, you can proceed with following steps of each particular benchmark: +- [Stable Diffusion](./mlperf-sd) +- [GPT3](./mlperf-gpt3) diff --git a/soperator/test/common/enroot.sh b/soperator/test/common/enroot.sh new file mode 100755 index 00000000..5d979828 --- /dev/null +++ b/soperator/test/common/enroot.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +ENROOT_CONFIG_DIR='/root/.config/enroot' +ENROOT_CONFIG_FILE="${ENROOT_CONFIG_DIR}/.credentials" + +enroot_create_config_dir() { + mkdir -p ${ENROOT_CONFIG_DIR} +} + +enroot_create_config() { + ENDPOINT=$1;shift; + USER=$1;shift; + PASSWORD=$1;shift; + echo "machine ${ENDPOINT} login ${USER} password ${PASSWORD}" > ${ENROOT_CONFIG_FILE} +} diff --git a/soperator/test/common/env.sh b/soperator/test/common/env.sh new file mode 100755 index 00000000..ad632bc2 --- /dev/null +++ b/soperator/test/common/env.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +export TEST_DIR='/opt/slurm-test' + +export TEST_QUICKCHECK_DIR="${TEST_DIR}/quickcheck" +export TEST_MLPERF_SD_DIR="${TEST_DIR}/mlperf-sd" +export TEST_MLPERF_GPT3_DIR="${TEST_DIR}/mlperf-gpt3" + +TEST_RESULTS_DIR_NAME='results' +export TEST_QUICKCHECK_RESULTS_DIR="${TEST_QUICKCHECK_DIR}/${TEST_RESULTS_DIR_NAME}" +export TEST_MLPERF_SD_RESULTS_DIR="${TEST_MLPERF_SD_DIR}/${TEST_RESULTS_DIR_NAME}" +export TEST_MLPERF_GPT3_RESULTS_DIR="${TEST_MLPERF_GPT3_DIR}/${TEST_RESULTS_DIR_NAME}" + +export NEBIUS_S3_ENDPOINT='storage.eu-north1.nebius.cloud' +export NEBIUS_S3_BUCKET_NAME='slurm-mlperf-training' + +export NEBIUS_CR_ENDPOINT='cr.eu-north1.nebius.cloud' +export NEBIUS_CR_REGISTRY='slurm-mlperf-training' +export NEBIUS_CR_USER='iam' +export NEBIUS_CR_PASSWORD='cafebabyfordocker' + +export RCLONE_PROFILE_NEBIUS_S3='nebius-s3' diff --git a/soperator/test/common/printer.sh b/soperator/test/common/printer.sh new file mode 100755 index 00000000..a06b0654 --- /dev/null +++ b/soperator/test/common/printer.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +h1() { echo -e "$(tput setab 12)$(tput setaf 0)$(tput bold) ${1} $(tput sgr0)"; } +h2() { echo -e "$(tput setab 14)$(tput setaf 0) ${1} $(tput sgr0)"; } +hdone() { echo -e "$(tput setab 10)$(tput setaf 0) Done $(tput sgr0)"; } +herror() { echo -e "$(tput setab 1)$(tput bold) ERROR: ${1} $(tput sgr0)"; } diff --git a/soperator/test/common/rclone.sh b/soperator/test/common/rclone.sh new file mode 100755 index 00000000..4affedcd --- /dev/null +++ b/soperator/test/common/rclone.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +rclone_install() { + sudo -v + curl https://rclone.org/install.sh | sudo bash \ + || true +} + +rclone_create_config() { + PROFILE_NAME=$1;shift; + ENDPOINT=$1;shift; + rclone \ + config \ + create \ + "${PROFILE_NAME}" \ + s3 \ + provider=AWS \ + endpoint="https://${ENDPOINT}:443" \ + || true +} diff --git a/soperator/test/common/sync.sh b/soperator/test/common/sync.sh new file mode 100755 index 00000000..a411b26d --- /dev/null +++ b/soperator/test/common/sync.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +#SBATCH -J sync-data +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --output=results/%x_%J.out +#SBATCH --error=results/%x_%J.out + +usage() { + echo "Usage: sbatch ${0} [-h]" >&2 + echo 'Required flags:' >&2 + echo ' -f [path] Source directory path' >&2 + echo ' It could be "[:]"' >&2 + echo ' -t [path] Destination directory path' >&2 + echo ' It could be "[:]"' >&2 + echo '' >&2 + echo 'Flags:' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while getopts f:t:h flag +do + case "${flag}" in + f) FROM=${OPTARG};; + t) TO=${OPTARG};; + h) usage;; + *) usage;; + esac +done + +if [ -z "$FROM" ] || [ -z "$TO" ]; then + usage +fi + +srun -N 1 -n 1 \ + rclone copy "${FROM}" "${TO}" \ + --progress \ + --links \ + --use-mmap \ + --bwlimit=1000M \ + --transfers=64 \ + --buffer-size=512Mi \ + --multi-thread-streams=24 --multi-thread-chunk-size=128Mi --multi-thread-cutoff=4Gi --multi-thread-write-buffer-size=256Mi \ + --checkers=16 \ + --use-server-modtime --fast-list --s3-no-head-object --s3-chunk-size=32M + +echo "Done" diff --git a/soperator/test/deliver.sh b/soperator/test/deliver.sh new file mode 100755 index 00000000..ed9f4d52 --- /dev/null +++ b/soperator/test/deliver.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: ${0} [FLAGS] [-h]" >&2 + echo 'Required flags:' >&2 + echo ' -t [str ] Test type. One of:' >&2 + echo ' quickcheck' >&2 + echo ' mlperf-sd' >&2 + echo ' mlperf-gpt3' >&2 + echo ' -u [str ] SSH username' >&2 + echo ' -k [path] Path to private SSH key' >&2 + echo ' -a [str ] Address of login node (IP or domain name)' >&2 + echo '' >&2 + echo 'Flags:' >&2 + echo ' -p [int ] SSH port of login node.' >&2 + echo ' By default, 22' >&2 + echo '' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while getopts t:u:k:a:p:n:h flag +do + case "${flag}" in + t) TEST_TYPE=${OPTARG};; + u) USER=${OPTARG};; + k) KEY=${OPTARG};; + a) ADDRESS=${OPTARG};; + p) PORT=${OPTARG};; + h) usage;; + *) usage;; + esac +done + +if [ -z "${TEST_TYPE}" ] || [ -z "${USER}" ] || [ -z "${KEY}" ] || [ -z "${ADDRESS}" ]; then + usage +fi + +if [ "${TEST_TYPE}" != "quickcheck" ] && [ "${TEST_TYPE}" != "mlperf-sd" ] && [ "${TEST_TYPE}" != "mlperf-gpt3" ]; then + usage +fi + +if [ -z "${PORT}" ]; then + PORT=22 +fi + +source common/env.sh +source common/printer.sh + +h1 "Creating directory for tests on ${ADDRESS}..." +ssh \ + -i "${KEY}" \ + -P "${PORT}" \ + "${USER}@${ADDRESS}" \ + mkdir -p "${TEST_DIR}" +hdone + +h1 "Transferring common files as user '${USER}' with key '${KEY}' to ${ADDRESS}:${PORT}..." +scp \ + -i "${KEY}" \ + -P "${PORT}" \ + -r \ + ./common/* \ + "${USER}@${ADDRESS}":"${TEST_DIR}/common" +hdone + +h1 "Transferring files of ${TEST_TYPE} as user '${USER}' with key '${KEY}' to ${ADDRESS}:${PORT}..." +scp \ + -i "${KEY}" \ + -P "${PORT}" \ + -r \ + "./${TEST_TYPE}"/* \ + "${USER}@${ADDRESS}":"${TEST_DIR}/${TEST_TYPE}" +hdone diff --git a/soperator/test/mlperf-gpt3/README.md b/soperator/test/mlperf-gpt3/README.md new file mode 100644 index 00000000..b86e7175 --- /dev/null +++ b/soperator/test/mlperf-gpt3/README.md @@ -0,0 +1,28 @@ +# Run MLCommons GPT3 benchmark + +Wait for the dataset and checkpoint download jobs finished. You can track the progress by running: + +```shell +watch -n 15 squeue +``` + +Or checking the sync job outputs: + +```shell +tail -f ./results/sync-.out +``` + +Once they're done, go to the gpt3 directory and check defaults of the runner: + +```shell +cd gpt3-impl-4.0-nvidia +./start.sh -h +``` + +Provide some parameters to the `start.sh` if you're not happy with defaults and start the benchmark job. + +```shell +./scripts/slurm/sbatch.sh [] +``` + +You can see the benchmark output in the log file created in `/opt/slurm-test/mlperf-gpt3/results` directory. diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/.gitignore b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/.gitignore similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/.gitignore rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/.gitignore diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/Dockerfile b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/Dockerfile similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/Dockerfile rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/Dockerfile diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/README.md b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/README.md similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/README.md rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/README.md diff --git a/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/VERSION b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/VERSION new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/VERSION @@ -0,0 +1 @@ +1 diff --git a/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/build_and_push.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/build_and_push.sh new file mode 100755 index 00000000..6f90f411 --- /dev/null +++ b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/build_and_push.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -e + +REGISTRY='cr.eu-north1.nebius.cloud/slurm-mlperf-training' +IMAGE_NAME='gpt3-4.0-nvidia' +TAG="$(cat ./VERSION)" + +echo 'Build image' +docker build -f ./Dockerfile -t "${IMAGE_NAME}:${TAG}" --platform linux/amd64 -m 64G . + +echo 'Tag image' +docker tag "${IMAGE_NAME}:${TAG}" "${REGISTRY}/${IMAGE_NAME}:${TAG}" + +echo 'Push image' +docker push "${REGISTRY}/${IMAGE_NAME}:${TAG}" diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/custom.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/custom.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/custom.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/custom.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/data_prefix/benchmark_c4.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/data_prefix/benchmark_c4.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/data_prefix/benchmark_c4.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/data_prefix/benchmark_c4.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/data_prefix/synthetic.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/data_prefix/synthetic.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/data_prefix/synthetic.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/data_prefix/synthetic.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/megatron_gpt_config_custom.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/megatron_gpt_config_custom.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/megatron_gpt_config_custom.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/megatron_gpt_config_custom.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/nccl/custom_communicator_cta.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/nccl/custom_communicator_cta.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/nccl/custom_communicator_cta.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/nccl/custom_communicator_cta.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/optim/distributed_fused_adam.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/optim/distributed_fused_adam.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/optim/distributed_fused_adam.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/optim/distributed_fused_adam.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/optim/fused_adam.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/optim/fused_adam.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/optim/fused_adam.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/optim/fused_adam.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs1.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs1.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs1.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs1.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs2.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs2.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs2.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/a100tp4mbs2.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs1.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs1.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs1.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs1.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs2.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs2.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs2.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp4mbs2.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs1.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs1.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs1.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs1.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs2.yaml b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs2.yaml similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs2.yaml rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/conf/tp_overlap/h100tp8mbs2.yaml diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_default.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_default.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_default.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx64_default.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_TPx8_PPx8_VPx4_MINBSx1536_MICBSx1.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_TPx8_PPx8_VPx4_MINBSx1536_MICBSx1.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_TPx8_PPx8_VPx4_MINBSx1536_MICBSx1.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_TPx8_PPx8_VPx4_MINBSx1536_MICBSx1.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_default.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_default.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_default.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H100x8_NODEx8_default.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_TPx2_PPx8_VPx4_MINBSx128_MICBSx2.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_TPx2_PPx8_VPx4_MINBSx128_MICBSx2.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_TPx2_PPx8_VPx4_MINBSx128_MICBSx2.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_TPx2_PPx8_VPx4_MINBSx128_MICBSx2.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_default.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_default.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_default.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx64_default.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx4_PPx8_VPx4_MINBSx128_MICBSx2.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx8_PPx8_VPx4_MINBSx3072_MICBSx1.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx8_PPx8_VPx4_MINBSx3072_MICBSx1.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx8_PPx8_VPx4_MINBSx3072_MICBSx1.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_TPx8_PPx8_VPx4_MINBSx3072_MICBSx1.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_default.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_default.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_default.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_H200x8_NODEx8_default.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_common.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_common.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_common.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_common.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/config_fp8.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_fp8.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/config_fp8.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/config_fp8.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/custom_callbacks.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_callbacks.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/custom_callbacks.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_callbacks.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/custom_optimizer.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_optimizer.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/custom_optimizer.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_optimizer.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/custom_schedulers.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_schedulers.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/custom_schedulers.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/custom_schedulers.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/__init__.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/__init__.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/__init__.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/__init__.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/core.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/core.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/core.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/core.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/dict_utils.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/dict_utils.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/dict_utils.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/dict_utils.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/mapping.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/mapping.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/mapping.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/mapping.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/optimizer.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/optimizer.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/optimizer.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/optimizer.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/serialization.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/serialization.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/serialization.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/serialization.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/__init__.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/__init__.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/__init__.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/__init__.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/base.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/base.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/base.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/base.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/tensorstore.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/tensorstore.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/tensorstore.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/tensorstore.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/two_stage.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/two_stage.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/two_stage.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/two_stage.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/zarr.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/zarr.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/zarr.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/strategies/zarr.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/utils.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/utils.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/dist_checkpointing/utils.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/dist_checkpointing/utils.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/megatron_gpt_pretraining_custom.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/megatron_gpt_pretraining_custom.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/megatron_gpt_pretraining_custom.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/megatron_gpt_pretraining_custom.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/mlperf_logger.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/mlperf_logger.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/mlperf_logger.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/mlperf_logger.py diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/requirements.txt b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/requirements.txt similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/requirements.txt rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/requirements.txt diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/run.sub b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/run.sub similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/run.sub rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/run.sub diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/run_and_time.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/run_and_time.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/run_and_time.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/run_and_time.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_core_node.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_core_node.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_core_node.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_core_node.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_tracebacks_node.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_tracebacks_node.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_tracebacks_node.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/dump_tracebacks_node.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/hang_monitor.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/hang_monitor.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/hang_monitor.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/hang_monitor.sh diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/postprocess_core_dumps.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/postprocess_core_dumps.sh similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/scripts/tracebacks/postprocess_core_dumps.sh rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/scripts/tracebacks/postprocess_core_dumps.sh diff --git a/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/start.sh b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/start.sh new file mode 100755 index 00000000..41c8bf12 --- /dev/null +++ b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/start.sh @@ -0,0 +1,254 @@ +#!/bin/bash + +set -e + +# region Defaults + +: "${NODE_COUNT:=8}" +: "${GPU_TYPE:=H100}" +: "${DATA_DIR:=/mlperf-gpt3}" +: "${BASE_LOG_DIR:=./logs}" +: "${CONTAINER_IMAGE:=cr.eu-north1.nebius.cloud#slurm-mlperf-training/gpt3-4.0-nvidia:$(cat ./VERSION)}" + +# endregion Defaults + +usage() { + echo "Usage: ${0} [FLAGS] [-h]" >&2 + echo 'Flags:' >&2 + echo ' -N [int ] Number of worker nodes' >&2 + echo " By default, ${NODE_COUNT}" >&2 + echo ' -w [str ] Worker node list' >&2 + echo ' e.g. "worker-[0-63]"' >&2 + echo ' -G [str ] GPU type. One of' >&2 + echo ' H100' >&2 + echo ' H200' >&2 + echo ' -c [path] Path to the config file' >&2 + echo " By default, config_${GPU_TYPE}x8_NODEx${NODE_COUNT}_default.sh" >&2 + echo ' -D [path] Path to the data directory' >&2 + echo ' This is where datasets and checkpoints are stored' >&2 + echo " By default, ${DATA_DIR}" >&2 + echo ' -L [path] Path to the directory for logs' >&2 + echo " By default, ${BASE_LOG_DIR}" >&2 + echo ' -S [path] Directory to store shared image cache' >&2 + echo ' By default, none' >&2 + echo ' -i [path] Path to the container image' >&2 + echo " By default, ${CONTAINER_IMAGE}" >&2 + echo ' -e [str ] Experiment name to attach to job name' >&2 + echo ' By default, none' >&2 + echo '' >&2 + echo ' -q Whether to run training quickly without additional tests' >&2 + echo ' -r Whether to remove previous log files' >&2 + echo ' -d Whether to enable debug logs' >&2 + echo ' -p Whether to run only one step with NSYS profiling' >&2 + echo '' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while getopts N:w:G:c:D:L:S:i:e:qrdph flag +do + case "${flag}" in + N) NODE_COUNT=${OPTARG};; + w) NODE_LIST=${OPTARG};; + G) GPU_TYPE=${OPTARG};; + c) CONFIG_FILE=${OPTARG};; + D) DATA_DIR=${OPTARG};; + L) BASE_LOG_DIR=${OPTARG};; + S) SHARED_IMAGE_CACHE_DIR=${OPTARG};; + i) CONTAINER_IMAGE=${OPTARG};; + e) EXPERIMENT_NAME=${OPTARG};; + q) QUICK_START=1;; + r) REMOVE_LOGS=1;; + d) DEBUG=1;; + p) NSYS_PROFILING=1;; + h) usage;; + *) usage;; + esac +done + +if [ "${GPU_TYPE}" != 'H100' ] && [ "${GPU_TYPE}" != 'H200' ]; then + usage +fi + +: "${TEST_DIR:=/opt/slurm-test}" +source "${TEST_DIR}/common/printer.sh" + +# region Cluster name + +h1 'Extracting cluster name...' + +MLPERF_CLUSTER_NAME=$(scontrol show config | grep -E 'ClusterName\s+' | awk -F' = ' '{print $2}') +export MLPERF_CLUSTER_NAME +h2 "${MLPERF_CLUSTER_NAME}" + +hdone + +# endregion Cluster name + +# region Node list + +h1 'Configuring node list...' + +if [ -n "${NODE_LIST}" ]; then + h2 'Selecting nodes...' + + SELECTED_NODES=$(sinfo -N --nodes="${NODE_LIST}" --format='%N %t' --noheader | uniq) + EXIT_CODE=$? + + h2 'Selected nodes:' + echo "${SELECTED_NODES}" + + if [ "${EXIT_CODE}" -ne '0' ]; then + herror "sinfo: exit code ${EXIT_CODE}" + exit 1 + fi + + SELECTED_NODE_COUNT=$(echo "${SELECTED_NODES}" | wc -l) + if [ "${SELECTED_NODE_COUNT}" -ne "${NODE_COUNT}" ]; then + herror "Requested node count = ${NODE_COUNT} ('-N ${NODE_COUNT}') doesn't match the count of selected nodes = ${SELECTED_NODE_COUNT} ('-w ${NODE_LIST}')" + exit 1 + fi +fi + +NODE_ALLOCATION="--nodes=${NODE_COUNT}" +if [ -n "${NODE_LIST}" ]; then + NODE_ALLOCATION="--nodelist=${NODE_LIST}" +fi + +hdone + +# endregion Node list + +# region Config + +h1 'Applying config...' + +if [ -z "${CONFIG_FILE}" ]; then + CONFIG_FILE="config_${GPU_TYPE}x8_NODEx${NODE_COUNT}_default.sh" +fi + +h2 "${CONFIG_FILE}" +source "${CONFIG_FILE}" + +hdone + +# endregion Config + +# region Paths + +h1 'Configuring paths...' + +DATASET_DIR=${DATA_DIR}/gpt3-dataset-4.0 +CHECKPOINT_DIR=${DATA_DIR}/gpt3-checkpoint-4.0 + +export CONT="${CONTAINER_IMAGE}" +export PREPROC_DATA="${DATASET_DIR}/preprocessed_c4_spm" +export SPM="${DATASET_DIR}/spm/c4_en_301_5Mexp2_spm.model" +export LOAD_CHECKPOINTS_PATH="${CHECKPOINT_DIR}/ckpt4000-consumed_samples=0" +export LOGDIR="${BASE_LOG_DIR}" +export CONTAINER_PRELOAD_SHARED_PATH="${SHARED_IMAGE_CACHE_DIR}" + +hdone + +# endregion Paths + +# region Training + +h1 'Configuring training parameters...' +export NEXP=1 +export NCCL_SOCKET_IFNAME='eth0' +export TORCH_CUDA_ARCH_LIST='9.0' + +if [[ "${QUICK_START}" -eq 1 ]]; then + h2 'Disabling everything except training...' + export WARMUP_STEPS=0 + export TRAIN_ONLY=1 + export NCCL_TEST=0 + export TIME_TAGS=0 + export NVTX_FLAG=0 + export SYNTH_DATA=0 + export EPOCH_PROF=0 + export API_LOGGING=0 + export CLEAR_CACHES=0 + export CHECK_COMPLIANCE=0 + export ATTEMPT_CUDA_GDB_CORE_DUMP=0 + export POSTPROCESS_CUDA_GDB_CORE_DUMP=0 + export REMOVE_CUDA_GDB_CORE_DUMP=0 + export HANG_MONITOR_TIMEOUT=0 + export JET=0 +fi + +hdone + +# endregion Training + +# region Job name + +h1 'Configuring job name...' + +if [ -z "${EXPERIMENT_NAME}" ]; then + h2 'Using experiment-less name:' + JOB_NAME='gpt3' + JOB_OUTPUT='gpt3-%j.out' +else + JOB_NAME="gpt3-${EXPERIMENT_NAME}" + JOB_OUTPUT="gpt3-%j-${EXPERIMENT_NAME}.out" +fi +JOB_OUTPUT="${BASE_LOG_DIR}/${JOB_OUTPUT}" + +echo "Job name: ${JOB_NAME}" +echo "Job out: ${JOB_NAME}" + +hdone + +# endregion Job name + +# region Logging & Profiling + +h1 'Configuring logging & profiling...' + +if [[ "${REMOVE_LOGS}" -eq 1 ]]; then + h2 'Removing previous logs...' + rm "${BASE_LOG_DIR}"/gpt3-*.out || true + rm -rf ./api_logs/ || true +fi + +if [[ "${DEBUG}" -eq 1 ]]; then + h2 'Enabling debug logging...' + export NCCL_DEBUG=INFO + export GDRCOPY_ENABLE_LOGGING=1 + export GDRCOPY_LOG_LEVEL=1 +fi + +if [[ "${NSYS_PROFILING}" -eq 1 ]]; then + h2 'Configuring NSYS profiler...' + export NVTX_FLAG=1 + export PROFILE=True + export PROFILE_START_STEP=10 + export PROFILE_END_STEP=11 + export PROFILE_RANKS="0,1,2,3,4,5,6,7" + + # Early stopping + export TARGET_LOG_PPL=2.75 +fi + +hdone + +# endregion Logging & Profiling + +h1 'Submitting Slurm job...' + +sbatch \ + -t "${WALLTIME}" \ + -J "${JOB_NAME}" \ + --output="${JOB_OUTPUT}" \ + --export="ALL" \ + "${NODE_ALLOCATION}" \ + --ntasks-per-node="${SBATCH_GPUS_PER_NODE}" \ + ${EXCLUSIVE:+--exclusive} \ + run.sub + +h2 'Current Slurm job queue:' +squeue + +hdone diff --git a/soperator/mlperf/gpt3-impl-4.0-nvidia/unified_checkpointing.py b/soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/unified_checkpointing.py similarity index 100% rename from soperator/mlperf/gpt3-impl-4.0-nvidia/unified_checkpointing.py rename to soperator/test/mlperf-gpt3/gpt3-impl-4.0-nvidia/unified_checkpointing.py diff --git a/soperator/test/mlperf-gpt3/init.sh b/soperator/test/mlperf-gpt3/init.sh index fac7f16e..808aa609 100755 --- a/soperator/test/mlperf-gpt3/init.sh +++ b/soperator/test/mlperf-gpt3/init.sh @@ -2,177 +2,128 @@ set -e -usage() { echo "usage: ${0} [-g ] [-h]" >&2; exit 1; } - -while getopts g:u:k:a:p:n:h flag +usage() { + echo "Usage: ${0} [-h]" >&2 + echo 'Required flags:' >&2 + echo ' -d [path] Path to data directory' >&2 + echo ' This is where datasets and checkpoints will be stored' >&2 + echo '' >&2 + echo 'Flags:' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while getopts d:h flag do - case "${flag}" in - g) GPUS=${OPTARG};; - h) usage;; - *) usage;; - esac + case "${flag}" in + d) DATA_DIR=${OPTARG};; + h) usage;; + *) usage;; + esac done -if [ -z "${GPUS}" ]; then - GPUS=64 +if [ -z "${DATA_DIR}" ]; then + usage fi -h1() { echo -e "$(tput setab 12)$(tput setaf 0)$(tput bold) ${1} $(tput sgr0)"; } -h2() { echo -e "$(tput setab 14)$(tput setaf 0) ${1} $(tput sgr0)"; } -hdone() { echo -e "$(tput setab 10)$(tput setaf 0) Done $(tput sgr0)"; } - -ROOT_DIR="${1:-/gpt3}" -DATA_DIR="${ROOT_DIR}/dataset" -CKPT_DIR="${ROOT_DIR}/checkpoint" -TRAIN_DIR="${ROOT_DIR}/training" -LOG_DIR="${ROOT_DIR}/logs" +source ../common/enroot.sh +source ../common/env.sh +source ../common/printer.sh +source ../common/rclone.sh # region Dirs h1 'Creating directories...' -mkdir -p "${DATA_DIR}" -mkdir -p "${CKPT_DIR}" -mkdir -p "${LOG_DIR}" +DATASET_NAME='gpt3-dataset-4.0' +DATASET_DIR="${DATA_DIR}/${DATASET_NAME}" -hdone +CHECKPOINT_NAME='gpt3-checkpoint-4.0' +CHECKPOINT_DIR="${DATA_DIR}/${CHECKPOINT_NAME}" -# endregion Dirs - -# region MLCommons training repo - -h1 'Preparing MLCommons/training repository...' - -h2 'Cloning repository...' -git clone --depth 1 https://github.com/mlcommons/training "${TRAIN_DIR}" -pushd "${TRAIN_DIR}" - git fetch --depth 1 origin 00f04c57d589721aabce4618922780d29f73cf4e - git checkout 00f04c57d589721aabce4618922780d29f73cf4e -popd - -h2 'Patching...' -cp patches/megatron.diff "${TRAIN_DIR}/large_language_model/megatron-lm/" -pushd "${TRAIN_DIR}/large_language_model/megatron-lm" - patch -i megatron.diff -popd - -h2 'Cleaning up...' -pushd "${TRAIN_DIR}" - rm -rf \ - .github \ - benchmark_readme_template.md \ - image_classification \ - image_segmentation \ - language_model \ - large_language_model/paxml \ - object_detection\ - recommendation\ - recommendation_v2\ - reference_results.md\ - retired_benchmarks\ - rnn_speech_recognition\ - single_stage_detector\ - stable_diffusion -popd +h2 "Datasets..." \ + && mkdir -p "${DATASET_DIR}" +h2 "Checkpoints..." \ + && mkdir -p "${CHECKPOINT_DIR}" +h2 'Results...' \ + && mkdir -p "${TEST_MLPERF_GPT3_RESULTS_DIR}" hdone -# endregion MLCommons training repo +# endregion Dirs # region Rclone h1 'Configuring Rclone...' h2 'Installing...' -curl https://rclone.org/install.sh | bash +rclone_install h2 'Creating config...' -rclone \ - config \ - create \ - mlc-training \ - s3 \ - provider=Cloudflare \ - access_key_id=76ea42eadb867e854061a1806220ee1e \ - secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 \ - endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com +rclone_create_config "${RCLONE_PROFILE_NEBIUS_S3}" "${NEBIUS_S3_ENDPOINT}" hdone # endregion Rclone -# region Dataset - -h1 'Preparing dataset...' +#region Enroot -pushd "${DATA_DIR}" - h2 'Downloading BPE related files...' - mkdir "bpe" - wget -O bpe/vocab.json https://huggingface.co/gpt2/resolve/main/vocab.json - wget -O bpe/merges.txt https://huggingface.co/gpt2/resolve/main/merges.txt +h1 'Configuring enroot...' - h2 'Downloading C4:en dataset archive...' - rclone \ - copy \ - -P \ - mlc-training:mlcommons-training-wg-public/gpt3/megatron-lm/dataset_c4_spm.tar \ - ./ +h2 'Creating config directory...' +enroot_create_config_dir - h2 'Unpacking C4:en dataset archive...' - tar --blocking-factor=8192 -xvf dataset_c4_spm.tar - - h2 'Cleaning up...' - rm -rf \ - dataset_c4_spm.tar \ - spm -popd +# h2 'Creating config...' +# enroot_create_config "${NEBIUS_CR_ENDPOINT}" "${NEBIUS_CR_USER}" "${NEBIUS_CR_PASSWORD}" hdone -# endregion Dataset - -# region Checkpoint - -# TODO: Describe checkpoint downloading process - -# endregion Checkpoint +#endregion Enroot -# region Run script +# region Test runner -h1 'Generating run script...' +h1 'Patching test runner...' +SBATCH_RUNNER_PATH='gpt3-impl-4.0-nvidia/start.sh' -cat > "${ROOT_DIR}/run.sh" << EOF -export MASTER_ADDR='worker-0' -export WORLD_SIZE="${GPUS}" +h2 'Test dir...' +sed -i -E \ + -e "s|(TEST_DIR:=)[^}]*|\1${TEST_DIR}|" \ + ${SBATCH_RUNNER_PATH} -export COM_DIR="${DATA_DIR}/preprocessed_c4_spm" -export BPE_DIR="${DATA_DIR}/bpe" +h2 'Log dir...' +sed -i -E \ + -e "s|(BASE_LOG_DIR:=)[^}]*|\1${TEST_MLPERF_GPT3_RESULTS_DIR}|" \ + ${SBATCH_RUNNER_PATH} -export EXTERNAL_MODEL_CHECKPOINT_DIR="${CKPT_DIR}/bf16/ckpt4000" -export USE_BF16='true' +h2 'Container image...' +sed -i -E \ + -e "s|(CONTAINER_IMAGE:=)[^}]*|\1${NEBIUS_CR_ENDPOINT}#${NEBIUS_CR_REGISTRY}/gpt3-4.0-nvidia:\$(cat ./VERSION)|" \ + ${SBATCH_RUNNER_PATH} -export LOG_DIR="${LOG_DIR}" +h2 'Data dir...' +sed -i -E \ + -e "s|(DATA_DIR:=)[^}]*|\1${DATA_DIR}|" \ + ${SBATCH_RUNNER_PATH} -export CONT='cr.nemax.nebius.cloud/crn02m09qcuniqo1fh1s/llm-gpt' +# endregion Test runner -pushd "${TRAIN_DIR}/large_language_model/megatron-lm" - sbatch run_gpt3.sh "\${LOG_DIR}" "\${BPE_DIR}" "\${CONT}" -popd -EOF - -chmod +x "${ROOT_DIR}/run.sh" - -hdone +# region Data -# endregion Run script +h1 'Downloading data...' -# region Finalizing +h2 'Creating a job to download GPT3 dataset...' +sbatch ../common/sync.sh \ + -f "${RCLONE_PROFILE_NEBIUS_S3}:${NEBIUS_S3_BUCKET_NAME}/${DATASET_NAME}" \ + -t "${DATASET_DIR}" -h1 'Finalizing...' +h2 'Creating a job to download GPT3 checkpoint...' +sbatch ../common/sync.sh \ + -f "${RCLONE_PROFILE_NEBIUS_S3}:${NEBIUS_S3_BUCKET_NAME}/${CHECKPOINT_NAME}" \ + -t "${CHECKPOINT_DIR}" -h2 'Setting rights...' -chown -R 0:0 "${ROOT_DIR}" +h2 'Current Slurm job queue:' +squeue hdone -# endregion Finalizing +# endregion Data diff --git a/soperator/test/mlperf-gpt3/patches/megatron.diff b/soperator/test/mlperf-gpt3/patches/megatron.diff deleted file mode 100644 index a667e2c2..00000000 --- a/soperator/test/mlperf-gpt3/patches/megatron.diff +++ /dev/null @@ -1,70 +0,0 @@ -diff --git a/Dockerfile b/Dockerfile -index 27c528d..3f61cf1 100644 ---- a/Dockerfile -+++ b/Dockerfile -@@ -1,7 +1,6 @@ --ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.04-py3 -+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:23.10-py3 - FROM ${FROM_IMAGE_NAME} - --# Copy code - WORKDIR /workspace/llm - COPY . . - RUN pip install -r requirements.txt -diff --git a/gpt3_blend.sh b/gpt3_blend.sh -index 01312f6..9ec9fea 100755 ---- a/gpt3_blend.sh -+++ b/gpt3_blend.sh -@@ -1,8 +1,10 @@ - #!/bin/bash - --COM_DIR="/c4/preprocessed_c4_googlespm" -+COM_DIR="${COM_DIR:?COM_DIR not set}" -+ - C4_0="${COM_DIR}/c4_en_6_c4_spm_text_document" - C4_1="${COM_DIR}/c4_en_7_c4_spm_text_document" - DATA_BLEND="0.5 ${C4_0} 0.5 ${C4_1}" -+ - VALID_C4="${COM_DIR}/c4_en_validation_subset_c4_spm_text_document" - VALID_DATA_BLEND="1.00 ${VALID_C4}" -diff --git a/megatron/optimizer/clip_grads.py b/megatron/optimizer/clip_grads.py -index ad249bd..22fe827 100755 ---- a/megatron/optimizer/clip_grads.py -+++ b/megatron/optimizer/clip_grads.py -@@ -16,7 +16,7 @@ - """Gradient clipping.""" - - import torch --from torch._six import inf -+from torch import inf - - from apex.multi_tensor_apply import multi_tensor_applier - import amp_C -diff --git a/run_gpt3.sh b/run_gpt3.sh -index ce35f2b..12384cc 100755 ---- a/run_gpt3.sh -+++ b/run_gpt3.sh -@@ -1,6 +1,6 @@ - #!/bin/bash - --#SBATCH -p luna -A mlperf -t 00:20:00 --nodes=8 --exclusive --mem=0 --overcommit --ntasks-per-node=8 --job-name=mlperf-megatron:megatron -+#SBATCH -A mlperf -t 00:20:00 --nodes=8 --exclusive --mem=0 --overcommit --ntasks-per-node=8 --job-name=mlperf-megatron:megatron - - # Vars without defaults - LOG_DIR=${1:?LOG_DIR not set} -@@ -107,10 +107,13 @@ fi - run_cmd="python -u ${MEGATRON_DIR}/pretrain_gpt.py ${options}" - DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'` - -+export NCCL_TOPO_FILE='/var/run/nvidia-topologyd/virtualTopology.xml' -+ - srun -l \ - --container-image $CONT \ -- --container-mounts "$PWD:$PWD,${COM_DIR}:${COM_DIR},${LOG_DIR}:${LOG_DIR},${BPE_DIR}:${BPE_DIR}" \ -+ --container-mounts "$NCCL_TOPO_FILE:$NCCL_TOPO_FILE,$PWD:$PWD,${COM_DIR}:${COM_DIR},${LOG_DIR}:${LOG_DIR},${BPE_DIR}:${BPE_DIR}" \ -+ --container-writable \ -+ --export NCCL_TOPO_FILE \ - --output=$LOG_DIR/GPT3-175B-runlog-$DATETIME.log sh -c "${run_cmd}" - - set +x -- diff --git a/soperator/test/mlperf-sd/README.md b/soperator/test/mlperf-sd/README.md new file mode 100644 index 00000000..4c3ddf52 --- /dev/null +++ b/soperator/test/mlperf-sd/README.md @@ -0,0 +1,31 @@ +# Run MLCommons Stable Diffusion benchmark + +Wait for the dataset and checkpoint download jobs finished. You can track the progress by running: + +```shell +watch -n 15 squeue +``` + +Or checking the sync job outputs: + +```shell +tail -f ./results/sync-.out +``` + +Once they're done, go to the training directory and check defaults of the runner: + +```shell +cd training/stable_diffusion +./scripts/slurm/sbatch.sh -h +``` + +Provide some parameters to the `sbatch.sh` if you're not happy with defaults and start the benchmark job. + +```shell +./scripts/slurm/sbatch.sh [] +``` + +You can see the benchmark output in the log file created in `/opt/slurm-test/mlperf-sd/results` directory. + +If your setup consists of 2 worker nodes with 8 H100 GPU on each, you can compare it with the reference log file: +`/opt/slurm-test/mlperf-sd/results/reference_02x08x08_1720163290.out` diff --git a/soperator/test/mlperf-sd/aws_download.sh b/soperator/test/mlperf-sd/aws_download.sh deleted file mode 100644 index 064049f8..00000000 --- a/soperator/test/mlperf-sd/aws_download.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -#SBATCH -J aws_download -#SBATCH --output=/mlperf-sd/aws_download.log -#SBATCH --error=/mlperf-sd/aws_download.log - -srun aws s3 --endpoint-url=https://storage.ai.nebius.cloud cp --no-sign-request --recursive s3://gpt3/stable-diff /mlperf-sd diff --git a/soperator/test/mlperf-sd/init.sh b/soperator/test/mlperf-sd/init.sh new file mode 100755 index 00000000..fe0fab71 --- /dev/null +++ b/soperator/test/mlperf-sd/init.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: ${0} [-h]" >&2 + echo 'Required flags:' >&2 + echo ' -d [path] Path to data directory' >&2 + echo ' This is where datasets and checkpoints will be stored' >&2 + echo '' >&2 + echo 'Flags:' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while getopts d:h flag +do + case "${flag}" in + d) DATA_DIR=${OPTARG};; + h) usage;; + *) usage;; + esac +done + +if [ -z "${DATA_DIR}" ]; then + usage +fi + +source ../common/enroot.sh +source ../common/env.sh +source ../common/printer.sh +source ../common/rclone.sh + +# region Dirs + +h1 'Creating directories...' + +DATASET_NAME='sd-dataset-3.0' +DATASET_DIR="${DATA_DIR}/${DATASET_NAME}" + +CHECKPOINT_NAME='sd-checkpoint-3.0' +CHECKPOINT_DIR="${DATA_DIR}/${CHECKPOINT_NAME}" + +h2 'Datasets...' \ + && mkdir -p "${DATASET_DIR}" +h2 'Checkpoints...' \ + && mkdir -p "${CHECKPOINT_DIR}" +h2 'Results...' \ + && mkdir -p "${TEST_MLPERF_SD_RESULTS_DIR}" +h2 'HuggingFace home...' \ + && mkdir -p "${DATA_DIR}/hf_home" + +hdone + +# endregion Dirs + +# region Rclone + +h1 'Configuring Rclone...' + +h2 'Installing...' +rclone_install + +h2 'Creating config...' +rclone_create_config "${RCLONE_PROFILE_NEBIUS_S3}" "${NEBIUS_S3_ENDPOINT}" + +hdone + +# endregion Rclone + +#region Enroot + +h1 'Configuring enroot...' + +h2 'Creating config directory...' +enroot_create_config_dir + +# h2 'Creating config...' +# enroot_create_config "${NEBIUS_CR_ENDPOINT}" "${NEBIUS_CR_USER}" "${NEBIUS_CR_PASSWORD}" + +hdone + +#endregion Enroot + +# region Test runner + +h1 'Patching test runner...' +SBATCH_RUNNER_PATH='training_patch/stable_diffusion/scripts/slurm/sbatch.sh' + +h2 'Test dir...' +sed -i -E \ + -e "s|(TEST_DIR:=)[^}]*|\1${TEST_DIR}|" \ + ${SBATCH_RUNNER_PATH} + +h2 'Log dir...' +sed -i -E \ + -e "s|(BASE_LOG_DIR:=)[^}]*|\1${TEST_MLPERF_SD_RESULTS_DIR}|" \ + ${SBATCH_RUNNER_PATH} + +h2 'Results dir...' +sed -i -E \ + -e "s|(BASE_RESULTS_DIR:=)[^}]*|\1${TEST_MLPERF_SD_RESULTS_DIR}|" \ + ${SBATCH_RUNNER_PATH} + +h2 'Container image...' +sed -i -E \ + -e "s|(CONTAINER_IMAGE:=)[^}]*|\1${NEBIUS_CR_ENDPOINT}#${NEBIUS_CR_REGISTRY}/stable_diffusion_mlcommons|" \ + ${SBATCH_RUNNER_PATH} + +h2 'Data dir...' +sed -i -E \ + -e "s|(DATA_DIR:=)[^}]*|\1${DATA_DIR}|" \ + ${SBATCH_RUNNER_PATH} + +# endregion Test runner + +# region MLCommons repo + +h1 'Configuring MLCommons training repository...' +TRAINING_DIR='training' + +if [[ -d "${TRAINING_DIR}" ]]; then + h2 'Cleaning leftovers...' + rm -rf ${TRAINING_DIR} +fi + +h2 'Checkout...' +export GIT_DISCOVERY_ACROSS_FILESYSTEM=1 +git clone --depth=1 https://github.com/mlcommons/training ${TRAINING_DIR} +pushd ${TRAINING_DIR} + git fetch --depth=1 origin 00f04c57d589721aabce4618922780d29f73cf4e + git checkout 00f04c57d589721aabce4618922780d29f73cf4e +popd + +h2 'Patching...' +rclone copy "${TRAINING_DIR}_patch" ${TRAINING_DIR} + +h2 'Setting execution flags...' +chmod +x ${TRAINING_DIR}/stable_diffusion/scripts/slurm/*.sh + +# endregion MLCommons repo + +# region Data + +h1 'Downloading data...' + +h2 'Creating a job to download SD dataset...' +sbatch ../common/sync.sh \ + -f "${RCLONE_PROFILE_NEBIUS_S3}:${NEBIUS_S3_BUCKET_NAME}/${DATASET_NAME}" \ + -t "${DATASET_DIR}" + +h2 'Creating a job to download SD checkpoint...' +sbatch ../common/sync.sh \ + -f "${RCLONE_PROFILE_NEBIUS_S3}:${NEBIUS_S3_BUCKET_NAME}/${CHECKPOINT_NAME}" \ + -t "${CHECKPOINT_DIR}" + +h2 'Current Slurm job queue:' +squeue + +hdone + +# endregion Data diff --git a/soperator/test/mlperf-sd/prepare_env.sh b/soperator/test/mlperf-sd/prepare_env.sh deleted file mode 100755 index 94f895d5..00000000 --- a/soperator/test/mlperf-sd/prepare_env.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -set -e - -mkdir -p /mlperf-sd -cd /mlperf-sd - -echo "Checkout MLCommons git repo" -export GIT_DISCOVERY_ACROSS_FILESYSTEM=1 -git clone --depth=1 https://github.com/mlcommons/training -pushd /mlperf-sd/training - git fetch --depth=1 origin 00f04c57d589721aabce4618922780d29f73cf4e - git checkout 00f04c57d589721aabce4618922780d29f73cf4e -popd - -echo "Adjust some files" -mkdir -p /mlperf-sd/data/results -mkdir -p /mlperf-sd/hf_home -cp -r /opt/mlperf-sd/training /mlperf-sd/ -cp -r /opt/mlperf-sd/aws_download.sh /mlperf-sd/ - -chown -R 0:0 /mlperf-sd - -echo "Install awscli" -apt update && apt install -y awscli - -echo "Start batch job for downloading datasets & results" -sbatch aws_download.sh - -echo "Started job aws_download" -squeue diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/nogit/logs/reference_02x08x08_1720163290.out b/soperator/test/mlperf-sd/results/reference_02x08x08_1720163290.out similarity index 100% rename from soperator/test/mlperf-sd/training/stable_diffusion/nogit/logs/reference_02x08x08_1720163290.out rename to soperator/test/mlperf-sd/results/reference_02x08x08_1720163290.out index e2bbaee0..16ea09f9 100644 --- a/soperator/test/mlperf-sd/training/stable_diffusion/nogit/logs/reference_02x08x08_1720163290.out +++ b/soperator/test/mlperf-sd/results/reference_02x08x08_1720163290.out @@ -1878,15 +1878,15 @@ worker-1:19513:19856 [5] NCCL INFO Channel 11/0 : 5[5] -> 13[5] [receive] via NE worker-1:19513:19856 [5] NCCL INFO Channel 12/0 : 5[5] -> 13[5] [receive] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 14/0 : 5[5] -> 13[5] [receive] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 15/0 : 5[5] -> 13[5] [receive] via NET/IBext/1/GDRDMA -worker-1:19513:19856 [5] NCCL INFO Channel 00/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 01/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA +worker-1:19513:19856 [5] NCCL INFO Channel 00/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 02/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 03/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-0:19593:20042 [3] NCCL INFO Channel 12/0 : 3[3] -> 11[3] [send] via NET/IBext/7/GDRDMA worker-0:19593:20042 [3] NCCL INFO Channel 13/0 : 3[3] -> 11[3] [send] via NET/IBext/7/GDRDMA +worker-1:19513:19856 [5] NCCL INFO Channel 07/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 04/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 06/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA -worker-1:19513:19856 [5] NCCL INFO Channel 07/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 08/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 09/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA worker-1:19513:19856 [5] NCCL INFO Channel 10/0 : 13[5] -> 5[5] [send] via NET/IBext/1/GDRDMA diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/parselog b/soperator/test/mlperf-sd/training/stable_diffusion/parselog deleted file mode 100755 index ec64b90c..00000000 Binary files a/soperator/test/mlperf-sd/training/stable_diffusion/parselog and /dev/null differ diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/sbatch.sh b/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/sbatch.sh deleted file mode 100755 index fc2d5e90..00000000 --- a/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/sbatch.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash - -: "${NUM_NODES:=2}" -: "${GPUS_PER_NODE:=8}" -: "${WALLTIME:=04:00:00}" -: "${CONFIG:=./configs/train_02x08x08.yaml}" -: "${BASE_LOG_DIR:=./nogit/logs}" -: "${BASE_RESULTS_DIR:=/mlperf-sd/data/results}" -: "${JOB_NAME:=job}" -: "${CONTAINER_IMAGE:=cr.ai.nebius.cloud#examples/stable_diffusion_h100}" -: "${CHECKPOINT:=/checkpoints/sd/512-base-ema.ckpt}" - -while [ "$1" != "" ]; do - case $1 in - -a | --account ) shift - ACCOUNT=$1 - ;; - -p | --partition ) shift - PARTITION=$1 - ;; - -n | --num-nodes ) shift - NUM_NODES=$1 - ;; - -g | --gpus-per-node ) shift - GPUS_PER_NODE=$1 - ;; - -t | --walltime ) shift - WALLTIME=$1 - ;; - -c | --config ) shift - CONFIG=$1 - ;; - -k | --checkpoint ) shift - CHECKPOINT=$1 - ;; - -l | --log-dir ) shift - BASE_LOG_DIR=$1 - ;; - -r | --results-dir ) shift - BASE_RESULTS_DIR=$1 - ;; - -d | --container ) shift - CONTAINER_IMAGE=$1 - ;; - esac - shift -done - -# Misc -SUFFIX=`date +%s` -CONFIG_NAME=`basename ${CONFIG} .yaml` -WORKDIR_MNT=/workdir - -# Job config -JOB_NAME=train_${CONFIG_NAME}_${SUFFIX} - -# Laion 400m -LAION_400M=/mlperf-sd/datasets/laion-400m -LAION_400M_MOUNT=/datasets/laion-400m - -# COCO -COCO=/mlperf-sd/datasets/coco2014 -COCO_MNT=/datasets/coco2014 - -# checkpoints -CKPT_DIR=/mlperf-sd/checkpoints -CKPT_MOUNT=/checkpoints - -# Hugging face home -HF_HOME_DIR=/mlperf-sd/hf_home -HF_HOME_MOUNT=/hf_home - -# exp -RESULTS_DIR=${BASE_RESULTS_DIR} # no need to append job name, pytorch appends datetime automatically -RESULTS_MNT=/results -mkdir -p ${RESULTS_DIR} - -# logdir -LOG_DIR="${BASE_LOG_DIR}" -mkdir -p ${LOG_DIR} - -NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml -export NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml - -# Mounts -#MOUNTS="/dev/infiniband:/dev/infiniband,${NCCL_TOPO_FILE}:${NCCL_TOPO_FILE},${PWD}:${WORKDIR_MNT},${LAION_400M}:${LAION_400M_MOUNT},${COCO}:${COCO_MNT},${RESULTS_DIR}:${RESULTS_MNT},${CKPT_DIR}:${CKPT_MOUNT},${HF_HOME_DIR}:${HF_HOME_MOUNT}" -MOUNTS="${NCCL_TOPO_FILE}:${NCCL_TOPO_FILE},${PWD}:${WORKDIR_MNT},${LAION_400M}:${LAION_400M_MOUNT},${COCO}:${COCO_MNT},${RESULTS_DIR}:${RESULTS_MNT},${CKPT_DIR}:${CKPT_MOUNT},${HF_HOME_DIR}:${HF_HOME_MOUNT}" - -sbatch \ - --job-name="mlperf-ssd:${JOB_NAME}" \ - --nodes="${NUM_NODES}" \ - --gpus-per-node=${GPUS_PER_NODE} \ - --ntasks-per-node="${GPUS_PER_NODE}" \ - --cpus-per-task=8 \ - --mem-per-cpu="4G" \ - --time="${WALLTIME}" \ - --output="${LOG_DIR}/%A_${JOB_NAME}.out" \ - ./scripts/slurm/srun.sh \ - --num-nodes ${NUM_NODES} \ - --gpus-per-node ${GPUS_PER_NODE} \ - --config ${CONFIG} \ - --workdir ${WORKDIR_MNT} \ - --results-dir ${RESULTS_MNT} \ - --mounts ${MOUNTS} \ - --container ${CONTAINER_IMAGE} \ - --checkpoint ${CHECKPOINT} - diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/srun.sh b/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/srun.sh deleted file mode 100755 index 0dfa4284..00000000 --- a/soperator/test/mlperf-sd/training/stable_diffusion/scripts/slurm/srun.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -: "${NUM_NODES:=2}" -: "${GPUS_PER_NODE:=8}" -: "${CONFIG:=./configs/train_02x08x08.yaml}" -: "${WORKDIR:=/workdir}" -: "${RESULTS_MNT:=/results}" -: "${MOUNTS:=/dev/infiniband:/dev/infiniband}" -: "${CONTAINER_IMAGE:=mlperf_sd:22.12-py3}" -: "${CHECKPOINT:=/checkpoints/sd/512-base-ema.ckpt}" - -while [ "$1" != "" ]; do - case $1 in - --num-nodes ) shift - NUM_NODES=$1 - ;; - --gpus-per-node ) shift - GPUS_PER_NODE=$1 - ;; - --config ) shift - CONFIG=$1 - ;; - --checkpoint ) shift - CHECKPOINT=$1 - ;; - --workdir ) shift - WORKDIR=$1 - ;; - --results-dir ) shift - RESULTS_MNT=$1 - ;; - --mounts ) shift - MOUNTS=$1 - ;; - --container ) shift - CONTAINER_IMAGE=$1 - ;; - esac - shift -done - -HF_HUB_OFFLINE=1 -export HF_HUB_OFFLINE=1 -NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml -export NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml -NCCL_DEBUG=INFO -export NCCL_DEBUG=INFO - -srun \ - --container-image="${CONTAINER_IMAGE}" \ - --container-writable \ - --container-mounts="${MOUNTS}" \ - --container-workdir="${WORKDIR}" \ - --ntasks-per-node="${GPUS_PER_NODE}" \ - --cpus-per-task=8 \ - --mem-per-cpu="4G" \ - --nodes="${NUM_NODES}" \ - bash -c "./run_and_time.sh \ - --export NCCL_TOPO_FILE,HF_HUB_OFFLINE,NCCL_DEBUG \ - --num-nodes ${NUM_NODES} \ - --gpus-per-node ${GPUS_PER_NODE} \ - --checkpoint ${CHECKPOINT} \ - --results-dir ${RESULTS_MNT} \ - --config ${CONFIG}" diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/configs/train_02x08x08.yaml b/soperator/test/mlperf-sd/training_patch/stable_diffusion/configs/train_02x08x08.yaml similarity index 100% rename from soperator/test/mlperf-sd/training/stable_diffusion/configs/train_02x08x08.yaml rename to soperator/test/mlperf-sd/training_patch/stable_diffusion/configs/train_02x08x08.yaml diff --git a/soperator/test/mlperf-sd/training/stable_diffusion/run_and_time.sh b/soperator/test/mlperf-sd/training_patch/stable_diffusion/run_and_time.sh similarity index 50% rename from soperator/test/mlperf-sd/training/stable_diffusion/run_and_time.sh rename to soperator/test/mlperf-sd/training_patch/stable_diffusion/run_and_time.sh index 1a9a07ad..164b9761 100755 --- a/soperator/test/mlperf-sd/training/stable_diffusion/run_and_time.sh +++ b/soperator/test/mlperf-sd/training_patch/stable_diffusion/run_and_time.sh @@ -7,24 +7,24 @@ : "${CONFIG:=./configs/train_512_latents.yaml}" while [ "$1" != "" ]; do - case $1 in - --num-nodes ) shift - NUM_NODES=$1 - ;; - --gpus-per-node ) shift - GPUS_PER_NODE=$1 - ;; - --checkpoint ) shift - CHECKPOINT=$1 - ;; - --results-dir ) shift - RESULTS_DIR=$1 - ;; - --config ) shift - CONFIG=$1 - ;; - esac - shift + case $1 in + --num-nodes ) shift + NUM_NODES=$1 + ;; + --gpus-per-node ) shift + GPUS_PER_NODE=$1 + ;; + --checkpoint ) shift + CHECKPOINT=$1 + ;; + --results-dir ) shift + RESULTS_DIR=$1 + ;; + --config ) shift + CONFIG=$1 + ;; + esac + shift done set -e @@ -45,12 +45,12 @@ from mlperf_logging_utils import mllogger mllogger.event(key=constants.CACHE_CLEAR, value=True)" python main.py \ - lightning.trainer.num_nodes=${NUM_NODES} \ - lightning.trainer.devices=${GPUS_PER_NODE} \ - -m train \ - --ckpt ${CHECKPOINT} \ - --logdir ${RESULTS_DIR} \ - -b ${CONFIG} + lightning.trainer.num_nodes=${NUM_NODES} \ + lightning.trainer.devices=${GPUS_PER_NODE} \ + -m train \ + --ckpt ${CHECKPOINT} \ + --logdir ${RESULTS_DIR} \ + -b ${CONFIG} # end timing end=$(date +%s) diff --git a/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/sbatch.sh b/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/sbatch.sh new file mode 100755 index 00000000..49be5113 --- /dev/null +++ b/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/sbatch.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +# region Defaults + +: "${NUM_NODES:=2}" +: "${GPUS_PER_NODE:=8}" +: "${WALLTIME:=04:00:00}" +: "${CONFIG:=./configs/train_02x08x08.yaml}" +: "${DATA_DIR:=/mlperf-sd}" +: "${BASE_LOG_DIR:=./nogit/logs}" +: "${BASE_RESULTS_DIR:=/mlperf-sd/data/results}" +: "${CONTAINER_IMAGE:=cr.eu-north1.nebius.cloud#slurm-mlperf-training/stable_diffusion_mlcommons}" +: "${CHECKPOINT:=sd/512-base-ema.ckpt}" + +# endregion Defaults + +usage() { + echo "Usage: ${0} [FLAGS] [-h]" >&2 + echo 'Flags:' >&2 + echo ' -N / --num-nodes [int ] Number of worker nodes' >&2 + echo " By default, ${NUM_NODES}" >&2 + echo ' -g / --gpus-per-node [int ] Number of GPUs per worker nodes' >&2 + echo " By default, ${GPUS_PER_NODE}" >&2 + echo ' -t / --walltime [str ] Job timeout' >&2 + echo " By default, ${WALLTIME}" >&2 + echo ' -c / --config [path] Path to the config file' >&2 + echo " By default, ${CONFIG}" >&2 + echo ' -D / --data-dir [path] Path to the data directory' >&2 + echo ' This is where datasets and checkpoints are stored' >&2 + echo " By default, ${DATA_DIR}" >&2 + echo ' -l / --log-dir [path] Path to the directory for logs' >&2 + echo " By default, ${BASE_LOG_DIR}" >&2 + echo ' -r / --results-dir [path] Path to the directory for results' >&2 + echo " By default, ${BASE_RESULTS_DIR}" >&2 + echo ' -i / --container [path] Path to the container image' >&2 + echo " By default, ${CONTAINER_IMAGE}" >&2 + echo ' -C / --checkpoint [path] Path to the checkpoint file inside checkpoints mount' >&2 + echo " By default, ${CHECKPOINT}" >&2 + echo '' >&2 + echo ' -h Print help and exit' >&2 + exit 1 +} + +while [ "$1" != "" ]; do + case $1 in + -N | --num-nodes ) shift; NUM_NODES=$1;; + -g | --gpus-per-node ) shift; GPUS_PER_NODE=$1;; + -t | --walltime ) shift; WALLTIME=$1;; + -c | --config ) shift; CONFIG=$1;; + -D | --data-dir ) shift; DATA_DIR=$1;; + -l | --log-dir ) shift; BASE_LOG_DIR=$1;; + -r | --results-dir ) shift; BASE_RESULTS_DIR=$1;; + -i | --container ) shift; CONTAINER_IMAGE=$1;; + -C | --checkpoint ) shift; CHECKPOINT=$1;; + -h | * ) shift; usage;; + esac + shift +done + +: "${TEST_DIR:=/opt/slurm-test}" +source "${TEST_DIR}/common/printer.sh" + +# region Paths + +h1 'Configuring paths...' + +h2 'Dataset...' +DATASET_DIR="${DATA_DIR}/sd-dataset-3.0" + +# Laion 400m +LAION_400M="${DATASET_DIR}/laion-400m" +LAION_400M_MOUNT='/datasets/laion-400m' + +# COCO +COCO="${DATASET_DIR}/coco2014" +COCO_MNT='/datasets/coco2014' + +h2 'Checkpoint...' +CKPT_DIR="${DATA_DIR}/sd-checkpoint-3.0" +CKPT_MOUNT='/checkpoints' + +h2 'HuggingFace home...' +HF_HOME_DIR="${DATA_DIR}/hf_home" +HF_HOME_MOUNT='/hf_home' + +h2 'Results...' +RESULTS_DIR="${BASE_RESULTS_DIR}" +RESULTS_MNT='/results' +mkdir -p "${RESULTS_DIR}" + +h2 'Logs...' +LOG_DIR="${BASE_LOG_DIR}" +mkdir -p "${LOG_DIR}" + +h2 'NCCL topology...' +NCCL_TOPO_FILE='/var/run/nvidia-topologyd/virtualTopology.xml' +export NCCL_TOPO_FILE + +h2 'Workdir...' +WORKDIR_MNT='/workdir' + +h2 'Mounts...' +MOUNTS="${NCCL_TOPO_FILE}:${NCCL_TOPO_FILE},${PWD}:${WORKDIR_MNT},${LAION_400M}:${LAION_400M_MOUNT},${COCO}:${COCO_MNT},${RESULTS_DIR}:${RESULTS_MNT},${CKPT_DIR}:${CKPT_MOUNT},${HF_HOME_DIR}:${HF_HOME_MOUNT}" + +hdone + +# endregion Paths + +# region Job config + +h1 'Configuring job config...' + +CONFIG_NAME="$(basename "${CONFIG}" .yaml)" +SUFFIX="$(date +%s)" +JOB_NAME="train_${CONFIG_NAME}_${SUFFIX}" + +hdone + +# endregion Job config + +sbatch \ + --job-name="mlperf-sd:${JOB_NAME}" \ + --nodes="${NUM_NODES}" \ + --gpus-per-node="${GPUS_PER_NODE}" \ + --ntasks-per-node="${GPUS_PER_NODE}" \ + --cpus-per-task='8' \ + --mem-per-cpu='4G' \ + --time="${WALLTIME}" \ + --output="${LOG_DIR}/%A_${JOB_NAME}.out" \ + ./scripts/slurm/srun.sh \ + --num-nodes "${NUM_NODES}" \ + --gpus-per-node "${GPUS_PER_NODE}" \ + --config "${CONFIG}" \ + --workdir "${WORKDIR_MNT}" \ + --results-dir "${RESULTS_MNT}" \ + --mounts "${MOUNTS}" \ + --container "${CONTAINER_IMAGE}" \ + --checkpoint "${CKPT_MOUNT}/${CHECKPOINT}" diff --git a/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/srun.sh b/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/srun.sh new file mode 100755 index 00000000..5d1db7b2 --- /dev/null +++ b/soperator/test/mlperf-sd/training_patch/stable_diffusion/scripts/slurm/srun.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +: "${NUM_NODES:=2}" +: "${GPUS_PER_NODE:=8}" +: "${CONFIG:=./configs/train_02x08x08.yaml}" +: "${WORKDIR:=/workdir}" +: "${RESULTS_MNT:=/results}" +: "${MOUNTS:=/dev/infiniband:/dev/infiniband}" +: "${CONTAINER_IMAGE:=mlperf_sd:22.12-py3}" +: "${CHECKPOINT:=/checkpoints/sd/512-base-ema.ckpt}" + +while [ "$1" != "" ]; do + case $1 in + --num-nodes ) shift + NUM_NODES=$1 + ;; + --gpus-per-node ) shift + GPUS_PER_NODE=$1 + ;; + --config ) shift + CONFIG=$1 + ;; + --checkpoint ) shift + CHECKPOINT=$1 + ;; + --workdir ) shift + WORKDIR=$1 + ;; + --results-dir ) shift + RESULTS_MNT=$1 + ;; + --mounts ) shift + MOUNTS=$1 + ;; + --container ) shift + CONTAINER_IMAGE=$1 + ;; + esac + shift +done + +HF_HUB_OFFLINE=1 +export HF_HUB_OFFLINE=1 +NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml +export NCCL_TOPO_FILE=/var/run/nvidia-topologyd/virtualTopology.xml +NCCL_DEBUG=INFO +export NCCL_DEBUG=INFO + +srun \ + --container-image="${CONTAINER_IMAGE}" \ + --container-writable \ + --container-mounts="${MOUNTS}" \ + --container-workdir="${WORKDIR}" \ + --ntasks-per-node="${GPUS_PER_NODE}" \ + --cpus-per-task=8 \ + --mem-per-cpu="4G" \ + --nodes="${NUM_NODES}" \ + bash -c "./run_and_time.sh \ + --export NCCL_TOPO_FILE,HF_HUB_OFFLINE,NCCL_DEBUG \ + --num-nodes ${NUM_NODES} \ + --gpus-per-node ${GPUS_PER_NODE} \ + --checkpoint ${CHECKPOINT} \ + --results-dir ${RESULTS_MNT} \ + --config ${CONFIG}" diff --git a/soperator/test/prepare_for_mlperf_gpt3.sh b/soperator/test/prepare_for_mlperf_gpt3.sh deleted file mode 100755 index 0711fa1b..00000000 --- a/soperator/test/prepare_for_mlperf_gpt3.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -usage() { echo "usage: ${0} -u -k -a
[-p ] [-h]" >&2; exit 1; } - -while getopts u:k:a:p:n:h flag -do - case "${flag}" in - u) user=${OPTARG};; - k) key=${OPTARG};; - a) address=${OPTARG};; - p) port=${OPTARG};; - h) usage;; - *) usage;; - esac -done - -if [ -z "$user" ] || [ -z "$key" ] || [ -z "$address" ]; then - usage -fi - -if [ -z "$port" ]; then - port=22 -fi - -echo "Transferring files as user '${user}' with key '${key}' to ${address}:${port}..." -scp \ - -i "${key}" \ - -P "${port}" \ - -r \ - ./mlperf-gpt3/* \ - "${user}"@"${address}":/opt/mlperf-gpt3 - -echo "Done" diff --git a/soperator/test/prepare_for_mlperf_sd.sh b/soperator/test/prepare_for_mlperf_sd.sh deleted file mode 100755 index 999c1b7c..00000000 --- a/soperator/test/prepare_for_mlperf_sd.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -usage() { echo "usage: ${0} -u -k -a
[-h]" >&2; exit 1; } - -while getopts u:k:a:h flag -do - case "${flag}" in - u) user=${OPTARG};; - k) key=${OPTARG};; - a) address=${OPTARG};; - h) usage;; - *) usage;; - esac -done - -if [ -z "$user" ] || [ -z "$key" ] || [ -z "$address" ]; then - usage -fi - -scp -i "${key}" -r ./mlperf-sd "${user}"@"${address}":/opt/mlperf-sd diff --git a/soperator/test/prepare_for_quickcheck.sh b/soperator/test/prepare_for_quickcheck.sh deleted file mode 100755 index 62192ecf..00000000 --- a/soperator/test/prepare_for_quickcheck.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -usage() { echo "usage: ${0} -u -k -a
[-h]" >&2; exit 1; } - -while getopts u:k:a:h flag -do - case "${flag}" in - u) user=${OPTARG};; - k) key=${OPTARG};; - a) address=${OPTARG};; - h) usage;; - *) usage;; - esac -done - -if [ -z "$user" ] || [ -z "$key" ] || [ -z "$address" ]; then - usage -fi - -scp -i "${key}" -r ./quickcheck "${user}"@"${address}":/ diff --git a/soperator/test/quickcheck/README.md b/soperator/test/quickcheck/README.md new file mode 100644 index 00000000..d0fde2e5 --- /dev/null +++ b/soperator/test/quickcheck/README.md @@ -0,0 +1,42 @@ +# Quick checks of the Slurm cluster + +Within an SSH session to the Slurm cluster, go to the test directory: +```shell +cd /opt/slurm-test +``` + +
+There are 3 tests + +- `hello.sh` + + Performs basic checks of the Slurm cluster. + Jobs can be executed and resources can be allocated. + +- `nccl.sh` + + Executes NCCL test "all_reduce_perf" twice: + - Using NVLink; + - Using Infiniband. + +- `enroot.sh` + + Launches jobs inside enroot containers (using pyxis plugin). +
+ +To run them, execute following commands: + +```shell +sbatch hello.sh && \ +tail -f results/hello.out +``` + +```shell +sbatch nccl.sh && \ +tail -f results/nccl.out +``` + +```shell +sbatch enroot.sh && \ +tail -f results/enroot.out +``` diff --git a/soperator/test/quickcheck/enroot.sh b/soperator/test/quickcheck/enroot.sh index eb932c6a..13f4e83b 100755 --- a/soperator/test/quickcheck/enroot.sh +++ b/soperator/test/quickcheck/enroot.sh @@ -1,10 +1,13 @@ #!/bin/bash #SBATCH -J enroot -#SBATCH --output=/quickcheck/outputs/enroot.out -#SBATCH --error=/quickcheck/outputs/enroot.out +#SBATCH --output=results/enroot.out +#SBATCH --error=results/enroot.out #SBATCH --gpus=1 # Specify the image in order to run the job inside a container -srun --gpus=1 --container-image="nvidia/cuda:12.2.2-base-ubuntu20.04" \ - echo "Run nvidia-smi from enroot container on $(hostname)" && nvidia-smi +srun \ + --gpus=1 \ + --container-image="nvidia/cuda:12.2.2-base-ubuntu20.04" \ + echo "Run nvidia-smi from enroot container on $(hostname)" \ + && nvidia-smi diff --git a/soperator/test/quickcheck/hello.sh b/soperator/test/quickcheck/hello.sh index 1537f220..f813d357 100755 --- a/soperator/test/quickcheck/hello.sh +++ b/soperator/test/quickcheck/hello.sh @@ -1,15 +1,21 @@ #!/bin/bash #SBATCH -J hello -#SBATCH --output=/quickcheck/outputs/hello.out -#SBATCH --error=/quickcheck/outputs/hello.out -#SBATCH --cpus-per-task=120 +#SBATCH --output=results/hello.out +#SBATCH --error=results/hello.out +#SBATCH --cpus-per-task=60 #SBATCH --mem-per-cpu=8G #SBATCH --gpus=4 # Print hello -srun echo "Hello from $(hostname)" +srun \ + echo "Hello from $(hostname)" # Allocate some resources -srun --ntasks=2 --cpus-per-task=60 --mem-per-cpu=8G --gpus=4 \ - echo "Run nvidia-smi on $(hostname)" && nvidia-smi +srun \ + --ntasks=2 \ + --cpus-per-task=60 \ + --mem-per-cpu=8G \ + --gpus=4 \ + echo "Run nvidia-smi on $(hostname)" \ + && nvidia-smi diff --git a/soperator/test/quickcheck/nccl.sh b/soperator/test/quickcheck/nccl.sh index cd0d492b..96bff373 100755 --- a/soperator/test/quickcheck/nccl.sh +++ b/soperator/test/quickcheck/nccl.sh @@ -1,16 +1,24 @@ #!/bin/bash #SBATCH -J nccl -#SBATCH --output=/quickcheck/outputs/nccl.out -#SBATCH --error=/quickcheck/outputs/nccl.out +#SBATCH --output=results/nccl.out +#SBATCH --error=results/nccl.out #SBATCH --cpus-per-task=16 #SBATCH --mem-per-cpu=8G -#SBATCH --gres=gpu:nvidia_h100_80gb_hbm3:8 +#SBATCH --gpus=8 -# Allocate 8 ANY GPUs -srun --cpus-per-task=16 --mem-per-cpu=8G --gres=gpu:8 \ - echo "Run NCCL test with NVLink:" && /usr/bin/all_reduce_perf -b 512M -e 8G -f 2 -g 8 +# Allocate 8 GPUs for NCCL test with NVLink +srun \ + --cpus-per-task=16 \ + --mem-per-cpu=8G \ + --gpus=8 \ + echo "Run NCCL test with NVLink:" \ + && /usr/bin/all_reduce_perf -b 512M -e 8G -f 2 -g 8 -# Allocate 8 H100 GPUs -srun --cpus-per-task=16 --mem-per-cpu=8G --gres=gpu:nvidia_h100_80gb_hbm3:8 \ - echo "Run NCCL test with InfiniBand:" && NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring /usr/bin/all_reduce_perf -b 512M -e 8G -f 2 -g 8 +# Allocate 8 GPUs for NCCL test with InfiniBand +srun \ + --cpus-per-task=16 \ + --mem-per-cpu=8G \ + --gpus=8 \ + echo "Run NCCL test with InfiniBand:" \ + && NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 NCCL_ALGO=Ring /usr/bin/all_reduce_perf -b 512M -e 8G -f 2 -g 8