Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-32683 Fix issues with postmortem and container death #19296

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 160 additions & 85 deletions helm/hpcc/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -313,33 +313,85 @@ metrics:
{{- end -}}

{{/*
Add ConfigMap volume mount for a component
Add tmp volume mount
*/}}
{{- define "hpcc.addConfigMapVolumeMount" -}}
- name: {{ .name }}-temp-volume
{{- define "hpcc.addTempVolumeMount" -}}
{{- $volumeName := .volumeName | default .name -}}
- name: {{ $volumeName }}-temp-volume
mountPath: /tmp
- name: {{ .name }}-hpcctmp-volume
{{- if not .noSubPath }}
subPath: {{ .name | quote }}
{{- end -}}
{{- end -}}

{{/*
Add runtime volume mount
*/}}
{{- define "hpcc.addRuntimeVolumeMount" -}}
{{- $volumeName := .volumeName | default .name -}}
- name: {{ $volumeName }}-hpcctmp-volume
mountPath: /var/lib/HPCCSystems
{{- if .tmpSubPath }}
subPath: {{ .tmpSubPath | quote }}
{{- end }}
{{- if not .noSubPath }}
subPath: {{ .name | quote }}
{{- end -}}
{{- end -}}

{{/*
Add ConfigMap volume mount for a component
*/}}
{{- define "hpcc.addConfigMapVolumeMount" -}}
- name: {{ .name }}-configmap-volume
{{- if .noSubPath }}
mountPath: {{ printf "/etc/config/%s" .name }}
{{- else }}
mountPath: /etc/config
{{- end -}}
{{- end -}}

{{/*
Add ConfigMap volume for a component
Add standard ephemeral volume mounts for a component
*/}}
{{- define "hpcc.addConfigMapVolume" -}}
{{- define "hpcc.addEphemeralVolumeMounts" -}}
{{ include "hpcc.addTempVolumeMount" . }}
{{ include "hpcc.addRuntimeVolumeMount" . }}
{{ include "hpcc.addConfigMapVolumeMount" . }}
{{- end -}}

{{/*
Add tmp volume for a component
*/}}
{{- define "hpcc.addTempVolume" -}}
- name: {{ .name }}-temp-volume
emptyDir: {}
{{- end -}}

{{/*
Add runtime volume for a component
*/}}
{{- define "hpcc.addRuntimeVolume" -}}
- name: {{ .name }}-hpcctmp-volume
emptyDir: {}
{{- end -}}

{{/*
Add ConfigMap volume for a component
*/}}
{{- define "hpcc.addConfigMapVolume" -}}
- name: {{ .name }}-configmap-volume
configMap:
name: {{ .name }}-configmap
{{- end -}}


{{/*
Add ConfigMap volume for a component
*/}}
{{- define "hpcc.addEphemeralVolumes" -}}
{{ include "hpcc.addTempVolume" . }}
{{ include "hpcc.addRuntimeVolume" . }}
{{ include "hpcc.addConfigMapVolume" . }}
{{- end -}}

{{/*
Get mount details
Pass in plane
Expand Down Expand Up @@ -734,57 +786,24 @@ Check that the storage and spill planes for a component exist
{{- end -}}

{{/*
Add command for a component
*/}}
{{- define "hpcc.componentCommand" -}}
{{- if .me.valgrind -}}
valgrind
{{- else if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}}
check_executes
{{- else -}}
{{ .process }}
{{- end }}
{{- end -}}

{{/*
Add extra args for a component
Add config arg for a component
*/}}
{{- define "hpcc.componentStartArgs" -}}
{{- if .me.valgrind -}}
"--leak-check=full",
"--show-leak-kinds=all",
"--track-origins=yes",
"--num-callers=8",
"--log-fd=1",
{{ .process | quote }},
{{- else if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- $debugPlane := .me.debugPlane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- include "hpcc.checkPlaneExists" (dict "root" .root "planeName" $debugPlane) -}}
{{- $prefix := include "hpcc.getPlanePrefix" (dict "root" .root "planeName" $debugPlane) -}}
{{- $meExpert := .me.expert | default dict -}}
{{- $globalExpert := .root.Values.global.expert | default dict -}}
{{- $alwaysPostMortem := (hasKey $meExpert "alwaysPostMortem") | ternary $meExpert.alwaysPostMortem ($globalExpert.alwaysPostMortem | default false) -}}
{{- if $alwaysPostMortem -}}
"-a",{{ "\n" }}
{{- end -}}
"-d", {{ $prefix }},
"--",
{{ .process | quote }},
{{- end }}
{{- define "hpcc.getConfigArg" -}}
/etc/config/{{ .name }}.yaml
{{- end -}}

{{/*
Add config arg for a component
*/}}
{{- define "hpcc.configArg" -}}
"--config=/etc/config/{{ .name }}.yaml"
"--config={{ include "hpcc.getConfigArg" . }}"
{{- end -}}

{{/*
Add dali arg for a component
Get dali endpoint for a component
Pass in dict with root, component (in case of error), optional (true if daliArg is optional)
*/}}
{{- define "hpcc.daliArg" -}}
{{- define "hpcc.getDali" -}}
{{- if empty .root.Values.dali -}}
{{- if not .optional -}}
{{- $_ := fail (printf "%s requires a DALI to be defined" .component) -}}
Expand All @@ -794,10 +813,22 @@ Pass in dict with root, component (in case of error), optional (true if daliArg
{{- $daliService := $dali.service | default dict -}}
{{- $daliHost := .overrideDaliHost | default $dali.name -}}
{{- $daliServicePort := .overrideDaliPort | default ($daliService.servicePort | default 7070) -}}
"--daliServers={{ $daliHost }}:{{ $daliServicePort }}"
{{ $daliHost }}:{{ $daliServicePort }}
{{- end -}}
{{- end -}}


{{/*
Add dali arg for a component
Pass in dict with root, component (in case of error), optional (true if daliArg is optional)
*/}}
{{- define "hpcc.daliArg" -}}
{{- $dali := include "hpcc.getDali" . -}}
{{- if $dali -}}
"--daliServers={{ $dali }}"
{{- end -}}
{{- end -}}

{{/*
Get image name
*/}}
Expand Down Expand Up @@ -1022,6 +1053,53 @@ NB: uid=10000 and gid=10001 are the uid/gid of the hpcc user, built into platfor
{{- include "hpcc.configContainer" . | nindent 0 -}}
{{- end -}}

{{/*
A sidecar container to run commands after a main container finishes
Pass in dict with me, and params
*/}}
{{- define "hpcc.addPostRunContainer" -}}
{{- $meExpert := .me.expert | default dict -}}
{{- $globalExpert := .root.Values.global.expert | default dict -}}
{{- $postRun := (hasKey $meExpert "postRunSidecar") | ternary $meExpert.postRunSidecar ((hasKey $globalExpert "postRunSidecar") | ternary $globalExpert.postRunSidecar true) }}
{{- if $postRun }}
{{- if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- $debugPlane := .me.debugPlane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- include "hpcc.checkPlaneExists" (dict "root" .root "planeName" $debugPlane) -}}
{{- $prefix := include "hpcc.getPlanePrefix" (dict "root" .root "planeName" $debugPlane) -}}
{{- $dali := include "hpcc.getDali" . -}}
- name: postrun
{{- include "hpcc.addImageAttrs" . | nindent 2 }}
command:
- container_watch.sh
- {{ printf "--directory=%s" $prefix }}
{{- if $dali }}
- {{ printf "--daliServer=%s" $dali }}
{{- end }}
{{- if .isJob }}
- --isJob
{{- end }}
{{- range $container := .lifeCycleCtx.containers }}
- {{ $container.name }}
- {{ $container.process }}
{{- end }}
{{- include "hpcc.addSecurityContext" . | indent 2 }}
volumeMounts:
{{- include "hpcc.addTempVolumeMount" (.me | merge (dict "noSubPath" "true")) | nindent 2 }}
{{- include "hpcc.addRuntimeVolumeMount" (.me | merge (dict "noSubPath" "true")) | nindent 2 }}
{{- $uniqueConfigs := dict -}}
{{- range $container := .lifeCycleCtx.containers -}}
{{- $config := $container.config -}}
{{- $_ := set $uniqueConfigs $config true -}}
{{- end -}}
{{- $me := .me -}}
{{- range $config, $_ := $uniqueConfigs }}
{{- include "hpcc.addConfigMapVolumeMount" ($me | merge (dict "name" $config "noSubPath" "true")) | nindent 2 -}}
{{- end -}}
{{- include "hpcc.addVolumeMounts" (dict "root" .root "me" $me "includeCategories" (list "debug")) | nindent 2 }}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Container to watch for a file on a shared mount and execute a command
Pass in dict with me and command
Expand Down Expand Up @@ -1095,7 +1173,7 @@ Pass in a dictionary with root and me defined
{{- define "hpcc.addSecurityContext" }}
{{- $user := (.root.Values.global.user | default dict) }}
securityContext:
{{- if .root.Values.global.privileged }}
{{- if (or .root.Values.global.privileged .privileged) }}
privileged: true
capabilities:
add:
Expand Down Expand Up @@ -1428,7 +1506,7 @@ data:

{{/*
A template to generate Sasha service containers
Pass in dict with root, me and dali if container in dali pod
Pass in dict with root, me, lifeCycleCtx and dali if container in dali pod
*/}}
{{- define "hpcc.addSashaContainer" }}
{{- $serviceName := printf "sasha-%s" .me.name }}
Expand All @@ -1437,14 +1515,7 @@ Pass in dict with root, me and dali if container in dali pod
{{- $env := concat (.root.Values.global.env | default list) (.env | default list) }}
- name: {{ $serviceName | quote }}
workingDir: /var/lib/HPCCSystems
command: [ saserver ]
args: [
{{- with (dict "name" $serviceName) }}
{{ include "hpcc.configArg" . }},
{{- end }}
"--service={{ .me.name }}",
{{ include "hpcc.daliArg" (dict "root" .root "component" "Sasha" "optional" false "overrideDaliHost" $overrideDaliHost "overrideDaliPort" $overrideDaliPort) | indent 10 }}
]
{{- include "hpcc.addCommandAndLifecycle" (merge (pick . "root" "lifeCycleCtx") (dict "me" (.me | merge (dict "name" $serviceName))) (dict "process" "saserver" "extraArgs" (list (printf "--service=%s" .me.name)) "component" "Sasha" "optional" false "overrideConfigName" $serviceName "overrideDaliHost" $overrideDaliHost "overrideDaliPort" $overrideDaliPort)) | nindent 2 }}
{{- include "hpcc.addResources" (dict "me" .me.resources "root" .root) | indent 2 }}
{{- include "hpcc.addSecurityContext" . | indent 2 }}
env:
Expand Down Expand Up @@ -1822,11 +1893,11 @@ Pass in dict with root, pod, target and type

{{/*
Generate lifecycle, command and args
Pass in root, me and command
Pass in root, me and process
*/}}
{{- define "hpcc.addCommandAndLifecycle" -}}
{{- $misc := .root.Values.global.misc | default dict }}
{{- $postJobCommand := $misc.postJobCommand | default "" }}
{{- $misc := .root.Values.global.misc | default dict -}}
{{- $postJobCommand := (.isJob | default false) | ternary $misc.postJobCommand "" -}}
lifecycle:
preStop:
exec:
Expand All @@ -1835,38 +1906,42 @@ lifecycle:
- "-c"
- >-
k8s_postjob_clearup.sh
{{- if and (not $misc.postJobCommandViaSidecar) $postJobCommand }} ;
{{- if $misc.postJobCommandViaSidecar }} ;
touch /wait-and-run/{{ .me.name }}.jobdone
{{- else if $postJobCommand }} ;
{{ $postJobCommand }}
{{- end }}
command: ["/bin/bash"]
args:
- -c
{{- $check_cmd := dict "command" .command}}
{{- if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- end -}}
{{- $meExpert := .me.expert | default dict -}}
{{- $globalExpert := .root.Values.global.expert | default dict -}}
{{- $containerName := .containerName | default .me.name -}}
{{- $args := list -}}
{{- $configCtx := (hasKey . "overrideConfigName") | ternary (dict "name" .overrideConfigName) .me -}}
{{- if .me.valgrind -}}
{{- $args = append $args "-v" -}}
{{- else if (include "hpcc.hasPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- $debugPlane := .me.debugPlane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "debug")) -}}
{{- include "hpcc.checkPlaneExists" (dict "root" .root "planeName" $debugPlane) -}}
{{- $prefix := include "hpcc.getPlanePrefix" (dict "root" .root "planeName" $debugPlane) -}}
{{- $pmd_always_opt := "" -}}
{{- $globalExpert := .root.Values.global.expert | default dict -}}
{{- $meExpert := .me.expert | default dict -}}
{{- $alwaysPostMortem := (hasKey $meExpert "alwaysPostMortem") | ternary $meExpert.alwaysPostMortem ($globalExpert.alwaysPostMortem | default false) -}}
{{- if $alwaysPostMortem -}}
{{- $pmd_always_opt = "-a " -}}
{{- $args = append $args "-a" -}}
{{- end -}}
{{- $_ := set $check_cmd "command" (printf "check_executes %s-d %s -- %s" $pmd_always_opt $prefix .command) -}}
{{- end }}
- >-
{{ $check_cmd.command }};
exitCode=$?;
k8s_postjob_clearup.sh;
{{- if $misc.postJobCommandViaSidecar -}}
touch /wait-and-run/{{ .me.name }}.jobdone;
{{- else if $postJobCommand -}}
{{ $postJobCommand }} ;
{{- $postRun := (hasKey $meExpert "postRunSidecar") | ternary $meExpert.postRunSidecar ((hasKey $globalExpert "postRunSidecar") | ternary $globalExpert.postRunSidecar true) -}}
{{- if $postRun -}}
{{- $args = append $args "-p" -}}
{{- end -}}
{{- $args = concat $args (list "-d" $prefix "-c" $containerName "--") -}}
{{- $_ := set .lifeCycleCtx "containers" (append .lifeCycleCtx.containers (dict "name" $containerName "process" .process "config" $configCtx.name)) -}}
{{- end -}}
{{- $args = append $args .process -}}
{{- $args = append $args (include "hpcc.configArg" $configCtx) -}}
{{- $args = append $args (include "hpcc.daliArg" .) -}}
{{- if hasKey . "extraArgs" -}}
{{- $args = concat $args .extraArgs -}}
{{- end }}
exit $exitCode;
command: ["check_executes.sh"]
args: [ {{ join " " $args }} ]
{{- end -}}

{{- define "hpcc.addCertificateImpl" }}
{{- if (.root.Values.certificates | default dict).enabled -}}
{{- $externalCert := .externalCert -}}
Expand Down
12 changes: 6 additions & 6 deletions helm/hpcc/templates/dafilesrv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ data:
{{- if not .disabled -}}
{{- $env := concat ($.Values.global.env | default list) (.env | default list) -}}
{{- $commonCtx := dict "root" $ "me" . "env" $env "exposure" "local" "visibility" .service.visibility "includeCategories" (list "data" "debug") -}}
{{- $_ := set $commonCtx "lifeCycleCtx" (dict "containers" list) -}}
{{- if (eq "spray" .application) -}}
{{- $_ := set $commonCtx "includeCategories" (concat $commonCtx.includeCategories (list "lz" "remote")) -}}
{{- end -}}
Expand Down Expand Up @@ -58,16 +59,14 @@ spec:
spec:
{{- include "hpcc.placementsByPodTargetType" (dict "root" $ "pod" .name "type" "dafilesrv") | indent 6 }}
serviceAccountName: "hpcc-default"
terminationGracePeriodSeconds: {{ .terminationGracePeriodSeconds | default 600 }}
initContainers:
{{- include "hpcc.createConfigInitContainers" $commonCtx | indent 6 }}
{{- include "hpcc.addImagePullSecrets" $commonCtx | nindent 6 -}}
containers:
- name: {{ .name | quote }}
workingDir: /var/lib/HPCCSystems
command: [ {{ include "hpcc.componentCommand" (dict "me" . "root" $ "process" "dafilesrv") }} ]
args: [ {{- include "hpcc.componentStartArgs" (dict "me" . "root" $ "process" "dafilesrv") | nindent 16 }}
{{ include "hpcc.configArg" . }}
]
{{- include "hpcc.addCommandAndLifecycle" ($commonCtx | merge (dict "process" "dafilesrv" "component" "DaFileSrv" "optional" false)) | nindent 8 }}
env:
{{ include "hpcc.mergeEnvironments" (dict "env" $env "defaultArenas" 2) | indent 8 -}}
- name: "SENTINEL"
Expand All @@ -77,7 +76,7 @@ spec:
{{- include "hpcc.addResources" (dict "me" .resources "root" $) | indent 8 }}
{{ include "hpcc.addImageAttrs" $commonCtx | indent 8 }}
volumeMounts:
{{ include "hpcc.addConfigMapVolumeMount" . | indent 8 }}
{{ include "hpcc.addEphemeralVolumeMounts" . | indent 8 }}
{{ include "hpcc.addVolumeMounts" $commonCtx | indent 8 }}
{{ include "hpcc.addVaultClientCertificateVolumeMounts" $commonCtx | indent 8 }}
{{- if $commonCtx.certificatesEnabled }}
Expand All @@ -87,8 +86,9 @@ spec:
{{- $_ := fail (printf "dafilesrv[application=stream]- certificates must be enabled to use") -}}
{{- end }}
{{- end }}
{{- include "hpcc.addPostRunContainer" $commonCtx | nindent 6 }}
volumes:
{{ include "hpcc.addConfigMapVolume" . | indent 6 }}
{{ include "hpcc.addEphemeralVolumes" . | indent 6 }}
{{ include "hpcc.addVolumes" $commonCtx | indent 6 }}
{{ include "hpcc.addVaultClientCertificateVolumes" $commonCtx | indent 6 }}
{{- if $commonCtx.certificatesEnabled }}
Expand Down
Loading
Loading