From 9ddd20dcbe04e90df97b6abcdf511ceef3ec1170 Mon Sep 17 00:00:00 2001 From: Ben Howard Date: Thu, 9 Jul 2020 16:50:53 -0600 Subject: [PATCH] templates: ensure RHCOS node hostnames are mostly valid This aims to fix all the edge-cases around invalid hostnames. This can include: - localhost as the hostname - a static hostname larger than 63 characters - slow reverse DNS host name discovery - truncated hostname in initramfs discovery that is 64 character long Previously, there was a node-valid-hostname.service, but it only ensured that the hostname was not localhost. When running in the initramfs, NetworkManager will truncate the hostname at 64 charcters resulting in the `node-valid-hostname.service` checks succeeding (!localhost) but still invalid for the cluster. This change consolides the logic used by the dispatcher script _and_ the systemd unit into a library script that is used by both. This fix is not ideal, but it seems like the best path that be backported to 4.3 and 4.5. Fixes BZs 1844613, 1845885, 1853400, 1853584 (and probably more) Signed-off-by: Ben Howard --- ...manager-dispatcher.d-90-long-hostname.yaml | 33 +++----- .../usr-local-sbin-set-valid-hostname.yaml | 83 +++++++++++++++++++ .../_base/units/node-valid-hostname.service | 16 ++-- 3 files changed, 104 insertions(+), 28 deletions(-) create mode 100644 templates/common/_base/files/usr-local-sbin-set-valid-hostname.yaml diff --git a/templates/common/_base/files/etc-networkmanager-dispatcher.d-90-long-hostname.yaml b/templates/common/_base/files/etc-networkmanager-dispatcher.d-90-long-hostname.yaml index d46be9d55a..c1d7aa4f28 100644 --- a/templates/common/_base/files/etc-networkmanager-dispatcher.d-90-long-hostname.yaml +++ b/templates/common/_base/files/etc-networkmanager-dispatcher.d-90-long-hostname.yaml @@ -4,36 +4,29 @@ contents: inline: | #!/bin/bash # - # On Google Compute Platform (GCP) the hostname may be too long (>63 chars). - # During firstboot the hostname is set in the initramfs before NetworkManager - # runs; on reboot affect nodes use 'localhost'. This hook is a simple work - # around: if the host name is longer than 63 characters, then the hostname - # is truncated at the _first_ dot. - # - # Additionally, this hook does not break DNS or cluster DNS resolution, - # since NetworkManager sets the appropriate /etc/resolv.conf settings. - + # 90-long-hostname is a wrapper around /usr/local/sbin/set-valid-hostname.sh, + # which ensures that a node has a valid hostname. IF=$1 STATUS=$2 log() { logger --tag "network-manager/$(basename $0)" "${@}"; } - # capture all eligible hostnames - if [[ ! "$(/bin/hostname)" =~ (localhost|localhost.local) ]]; then - log "hostname is already set" + if [[ ! "$STATUS" =~ (up|hostname|dhcp4-change|dhcp6-change) ]]; then exit 0 fi - if [[ ! "$STATUS" =~ (up|hostname|dhcp4-change|dhcp6-change) ]]; then + if [[ ! "$(< /proc/sys/kernel/hostname)" =~ (localhost|localhost.localdomain) ]]; then + log "hostname is already set" exit 0 fi - default_host="${DHCP4_HOST_NAME:-$DHCP6_HOST_NAME}" - # truncate the hostname to the first dot and than 64 characters. - host=$(printf ${default_host} | cut -f1 -d'.' | cut -c -63) + # source the script since NetworkManager execution rules do + # allow sourcing from /usr/local. RHCOS has an read-only rootfs + # which limits where this can be stashed. + source /usr/local/sbin/set-valid-hostname.sh + host_name="${DHCP4_HOST_NAME:-$DHCP6_HOST_NAME}" - if [ "${#default_host}" -gt 63 ]; then - log "discovered hostname is longer than than 63 characters" - log "truncating ${default_host} => ${host}" - /bin/hostnamectl --transient set-hostname "${host}" + if [ -n "${host_name}" ]; then + set_valid_hostname "${host_name}" fi + diff --git a/templates/common/_base/files/usr-local-sbin-set-valid-hostname.yaml b/templates/common/_base/files/usr-local-sbin-set-valid-hostname.yaml new file mode 100644 index 0000000000..0049b018ed --- /dev/null +++ b/templates/common/_base/files/usr-local-sbin-set-valid-hostname.yaml @@ -0,0 +1,83 @@ +mode: 0755 +path: "/usr/local/sbin/set-valid-hostname.sh" +contents: + inline: | + #!/bin/bash + # On some platforms the hostname may be too long (>63 chars). + # - On firstboot the hostname is set in the initramfs before NetworkManager + # And it may be truncated at 64 characters (too long) + # - On reboot affect nodes use 'localhost'. + # + # This script is a simple workaround for hostname woes, including + # - NOT a localhost name + # - NOT longer than 63 characters. Names will be truncated at the + # first dot, and then capped at 63 char (which ever is less). + # - Race conditions between truncated hostnames by the dhclient + # and NetworkManager. + # + # Finally, this script is invoked via: + # - /etc/NetworkManager/dispatcher.d/90-long-hostnames + # - on boot via node-valid-hostname.service + + export PATH="/usr/bin:/usr/local/bin:/sbin:/usr/local/sbin:/bin:${PATH}" + log() { logger --tag "$(basename $0)" "${@}"; } + + # wait_localhost waits until the host gets a real hostname. + # This will wait indefinately. node-valid-hostname.service will terminate + # this after 5m. + wait_localhost() { + log "waiting for non-localhost hostname to be assigned" + while [[ "$(< /proc/sys/kernel/hostname)" =~ (localhost|localhost.localdomain) ]]; + do + sleep 1 + done + log "node identified as $(