From 684d92c91faf7daaaadef71b3a128cd2b8eb4401 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 12 Dec 2019 16:01:08 -0800 Subject: [PATCH] cmd/openshift-install/gather: Recognize "connection refused" Before this commit, bootstrap machines that failed to come up would look like [1]: level=info msg="Waiting up to 30m0s for the Kubernetes API at https://api.ci-op-6266tp8r-77109.origin-ci-int-aws.dev.rhcloud.com:6443..." level=error msg="Attempted to gather ClusterOperator status after installation failure: listing ClusterOperator objects: Get https://api.ci-op-6266tp8r-77109.origin-ci-int-aws.dev.rhcloud.com:6443/apis/config.openshift.io/v1/clusteroperators: dial tcp 3.221.214.197:6443: connect: connection refused" level=info msg="Pulling debug logs from the bootstrap machine" level=error msg="Attempted to gather debug logs after installation failure: failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key: dial tcp 3.84.188.207:22: connect: connection refused" level=fatal msg="Bootstrap failed to complete: waiting for Kubernetes API: context deadline exceeded" With this commit, that last error will look like: level=error msg="Attempted to gather debug logs after installation failure: failed to connect to the bootstrap machine: dial tcp 3.84.188.207:22: connect: connection refused" without the unrelated (to this failure mode) distraction about SSH keys. [1]: https://prow.svc.ci.openshift.org/view/gcs/origin-ci-test/logs/release-openshift-origin-installer-e2e-aws-upgrade/12076 --- cmd/openshift-install/gather.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cmd/openshift-install/gather.go b/cmd/openshift-install/gather.go index decce1a4154..272bf73db15 100644 --- a/cmd/openshift-install/gather.go +++ b/cmd/openshift-install/gather.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" "strings" + "syscall" "time" configv1 "github.com/openshift/api/config/v1" @@ -117,11 +118,15 @@ func runGatherBootstrapCmd(directory string) error { func logGatherBootstrap(bootstrap string, port int, masters []string, directory string) error { logrus.Info("Pulling debug logs from the bootstrap machine") client, err := ssh.NewClient("core", fmt.Sprintf("%s:%d", bootstrap, port), gatherBootstrapOpts.sshKeys) - if err != nil && len(gatherBootstrapOpts.sshKeys) == 0 { - return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key") - } else if err != nil { + if err != nil { + if errno, ok := err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED { + return errors.Wrap(err, "failed to connect to the bootstrap machine") + } else if len(gatherBootstrapOpts.sshKeys) == 0 { + return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key") + } return errors.Wrap(err, "failed to create SSH client") } + gatherID := time.Now().Format("20060102150405") if err := ssh.Run(client, fmt.Sprintf("/usr/local/bin/installer-gather.sh --id %s %s", gatherID, strings.Join(masters, " "))); err != nil { return errors.Wrap(err, "failed to run remote command")