Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: cmd/openshift-install/gather: Gather bootstrap console logs #2811

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/openshift-install/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ var (
}

err = waitForBootstrapComplete(ctx, config, rootOpts.dir)
if err != nil {
if err != nil || true {
if err2 := logClusterOperatorConditions(ctx, config); err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after installation failure: ", err2)
}
Expand Down
85 changes: 66 additions & 19 deletions cmd/openshift-install/gather.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package main
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"syscall"
"time"

configv1 "github.com/openshift/api/config/v1"
Expand All @@ -18,13 +20,14 @@ import (

"github.com/openshift/installer/pkg/asset/installconfig"
assetstore "github.com/openshift/installer/pkg/asset/store"
gatheraws "github.com/openshift/installer/pkg/gather/aws"
"github.com/openshift/installer/pkg/gather/ssh"
"github.com/openshift/installer/pkg/terraform"
gatheraws "github.com/openshift/installer/pkg/terraform/gather/aws"
gatherazure "github.com/openshift/installer/pkg/terraform/gather/azure"
gathergcp "github.com/openshift/installer/pkg/terraform/gather/gcp"
gatherlibvirt "github.com/openshift/installer/pkg/terraform/gather/libvirt"
gatheropenstack "github.com/openshift/installer/pkg/terraform/gather/openstack"
terraformgatheraws "github.com/openshift/installer/pkg/terraform/gather/aws"
terraformgatherazure "github.com/openshift/installer/pkg/terraform/gather/azure"
terraformgathergcp "github.com/openshift/installer/pkg/terraform/gather/gcp"
terraformgatherlibvirt "github.com/openshift/installer/pkg/terraform/gather/libvirt"
terraformgatheropenstack "github.com/openshift/installer/pkg/terraform/gather/openstack"
"github.com/openshift/installer/pkg/types"
awstypes "github.com/openshift/installer/pkg/types/aws"
azuretypes "github.com/openshift/installer/pkg/types/azure"
Expand Down Expand Up @@ -111,17 +114,31 @@ func runGatherBootstrapCmd(directory string) error {
return errors.Wrapf(err, "failed to get bootstrap and control plane host addresses from %q", tfStateFilePath)
}

return logGatherBootstrap(bootstrap, port, masters, directory)
err = logGatherBootstrap(bootstrap, port, masters, directory)
if err != nil || true {
// if errno, ok := errors.Cause(err).(syscall.Errno); ok && errno == syscall.ECONNREFUSED {
err2 := gatherConsoleLogs(context.TODO(), config, bootstrap, directory)
if err2 != nil {
logrus.Error(err2)
}
// }
}

return err
}

func logGatherBootstrap(bootstrap string, port int, masters []string, directory string) error {
logrus.Info("Pulling debug logs from the bootstrap machine")
client, err := ssh.NewClient("core", fmt.Sprintf("%s:%d", bootstrap, port), gatherBootstrapOpts.sshKeys)
if err != nil && len(gatherBootstrapOpts.sshKeys) == 0 {
return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key")
} else if err != nil {
if err != nil {
if errno, ok := err.(syscall.Errno); ok && errno == syscall.ECONNREFUSED {
return errors.Wrap(err, "failed to connect to the bootstrap machine")
} else if len(gatherBootstrapOpts.sshKeys) == 0 {
return errors.Wrap(err, "failed to create SSH client, ensure the proper ssh key is in your keyring or specify with --key")
}
return errors.Wrap(err, "failed to create SSH client")
}

gatherID := time.Now().Format("20060102150405")
if err := ssh.Run(client, fmt.Sprintf("/usr/local/bin/installer-gather.sh --id %s %s", gatherID, strings.Join(masters, " "))); err != nil {
return errors.Wrap(err, "failed to run remote command")
Expand All @@ -134,51 +151,81 @@ func logGatherBootstrap(bootstrap string, port int, masters []string, directory
return nil
}

func gatherConsoleLogs(ctx context.Context, installConfig *installconfig.InstallConfig, ip string, directory string) error {
var data []byte
platform := installConfig.Config.Platform.Name()
switch platform {
case awstypes.Name:
session, err := installConfig.AWS.Session(ctx)
if err != nil {
return err
}

data, err = gatheraws.ConsoleLogs(ctx, session, ip)
if err != nil {
return err
}
default:
logrus.Debug("Unable to gather console logs on %q", platform)
return nil
}

gatherID := time.Now().Format("20060102150405")
file := filepath.Join(directory, fmt.Sprintf("bootstrap-%s-console.log", gatherID))
err := ioutil.WriteFile(file, data, 0666)
if err != nil {
return err
}

logrus.Infof("Bootstrap gather logs captured here %q", file)
return nil
}

func extractHostAddresses(config *types.InstallConfig, tfstate *terraform.State) (bootstrap string, port int, masters []string, err error) {
port = 22
switch config.Platform.Name() {
case awstypes.Name:
bootstrap, err = gatheraws.BootstrapIP(tfstate)
bootstrap, err = terraformgatheraws.BootstrapIP(tfstate)
if err != nil {
return bootstrap, port, masters, err
}
masters, err = gatheraws.ControlPlaneIPs(tfstate)
masters, err = terraformgatheraws.ControlPlaneIPs(tfstate)
if err != nil {
logrus.Error(err)
}
case azuretypes.Name:
bootstrap, err = gatherazure.BootstrapIP(tfstate)
bootstrap, err = terraformgatherazure.BootstrapIP(tfstate)
if err != nil {
return bootstrap, port, masters, err
}
masters, err = gatherazure.ControlPlaneIPs(tfstate)
masters, err = terraformgatherazure.ControlPlaneIPs(tfstate)
if err != nil {
logrus.Error(err)
}
case gcptypes.Name:
bootstrap, err = gathergcp.BootstrapIP(tfstate)
bootstrap, err = terraformgathergcp.BootstrapIP(tfstate)
if err != nil {
return bootstrap, port, masters, err
}
masters, err = gathergcp.ControlPlaneIPs(tfstate)
masters, err = terraformgathergcp.ControlPlaneIPs(tfstate)
if err != nil {
logrus.Error(err)
}
case libvirttypes.Name:
bootstrap, err = gatherlibvirt.BootstrapIP(tfstate)
bootstrap, err = terraformgatherlibvirt.BootstrapIP(tfstate)
if err != nil {
return bootstrap, port, masters, err
}
masters, err = gatherlibvirt.ControlPlaneIPs(tfstate)
masters, err = terraformgatherlibvirt.ControlPlaneIPs(tfstate)
if err != nil {
logrus.Error(err)
}
case openstacktypes.Name:
bootstrap, err = gatheropenstack.BootstrapIP(tfstate)
bootstrap, err = terraformgatheropenstack.BootstrapIP(tfstate)
if err != nil {
return bootstrap, port, masters, err
}
masters, err = gatheropenstack.ControlPlaneIPs(tfstate)
masters, err = terraformgatheropenstack.ControlPlaneIPs(tfstate)
if err != nil {
logrus.Error(err)
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/gather/aws/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
# This file just uses aliases defined in OWNERS_ALIASES.

approvers:
- aws-approvers
reviewers:
- aws-reviewers
68 changes: 68 additions & 0 deletions pkg/gather/aws/console.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Package AWS provides AWS-specific tools for gathering debugging information.
package aws

import (
"context"
"encoding/base64"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/pkg/errors"
)

// ConsoleLogs retrieves console logs from the AWS instance with the
// given IP address.
func ConsoleLogs(ctx context.Context, session *session.Session, ip string) ([]byte, error) {
client := ec2.New(session)
var instanceID string
err := client.DescribeInstancesPagesWithContext(
ctx,
&ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{{
Name: aws.String("ip-address"),
Values: []*string{&ip},
}},
},
func(results *ec2.DescribeInstancesOutput, lastPage bool) bool {
for _, reservation := range results.Reservations {
for _, instance := range reservation.Instances {
if instance.InstanceId != nil {
instanceID = *instance.InstanceId
return false
}
}
}

return !lastPage
},
)
if err != nil {
return nil, errors.Wrap(err, "describe instances")
}

if instanceID == "" {
return nil, errors.Errorf("unable to find an AWS instance ID for %q", ip)
}

consoleOutput, err := client.GetConsoleOutputWithContext(
ctx,
&ec2.GetConsoleOutputInput{
InstanceId: &instanceID,
Latest: aws.Bool(true),
},
)
if err != nil {
return nil, errors.Wrapf(err, "get console output for %s", instanceID)
}
if consoleOutput.Output == nil {
return nil, errors.Errorf("nil console output for %s", instanceID)
}

data, err := base64.StdEncoding.DecodeString(*consoleOutput.Output)
if err != nil {
return nil, errors.Wrapf(err, "decoding console output for %s", instanceID)
}

return data, nil
}