Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add instance update and reactivation #38

Merged
merged 1 commit into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 51 additions & 28 deletions updater/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand Down Expand Up @@ -155,7 +156,7 @@ func (u *updater) activateInstance(containerInstance *string) error {
if len(resp.Failures) != 0 {
return fmt.Errorf("Container instance %s failed to activate: %#v", aws.StringValue(containerInstance), resp.Failures)
}
log.Printf("Container instance state changed to ACTIVE")
log.Printf("Container instance %s state changed to ACTIVE", aws.StringValue(containerInstance))
return nil
}

Expand Down Expand Up @@ -209,37 +210,59 @@ func (u *updater) sendCommand(instanceIDs []string, ssmCommand string) (string,
return commandID, nil
}

func (u *updater) checkSSMCommandOutput(commandID string, instanceIDs []string) ([]string, error) {
updateCandidates := make([]string, 0)
for _, v := range instanceIDs {
resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
CommandId: aws.String(commandID),
InstanceId: aws.String(v),
})
if err != nil {
return nil, fmt.Errorf("failed to retrieve command invocation output: %#v", err)
}
func (u *updater) getCommandResult(commandID string, instanceID string) ([]byte, error) {
resp, err := u.ssm.GetCommandInvocation(&ssm.GetCommandInvocationInput{
CommandId: aws.String(commandID),
InstanceId: aws.String(instanceID),
})
if err != nil {
return nil, fmt.Errorf("failed to retrieve command invocation output: %#v", err)
}
commandResults := []byte(aws.StringValue(resp.StandardOutputContent))
return commandResults, nil
}

type updateCheckResult struct {
UpdateState string `json:"update_state"`
}
func isUpdateAvailable(commandOutput []byte) (bool, error) {
type updateCheckResult struct {
UpdateState string `json:"update_state"`
}

var result updateCheckResult
err = json.Unmarshal([]byte(*resp.StandardOutputContent), &result)
if err != nil {
log.Printf("failed to unmarshal command invocation output: %#v", err)
}
log.Println("update_state: ", result)
var updateState updateCheckResult
err := json.Unmarshal(commandOutput, &updateState)
if err != nil {
return false, fmt.Errorf("failed to unmarshal update state: %#v", err)
}
return updateState.UpdateState == "Available", nil
}

switch result.UpdateState {
case "Available":
updateCandidates = append(updateCandidates, v)
}
// getActiveVersion unmarshals GetCommandInvocation output to determine the active version of a Bottlerocket instance.
// Takes GetCommandInvocation output as a parameter and returns the active version in use.
func getActiveVersion(commandOutput []byte) (string, error) {
type version struct {
Version string `json:"version"`
}

type image struct {
Image version `json:"image"`
}

if len(updateCandidates) == 0 {
log.Printf("No instances to update")
return nil, nil
type partition struct {
ActivePartition image `json:"active_partition"`
}
return updateCandidates, nil

var activeVersion partition
err := json.Unmarshal(commandOutput, &activeVersion)
if err != nil {
log.Printf("failed to unmarshal command invocation output: %#v", err)
return "", err
}
versionInUse := activeVersion.ActivePartition.Image.Version
return versionInUse, nil
}

// waitUntilOk takes an EC2 ID as a parameter and waits until the specified EC2 instance is in an Ok status.
func (u *updater) waitUntilOk(ec2ID string) error {
return u.ec2.WaitUntilInstanceStatusOk(&ec2.DescribeInstanceStatusInput{
InstanceIds: []*string{aws.String(ec2ID)},
})
}
84 changes: 79 additions & 5 deletions updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand All @@ -22,6 +23,7 @@ type updater struct {
cluster string
ecs ECSAPI
ssm *ssm.SSM
ec2 *ec2.EC2
}

func main() {
Expand Down Expand Up @@ -50,6 +52,7 @@ func _main() error {
cluster: *flagCluster,
ecs: ecs.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
ssm: ssm.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
ec2: ec2.New(sess, aws.NewConfig().WithLogLevel(aws.LogDebugWithHTTPBody)),
}

listedInstances, err := u.listContainerInstances()
Expand Down Expand Up @@ -78,16 +81,27 @@ func _main() error {
return err
}

candidates, err := u.checkSSMCommandOutput(commandID, instances)
if err != nil {
return err
}
candidates := make([]string, 0)
for _, ec2ID := range instances {
commandOutput, err := u.getCommandResult(commandID, ec2ID)
if err != nil {
return err
}

updateState, err := isUpdateAvailable(commandOutput)
if err != nil {
return err
}

if updateState {
candidates = append(candidates, ec2ID)
}
}
if len(candidates) == 0 {
log.Printf("No instances to update")
return nil
}
fmt.Println("Instances ready for update: ", candidates)
log.Print("Instances ready for update: ", candidates)

for ec2ID, containerInstance := range ec2IDtoECSARN {
err := u.drain(containerInstance)
Expand All @@ -96,6 +110,66 @@ func _main() error {
continue
}
log.Printf("Instance %s drained", ec2ID)

ec2IDs := []string{ec2ID}
_, err = u.sendCommand(ec2IDs, "apiclient update apply --reboot")
if err != nil {
// TODO add nuanced error checking to determine the type of failure, act accordingly.
samuelkarp marked this conversation as resolved.
Show resolved Hide resolved
log.Printf("%#v", err)
err2 := u.activateInstance(aws.String(containerInstance))
if err2 != nil {
log.Printf("failed to reactivate %s after failure to execute update command. Aborting update operations.", ec2ID)
return err2
}
continue
}

err = u.waitUntilOk(ec2ID)
if err != nil {
return fmt.Errorf("instance %s failed to enter an Ok status after reboot. Aborting update operations: %#v", ec2ID, err)
}

err = u.activateInstance(aws.String(containerInstance))
if err != nil {
log.Printf("instance %s failed to return to ACTIVE after reboot. Aborting update operations.", ec2ID)
return err
}

updateStatus, err := u.sendCommand(ec2IDs, "apiclient update check")
if err != nil {
log.Printf("%#v", err)
continue
}

updateResult, err := u.getCommandResult(updateStatus, ec2ID)
if err != nil {
log.Printf("%#v", err)
continue
}

// TODO version before and after comparison.
updateState, err := isUpdateAvailable(updateResult)
if err != nil {
log.Printf("Unable to determine update result. Manual verification of %s required", ec2ID)
continue
}

if updateState {
log.Printf("Instance %s did not update. Manual update advised.", ec2ID)
continue
} else {
log.Printf("Instance %s updated successfully", ec2ID)
}

updatedVersion, err := getActiveVersion(updateResult)
if err != nil {
log.Printf("%#v", err)
}
if len(updatedVersion) != 0 {
log.Printf("Instance %s running Bottlerocket: %s", ec2ID, updatedVersion)
} else {
log.Printf("Unable to verify active version. Manual verification of %s required.", ec2ID)
}
}
return nil
}