Skip to content

Commit

Permalink
retry on network failure for detecting EC2 (#397)
Browse files Browse the repository at this point in the history
* retry on network failure for detecting EC2

* fix error variable name, fix retry loop

* use network access check instead of simple retry on metadata endpoint

* revert systemd network target config
  • Loading branch information
SaxyPandaBear authored Mar 9, 2022
1 parent d1debb9 commit dd1be96
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 15 deletions.
2 changes: 1 addition & 1 deletion packaging/dependencies/amazon-cloudwatch-agent.service
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

[Unit]
Description=Amazon CloudWatch Agent
After=network-online.target
After=network.target

[Service]
Type=simple
Expand Down
61 changes: 47 additions & 14 deletions translator/util/ec2util/ec2util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
package ec2util

import (
"log"
"sync"

"github.com/aws/amazon-cloudwatch-agent/translator/config"
"github.com/aws/amazon-cloudwatch-agent/translator/context"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"log"
"net"
"sync"
"time"
)

// this is a singleton struct
Expand All @@ -21,6 +22,8 @@ type ec2Util struct {
Hostname string
}

const allowedRetries = 5

var e *ec2Util
var once sync.Once

Expand All @@ -37,39 +40,69 @@ func initEC2UtilSingleton() (newInstance *ec2Util) {
return
}

ses, e := session.NewSession()
if e != nil {
log.Println("E! [EC2] getting new session info: ", e)
// Need to account for the scenario where a user running the CloudWatch agent on-premises,
// and doesn't require connectivity with the EC2 instance metadata service, while still
// gracefully waiting for network access on EC2 instances.
networkUp := false
for retry := 0; !networkUp && retry < allowedRetries; retry++ {
ifs, err := net.Interfaces()

if err != nil {
log.Println("E! [EC2] An error occurred while fetching network interfaces: ", err)
}

for _, in := range ifs {
if (in.Flags&net.FlagUp) != 0 && (in.Flags&net.FlagLoopback) == 0 {
networkUp = true
break
}
}
if networkUp {
log.Println("D! [EC2] Found active network interface")
break
}

log.Println("W! [EC2] Sleep until network is up")
time.Sleep(1 * time.Second)
}
if !networkUp {
log.Println("E! [EC2] No available network interface")
}

ses, err := session.NewSession()
if err != nil {
log.Println("E! [EC2] getting new session info: ", err)
return
}
md := ec2metadata.New(ses)

if !md.Available() {
log.Println("E! ec2metadata is not available")
return
}

if info, e := md.GetMetadata("instance-id"); e == nil {
if info, err := md.GetMetadata("instance-id"); err == nil {
newInstance.InstanceID = info
} else {
log.Println("E! getting instance-id from EC2 metadata fail: ", e)
log.Println("E! getting instance-id from EC2 metadata fail: ", err)
}

if info, e := md.GetMetadata("hostname"); e == nil {
if info, err := md.GetMetadata("hostname"); err == nil {
newInstance.Hostname = info
} else {
log.Println("E! getting hostname from EC2 metadata fail: ", e)
log.Println("E! getting hostname from EC2 metadata fail: ", err)
}

if info, e := md.GetMetadata("local-ipv4"); e == nil {
if info, err := md.GetMetadata("local-ipv4"); err == nil {
newInstance.PrivateIP = info
} else {
log.Println("E! getting local-ipv4 from EC2 metadata fail: ", e)
log.Println("E! getting local-ipv4 from EC2 metadata fail: ", err)
}

if info, e := md.GetInstanceIdentityDocument(); e == nil {
if info, err := md.GetInstanceIdentityDocument(); err == nil {
newInstance.Region = info.Region
} else {
log.Println("E! getting region from EC2 metadata fail: ", e)
log.Println("E! getting region from EC2 metadata fail: ", err)
}

return
Expand Down

0 comments on commit dd1be96

Please sign in to comment.