diff --git a/BETA-org-member-deployment/01_CFN_MGMT_ROLE.yml b/01_CFN_MGMT_ROLE.yml similarity index 100% rename from BETA-org-member-deployment/01_CFN_MGMT_ROLE.yml rename to 01_CFN_MGMT_ROLE.yml diff --git a/BETA-org-member-deployment/02_CFN_DEPLOY_AHA.yml b/02_CFN_DEPLOY_AHA.yml similarity index 86% rename from BETA-org-member-deployment/02_CFN_DEPLOY_AHA.yml rename to 02_CFN_DEPLOY_AHA.yml index cbd3cec..bd81eda 100644 --- a/BETA-org-member-deployment/02_CFN_DEPLOY_AHA.yml +++ b/02_CFN_DEPLOY_AHA.yml @@ -34,6 +34,36 @@ Metadata: - EventSearchBack - Regions - ManagementAccountRoleArn + - AccountIDs + ParameterLabels: + AWSOrganizationsEnabled: + default: AWS Organizations Enabled? + ManagementAccountRoleArn: + default: ARN of the AWS Organizations Management Account assume role (if using) + AWSHealthEventType: + default: The types of events to get alerted on + S3Bucket: + default: Name of S3 Bucket + S3Key: + default: Name of .zip file in S3 Bucket + SlackWebhookURL: + default: Slack Webhook URL + MicrosoftTeamsWebhookURL: + default: Microsoft Teams Webhook URL + AmazonChimeWebhookURL: + default: Amazon Chime Webhook URL + FromEmail: + default: Email From + ToEmail: + default: Email To + Subject: + default: Subject of Email + HealthAPIFrequency: + default: Hours back to search for events + Regions: + default: Which regions to search for events in + AccountIDs: + default: Exclude any account numbers? Conditions: UsingSlack: !Not [!Equals [!Ref SlackWebhookURL, None]] UsingTeams: !Not [!Equals [!Ref MicrosoftTeamsWebhookURL, None]] @@ -41,6 +71,7 @@ Conditions: UsingEventBridge: !Not [!Equals [!Ref EventBusName, None]] UsingSecrets: !Or [!Condition UsingSlack, !Condition UsingTeams, !Condition UsingChime, !Condition UsingEventBridge] UsingCrossAccountRole: !Not [!Equals [!Ref ManagementAccountRoleArn, None]] + UsingAccountIds: !Not [!Equals [!Ref AccountIDs, None]] Parameters: AWSOrganizationsEnabled: Description: >- @@ -113,6 +144,13 @@ Parameters: AllowedPattern: ".+" ConstraintDescription: No regions were entered, please read the documentation about selecting all regions or filtering on some. Type: String + AccountIDs: + Description: >- + If you would like to EXCLUDE any accounts from alerting, upload a .csv file of comma-seperated account numbers to the same S3 bucket + where the AHA.zip package is located. Sample AccountIDs file name: aha_account_ids.csv. If not, leave the default of None. + Default: None + Type: String + AllowedPattern: (None)|(.+(\.csv))$ EventSearchBack: Description: How far back to search for events in hours. Default is 1 hour Default: '1' @@ -186,6 +224,7 @@ Resources: - health:DescribeEventTypes - health:DescribeAffectedEntities - organizations:ListAccounts + - organizations:DescribeAccount Resource: "*" - Effect: Allow Action: @@ -211,6 +250,13 @@ Resources: Action: - events:PutEvents Resource: !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:event-bus/${EventBusName}' + - !If + - UsingAccountIds + - Effect: Allow + Action: + - s3:GetObject + Resource: !Sub 'arn:aws:s3:::${S3Bucket}/${AccountIDs}' + - !Ref 'AWS::NoValue' - !If - UsingCrossAccountRole - Effect: Allow @@ -321,8 +367,12 @@ Resources: Runtime: python3.8 Environment: Variables: + ACCOUNT_IDS: + Ref: AccountIDs REGIONS: Ref: Regions + S3_BUCKET: + Ref: S3Bucket FROM_EMAIL: Ref: FromEmail TO_EMAIL: @@ -342,4 +392,3 @@ Resources: - UsingCrossAccountRole - !Ref ManagementAccountRoleArn - "None" - \ No newline at end of file diff --git a/BETA-multi-region/01_CFN_MGMT_ROLE.yml b/BETA-multi-region/01_CFN_MGMT_ROLE.yml new file mode 100644 index 0000000..d6c172b --- /dev/null +++ b/BETA-multi-region/01_CFN_MGMT_ROLE.yml @@ -0,0 +1,48 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: Deploy Cross-Account Role for PHD access +Parameters: + OrgMemberAccountId: + Type: String + AllowedPattern: '^\d{12}$' + Description: AWS Account ID of the AWS Organizations Member Account that will run AWS Health Aware +Resources: + AWSHealthAwareRoleForPHDEvents: + Type: "AWS::IAM::Role" + Properties: + Description: "Grants access to PHD events" + Path: / + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Action: + - sts:AssumeRole + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${OrgMemberAccountId}:root' + Policies: + - PolicyName: AllowHealthCalls + PolicyDocument: + Statement: + - Effect: Allow + Action: + - health:DescribeAffectedAccountsForOrganization + - health:DescribeAffectedEntitiesForOrganization + - health:DescribeEventDetailsForOrganization + - health:DescribeEventsForOrganization + - health:DescribeEventDetails + - health:DescribeEvents + - health:DescribeEventTypes + - health:DescribeAffectedEntities + Resource: "*" + - PolicyName: AllowsDescribeOrg + PolicyDocument: + Statement: + - Effect: Allow + Action: + - organizations:ListAccounts + - organizations:ListAWSServiceAccessForOrganization + - organizations:DescribeAccount + Resource: "*" +Outputs: + AWSHealthAwareRoleForPHDEventsArn: + Value: !GetAtt AWSHealthAwareRoleForPHDEvents.Arn diff --git a/BETA-multi-region/02_CFN_MR_DEPLOY_AHA.yaml b/BETA-multi-region/02_CFN_MR_DEPLOY_AHA.yaml new file mode 100644 index 0000000..d468679 --- /dev/null +++ b/BETA-multi-region/02_CFN_MR_DEPLOY_AHA.yaml @@ -0,0 +1,682 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: CloudFormation Template for AWS Health Aware (AHA) +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: Customize Alerts/Notifications + Parameters: + - AWSOrganizationsEnabled + - AWSHealthEventType + - Label: + default: Package Information + Parameters: + - S3Bucket + - S3Key + - Label: + default: >- + Communication Channels - Slack/Microsoft Teams/Amazon Chime And/or + EventBridge + Parameters: + - SlackWebhookURL + - MicrosoftTeamsWebhookURL + - AmazonChimeWebhookURL + - EventBusName + - Label: + default: Email Setup - For Alerting via Email + Parameters: + - FromEmail + - ToEmail + - Subject + - Label: + default: More Configurations - Optional + Parameters: + - EventSearchBack + - Regions + - ManagementAccountRoleArn + - SecondaryRegion + - AccountIDs + ParameterLabels: + AWSOrganizationsEnabled: + default: AWS Organizations Enabled? + ManagementAccountRoleArn: + default: ARN of the AWS Organizations Management Account assume role (if using) + AWSHealthEventType: + default: The types of events to get alerted on + S3Bucket: + default: Name of S3 Bucket + S3Key: + default: Name of .zip file in S3 Bucket + SlackWebhookURL: + default: Slack Webhook URL + MicrosoftTeamsWebhookURL: + default: Microsoft Teams Webhook URL + AmazonChimeWebhookURL: + default: Amazon Chime Webhook URL + FromEmail: + default: Email From + ToEmail: + default: Email To + Subject: + default: Subject of Email + HealthAPIFrequency: + default: Hours back to search for events + Regions: + default: Which regions to search for events in + SecondaryRegion: + default: Deploy in secondary region? + AccountIDs: + default: Exclude any account numbers? +Conditions: + UsingSlack: !Not [!Equals [!Ref SlackWebhookURL, None]] + UsingTeams: !Not [!Equals [!Ref MicrosoftTeamsWebhookURL, None]] + UsingChime: !Not [!Equals [!Ref AmazonChimeWebhookURL, None]] + UsingEventBridge: !Not [!Equals [!Ref EventBusName, None]] + UsingSecrets: !Or [!Condition UsingSlack, !Condition UsingTeams, !Condition UsingChime, !Condition UsingEventBridge] + UsingCrossAccountRole: !Not [!Equals [!Ref ManagementAccountRoleArn, None]] + NotUsingMultiRegion: !Equals [!Ref SecondaryRegion, 'No'] + UsingMultiRegion: !Not [!Equals [!Ref SecondaryRegion, 'No']] + TestCondition: !Equals ['true', 'false'] + UsingMultiRegionTeams: !And [!Condition UsingTeams, !Condition UsingMultiRegion] + UsingMultiRegionSlack: !And [!Condition UsingSlack, !Condition UsingMultiRegion] + UsingMultiRegionEventBridge: !And [!Condition UsingEventBridge, !Condition UsingMultiRegion] + UsingMultiRegionChime: !And [!Condition UsingChime, !Condition UsingMultiRegion] + UsingMultiRegionCrossAccountRole: !And [!Condition UsingCrossAccountRole, !Condition UsingMultiRegion] + UsingAccountIds: !Not [!Equals [!Ref AccountIDs, None]] +Parameters: + AWSOrganizationsEnabled: + Description: >- + You can receive both PHD and SHD alerts if you're using AWS Organizations. + If you are, make sure to enable Organizational Health View: + (https://docs.aws.amazon.com/health/latest/ug/aggregate-events.html) to + aggregate all PHD events in your AWS Organization. If not, you can still + get SHD alerts. + Default: 'No' + AllowedValues: + - 'Yes' + - 'No' + Type: String + SecondaryRegion: + Description: You can deploy this in a secondary region for resiliency. As a result, + the DynamoDB table will become a Global DynamoDB table. Regions that support + Global DynamoDB tables are listed + Default: 'No' + AllowedValues: + - 'No' + - us-east-1 + - us-east-2 + - us-west-1 + - us-west-2 + - ap-south-1 + - ap-northeast-2 + - ap-southeast-1 + - ap-southeast-2 + - ap-northeast-1 + - ca-central-1 + - eu-central-1 + - eu-west-1 + - eu-west-2 + - eu-west-3 + - sa-east-1 + Type: String + ManagementAccountRoleArn: + Description: Arn of the IAM role in the top-level management account for collecting PHD Events. 'None' if deploying into the top-level management account. + Type: String + Default: None + AWSHealthEventType: + Description: >- + Select the event type that you want AHA to report on. Refer to + https://docs.aws.amazon.com/health/latest/APIReference/API_EventType.html for more information on EventType. + Default: 'issue | accountNotification | scheduledChange' + AllowedValues: + - 'issue | accountNotification | scheduledChange' + - 'issue' + Type: String + S3Bucket: + Description: >- + Name of your S3 Bucket where the AHA Package .zip resides. Just the name + of the bucket (e.g. my-s3-bucket) + Type: String + S3Key: + Description: >- + Name of the .zip in your S3 Bucket. Just the name of the file (e.g. + aha-v1.0.zip) + Type: String + EventBusName: + Description: >- + This is to ingest alerts into AWS EventBridge. Enter the event bus name if + you wish to send the alerts to the AWS EventBridge. Note: By ingesting + these alerts to AWS EventBridge, you can integrate with 35 SaaS vendors + such as DataDog/NewRelic/PagerDuty. If you don't prefer to use EventBridge, leave the default (None). + Type: String + Default: None + SlackWebhookURL: + Description: >- + Enter the Slack Webhook URL. If you don't prefer to use Slack, leave the default (None). + Type: String + Default: None + MicrosoftTeamsWebhookURL: + Description: >- + Enter Microsoft Teams Webhook URL. If you don't prefer to use MS Teams, + leave the default (None). + Type: String + Default: None + AmazonChimeWebhookURL: + Description: >- + Enter the Chime Webhook URL, If you don't prefer to use Amazon Chime, + leave the default (None). + Type: String + Default: None + Regions: + Description: >- + By default, AHA reports events affecting all AWS regions. + If you want to report on certain regions you can enter up to 10 in a comma separated format. + Available Regions: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-3, + ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2, + eu-south-1,eu-south-3,eu-north-1,me-south-1,sa-east-1,global + Default: all regions + AllowedPattern: ".+" + ConstraintDescription: No regions were entered, please read the documentation about selecting all regions or filtering on some. + Type: String + AccountIDs: + Description: >- + If you would like to EXCLUDE any accounts from alerting, upload a .csv file of comma-seperated account numbers to the same S3 bucket + where the AHA.zip package is located. Sample AccountIDs file name: aha_account_ids.csv. If not, leave the default of None. + Default: None + Type: String + AllowedPattern: (None)|(.+(\.csv))$ + EventSearchBack: + Description: How far back to search for events in hours. Default is 1 hour + Default: '1' + Type: Number + FromEmail: + Description: Enter FROM Email Address + Type: String + Default: none@domain.com + AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ + ConstraintDescription: 'FromEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' + ToEmail: + Description: >- + Enter email addresses separated by commas (for ex: abc@amazon.com, + bcd@amazon.com) + Type: String + Default: none@domain.com + AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ + ConstraintDescription: 'ToEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' + Subject: + Description: Enter the subject of the email address + Type: String + Default: AWS Health Alert +Resources: + GlobalDDBTable: + Type: AWS::DynamoDB::GlobalTable + Condition: UsingMultiRegion + Properties: + AttributeDefinitions: + - AttributeName: arn + AttributeType: S + KeySchema: + - AttributeName: arn + KeyType: HASH + Replicas: + - Region: !Ref SecondaryRegion + ReadProvisionedThroughputSettings: + ReadCapacityUnits: 5 + - Region: !Ref "AWS::Region" + ReadProvisionedThroughputSettings: + ReadCapacityUnits: 5 + StreamSpecification: + StreamViewType: "NEW_AND_OLD_IMAGES" + TimeToLiveSpecification: + AttributeName: ttl + Enabled: true + WriteProvisionedThroughputSettings: + WriteCapacityAutoScalingSettings: + MaxCapacity: 10 + MinCapacity: 10 + TargetTrackingScalingPolicyConfiguration: + DisableScaleIn: false + ScaleInCooldown: 30 + ScaleOutCooldown: 30 + TargetValue: 10 + DynamoDBTable: + Type: 'AWS::DynamoDB::Table' + Condition: NotUsingMultiRegion + Properties: + AttributeDefinitions: + - AttributeName: arn + AttributeType: S + KeySchema: + - AttributeName: arn + KeyType: HASH + ProvisionedThroughput: + ReadCapacityUnits: 5 + WriteCapacityUnits: 5 + TimeToLiveSpecification: + AttributeName: ttl + Enabled: TRUE + AHASecondaryRegionStackSet: + Condition: UsingMultiRegion + DependsOn: GlobalDDBTable + Type: AWS::CloudFormation::StackSet + Properties: + Description: Secondary Region CloudFormation Template for AWS Health Aware (AHA) + PermissionModel: SELF_MANAGED + Capabilities: [CAPABILITY_IAM] + StackInstancesGroup: + - Regions: + - !Ref 'SecondaryRegion' + DeploymentTargets: + Accounts: + - !Ref 'AWS::AccountId' + StackSetName: 'aha-multi-region' + TemplateBody: + !Sub | + Resources: + AHA2ndRegionBucket: + Type: AWS::S3::Bucket + CopyAHA: + Type: Custom::CopyAHA + Properties: + DestBucket: !Ref 'AHA2ndRegionBucket' + ServiceToken: !GetAtt 'CopyAHAFunction.Arn' + SourceBucket: ${S3Bucket} + Object: + - ${S3Key} + CopyAHARole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole + Path: / + Policies: + - PolicyName: aha-lambda-copier + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - s3:GetObject + Resource: + - 'arn:aws:s3:::${S3Bucket}*' + - Effect: Allow + Action: + - s3:PutObject + - s3:DeleteObject + Resource: + - !Join ['', [ 'arn:aws:s3:::', !Ref AHA2ndRegionBucket, '*']] + CopyAHAFunction: + Type: AWS::Lambda::Function + DependsOn: AHA2ndRegionBucket + Properties: + Description: Copies AHA .zip from a source S3 bucket to a destination + Handler: index.handler + Runtime: python3.8 + Role: !GetAtt 'CopyAHARole.Arn' + Timeout: 240 + Code: + ZipFile: | + import json + import logging + import threading + import boto3 + import cfnresponse + + def copy_object(source_bucket, dest_bucket, object): + s3 = boto3.client('s3') + for o in object: + key = o + copy_source = { + 'Bucket': source_bucket, + 'Key': key + } + print('copy_source: %s' % copy_source) + print('dest_bucket = %s'%dest_bucket) + print('key = %s' %key) + s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, + Key=key) + + def delete_object(bucket, object): + s3 = boto3.client('s3') + objects = {'Objects': [{'Key': o} for o in object]} + s3.delete_objects(Bucket=bucket, Delete=objects) + + def timeout(event, context): + logging.error('Execution is about to time out, sending failure response to CloudFormation') + cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) + + def handler(event, context): + # make sure we send a failure to CloudFormation if the function + # is going to timeout + timer = threading.Timer((context.get_remaining_time_in_millis() + / 1000.00) - 0.5, timeout, args=[event, context]) + timer.start() + + print('Received event: %s' % json.dumps(event)) + status = cfnresponse.SUCCESS + try: + source_bucket = event['ResourceProperties']['SourceBucket'] + dest_bucket = event['ResourceProperties']['DestBucket'] + object = event['ResourceProperties']['Object'] + if event['RequestType'] == 'Delete': + delete_object(dest_bucket, object) + else: + copy_object(source_bucket, dest_bucket, object) + except Exception as e: + logging.error('Exception: %s' % e, exc_info=True) + status = cfnresponse.FAILED + finally: + timer.cancel() + cfnresponse.send(event, context, status, {}, None) + LambdaSchedule: + Type: AWS::Events::Rule + Properties: + Description: Lambda trigger Event + ScheduleExpression: rate(1 minute) + State: ENABLED + Targets: + - Arn: !GetAtt 'LambdaFunction.Arn' + Id: LambdaSchedule + LambdaSchedulePermission: + Type: AWS::Lambda::Permission + Properties: + Action: lambda:InvokeFunction + FunctionName: !GetAtt 'LambdaFunction.Arn' + Principal: events.amazonaws.com + SourceArn: !GetAtt 'LambdaSchedule.Arn' + LambdaFunction: + Type: AWS::Lambda::Function + DependsOn: CopyAHA + Properties: + Description: Lambda function that runs AHA + Code: + S3Bucket: + Ref: AHA2ndRegionBucket + S3Key: "${S3Key}" + Handler: handler.main + MemorySize: 128 + Timeout: 600 + Role: ${LambdaExecutionRole.Arn} + Runtime: python3.8 + Environment: + Variables: + REGIONS: ${Regions} + FROM_EMAIL: "${FromEmail}" + TO_EMAIL: "${ToEmail}" + EMAIL_SUBJECT: "${Subject}" + DYNAMODB_TABLE: "${GlobalDDBTable}" + EVENT_SEARCH_BACK: ${EventSearchBack} + ORG_STATUS: ${AWSOrganizationsEnabled} + HEALTH_EVENT_TYPE: "${AWSHealthEventType}" + MANAGEMENT_ROLE_ARN: "${ManagementAccountRoleArn}" + LambdaExecutionRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: AHA-LambdaPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:logs:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - !If + - UsingSecrets + - Effect: Allow + Action: + - 'secretsmanager:GetResourcePolicy' + - 'secretsmanager:DescribeSecret' + - 'secretsmanager:ListSecretVersionIds' + - 'secretsmanager:GetSecretValue' + Resource: + - !If [UsingTeams, !Sub '${MicrosoftChannelSecret}', !Ref AWS::NoValue] + - !If [UsingSlack, !Sub '${SlackChannelSecret}', !Ref AWS::NoValue] + - !If [UsingEventBridge, !Sub '${EventBusNameSecret}', !Ref AWS::NoValue] + - !If [UsingChime, !Sub '${ChimeChannelSecret}', !Ref AWS::NoValue] + - !If [UsingCrossAccountRole, !Sub '${AssumeRoleSecret}', !Ref AWS::NoValue] + - !If + - UsingMultiRegionTeams + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${MicrosoftChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionSlack + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${SlackChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionEventBridge + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${EventBusNameSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionChime + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${ChimeChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionCrossAccountRole + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${AssumeRoleSecret}' ]]} + - !Ref AWS::NoValue + - !Ref 'AWS::NoValue' + - Effect: Allow + Action: + - health:DescribeAffectedAccountsForOrganization + - health:DescribeAffectedEntitiesForOrganization + - health:DescribeEventDetailsForOrganization + - health:DescribeEventsForOrganization + - health:DescribeEventDetails + - health:DescribeEvents + - health:DescribeEventTypes + - health:DescribeAffectedEntities + - organizations:ListAccounts + - organizations:DescribeAccount + Resource: "*" + - Effect: Allow + Action: + - dynamodb:ListTables + Resource: + - !Sub 'arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:dynamodb:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - Effect: Allow + Action: + - ses:SendEmail + Resource: + - !Sub 'arn:aws:ses:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:ses:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - Effect: Allow + Action: + - dynamodb:UpdateTimeToLive + - dynamodb:PutItem + - dynamodb:DeleteItem + - dynamodb:GetItem + - dynamodb:Scan + - dynamodb:Query + - dynamodb:UpdateItem + - dynamodb:UpdateTable + - dynamodb:GetRecords + Resource: !If [UsingMultiRegion, !GetAtt GlobalDDBTable.Arn, !GetAtt DynamoDBTable.Arn] + - Effect: Allow + Action: + - events:PutEvents + Resource: + - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:event-bus/${EventBusName}' + - !If [UsingMultiRegion, !Sub 'arn:aws:events:${SecondaryRegion}:${AWS::AccountId}:event-bus/${EventBusName}', !Ref AWS::NoValue] + - !If + - UsingAccountIds + - Effect: Allow + Action: + - s3:GetObject + Resource: !Sub 'arn:aws:s3:::${S3Bucket}/${AccountIDs}' + - !Ref 'AWS::NoValue' + - !If + - UsingCrossAccountRole + - Effect: Allow + Action: + - sts:AssumeRole + Resource: !Ref ManagementAccountRoleArn + - !Ref 'AWS::NoValue' + LambdaSchedule: + Type: 'AWS::Events::Rule' + Properties: + Description: Lambda trigger Event + ScheduleExpression: rate(1 minute) + State: ENABLED + Targets: + - Arn: !GetAtt LambdaFunction.Arn + Id: LambdaSchedule + LambdaSchedulePermission: + Type: 'AWS::Lambda::Permission' + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !GetAtt LambdaFunction.Arn + Principal: events.amazonaws.com + SourceArn: !GetAtt LambdaSchedule.Arn + MicrosoftChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingTeams + Properties: + Name: MicrosoftChannelID + Description: Microsoft Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: MicrosoftTeamsWebhookURL + Tags: + - Key: HealthCheckMicrosoft + Value: ChannelID + SlackChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingSlack + Properties: + Name: SlackChannelID + Description: Slack Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: SlackWebhookURL + Tags: + - Key: HealthCheckSlack + Value: ChannelID + EventBusNameSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingEventBridge + Properties: + Name: EventBusName + Description: EventBus Name Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: EventBusName + Tags: + - Key: EventBusName + Value: ChannelID + ChimeChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingChime + Properties: + Name: ChimeChannelID + Description: Chime Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: AmazonChimeWebhookURL + Tags: + - Key: HealthCheckChime + Value: ChannelID + AssumeRoleSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingCrossAccountRole + Properties: + Name: AssumeRoleArn + Description: Management account role for AHA to assume + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: ManagementAccountRoleArn + Tags: + - Key: AssumeRoleArn + Value: ChannelID + LambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + Description: Lambda function that runs AHA + Code: + S3Bucket: + Ref: S3Bucket + S3Key: + Ref: S3Key + Handler: handler.main + MemorySize: 128 + Timeout: 600 + Role: + 'Fn::Sub': '${LambdaExecutionRole.Arn}' + Runtime: python3.8 + Environment: + Variables: + ACCOUNT_IDS: + Ref: AccountIDs + REGIONS: + Ref: Regions + S3_BUCKET: + Ref: S3Bucket + FROM_EMAIL: + Ref: FromEmail + TO_EMAIL: + Ref: ToEmail + EMAIL_SUBJECT: + Ref: Subject + DYNAMODB_TABLE: + !If [UsingMultiRegion, !Ref GlobalDDBTable, !Ref DynamoDBTable] + EVENT_SEARCH_BACK: + Ref: EventSearchBack + ORG_STATUS: + Ref: AWSOrganizationsEnabled + HEALTH_EVENT_TYPE: + Ref: AWSHealthEventType + MANAGEMENT_ROLE_ARN: + Ref: ManagementAccountRoleArn + diff --git a/BETA-org-member-deployment/README.md b/BETA-multi-region/README.md similarity index 69% rename from BETA-org-member-deployment/README.md rename to BETA-multi-region/README.md index 4a871a4..cbbfaad 100644 --- a/BETA-org-member-deployment/README.md +++ b/BETA-multi-region/README.md @@ -1,10 +1,8 @@ -# [IN BETA] Deploying AHA in a member account within an AWS Organization +# [IN BETA] Deploying AHA in 2 Regions for High Availability # Introduction -The #1 feature request from customers, is the ability to deploy the AHA resources (Lambda, DynamoDB, etc.) in a member account instead of the top-level management account. This process is currently **IN BETA** so we ask that you log any issues within Github issues. - -The steps below are relatively the same as a normal deployment, if you configured an Endpoint already you can skip to [**Deployment in AWS Organization Member Account**](#deployment-in-aws-organization-member-account) +Our next most requested feature was the ability to deploy AHA in more than 1 region in the event a region or specific service in a region was degraded. The process is currently **IN BETA** so we ask that you log any issues in this repo. # Configuring an Endpoint - AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use any of these you'll need to set it up before-hand as some of these are done on 3rd party websites. We'll go over some of the common ones here. @@ -18,17 +16,39 @@ AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use 4. **Type** a name for the bot (e.g. AWS Health Bot) and **click** *Create*. 5. **Click** *Copy URL*, we will need it for the deployment. -## Creating a Slack Webhook URL - +## Creating a Slack Webhook URL **You will need to have access to add a new channel and app to your Slack Workspace**. +*Webhook* 1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) -2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace. -3. In the search bar, search for: *Incoming Webhooks* and **click** on it. -4. **Click** on *Add to Slack*. -5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*. -6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. +2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace. +3. In the search bar, search for: *Incoming Webhooks* and **click** on it. +4. **Click** on *Add to Slack*. +5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*. +6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. 7. For the deployment we will need the *Webhook URL*. +*Workflow* + +1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) +2. Within Slack **click** on your workspace name drop down arrow in the upper left. **click on Tools > Workflow Builder** +3. **Click** Create in the upper right hand corner of the Workflow Builder and give your workflow a name **click** next. +4. **Click** on *select* next to **Webhook** and then **click** *add variable* add the following variables one at a time in the *Key* section. All *data type* will be *text*: +-text +-accounts +-resources +-service +-region +-start_time +-event_arn +-updates +5. When done you should have 8 variables, double check them as they are case sensitive and will be referenced. When checked **click** on *done* and *next*. +6. **Click** on *add step* and then on the add a workflow step **click** *add* next to *send a message*. +7. Under *send this message to:* select the channel you created in Step 1 in *message text* you can should recreate this following: +![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/workflow.png?raw=1) +8. **Click** *save* and the **click** *publish* +9. For the deployment we will need the *Webhook URL*. + ## Creating a Microsoft Teams Webhook URL - **You will need to have access to add a new channel and app to your Microsoft Teams channel**. @@ -60,7 +80,7 @@ There are 2 available ways to deploy AHA, both are done via the same CloudFormat The 2 deployment methods for AHA are: 1. [**AHA for users NOT using AWS Organizations**](#aha-without-aws-organizations): Users NOT using AWS Organizations will be able to get Service Health Dashboard (SHD) events ONLY. -2. [**AHA for users who ARE using AWS Organizations**](#aha-with-organizations): Users who ARE using AWS Organizations will be able to get Service Health Dashboard (SHD) events as well as aggregated Personal Health Dashboard (PHD) events for all accounts in their AWS Organization. +2. [**AHA for users who ARE using AWS Organizations**](#aha-with-aws-organizations): Users who ARE using AWS Organizations will be able to get Service Health Dashboard (SHD) events as well as aggregated Personal Health Dashboard (PHD) events for all accounts in their AWS Organization. ## AHA Without AWS Organizations @@ -68,15 +88,19 @@ The 2 deployment methods for AHA are: 1. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) 2. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. - +3. -If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. +-In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region +-Launch the stack. +-In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) +-Launch the stack. ### Deployment 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` 2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** -3. Upload the .zip you created in Step 2 to an S3 in the same region you plan to deploy this in. +3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. 4. In your AWS console go to *CloudFormation*. 5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. -6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_AHA.yml` **Click** *Next*. +6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_MR_DEPLOY_AHA.yml` **Click** *Next*. 7. -In *Stack name* type a stack name (i.e. AHA-Deployment). -In *AWSOrganizationsEnabled* leave it set to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the step for [AHA for users who ARE using AWS Organizations](#aha-with-aws-organizations) -In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. @@ -86,27 +110,34 @@ The 2 deployment methods for AHA are: -In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. -In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. -In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). +-In *ARN of the AWS Organizations Management Account assume role* leave it set to default None as this is only for customers using AWS Organizations. +-In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. 8. Scroll to the bottom and **click** *Next*. 9. Scroll to the bottom and **click** *Next* again. 10. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. -11. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes). +11. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). ## AHA With AWS Organizations + ### Prerequisites 1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view-in-health-console.html) from the console, so that you can aggregate all Personal Health Dashboard (PHD) events for all accounts in your AWS Organization. 2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) -3. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. - -### Deployment in Top Level Management Account +3. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager in the **AWS Organizations Master Account**. +4. -If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. +-In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region +-Launch the stack. +-In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) +-Launch the stack. +### Deployment 1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` 2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** -3. Upload the .zip you created in Step 2 to an S3 in the same region you plan to deploy this in. +3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. 4. In your AWS console go to *CloudFormation*. 5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. -6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_AHA.yml` **Click** *Next*. +6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_MR_DEPLOY_AHA.yml` **Click** *Next*. 7. -In *Stack name* type a stack name (i.e. AHA-Deployment). -In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations) -In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. @@ -116,50 +147,53 @@ The 2 deployment methods for AHA are: -In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. -In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. -In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). +-In *ARN of the AWS Organizations Management Account assume role* leave it set to default None, unless you are using a member account instead of the management account. Instructions for this configuration are in the next section. +-In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. 8. Scroll to the bottom and **click** *Next*. 9. Scroll to the bottom and **click** *Next* again. 10. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. -11. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes). +11. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). ### Deployment in AWS Organization Member Account 1. Clone the AHA package from the BETA-member-deployment folder. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` -2. In your top-level management account AWS console go to *CloudFormation* -3. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. -4. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `01_CFN_MGMT_ROLE.yml` **Click** *Next*. +2. In your top-level management account AWS console go to *CloudFormation* +3. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +4. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `01_CFN_MGMT_ROLE.yml` **Click** *Next*. 5. -In *Stack name* type a stack name (i.e. aha-assume-role). --In *OrgMemberAccountId* put in the account id of the member account you plan to run AHA in (e.g. 000123456789). -6. Scroll to the bottom and **click** *Next*. -7. Scroll to the bottom and **click** *Next* again. -8. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +-In *OrgMemberAccountId* put in the account id of the member account you plan to run AHA in (e.g. 000123456789). +6. Scroll to the bottom and **click** *Next*. +7. Scroll to the bottom and **click** *Next* again. +8. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. 9. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 1-2 minutes). This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume. 10. In the *Outputs* tab, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need it for step 16. 11. Back In the root of the package you downloaded/cloned you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** -12. Upload the .zip you created in Step 11 to an S3 in the same region you plan to deploy this in. -13. Login to the member account you plan to deploy this in and in your AWS console go to *CloudFormation*. -14. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. -15. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_DEPLOY_AHA.yml` **Click** *Next*. +12. Upload the .zip you created in Step 11 to an S3 in the same region you plan to deploy this in. +13. Login to the member account you plan to deploy this in and in your AWS console go to *CloudFormation*. +14. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +15. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_MR_DEPLOY_AHA.yml` **Click** *Next*. 16. -In *Stack name* type a stack name (i.e. AHA-Deployment). --In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations) --In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. --In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 12 (e.g. my-aha-bucket). --In *S3Key* type ***just*** the name of the .zip file you created in Step 11 (e.g. aha-v1.8.zip). --In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. +-In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations) +-In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. +-In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 12 (e.g. my-aha-bucket). +-In *S3Key* type ***just*** the name of the .zip file you created in Step 11 (e.g. aha-v1.8.zip). +-In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. -In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. --In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. +-In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. -In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). -In *ManagementAccountRoleArn* enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201) -17. Scroll to the bottom and **click** *Next*. -18. Scroll to the bottom and **click** *Next* again. -19. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. -12. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes). +-In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. +17. Scroll to the bottom and **click** *Next*. +18. Scroll to the bottom and **click** *Next* again. +19. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +12. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). # Updating **Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:** 1. Download the updated CloudFormation Template .yml file and 2 `.py` files. 2. Zip up the 2 `.py` files and name the .zip with a different version number than before (e.g. if the .zip you originally uploaded is aha-v1.8.zip the new one should be aha-v1.9.zip) 3. In the AWS CloudFormation console **click** on the name of your stack, then **click** *Update*. -4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `CFN_AHA.yml` file you downloaded and finally **click** *Next*. +4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `02_CFN_MR_DEPLOY_AHA.yml` file you downloaded and finally **click** *Next*. 5. In the *S3Key* text box change the version number in the name of the .zip to match name of the .zip you uploaded in Step 2 (The name of the .zip has to be different for CloudFormation to recognize a change). **Click** *Next*. 6. At the next screen **click** *Next* and finally **click** *Update stack*. This will now upgrade your environment to the latest version you downloaded. diff --git a/BETA-org-member-deployment/handler.py b/BETA-multi-region/handler.py similarity index 84% rename from BETA-org-member-deployment/handler.py rename to BETA-multi-region/handler.py index cc20deb..17d246e 100644 --- a/BETA-org-member-deployment/handler.py +++ b/BETA-multi-region/handler.py @@ -4,7 +4,6 @@ import re import time import decimal -import uuid import socket import configparser from dateutil import parser @@ -25,7 +24,6 @@ health_active_list = current_endpoint.split('.') health_active_region = health_active_list[1] print("current health region: ", health_active_region) -management_role_arn = os.environ["MANAGEMENT_ROLE_ARN"] # create a boto3 health client w/ backoff/retry config = Config( @@ -37,7 +35,16 @@ ) ) -# Send alert module +# Get Account Name +def get_account_name(account_id): + org_client = get_sts_token('organizations') + try: + account_name = org_client.describe_account(AccountId=account_id)['Account']['Name'] + except Exception: + account_name = account_id + + return account_name + def send_alert(event_details, event_type): slack_url = get_secrets()["slack"] teams_url = get_secrets()["teams"] @@ -49,7 +56,7 @@ def send_alert(event_details, event_type): if "None" not in event_bus_name: try: print("Sending the alert to Event Bridge") - send_to_eventbridge(get_message_for_eventbridge(event_details, event_type), event_type, event_bus_name) + send_to_eventbridge(get_message_for_eventbridge(event_details, event_type, affected_accounts, affected_entities), event_type, event_bus_name) except HTTPError as e: print("Got an error while sending message to EventBridge: ", e.code, e.reason) except URLError as e: @@ -57,17 +64,26 @@ def send_alert(event_details, event_type): pass if "hooks.slack.com/services" in slack_url: try: - print("Sending the alert to Slack Channel") - send_to_slack(get_message_for_slack(event_details, event_type), slack_url) + print("Sending the alert to Slack Webhook Channel") + send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook="webhook"), slack_url) except HTTPError as e: print("Got an error while sending message to Slack: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass + if "hooks.slack.com/workflows" in slack_url: + try: + print("Sending the alert to Slack Workflows Channel") + send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook="workflow"), slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass if "office.com/webhook" in teams_url: try: print("Sending the alert to Teams") - send_to_teams(get_message_for_teams(event_details, event_type), teams_url) + send_to_teams(get_message_for_teams(event_details, event_type, affected_accounts, affected_entities), teams_url) except HTTPError as e: print("Got an error while sending message to Teams: ", e.code, e.reason) except URLError as e: @@ -77,7 +93,7 @@ def send_alert(event_details, event_type): if "none@domain.com" not in SENDER and RECIPIENT: try: print("Sending the alert to the emails") - send_email(event_details, event_type) + send_email(event_details, event_type, affected_accounts, affected_entities) except HTTPError as e: print("Got an error while sending message to Email: ", e.code, e.reason) except URLError as e: @@ -86,7 +102,7 @@ def send_alert(event_details, event_type): if "hooks.chime.aws/incomingwebhooks" in chime_url: try: print("Sending the alert to Chime channel") - send_to_chime(get_message_for_chime(event_details, event_type), chime_url) + send_to_chime(get_message_for_chime(event_details, event_type, affected_accounts, affected_entities), chime_url) except HTTPError as e: print("Got an error while sending message to Chime: ", e.code, e.reason) except URLError as e: @@ -115,15 +131,26 @@ def send_org_alert(event_details, affected_org_accounts, affected_org_entities, pass if "hooks.slack.com/services" in slack_url: try: - print("Sending the alert to Slack Channel") + print("Sending the alert to Slack Webhook Channel") send_to_slack( - get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities), + get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook="webhook"), slack_url) except HTTPError as e: print("Got an error while sending message to Slack: ", e.code, e.reason) except URLError as e: print("Server connection failed: ", e.reason) pass + if "hooks.slack.com/workflows" in slack_url: + try: + print("Sending the alert to Slack Workflow Channel") + send_to_slack( + get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook="workflow"), + slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass if "office.com/webhook" in teams_url: try: print("Sending the alert to Teams") @@ -250,6 +277,31 @@ def send_org_email(event_details, eventType, affected_org_accounts, affected_org }, ) +# organization view affected accounts +def get_health_accounts(health_client, event, event_arn): + affected_accounts = [] + accounts_paginator = health_client.get_paginator('describe_affected_entities') + event_accounts_page_iterator = event_accounts_paginator.paginate( + eventArn=event_arn + ) + for event_accounts_page in event_accounts_page_iterator: + json_event_accounts = json.dumps(event_accounts_page, default=myconverter) + parsed_event_accounts = json.loads(json_event_accounts) + affected_org_accounts = (parsed_event_accounts['entities'][0]['awsAccountId']) + return affected_accounts + +def get_health_entities(health_client, event, event_arn): + affected_entities = [] + event_entities_paginator = health_client.get_paginator('describe_affected_entities') + event_entities_page_iterator = event_entities_paginator.paginate( + eventArn=event_arn + ) + for event_entities_page in event_entities_page_iterator: + json_event_entities = json.dumps(event_entities_page, default=myconverter) + parsed_event_entities = json.loads(json_event_entities) + for entity in parsed_event_entities['entities']: + affected_entities.append(entity['entityValue']) + return affected_entities # organization view affected accounts def get_health_org_accounts(health_client, event, event_arn): @@ -301,12 +353,12 @@ def update_org_ddb(event_arn, str_update, status_code, event_details, affected_o delta_hours = os.environ['EVENT_SEARCH_BACK'] delta_hours = int(delta_hours) delta_hours_sec = delta_hours * 3600 - + # formatting time in seconds srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" str_ddb_format_sec = '%s' sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) - + # check if event arn already exists try: response = aha_ddb_table.get_item( @@ -333,12 +385,14 @@ def update_org_ddb(event_arn, str_update, status_code, event_details, affected_o # Cleanup: DynamoDB entry deleted 24 hours after last update } ) + affected_org_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts] # send to configured endpoints if status_code != "closed": - send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="create") + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create") else: - send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="resolve") - + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve") + else: item = response['Item'] if item['lastUpdatedTime'] != str_update and (item['statusCode'] != status_code or @@ -358,32 +412,34 @@ def update_org_ddb(event_arn, str_update, status_code, event_details, affected_o # Cleanup: DynamoDB entry deleted 24 hours after last update } ) + affected_org_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts] # send to configured endpoints if status_code != "closed": - send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="create") + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create") else: - send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="resolve") + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve") else: print("No new updates found, checking again in 1 minute.") - - + + # For Customers not using AWS Organizations -def update_ddb(event_arn, str_update, status_code, event_details): +def update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities): # open dynamoDB dynamodb = boto3.resource("dynamodb") ddb_table = os.environ['DYNAMODB_TABLE'] aha_ddb_table = dynamodb.Table(ddb_table) - + # set time parameters delta_hours = os.environ['EVENT_SEARCH_BACK'] delta_hours = int(delta_hours) delta_hours_sec = delta_hours * 3600 - + # formatting time in seconds srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" str_ddb_format_sec = '%s' sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) - + # check if event arn already exists try: response = aha_ddb_table.get_item( @@ -407,12 +463,14 @@ def update_ddb(event_arn, str_update, status_code, event_details): # Cleanup: DynamoDB entry deleted 24 hours after last update } ) + affected_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_accounts] # send to configured endpoints if status_code != "closed": - send_alert(event_details, event_type="create") + send_alert(event_details, affected_accounts, affected_entities, event_type="create") else: - send_alert(event_details, event_type="resolve") - + send_alert(event_details, affected_accounts, affected_entities, event_type="resolve") + else: item = response['Item'] if item['lastUpdatedTime'] != str_update: @@ -429,12 +487,13 @@ def update_ddb(event_arn, str_update, status_code, event_details): ) # send to configured endpoints if status_code != "closed": - send_alert(event_details, event_type="create") + send_alert(event_details, affected_accounts_details, affected_entities, event_type="create") else: - send_alert(event_details, event_type="resolve") + send_alert(event_details, affected_accounts_details, affected_entities, event_type="resolve") else: print("No new updates found, checking again in 1 minute.") + def get_secrets(): secret_teams_name = "MicrosoftChannelID" secret_slack_name = "SlackChannelID" @@ -446,8 +505,8 @@ def get_secrets(): get_secret_value_response_teams = "" get_secret_value_response_slack = "" event_bus_name = "EventBusName" - secret_assumerole_name = "AssumeRoleArn" - + secret_assumerole_name = "AssumeRoleArn" + # create a Secrets Manager client session = boto3.session.Session() client = session.client( @@ -503,7 +562,6 @@ def get_secrets(): chime_channel_id = get_secret_value_response_chime['SecretString'] else: chime_channel_id = "None" - try: get_secret_value_response_assumerole = client.get_secret_value( SecretId=secret_assumerole_name @@ -519,8 +577,7 @@ def get_secrets(): if 'SecretString' in get_secret_value_response_assumerole: assumerole_channel_id = get_secret_value_response_assumerole['SecretString'] else: - assumerole_channel_id = "None" - + assumerole_channel_id = "None" try: get_secret_value_response_eventbus = client.get_secret_value( SecretId=event_bus_name @@ -542,13 +599,15 @@ def get_secrets(): "slack": slack_channel_id, "chime": chime_channel_id, "eventbusname": eventbus_channel_id, - "ahaassumerole": assumerole_channel_id, - } + "ahaassumerole": assumerole_channel_id + } # uncomment below to verify secrets values - #print("Secrets: ",secrets) + #print("Secrets: ",secrets) return secrets + def describe_events(health_client): + str_ddb_format_sec = '%s' # set hours to search back in time for events delta_hours = os.environ['EVENT_SEARCH_BACK'] health_event_type = os.environ['HEALTH_EVENT_TYPE'] @@ -556,7 +615,7 @@ def describe_events(health_client): time_delta = (datetime.now() - timedelta(hours=delta_hours)) print("Searching for events and updates made after: ", time_delta) dict_regions = os.environ['REGIONS'] - + str_filter = { 'lastUpdatedTimes': [ { @@ -564,26 +623,50 @@ def describe_events(health_client): } ] } - + if health_event_type == "issue": - event_type_filter = {'eventTypeCategories': ["issue"]} + event_type_filter = {'eventTypeCategories': ['issue','investigation']} print("AHA will be monitoring events with event type categories as 'issue' only!") str_filter.update(event_type_filter) - + if dict_regions != "all regions": dict_regions = [region.strip() for region in dict_regions.split(',')] print("AHA will monitor for events only in the selected regions: ", dict_regions) region_filter = {'regions': dict_regions} str_filter.update(region_filter) - + event_paginator = health_client.get_paginator('describe_events') event_page_iterator = event_paginator.paginate(filter=str_filter) for response in event_page_iterator: events = response.get('events', []) - return events - + aws_events = json.dumps(events, default=myconverter) + aws_events = json.loads(aws_events) + print('Event(s) Received: ', json.dumps(aws_events)) + if len(aws_events) > 0: # if there are new event(s) from AWS + for event in aws_events: + event_arn = event['arn'] + status_code = event['statusCode'] + str_update = parser.parse((event['lastUpdatedTime'])) + str_update = str_update.strftime(str_ddb_format_sec) + + # get event details + event_details = json.dumps(describe_event_details(health_client, event_arn), default=myconverter) + event_details = json.loads(event_details) + print("Event Details: ", event_details) + if event_details['successfulSet'] == []: + print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", + event_details['failedSet'][0]['errorName'], ":", + event_details['failedSet'][0]['errorMessage']) + continue + else: + # write to dynamoDB for persistence + update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities) + else: + print("No events found in time frame, checking again in 1 minute.") + def describe_org_events(health_client): + str_ddb_format_sec = '%s' # set hours to search back in time for events delta_hours = os.environ['EVENT_SEARCH_BACK'] health_event_type = os.environ['HEALTH_EVENT_TYPE'] @@ -591,29 +674,56 @@ def describe_org_events(health_client): delta_hours = int(delta_hours) time_delta = (datetime.now() - timedelta(hours=delta_hours)) print("Searching for events and updates made after: ", time_delta) - + str_filter = { 'lastUpdatedTime': { 'from': time_delta } } - + if health_event_type == "issue": - event_type_filter = {'eventTypeCategories': ["issue"]} + event_type_filter = {'eventTypeCategories': ['issue','investigation']} print("AHA will be monitoring events with event type categories as 'issue' only!") str_filter.update(event_type_filter) - + if dict_regions != "all regions": dict_regions = [region.strip() for region in dict_regions.split(',')] print("AHA will monitor for events only in the selected regions: ", dict_regions) region_filter = {'regions': dict_regions} str_filter.update(region_filter) - + org_event_paginator = health_client.get_paginator('describe_events_for_organization') org_event_page_iterator = org_event_paginator.paginate(filter=str_filter) for response in org_event_page_iterator: events = response.get('events', []) - return events + aws_events = json.dumps(events, default=myconverter) + aws_events = json.loads(aws_events) + print('Event(s) Received: ', json.dumps(aws_events)) + if len(aws_events) > 0: + for event in aws_events: + event_arn = event['arn'] + status_code = event['statusCode'] + str_update = parser.parse((event['lastUpdatedTime'])) + str_update = str_update.strftime(str_ddb_format_sec) + + # get organizational view requirements + affected_org_accounts = get_health_org_accounts(health_client, event, event_arn) + affected_org_entities = get_health_org_entities(health_client, event, event_arn, affected_org_accounts) + + # get event details + event_details = json.dumps(describe_org_event_details(health_client, event_arn, affected_org_accounts), + default=myconverter) + event_details = json.loads(event_details) + print("Event Details: ", event_details) + if event_details['successfulSet'] == []: + print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", + event_details['failedSet'][0]['errorName'], ":", + event_details['failedSet'][0]['errorMessage']) + continue + else: + # write to dynamoDB for persistence + update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts, + affected_org_entities) def myconverter(json_object): if isinstance(json_object, datetime): @@ -626,6 +736,7 @@ def describe_event_details(health_client, event_arn): ) return response + def describe_org_event_details(health_client, event_arn, affected_org_accounts): if len(affected_org_accounts) >= 1: affected_account_ids = affected_org_accounts[0] @@ -641,8 +752,8 @@ def describe_org_event_details(health_client, event_arn, affected_org_accounts): else: response = describe_event_details(health_client, event_arn) return response - - + + def send_to_eventbridge(message, event_type, event_bus): print("Sending response to Eventbridge - event_type, event_bus", event_type, event_bus) client = boto3.client('events') @@ -651,9 +762,9 @@ def send_to_eventbridge(message, event_type, event_bus): 'EventBusName': event_bus}, ]) print("Response is:", response) -def get_sts_token(): +def get_sts_token(service): assumeRoleArn = get_secrets()["ahaassumerole"] - health_client = None + boto3_client = None if "arn:aws:iam::" in assumeRoleArn: ACCESS_KEY = [] @@ -676,8 +787,8 @@ def get_sts_token(): SESSION_TOKEN = acct_b['Credentials']['SessionToken'] # create service client using the assumed role credentials, e.g. S3 - health_client = boto3.client( - 'health', + boto3_client = boto3.client( + service, config=config, aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, @@ -685,79 +796,25 @@ def get_sts_token(): ) print("Running in member account deployment mode") else: - health_client = boto3.client('health', config=config) - print("Running in management account demployment mode") + boto3_client = boto3.client(service, config=config) + print("Running in management account deployment mode") - return health_client + return boto3_client def main(event, context): print("THANK YOU FOR CHOOSING AWS HEALTH AWARE!") - health_client = get_sts_token() + health_client = get_sts_token('health') org_status = os.environ['ORG_STATUS'] - str_ddb_format_sec = '%s' + #str_ddb_format_sec = '%s' # check for AWS Organizations Status if org_status == "No": print("AWS Organizations is not enabled. Only Service Health Dashboard messages will be alerted.") - aws_events = describe_events(health_client) - aws_events = json.dumps(aws_events, default=myconverter) - aws_events = json.loads(aws_events) - print('Event(s) Received: ', json.dumps(aws_events)) - if len(aws_events) > 0: # if there are new event(s) from AWS - for event in aws_events: - event_arn = event['arn'] - status_code = event['statusCode'] - str_update = parser.parse((event['lastUpdatedTime'])) - str_update = str_update.strftime(str_ddb_format_sec) - - # get event details - event_details = json.dumps(describe_event_details(event_arn), default=myconverter) - event_details = json.loads(event_details) - print("Event Details: ", event_details) - if event_details['successfulSet'] == []: - print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", - event_details['failedSet'][0]['errorName'], ":", - event_details['failedSet'][0]['errorMessage']) - continue - else: - # write to dynamoDB for persistence - update_ddb(event_arn, str_update, status_code, event_details) - else: - print("No events found in time frame, checking again in 1 minute.") + describe_events(health_client) else: print( "AWS Organizations is enabled. Personal Health Dashboard and Service Health Dashboard messages will be alerted.") - aws_events = describe_org_events(health_client) - aws_events = json.dumps(aws_events, default=myconverter) - aws_events = json.loads(aws_events) - print('Event(s) Received: ', json.dumps(aws_events)) - if len(aws_events) > 0: - for event in aws_events: - event_arn = event['arn'] - status_code = event['statusCode'] - str_update = parser.parse((event['lastUpdatedTime'])) - str_update = str_update.strftime(str_ddb_format_sec) - - # get organizational view requirements - affected_org_accounts = get_health_org_accounts(health_client, event, event_arn) - affected_org_entities = get_health_org_entities(health_client, event, event_arn, affected_org_accounts) - - # get event details - event_details = json.dumps(describe_org_event_details(health_client, event_arn, affected_org_accounts), - default=myconverter) - event_details = json.loads(event_details) - print("Event Details: ", event_details) - if event_details['successfulSet'] == []: - print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", - event_details['failedSet'][0]['errorName'], ":", - event_details['failedSet'][0]['errorMessage']) - continue - else: - # write to dynamoDB for persistence - update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts, - affected_org_entities) - else: - print("No events found in time frame, checking again in 1 minute.") + describe_org_events(health_client) if __name__ == "__main__": - main('', '') \ No newline at end of file + main('', '') diff --git a/BETA-org-member-deployment/messagegenerator.py b/BETA-multi-region/messagegenerator.py similarity index 63% rename from BETA-org-member-deployment/messagegenerator.py rename to BETA-multi-region/messagegenerator.py index e455b65..52755ae 100644 --- a/BETA-org-member-deployment/messagegenerator.py +++ b/BETA-multi-region/messagegenerator.py @@ -6,69 +6,129 @@ import time -def get_message_for_slack(event_details, event_type): +def get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook): message = "" summary = "" - if event_type == "create": - summary += ( - f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " - f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" - ) - message = { - "text": summary, - "attachments": [ - { - "color": "danger", - "fields": [ - { "title": "Account(s)", "value": "All accounts\nin region", "short": True }, - { "title": "Resource(s)", "value": "All resources\nin region", "short": True }, - { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, - { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, - { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, - { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, - { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, - { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } - ], - } - ] - } - - elif event_type == "resolve": - summary += ( - f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " - f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" - ) - message = { - "text": summary, - "attachments": [ - { - "color": "00ff00", - "fields": [ - { "title": "Account(s)", "value": "All accounts\nin region", "short": True }, - { "title": "Resource(s)", "value": "All resources\nin region", "short": True }, - { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, - { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, - { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, - { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, - { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, - { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, - { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } - ], - } - ] - } + if slack_webhook == "webhook": + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "danger", + "fields": [ + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "00ff00", + "fields": [ + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + else: + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + else: + affected_entities = "All resources in region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts in region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "accounts": affected_accounts, + "resources": affected_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "accounts": affected_accounts, + "resources": affected_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + print("Message sent to Slack: ", message) return message -def get_message_for_eventbridge(event_details, event_type): +def get_message_for_eventbridge(event_details, event_type, affected_accounts, affected_entities): message = "" + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" if event_type == "create": message = { "attachments": [ { "fields": [ - { "title": "Account(s)", "value": "All accounts\nin region", "short": True }, - { "title": "Resource(s)", "value": "All resources\nin region", "short": True }, + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, @@ -85,8 +145,8 @@ def get_message_for_eventbridge(event_details, event_type): "attachments": [ { "fields": [ - { "title": "Account(s)", "value": "All accounts\nin region", "short": True }, - { "title": "Resource(s)", "value": "All resources\nin region", "short": True }, + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, @@ -152,79 +212,130 @@ def get_org_message_for_eventbridge(event_details, event_type, affected_org_acco return message -def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities): +def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook): message = "" summary = "" - if len(affected_org_entities) >= 1: - affected_org_entities = "\n".join(affected_org_entities) - else: - affected_org_entities = "All resources\nin region" - if len(affected_org_accounts) >= 1: - affected_org_accounts = "\n".join(affected_org_accounts) + if slack_webhook == "webhook": + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources\nin region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts\nin region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "danger", + "fields": [ + { "title": "Account(s)", "value": affected_org_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_org_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "00ff00", + "fields": [ + { "title": "Account(s)", "value": affected_org_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_org_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } else: - affected_org_accounts = "All accounts\nin region" - if event_type == "create": - summary += ( - f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " - f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" - ) - message = { - "text": summary, - "attachments": [ - { - "color": "danger", - "fields": [ - { "title": "Account(s)", "value": affected_org_accounts, "short": True }, - { "title": "Resource(s)", "value": affected_org_entities, "short": True }, - { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, - { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, - { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, - { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, - { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, - { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } - ], - } - ] - } - - elif event_type == "resolve": - summary += ( - f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " - f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" - ) - message = { - "text": summary, - "attachments": [ - { - "color": "00ff00", - "fields": [ - { "title": "Account(s)", "value": affected_org_accounts, "short": True }, - { "title": "Resource(s)", "value": affected_org_entities, "short": True }, - { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, - { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, - { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, - { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, - { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, - { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, - { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } - ], - } - ] - } + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources in region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts in region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "accounts": affected_org_accounts, + "resources": affected_org_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "accounts": affected_org_accounts, + "resources": affected_org_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } json.dumps(message) print("Message sent to Slack: ", message) return message -def get_message_for_chime(event_details, event_type): +def get_message_for_chime(event_details, event_type, affected_accounts, affected_entities): message = "" + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" summary = "" if event_type == "create": message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region.**" + "\n" "---" + "\n" - "**Account(s)**: " + "All accounts in region" + "\n" - "**Resource(s)**: " + "All resources in region" + "\n" + "**Account(s)**: " + affected_accounts + "\n" + "**Resource(s)**: " + affected_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" @@ -237,8 +348,8 @@ def get_message_for_chime(event_details, event_type): message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n" "---" + "\n" - "**Account(s)**: " + "All accounts in region" + "\n" - "**Resource(s)**: " + "All resources in region" + "\n" + "**Account(s)**: " + affected_accounts + "\n" + "**Resource(s)**: " + affected_entities + "\n" "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" @@ -296,8 +407,16 @@ def get_org_message_for_chime(event_details, event_type, affected_org_accounts, -def get_message_for_teams(event_details, event_type): +def get_message_for_teams(event_details, event_type, affected_accounts, affected_entities): message = "" + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" summary = "" if event_type == "create": title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][ @@ -313,8 +432,8 @@ def get_message_for_teams(event_details, event_type): "activityTitle": str(title), "markdown": False, "facts": [ - {"name": "Account(s)", "value": "All accounts\nin region"}, - {"name": "Resource(s)", "value": "All resources\nin region"}, + {"name": "Account(s)", "value": affected_accounts}, + {"name": "Resource(s)", "value": affected_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, @@ -340,8 +459,8 @@ def get_message_for_teams(event_details, event_type): "activityTitle": str(title), "markdown": False, "facts": [ - {"name": "Account(s)", "value": "All accounts\nin region"}, - {"name": "Resource(s)", "value": "All resources\nin region"}, + {"name": "Account(s)", "value": affected_accounts}, + {"name": "Resource(s)", "value": affected_entities}, {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, @@ -426,15 +545,15 @@ def get_org_message_for_teams(event_details, event_type, affected_org_accounts, print("Message sent to Teams: ", message) -def get_message_for_email(event_details, event_type): +def get_message_for_email(event_details, event_type, affected_accounts, affected_entities): if event_type == "create": BODY_HTML = f"""
There is an AWS incident that is in effect which may likely impact your resources. Here are the details:
- Account(s): All accounts in region
- Resource(s): All service related resources in region
+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
@@ -454,8 +573,8 @@ def get_message_for_email(event_details, event_type):
Good news! The AWS Health incident from earlier has now been marked as resolved.
- Account(s): All accounts in region
- Resource(s): All service related resources in region
+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
diff --git a/BETA-org-member-deployment/CODE_OF_CONDUCT.md b/BETA-org-member-deployment/CODE_OF_CONDUCT.md
deleted file mode 100644
index 5b627cf..0000000
--- a/BETA-org-member-deployment/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,4 +0,0 @@
-## Code of Conduct
-This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
-For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
-opensource-codeofconduct@amazon.com with any additional questions or comments.
diff --git a/BETA-org-member-deployment/CONTRIBUTING.md b/BETA-org-member-deployment/CONTRIBUTING.md
deleted file mode 100644
index c4b6a1c..0000000
--- a/BETA-org-member-deployment/CONTRIBUTING.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Contributing Guidelines
-
-Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
-documentation, we greatly value feedback and contributions from our community.
-
-Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
-information to effectively respond to your bug report or contribution.
-
-
-## Reporting Bugs/Feature Requests
-
-We welcome you to use the GitHub issue tracker to report bugs or suggest features.
-
-When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
-reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
-
-* A reproducible test case or series of steps
-* The version of our code being used
-* Any modifications you've made relevant to the bug
-* Anything unusual about your environment or deployment
-
-
-## Contributing via Pull Requests
-Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
-
-1. You are working against the latest source on the *main* branch.
-2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
-3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
-
-To send us a pull request, please:
-
-1. Fork the repository.
-2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
-3. Ensure local tests pass.
-4. Commit to your fork using clear commit messages.
-5. Send us a pull request, answering any default questions in the pull request interface.
-6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
-
-GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
-[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
-
-
-## Finding contributions to work on
-Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
-
-
-## Code of Conduct
-This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
-For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
-opensource-codeofconduct@amazon.com with any additional questions or comments.
-
-
-## Security issue notifications
-If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
-
-
-## Licensing
-
-See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
diff --git a/BETA-org-member-deployment/LICENSE b/BETA-org-member-deployment/LICENSE
deleted file mode 100644
index 1bb4f21..0000000
--- a/BETA-org-member-deployment/LICENSE
+++ /dev/null
@@ -1,15 +0,0 @@
-Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
diff --git a/CFN_AHA.yml b/CFN_AHA.yml
deleted file mode 100644
index 9f8191f..0000000
--- a/CFN_AHA.yml
+++ /dev/null
@@ -1,314 +0,0 @@
-AWSTemplateFormatVersion: '2010-09-09'
-Description: CloudFormation Template for AWS Health Aware (AHA)
-Metadata:
- 'AWS::CloudFormation::Interface':
- ParameterGroups:
- - Label:
- default: Customize Alerts/Notifications
- Parameters:
- - AWSOrganizationsEnabled
- - AWSHealthEventType
- - Label:
- default: Package Information
- Parameters:
- - S3Bucket
- - S3Key
- - Label:
- default: >-
- Communication Channels - Slack/Microsoft Teams/Amazon Chime And/or
- EventBridge
- Parameters:
- - SlackWebhookURL
- - MicrosoftTeamsWebhookURL
- - AmazonChimeWebhookURL
- - EventBusName
- - Label:
- default: Email Setup - For Alerting via Email
- Parameters:
- - FromEmail
- - ToEmail
- - Subject
- - Label:
- default: More Configurations - Optional
- Parameters:
- - EventSearchBack
- - Regions
-Conditions:
- UsingSlack: !Not [!Equals [!Ref SlackWebhookURL, None]]
- UsingTeams: !Not [!Equals [!Ref MicrosoftTeamsWebhookURL, None]]
- UsingChime: !Not [!Equals [!Ref AmazonChimeWebhookURL, None]]
- UsingEventBridge: !Not [!Equals [!Ref EventBusName, None]]
- UsingSecrets: !Or [!Condition UsingSlack, !Condition UsingTeams, !Condition UsingChime, !Condition UsingEventBridge]
-Parameters:
- AWSOrganizationsEnabled:
- Description: >-
- You can receive both PHD and SHD alerts if you're using AWS Organizations.
- If you are, make sure to enable Organizational Health View:
- (https://docs.aws.amazon.com/health/latest/ug/aggregate-events.html) to
- aggregate all PHD events in your AWS Organization. If not, you can still
- get SHD alerts.
- Default: 'No'
- AllowedValues:
- - 'Yes'
- - 'No'
- Type: String
- AWSHealthEventType:
- Description: >-
- Select the event type that you want AHA to report on. Refer to
- https://docs.aws.amazon.com/health/latest/APIReference/API_EventType.html for more information on EventType.
- Default: 'issue | accountNotification | scheduledChange'
- AllowedValues:
- - 'issue | accountNotification | scheduledChange'
- - 'issue'
- Type: String
- S3Bucket:
- Description: >-
- Name of your S3 Bucket where the AHA Package .zip resides. Just the name
- of the bucket (e.g. my-s3-bucket)
- Type: String
- S3Key:
- Description: >-
- Name of the .zip in your S3 Bucket. Just the name of the file (e.g.
- aha-v1.0.zip)
- Type: String
- EventBusName:
- Description: >-
- This is to ingest alerts into AWS EventBridge. Enter the event bus name if
- you wish to send the alerts to the AWS EventBridge. Note: By ingesting
- these alerts to AWS EventBridge, you can integrate with 35 SaaS vendors
- such as DataDog/NewRelic/PagerDuty. If you don't prefer to use EventBridge, leave the default (None).
- Type: String
- Default: None
- SlackWebhookURL:
- Description: >-
- Enter the Slack Webhook URL. If you don't prefer to use Slack, leave the default (None).
- Type: String
- Default: None
- MicrosoftTeamsWebhookURL:
- Description: >-
- Enter Microsoft Teams Webhook URL. If you don't prefer to use MS Teams,
- leave the default (None).
- Type: String
- Default: None
- AmazonChimeWebhookURL:
- Description: >-
- Enter the Chime Webhook URL, If you don't prefer to use Amazon Chime,
- leave the default (None).
- Type: String
- Default: None
- Regions:
- Description: >-
- By default, AHA reports events affecting all AWS regions.
- If you want to report on certain regions you can enter up to 10 in a comma separated format.
- Available Regions: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-3,
- ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,
- eu-south-1,eu-south-3,eu-north-1,me-south-1,sa-east-1,global
- Default: all regions
- AllowedPattern: ".+"
- ConstraintDescription: No regions were entered, please read the documentation about selecting all regions or filtering on some.
- Type: String
- EventSearchBack:
- Description: How far back to search for events in hours. Default is 1 hour
- Default: '1'
- Type: Number
- FromEmail:
- Description: Enter FROM Email Address
- Type: String
- Default: none@domain.com
- AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$
- ConstraintDescription: 'FromEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.'
- ToEmail:
- Description: >-
- Enter email addresses separated by commas (for ex: abc@amazon.com,
- bcd@amazon.com)
- Type: String
- Default: none@domain.com
- AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$
- ConstraintDescription: 'ToEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.'
- Subject:
- Description: Enter the subject of the email address
- Type: String
- Default: AWS Health Alert
-Resources:
- LambdaExecutionRole:
- Type: 'AWS::IAM::Role'
- Properties:
- AssumeRolePolicyDocument:
- Version: '2012-10-17'
- Statement:
- - Effect: Allow
- Principal:
- Service:
- - lambda.amazonaws.com
- Action:
- - 'sts:AssumeRole'
- Path: /
- Policies:
- - PolicyName: AHA-LambdaPolicy
- PolicyDocument:
- Version: '2012-10-17'
- Statement:
- - Effect: Allow
- Action:
- - logs:CreateLogGroup
- - logs:CreateLogStream
- - logs:PutLogEvents
- Resource: !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*'
- - !If
- - UsingSecrets
- - Effect: Allow
- Action:
- - 'secretsmanager:GetResourcePolicy'
- - 'secretsmanager:DescribeSecret'
- - 'secretsmanager:ListSecretVersionIds'
- - 'secretsmanager:GetSecretValue'
- Resource:
- - !If [UsingTeams, !Sub '${MicrosoftChannelSecret}', !Ref AWS::NoValue]
- - !If [UsingSlack, !Sub '${SlackChannelSecret}', !Ref AWS::NoValue]
- - !If [UsingEventBridge, !Sub '${EventBusNameSecret}', !Ref AWS::NoValue]
- - !If [UsingChime, !Sub '${ChimeChannelSecret}', !Ref AWS::NoValue]
- - !Ref 'AWS::NoValue'
- - Effect: Allow
- Action:
- - health:DescribeAffectedAccountsForOrganization
- - health:DescribeAffectedEntitiesForOrganization
- - health:DescribeEventDetailsForOrganization
- - health:DescribeEventsForOrganization
- - health:DescribeEventDetails
- - health:DescribeEvents
- - health:DescribeEventTypes
- - health:DescribeAffectedEntities
- - organizations:ListAccounts
- Resource: "*"
- - Effect: Allow
- Action:
- - dynamodb:ListTables
- Resource: !Sub 'arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:*'
- - Effect: Allow
- Action:
- - ses:SendEmail
- Resource: !Sub 'arn:aws:ses:${AWS::Region}:${AWS::AccountId}:*'
- - Effect: Allow
- Action:
- - dynamodb:UpdateTimeToLive
- - dynamodb:PutItem
- - dynamodb:DeleteItem
- - dynamodb:GetItem
- - dynamodb:Scan
- - dynamodb:Query
- - dynamodb:UpdateItem
- - dynamodb:UpdateTable
- - dynamodb:GetRecords
- Resource: !GetAtt DynamoDBTable.Arn
- - Effect: Allow
- Action:
- - events:PutEvents
- Resource: !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:event-bus/${EventBusName}'
- DynamoDBTable:
- Type: 'AWS::DynamoDB::Table'
- Properties:
- AttributeDefinitions:
- - AttributeName: arn
- AttributeType: S
- KeySchema:
- - AttributeName: arn
- KeyType: HASH
- ProvisionedThroughput:
- ReadCapacityUnits: 5
- WriteCapacityUnits: 5
- TimeToLiveSpecification:
- AttributeName: ttl
- Enabled: TRUE
- LambdaSchedule:
- Type: 'AWS::Events::Rule'
- Properties:
- Description: Lambda trigger Event
- ScheduleExpression: rate(1 minute)
- State: ENABLED
- Targets:
- - Arn: !GetAtt LambdaFunction.Arn
- Id: LambdaSchedule
- LambdaSchedulePermission:
- Type: 'AWS::Lambda::Permission'
- Properties:
- Action: 'lambda:InvokeFunction'
- FunctionName: !GetAtt LambdaFunction.Arn
- Principal: events.amazonaws.com
- SourceArn: !GetAtt LambdaSchedule.Arn
- MicrosoftChannelSecret:
- Type: 'AWS::SecretsManager::Secret'
- Condition: UsingTeams
- Properties:
- Name: MicrosoftChannelID
- Description: Microsoft Channel ID Secret
- SecretString:
- Ref: MicrosoftTeamsWebhookURL
- Tags:
- - Key: HealthCheckMicrosoft
- Value: ChannelID
- SlackChannelSecret:
- Type: 'AWS::SecretsManager::Secret'
- Condition: UsingSlack
- Properties:
- Name: SlackChannelID
- Description: Slack Channel ID Secret
- SecretString:
- Ref: SlackWebhookURL
- Tags:
- - Key: HealthCheckSlack
- Value: ChannelID
- EventBusNameSecret:
- Type: 'AWS::SecretsManager::Secret'
- Condition: UsingEventBridge
- Properties:
- Name: EventBusName
- Description: EventBus Name Secret
- SecretString:
- Ref: EventBusName
- Tags:
- - Key: EventBusName
- Value: ChannelID
- ChimeChannelSecret:
- Type: 'AWS::SecretsManager::Secret'
- Condition: UsingChime
- Properties:
- Name: ChimeChannelID
- Description: Chime Channel ID Secret
- SecretString:
- Ref: AmazonChimeWebhookURL
- Tags:
- - Key: HealthCheckChime
- Value: ChannelID
- LambdaFunction:
- Type: 'AWS::Lambda::Function'
- Properties:
- Description: Lambda function that runs AHA
- Code:
- S3Bucket:
- Ref: S3Bucket
- S3Key:
- Ref: S3Key
- Handler: handler.main
- MemorySize: 128
- Timeout: 600
- Role:
- 'Fn::Sub': '${LambdaExecutionRole.Arn}'
- Runtime: python3.8
- Environment:
- Variables:
- REGIONS:
- Ref: Regions
- FROM_EMAIL:
- Ref: FromEmail
- TO_EMAIL:
- Ref: ToEmail
- EMAIL_SUBJECT:
- Ref: Subject
- DYNAMODB_TABLE:
- Ref: DynamoDBTable
- EVENT_SEARCH_BACK:
- Ref: EventSearchBack
- ORG_STATUS:
- Ref: AWSOrganizationsEnabled
- HEALTH_EVENT_TYPE:
- Ref: AWSHealthEventType
\ No newline at end of file
diff --git a/README.md b/README.md
index 9446343..4829877 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,10 @@
![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha_banner.png?raw=1)
+
**Table of Contents**
-- [Introduction](#introduction)
+- [Introduction](#introduction)
- [Architecture](#architecture)
- [Configuring an Endpoint](#configuring-an-endpoint)
* [Creating a Amazon Chime Webhook URL](#creating-a-amazon-chime-webhook-url)
@@ -20,8 +21,6 @@
# Introduction
AWS Health Aware (AHA) is an automated notification tool for sending well-formatted AWS Health Alerts to Amazon Chime, Slack, Microsoft Teams, E-mail or an AWS Eventbridge compatible endpoint as long as you have Business or Enterprise Support.
-![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha-logo.png?raw=1)
-
# Architecture
![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/architecture.png?raw=1)
@@ -37,10 +36,10 @@ AWS Health Aware (AHA) is an automated notification tool for sending well-format
| `MicrosoftChannelSecret` | Webhook URL for Microsoft Teams stored in AWS Secrets Manager |
| `SlackChannelSecret` | Webhook URL for Slack stored in AWS Secrets Manager |
-# Configuring an Endpoint -
+# Configuring an Endpoint
AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use any of these you'll need to set it up before-hand as some of these are done on 3rd party websites. We'll go over some of the common ones here.
-## Creating a Amazon Chime Webhook URL -
+## Creating a Amazon Chime Webhook URL
**You will need to have access to create a Amazon Chime room and manage webhooks.**
1. Create a new [chat room](https://docs.aws.amazon.com/chime/latest/ug/chime-chat-room.html) for events (i.e. aws_events).
@@ -49,18 +48,40 @@ AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use
4. **Type** a name for the bot (e.g. AWS Health Bot) and **click** *Create*.
5. **Click** *Copy URL*, we will need it for the deployment.
-## Creating a Slack Webhook URL -
+## Creating a Slack Webhook URL
**You will need to have access to add a new channel and app to your Slack Workspace**.
+*Webhook*
1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events)
-2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace.
-3. In the search bar, search for: *Incoming Webhooks* and **click** on it.
-4. **Click** on *Add to Slack*.
-5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*.
-6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc.
+2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace.
+3. In the search bar, search for: *Incoming Webhooks* and **click** on it.
+4. **Click** on *Add to Slack*.
+5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*.
+6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc.
7. For the deployment we will need the *Webhook URL*.
-## Creating a Microsoft Teams Webhook URL -
+*Workflow*
+
+1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events)
+2. Within Slack **click** on your workspace name drop down arrow in the upper left. **click on Tools > Workflow Builder**
+3. **Click** Create in the upper right hand corner of the Workflow Builder and give your workflow a name **click** next.
+4. **Click** on *select* next to **Webhook** and then **click** *add variable* add the following variables one at a time in the *Key* section. All *data type* will be *text*:
+-text
+-accounts
+-resources
+-service
+-region
+-start_time
+-event_arn
+-updates
+5. When done you should have 8 variables, double check them as they are case sensitive and will be referenced. When checked **click** on *done* and *next*.
+6. **Click** on *add step* and then on the add a workflow step **click** *add* next to *send a message*.
+7. Under *send this message to:* select the channel you created in Step 1 in *message text* you can should recreate this following:
+![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/workflow.png?raw=1)
+8. **Click** *save* and the **click** *publish*
+9. For the deployment we will need the *Webhook URL*.
+
+## Creating a Microsoft Teams Webhook URL
**You will need to have access to add a new channel and app to your Microsoft Teams channel**.
1. Create a new [channel](https://docs.microsoft.com/en-us/microsoftteams/get-started-with-teams-create-your-first-teams-and-channels) for events (i.e. aws_events)
@@ -71,13 +92,13 @@ AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use
6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. **Click** *Create* when done.
7. For the deployment we will need the webhook *URL* that is presented.
-## Configuring an Email -
+## Configuring an Email
1. You'll be able to send email alerts to one or many addresses. However, you must first [verify](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/verify-email-addresses-procedure.html) the email(s) in the Simple Email Service (SES) console.
2. AHA utilizes Amazon SES so all you need is to enter in a To: address and a From: address.
3. You *may* have to allow a rule in your environment so that the emails don't get labeled as SPAM. This will be something you have to congfigure on your own.
-## Creating a Amazon EventBridge Ingestion ARN -
+## Creating a Amazon EventBridge Ingestion ARN
1. In the AWS Console, search for **Amazon EventBridge**.
2. On the left hand side, **click** *Event buses*.
@@ -85,7 +106,7 @@ AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use
4. Give your Event bus a name and **click** *Create*.
5. For the deployment we will need the *Name* of the Event bus **(not the ARN)**.
-# Setup -
+# Setup
There are 2 available ways to deploy AHA, both are done via the same CloudFormation template to make deployment as easy as possible.
The 2 deployment methods for AHA are:
@@ -107,7 +128,7 @@ The 2 deployment methods for AHA are:
3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in.
4. In your AWS console go to *CloudFormation*.
5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*.
-6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_AHA.yml` **Click** *Next*.
+6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_DEPLOY_AHA.yml` **Click** *Next*.
7. -In *Stack name* type a stack name (i.e. AHA-Deployment).
-In *AWSOrganizationsEnabled* leave it set to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the step for [AHA for users who ARE using AWS Organizations](#aha-with-aws-organizations)
-In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues.
@@ -138,7 +159,7 @@ The 2 deployment methods for AHA are:
3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in.
4. In your AWS console go to *CloudFormation*.
5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*.
-6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_AHA.yml` **Click** *Next*.
+6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_DEPLOY_AHA.yml` **Click** *Next*.
7. -In *Stack name* type a stack name (i.e. AHA-Deployment).
-In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations)
-In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues.
@@ -153,12 +174,46 @@ The 2 deployment methods for AHA are:
10. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*.
11. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes).
+### Deployment in AWS Organization Member Account
+
+1. Clone the AHA package from the BETA-member-deployment folder. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri`
+2. In your top-level management account AWS console go to *CloudFormation*
+3. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*.
+4. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `01_CFN_MGMT_ROLE.yml` **Click** *Next*.
+5. -In *Stack name* type a stack name (i.e. aha-assume-role).
+-In *OrgMemberAccountId* put in the account id of the member account you plan to run AHA in (e.g. 000123456789).
+6. Scroll to the bottom and **click** *Next*.
+7. Scroll to the bottom and **click** *Next* again.
+8. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*.
+9. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 1-2 minutes). This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume.
+10. In the *Outputs* tab, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need it for step 16.
+11. Back In the root of the package you downloaded/cloned you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.**
+12. Upload the .zip you created in Step 11 to an S3 in the same region you plan to deploy this in.
+13. Login to the member account you plan to deploy this in and in your AWS console go to *CloudFormation*.
+14. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*.
+15. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `02_CFN_DEPLOY_AHA.yml` **Click** *Next*.
+16. -In *Stack name* type a stack name (i.e. AHA-Deployment).
+-In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations)
+-In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues.
+-In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 12 (e.g. my-aha-bucket).
+-In *S3Key* type ***just*** the name of the .zip file you created in Step 11 (e.g. aha-v1.8.zip).
+-In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously.
+-In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is.
+-In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour.
+-In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2).
+-In *ManagementAccountRoleArn* enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201)
+17. Scroll to the bottom and **click** *Next*.
+18. Scroll to the bottom and **click** *Next* again.
+19. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*.
+12. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes).
+
+
# Updating
**Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:**
1. Download the updated CloudFormation Template .yml file and 2 `.py` files.
2. Zip up the 2 `.py` files and name the .zip with a different version number than before (e.g. if the .zip you originally uploaded is aha-v1.8.zip the new one should be aha-v1.9.zip)
3. In the AWS CloudFormation console **click** on the name of your stack, then **click** *Update*.
-4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `CFN_AHA.yml` file you downloaded and finally **click** *Next*.
+4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `02_CFN_DEPLOY_AHA.yml` file you downloaded and finally **click** *Next*.
5. In the *S3Key* text box change the version number in the name of the .zip to match name of the .zip you uploaded in Step 2 (The name of the .zip has to be different for CloudFormation to recognize a change). **Click** *Next*.
6. At the next screen **click** *Next* and finally **click** *Update stack*. This will now upgrade your environment to the latest version you downloaded.
diff --git a/handler.py b/handler.py
index 9147ccd..17d246e 100644
--- a/handler.py
+++ b/handler.py
@@ -34,7 +34,16 @@
# backoff/retry values than than the boto defaults
)
)
-health_client = boto3.client('health', config=config)
+
+# Get Account Name
+def get_account_name(account_id):
+ org_client = get_sts_token('organizations')
+ try:
+ account_name = org_client.describe_account(AccountId=account_id)['Account']['Name']
+ except Exception:
+ account_name = account_id
+
+ return account_name
def send_alert(event_details, event_type):
slack_url = get_secrets()["slack"]
@@ -47,7 +56,7 @@ def send_alert(event_details, event_type):
if "None" not in event_bus_name:
try:
print("Sending the alert to Event Bridge")
- send_to_eventbridge(get_message_for_eventbridge(event_details, event_type), event_type, event_bus_name)
+ send_to_eventbridge(get_message_for_eventbridge(event_details, event_type, affected_accounts, affected_entities), event_type, event_bus_name)
except HTTPError as e:
print("Got an error while sending message to EventBridge: ", e.code, e.reason)
except URLError as e:
@@ -55,17 +64,26 @@ def send_alert(event_details, event_type):
pass
if "hooks.slack.com/services" in slack_url:
try:
- print("Sending the alert to Slack Channel")
- send_to_slack(get_message_for_slack(event_details, event_type), slack_url)
+ print("Sending the alert to Slack Webhook Channel")
+ send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook="webhook"), slack_url)
except HTTPError as e:
print("Got an error while sending message to Slack: ", e.code, e.reason)
except URLError as e:
print("Server connection failed: ", e.reason)
pass
+ if "hooks.slack.com/workflows" in slack_url:
+ try:
+ print("Sending the alert to Slack Workflows Channel")
+ send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook="workflow"), slack_url)
+ except HTTPError as e:
+ print("Got an error while sending message to Slack: ", e.code, e.reason)
+ except URLError as e:
+ print("Server connection failed: ", e.reason)
+ pass
if "office.com/webhook" in teams_url:
try:
print("Sending the alert to Teams")
- send_to_teams(get_message_for_teams(event_details, event_type), teams_url)
+ send_to_teams(get_message_for_teams(event_details, event_type, affected_accounts, affected_entities), teams_url)
except HTTPError as e:
print("Got an error while sending message to Teams: ", e.code, e.reason)
except URLError as e:
@@ -75,7 +93,7 @@ def send_alert(event_details, event_type):
if "none@domain.com" not in SENDER and RECIPIENT:
try:
print("Sending the alert to the emails")
- send_email(event_details, event_type)
+ send_email(event_details, event_type, affected_accounts, affected_entities)
except HTTPError as e:
print("Got an error while sending message to Email: ", e.code, e.reason)
except URLError as e:
@@ -84,7 +102,7 @@ def send_alert(event_details, event_type):
if "hooks.chime.aws/incomingwebhooks" in chime_url:
try:
print("Sending the alert to Chime channel")
- send_to_chime(get_message_for_chime(event_details, event_type), chime_url)
+ send_to_chime(get_message_for_chime(event_details, event_type, affected_accounts, affected_entities), chime_url)
except HTTPError as e:
print("Got an error while sending message to Chime: ", e.code, e.reason)
except URLError as e:
@@ -113,15 +131,26 @@ def send_org_alert(event_details, affected_org_accounts, affected_org_entities,
pass
if "hooks.slack.com/services" in slack_url:
try:
- print("Sending the alert to Slack Channel")
+ print("Sending the alert to Slack Webhook Channel")
send_to_slack(
- get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities),
+ get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook="webhook"),
slack_url)
except HTTPError as e:
print("Got an error while sending message to Slack: ", e.code, e.reason)
except URLError as e:
print("Server connection failed: ", e.reason)
pass
+ if "hooks.slack.com/workflows" in slack_url:
+ try:
+ print("Sending the alert to Slack Workflow Channel")
+ send_to_slack(
+ get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook="workflow"),
+ slack_url)
+ except HTTPError as e:
+ print("Got an error while sending message to Slack: ", e.code, e.reason)
+ except URLError as e:
+ print("Server connection failed: ", e.reason)
+ pass
if "office.com/webhook" in teams_url:
try:
print("Sending the alert to Teams")
@@ -248,6 +277,31 @@ def send_org_email(event_details, eventType, affected_org_accounts, affected_org
},
)
+# organization view affected accounts
+def get_health_accounts(health_client, event, event_arn):
+ affected_accounts = []
+ accounts_paginator = health_client.get_paginator('describe_affected_entities')
+ event_accounts_page_iterator = event_accounts_paginator.paginate(
+ eventArn=event_arn
+ )
+ for event_accounts_page in event_accounts_page_iterator:
+ json_event_accounts = json.dumps(event_accounts_page, default=myconverter)
+ parsed_event_accounts = json.loads(json_event_accounts)
+ affected_org_accounts = (parsed_event_accounts['entities'][0]['awsAccountId'])
+ return affected_accounts
+
+def get_health_entities(health_client, event, event_arn):
+ affected_entities = []
+ event_entities_paginator = health_client.get_paginator('describe_affected_entities')
+ event_entities_page_iterator = event_entities_paginator.paginate(
+ eventArn=event_arn
+ )
+ for event_entities_page in event_entities_page_iterator:
+ json_event_entities = json.dumps(event_entities_page, default=myconverter)
+ parsed_event_entities = json.loads(json_event_entities)
+ for entity in parsed_event_entities['entities']:
+ affected_entities.append(entity['entityValue'])
+ return affected_entities
# organization view affected accounts
def get_health_org_accounts(health_client, event, event_arn):
@@ -331,11 +385,13 @@ def update_org_ddb(event_arn, str_update, status_code, event_details, affected_o
# Cleanup: DynamoDB entry deleted 24 hours after last update
}
)
+ affected_org_accounts_details = [
+ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts]
# send to configured endpoints
if status_code != "closed":
- send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="create")
+ send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create")
else:
- send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="resolve")
+ send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve")
else:
item = response['Item']
@@ -356,17 +412,19 @@ def update_org_ddb(event_arn, str_update, status_code, event_details, affected_o
# Cleanup: DynamoDB entry deleted 24 hours after last update
}
)
+ affected_org_accounts_details = [
+ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts]
# send to configured endpoints
if status_code != "closed":
- send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="create")
+ send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create")
else:
- send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type="resolve")
+ send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve")
else:
print("No new updates found, checking again in 1 minute.")
# For Customers not using AWS Organizations
-def update_ddb(event_arn, str_update, status_code, event_details):
+def update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities):
# open dynamoDB
dynamodb = boto3.resource("dynamodb")
ddb_table = os.environ['DYNAMODB_TABLE']
@@ -405,11 +463,13 @@ def update_ddb(event_arn, str_update, status_code, event_details):
# Cleanup: DynamoDB entry deleted 24 hours after last update
}
)
+ affected_accounts_details = [
+ f"{get_account_name(account_id)} ({account_id})" for account_id in affected_accounts]
# send to configured endpoints
if status_code != "closed":
- send_alert(event_details, event_type="create")
+ send_alert(event_details, affected_accounts, affected_entities, event_type="create")
else:
- send_alert(event_details, event_type="resolve")
+ send_alert(event_details, affected_accounts, affected_entities, event_type="resolve")
else:
item = response['Item']
@@ -427,9 +487,9 @@ def update_ddb(event_arn, str_update, status_code, event_details):
)
# send to configured endpoints
if status_code != "closed":
- send_alert(event_details, event_type="create")
+ send_alert(event_details, affected_accounts_details, affected_entities, event_type="create")
else:
- send_alert(event_details, event_type="resolve")
+ send_alert(event_details, affected_accounts_details, affected_entities, event_type="resolve")
else:
print("No new updates found, checking again in 1 minute.")
@@ -439,11 +499,13 @@ def get_secrets():
secret_slack_name = "SlackChannelID"
secret_chime_name = "ChimeChannelID"
region_name = os.environ['AWS_REGION']
+ get_secret_value_response_assumerole = ""
get_secret_value_response_eventbus = ""
get_secret_value_response_chime = ""
get_secret_value_response_teams = ""
get_secret_value_response_slack = ""
event_bus_name = "EventBusName"
+ secret_assumerole_name = "AssumeRoleArn"
# create a Secrets Manager client
session = boto3.session.Session()
@@ -500,6 +562,22 @@ def get_secrets():
chime_channel_id = get_secret_value_response_chime['SecretString']
else:
chime_channel_id = "None"
+ try:
+ get_secret_value_response_assumerole = client.get_secret_value(
+ SecretId=secret_assumerole_name
+ )
+ except ClientError as e:
+ if e.response['Error']['Code'] == 'AccessDeniedException':
+ print("No AWS Secret configured for Assume Role, skipping")
+ assumerole_channel_id = "None"
+ else:
+ print("There was an error with the Assume Role secret: ",e.response)
+ assumerole_channel_id = "None"
+ finally:
+ if 'SecretString' in get_secret_value_response_assumerole:
+ assumerole_channel_id = get_secret_value_response_assumerole['SecretString']
+ else:
+ assumerole_channel_id = "None"
try:
get_secret_value_response_eventbus = client.get_secret_value(
SecretId=event_bus_name
@@ -520,12 +598,16 @@ def get_secrets():
"teams": teams_channel_id,
"slack": slack_channel_id,
"chime": chime_channel_id,
- "eventbusname": eventbus_channel_id}
- print("Secrets: ",secrets)
+ "eventbusname": eventbus_channel_id,
+ "ahaassumerole": assumerole_channel_id
+ }
+ # uncomment below to verify secrets values
+ #print("Secrets: ",secrets)
return secrets
-def describe_events():
+def describe_events(health_client):
+ str_ddb_format_sec = '%s'
# set hours to search back in time for events
delta_hours = os.environ['EVENT_SEARCH_BACK']
health_event_type = os.environ['HEALTH_EVENT_TYPE']
@@ -543,7 +625,7 @@ def describe_events():
}
if health_event_type == "issue":
- event_type_filter = {'eventTypeCategories': ["issue"]}
+ event_type_filter = {'eventTypeCategories': ['issue','investigation']}
print("AHA will be monitoring events with event type categories as 'issue' only!")
str_filter.update(event_type_filter)
@@ -557,10 +639,34 @@ def describe_events():
event_page_iterator = event_paginator.paginate(filter=str_filter)
for response in event_page_iterator:
events = response.get('events', [])
- return events
+ aws_events = json.dumps(events, default=myconverter)
+ aws_events = json.loads(aws_events)
+ print('Event(s) Received: ', json.dumps(aws_events))
+ if len(aws_events) > 0: # if there are new event(s) from AWS
+ for event in aws_events:
+ event_arn = event['arn']
+ status_code = event['statusCode']
+ str_update = parser.parse((event['lastUpdatedTime']))
+ str_update = str_update.strftime(str_ddb_format_sec)
+ # get event details
+ event_details = json.dumps(describe_event_details(health_client, event_arn), default=myconverter)
+ event_details = json.loads(event_details)
+ print("Event Details: ", event_details)
+ if event_details['successfulSet'] == []:
+ print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:",
+ event_details['failedSet'][0]['errorName'], ":",
+ event_details['failedSet'][0]['errorMessage'])
+ continue
+ else:
+ # write to dynamoDB for persistence
+ update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities)
+ else:
+ print("No events found in time frame, checking again in 1 minute.")
-def describe_org_events():
+
+def describe_org_events(health_client):
+ str_ddb_format_sec = '%s'
# set hours to search back in time for events
delta_hours = os.environ['EVENT_SEARCH_BACK']
health_event_type = os.environ['HEALTH_EVENT_TYPE']
@@ -576,7 +682,7 @@ def describe_org_events():
}
if health_event_type == "issue":
- event_type_filter = {'eventTypeCategories': ["issue"]}
+ event_type_filter = {'eventTypeCategories': ['issue','investigation']}
print("AHA will be monitoring events with event type categories as 'issue' only!")
str_filter.update(event_type_filter)
@@ -590,22 +696,48 @@ def describe_org_events():
org_event_page_iterator = org_event_paginator.paginate(filter=str_filter)
for response in org_event_page_iterator:
events = response.get('events', [])
- return events
+ aws_events = json.dumps(events, default=myconverter)
+ aws_events = json.loads(aws_events)
+ print('Event(s) Received: ', json.dumps(aws_events))
+ if len(aws_events) > 0:
+ for event in aws_events:
+ event_arn = event['arn']
+ status_code = event['statusCode']
+ str_update = parser.parse((event['lastUpdatedTime']))
+ str_update = str_update.strftime(str_ddb_format_sec)
+
+ # get organizational view requirements
+ affected_org_accounts = get_health_org_accounts(health_client, event, event_arn)
+ affected_org_entities = get_health_org_entities(health_client, event, event_arn, affected_org_accounts)
+ # get event details
+ event_details = json.dumps(describe_org_event_details(health_client, event_arn, affected_org_accounts),
+ default=myconverter)
+ event_details = json.loads(event_details)
+ print("Event Details: ", event_details)
+ if event_details['successfulSet'] == []:
+ print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:",
+ event_details['failedSet'][0]['errorName'], ":",
+ event_details['failedSet'][0]['errorMessage'])
+ continue
+ else:
+ # write to dynamoDB for persistence
+ update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts,
+ affected_org_entities)
def myconverter(json_object):
if isinstance(json_object, datetime):
return json_object.__str__()
-def describe_event_details(event_arn):
+def describe_event_details(health_client, event_arn):
response = health_client.describe_event_details(
eventArns=[event_arn],
)
return response
-def describe_org_event_details(event_arn, affected_org_accounts):
+def describe_org_event_details(health_client, event_arn, affected_org_accounts):
if len(affected_org_accounts) >= 1:
affected_account_ids = affected_org_accounts[0]
response = health_client.describe_event_details_for_organization(
@@ -618,7 +750,7 @@ def describe_org_event_details(event_arn, affected_org_accounts):
)
return response
else:
- response = describe_event_details(event_arn)
+ response = describe_event_details(health_client, event_arn)
return response
@@ -630,75 +762,59 @@ def send_to_eventbridge(message, event_type, event_bus):
'EventBusName': event_bus}, ])
print("Response is:", response)
+def get_sts_token(service):
+ assumeRoleArn = get_secrets()["ahaassumerole"]
+ boto3_client = None
+
+ if "arn:aws:iam::" in assumeRoleArn:
+ ACCESS_KEY = []
+ SECRET_KEY = []
+ SESSION_TOKEN = []
+
+ sts_connection = boto3.client('sts')
+
+ ct = datetime.now()
+ role_session_name = "cross_acct_aha_session"
+
+ acct_b = sts_connection.assume_role(
+ RoleArn=assumeRoleArn,
+ RoleSessionName=role_session_name,
+ DurationSeconds=900,
+ )
+
+ ACCESS_KEY = acct_b['Credentials']['AccessKeyId']
+ SECRET_KEY = acct_b['Credentials']['SecretAccessKey']
+ SESSION_TOKEN = acct_b['Credentials']['SessionToken']
+
+ # create service client using the assumed role credentials, e.g. S3
+ boto3_client = boto3.client(
+ service,
+ config=config,
+ aws_access_key_id=ACCESS_KEY,
+ aws_secret_access_key=SECRET_KEY,
+ aws_session_token=SESSION_TOKEN,
+ )
+ print("Running in member account deployment mode")
+ else:
+ boto3_client = boto3.client(service, config=config)
+ print("Running in management account deployment mode")
+
+ return boto3_client
def main(event, context):
print("THANK YOU FOR CHOOSING AWS HEALTH AWARE!")
+ health_client = get_sts_token('health')
org_status = os.environ['ORG_STATUS']
- str_ddb_format_sec = '%s'
+ #str_ddb_format_sec = '%s'
# check for AWS Organizations Status
if org_status == "No":
print("AWS Organizations is not enabled. Only Service Health Dashboard messages will be alerted.")
- aws_events = describe_events()
- aws_events = json.dumps(aws_events, default=myconverter)
- aws_events = json.loads(aws_events)
- print('Event(s) Received: ', json.dumps(aws_events))
- if len(aws_events) > 0: # if there are new event(s) from AWS
- for event in aws_events:
- event_arn = event['arn']
- status_code = event['statusCode']
- str_update = parser.parse((event['lastUpdatedTime']))
- str_update = str_update.strftime(str_ddb_format_sec)
-
- # get event details
- event_details = json.dumps(describe_event_details(event_arn), default=myconverter)
- event_details = json.loads(event_details)
- print("Event Details: ", event_details)
- if event_details['successfulSet'] == []:
- print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:",
- event_details['failedSet'][0]['errorName'], ":",
- event_details['failedSet'][0]['errorMessage'])
- continue
- else:
- # write to dynamoDB for persistence
- update_ddb(event_arn, str_update, status_code, event_details)
- else:
- print("No events found in time frame, checking again in 1 minute.")
+ describe_events(health_client)
else:
print(
"AWS Organizations is enabled. Personal Health Dashboard and Service Health Dashboard messages will be alerted.")
- aws_events = describe_org_events()
- aws_events = json.dumps(aws_events, default=myconverter)
- aws_events = json.loads(aws_events)
- print('Event(s) Received: ', json.dumps(aws_events))
- if len(aws_events) > 0:
- for event in aws_events:
- event_arn = event['arn']
- status_code = event['statusCode']
- str_update = parser.parse((event['lastUpdatedTime']))
- str_update = str_update.strftime(str_ddb_format_sec)
-
- # get organizational view requirements
- affected_org_accounts = get_health_org_accounts(health_client, event, event_arn)
- affected_org_entities = get_health_org_entities(health_client, event, event_arn, affected_org_accounts)
-
- # get event details
- event_details = json.dumps(describe_org_event_details(event_arn, affected_org_accounts),
- default=myconverter)
- event_details = json.loads(event_details)
- print("Event Details: ", event_details)
- if event_details['successfulSet'] == []:
- print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:",
- event_details['failedSet'][0]['errorName'], ":",
- event_details['failedSet'][0]['errorMessage'])
- continue
- else:
- # write to dynamoDB for persistence
- update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts,
- affected_org_entities)
- else:
- print("No events found in time frame, checking again in 1 minute.")
-
+ describe_org_events(health_client)
if __name__ == "__main__":
- main('', '')
\ No newline at end of file
+ main('', '')
diff --git a/messagegenerator.py b/messagegenerator.py
index e455b65..52755ae 100644
--- a/messagegenerator.py
+++ b/messagegenerator.py
@@ -6,69 +6,129 @@
import time
-def get_message_for_slack(event_details, event_type):
+def get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook):
message = ""
summary = ""
- if event_type == "create":
- summary += (
- f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
- f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
- )
- message = {
- "text": summary,
- "attachments": [
- {
- "color": "danger",
- "fields": [
- { "title": "Account(s)", "value": "All accounts\nin region", "short": True },
- { "title": "Resource(s)", "value": "All resources\nin region", "short": True },
- { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
- { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
- { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
- { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
- { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
- { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
- ],
- }
- ]
- }
-
- elif event_type == "resolve":
- summary += (
- f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
- f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
- )
- message = {
- "text": summary,
- "attachments": [
- {
- "color": "00ff00",
- "fields": [
- { "title": "Account(s)", "value": "All accounts\nin region", "short": True },
- { "title": "Resource(s)", "value": "All resources\nin region", "short": True },
- { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
- { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
- { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
- { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True },
- { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
- { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
- { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
- ],
- }
- ]
- }
+ if slack_webhook == "webhook":
+ if len(affected_entities) >= 1:
+ affected_entities = "\n".join(affected_entities)
+ else:
+ affected_entities = "All resources\nin region"
+ if len(affected_accounts) >= 1:
+ affected_accounts = "\n".join(affected_accounts)
+ else:
+ affected_accounts = "All accounts\nin region"
+ if event_type == "create":
+ summary += (
+ f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
+ )
+ message = {
+ "text": summary,
+ "attachments": [
+ {
+ "color": "danger",
+ "fields": [
+ { "title": "Account(s)", "value": affected_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_entities, "short": True },
+ { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
+ { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
+ { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
+ { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
+ { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
+ { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
+ ],
+ }
+ ]
+ }
+
+ elif event_type == "resolve":
+ summary += (
+ f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
+ )
+ message = {
+ "text": summary,
+ "attachments": [
+ {
+ "color": "00ff00",
+ "fields": [
+ { "title": "Account(s)", "value": affected_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_entities, "short": True },
+ { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
+ { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
+ { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
+ { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True },
+ { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
+ { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
+ { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
+ ],
+ }
+ ]
+ }
+ else:
+ if len(affected_entities) >= 1:
+ affected_entities = "\n".join(affected_entities)
+ else:
+ affected_entities = "All resources in region"
+ if len(affected_accounts) >= 1:
+ affected_accounts = "\n".join(affected_accounts)
+ else:
+ affected_accounts = "All accounts in region"
+ if event_type == "create":
+ summary += (
+ f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
+ )
+ message = {
+ "text": summary,
+ "accounts": affected_accounts,
+ "resources": affected_entities,
+ "service": event_details['successfulSet'][0]['event']['service'],
+ "region": event_details['successfulSet'][0]['event']['region'],
+ "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']),
+ "status": event_details['successfulSet'][0]['event']['statusCode'],
+ "event_arn": event_details['successfulSet'][0]['event']['arn'],
+ "updates": get_last_aws_update(event_details)
+ }
+
+ elif event_type == "resolve":
+ summary += (
+ f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
+ )
+ message = {
+ "text": summary,
+ "accounts": affected_accounts,
+ "resources": affected_entities,
+ "service": event_details['successfulSet'][0]['event']['service'],
+ "region": event_details['successfulSet'][0]['event']['region'],
+ "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']),
+ "status": event_details['successfulSet'][0]['event']['statusCode'],
+ "event_arn": event_details['successfulSet'][0]['event']['arn'],
+ "updates": get_last_aws_update(event_details)
+ }
+
print("Message sent to Slack: ", message)
return message
-def get_message_for_eventbridge(event_details, event_type):
+def get_message_for_eventbridge(event_details, event_type, affected_accounts, affected_entities):
message = ""
+ if len(affected_entities) >= 1:
+ affected_entities = "\n".join(affected_entities)
+ else:
+ affected_entities = "All resources\nin region"
+ if len(affected_accounts) >= 1:
+ affected_accounts = "\n".join(affected_accounts)
+ else:
+ affected_accounts = "All accounts\nin region"
if event_type == "create":
message = {
"attachments": [
{
"fields": [
- { "title": "Account(s)", "value": "All accounts\nin region", "short": True },
- { "title": "Resource(s)", "value": "All resources\nin region", "short": True },
+ { "title": "Account(s)", "value": affected_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_entities, "short": True },
{ "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
{ "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
{ "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
@@ -85,8 +145,8 @@ def get_message_for_eventbridge(event_details, event_type):
"attachments": [
{
"fields": [
- { "title": "Account(s)", "value": "All accounts\nin region", "short": True },
- { "title": "Resource(s)", "value": "All resources\nin region", "short": True },
+ { "title": "Account(s)", "value": affected_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_entities, "short": True },
{ "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
{ "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
{ "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
@@ -152,79 +212,130 @@ def get_org_message_for_eventbridge(event_details, event_type, affected_org_acco
return message
-def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities):
+def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook):
message = ""
summary = ""
- if len(affected_org_entities) >= 1:
- affected_org_entities = "\n".join(affected_org_entities)
- else:
- affected_org_entities = "All resources\nin region"
- if len(affected_org_accounts) >= 1:
- affected_org_accounts = "\n".join(affected_org_accounts)
+ if slack_webhook == "webhook":
+ if len(affected_org_entities) >= 1:
+ affected_org_entities = "\n".join(affected_org_entities)
+ else:
+ affected_org_entities = "All resources\nin region"
+ if len(affected_org_accounts) >= 1:
+ affected_org_accounts = "\n".join(affected_org_accounts)
+ else:
+ affected_org_accounts = "All accounts\nin region"
+ if event_type == "create":
+ summary += (
+ f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
+ )
+ message = {
+ "text": summary,
+ "attachments": [
+ {
+ "color": "danger",
+ "fields": [
+ { "title": "Account(s)", "value": affected_org_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_org_entities, "short": True },
+ { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
+ { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
+ { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
+ { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
+ { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
+ { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
+ ],
+ }
+ ]
+ }
+
+ elif event_type == "resolve":
+ summary += (
+ f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
+ )
+ message = {
+ "text": summary,
+ "attachments": [
+ {
+ "color": "00ff00",
+ "fields": [
+ { "title": "Account(s)", "value": affected_org_accounts, "short": True },
+ { "title": "Resource(s)", "value": affected_org_entities, "short": True },
+ { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
+ { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
+ { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
+ { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True },
+ { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
+ { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
+ { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
+ ],
+ }
+ ]
+ }
else:
- affected_org_accounts = "All accounts\nin region"
- if event_type == "create":
- summary += (
- f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
- f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
- )
- message = {
- "text": summary,
- "attachments": [
- {
- "color": "danger",
- "fields": [
- { "title": "Account(s)", "value": affected_org_accounts, "short": True },
- { "title": "Resource(s)", "value": affected_org_entities, "short": True },
- { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
- { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
- { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
- { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
- { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
- { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
- ],
- }
- ]
- }
-
- elif event_type == "resolve":
- summary += (
- f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
- f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
- )
- message = {
- "text": summary,
- "attachments": [
- {
- "color": "00ff00",
- "fields": [
- { "title": "Account(s)", "value": affected_org_accounts, "short": True },
- { "title": "Resource(s)", "value": affected_org_entities, "short": True },
- { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True },
- { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True },
- { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True },
- { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True },
- { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True },
- { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False },
- { "title": "Updates", "value": get_last_aws_update(event_details), "short": False }
- ],
- }
- ]
- }
+ if len(affected_org_entities) >= 1:
+ affected_org_entities = "\n".join(affected_org_entities)
+ else:
+ affected_org_entities = "All resources in region"
+ if len(affected_org_accounts) >= 1:
+ affected_org_accounts = "\n".join(affected_org_accounts)
+ else:
+ affected_org_accounts = "All accounts in region"
+ if event_type == "create":
+ summary += (
+ f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*"
+ )
+ message = {
+ "text": summary,
+ "accounts": affected_org_accounts,
+ "resources": affected_org_entities,
+ "service": event_details['successfulSet'][0]['event']['service'],
+ "region": event_details['successfulSet'][0]['event']['region'],
+ "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']),
+ "status": event_details['successfulSet'][0]['event']['statusCode'],
+ "event_arn": event_details['successfulSet'][0]['event']['arn'],
+ "updates": get_last_aws_update(event_details)
+ }
+
+ elif event_type == "resolve":
+ summary += (
+ f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in "
+ f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*"
+ )
+ message = {
+ "text": summary,
+ "accounts": affected_org_accounts,
+ "resources": affected_org_entities,
+ "service": event_details['successfulSet'][0]['event']['service'],
+ "region": event_details['successfulSet'][0]['event']['region'],
+ "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']),
+ "status": event_details['successfulSet'][0]['event']['statusCode'],
+ "event_arn": event_details['successfulSet'][0]['event']['arn'],
+ "updates": get_last_aws_update(event_details)
+ }
json.dumps(message)
print("Message sent to Slack: ", message)
return message
-def get_message_for_chime(event_details, event_type):
+def get_message_for_chime(event_details, event_type, affected_accounts, affected_entities):
message = ""
+ if len(affected_entities) >= 1:
+ affected_entities = "\n".join(affected_entities)
+ else:
+ affected_entities = "All resources\nin region"
+ if len(affected_accounts) >= 1:
+ affected_accounts = "\n".join(affected_accounts)
+ else:
+ affected_accounts = "All accounts\nin region"
summary = ""
if event_type == "create":
message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region.**" + "\n"
"---" + "\n"
- "**Account(s)**: " + "All accounts in region" + "\n"
- "**Resource(s)**: " + "All resources in region" + "\n"
+ "**Account(s)**: " + affected_accounts + "\n"
+ "**Resource(s)**: " + affected_entities + "\n"
"**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n"
"**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n"
"**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n"
@@ -237,8 +348,8 @@ def get_message_for_chime(event_details, event_type):
message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n"
"---" + "\n"
- "**Account(s)**: " + "All accounts in region" + "\n"
- "**Resource(s)**: " + "All resources in region" + "\n"
+ "**Account(s)**: " + affected_accounts + "\n"
+ "**Resource(s)**: " + affected_entities + "\n"
"**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n"
"**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n"
"**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n"
@@ -296,8 +407,16 @@ def get_org_message_for_chime(event_details, event_type, affected_org_accounts,
-def get_message_for_teams(event_details, event_type):
+def get_message_for_teams(event_details, event_type, affected_accounts, affected_entities):
message = ""
+ if len(affected_entities) >= 1:
+ affected_entities = "\n".join(affected_entities)
+ else:
+ affected_entities = "All resources\nin region"
+ if len(affected_accounts) >= 1:
+ affected_accounts = "\n".join(affected_accounts)
+ else:
+ affected_accounts = "All accounts\nin region"
summary = ""
if event_type == "create":
title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][
@@ -313,8 +432,8 @@ def get_message_for_teams(event_details, event_type):
"activityTitle": str(title),
"markdown": False,
"facts": [
- {"name": "Account(s)", "value": "All accounts\nin region"},
- {"name": "Resource(s)", "value": "All resources\nin region"},
+ {"name": "Account(s)", "value": affected_accounts},
+ {"name": "Resource(s)", "value": affected_entities},
{"name": "Service", "value": event_details['successfulSet'][0]['event']['service']},
{"name": "Region", "value": event_details['successfulSet'][0]['event']['region']},
{"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])},
@@ -340,8 +459,8 @@ def get_message_for_teams(event_details, event_type):
"activityTitle": str(title),
"markdown": False,
"facts": [
- {"name": "Account(s)", "value": "All accounts\nin region"},
- {"name": "Resource(s)", "value": "All resources\nin region"},
+ {"name": "Account(s)", "value": affected_accounts},
+ {"name": "Resource(s)", "value": affected_entities},
{"name": "Service", "value": event_details['successfulSet'][0]['event']['service']},
{"name": "Region", "value": event_details['successfulSet'][0]['event']['region']},
{"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])},
@@ -426,15 +545,15 @@ def get_org_message_for_teams(event_details, event_type, affected_org_accounts,
print("Message sent to Teams: ", message)
-def get_message_for_email(event_details, event_type):
+def get_message_for_email(event_details, event_type, affected_accounts, affected_entities):
if event_type == "create":
BODY_HTML = f"""
There is an AWS incident that is in effect which may likely impact your resources. Here are the details:
- Account(s): All accounts in region
- Resource(s): All service related resources in region
+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
@@ -454,8 +573,8 @@ def get_message_for_email(event_details, event_type):
Good news! The AWS Health incident from earlier has now been marked as resolved.
- Account(s): All accounts in region
- Resource(s): All service related resources in region
+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
Service: {event_details['successfulSet'][0]['event']['service']}
Region: {event_details['successfulSet'][0]['event']['region']}
Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
diff --git a/readme-images/architecture.png b/readme-images/architecture.png
index a7ffbab..57dbed6 100644
Binary files a/readme-images/architecture.png and b/readme-images/architecture.png differ
diff --git a/readme-images/workflow.png b/readme-images/workflow.png
new file mode 100644
index 0000000..f00a308
Binary files /dev/null and b/readme-images/workflow.png differ