From 240d2458ee01d0c162cd9d7f99792f795a76454e Mon Sep 17 00:00:00 2001 From: Roth Date: Wed, 10 Aug 2022 09:26:34 -0700 Subject: [PATCH] new eventbridge schema --- CFN_DEPLOY_AHA.yml | 2 +- aha-2.1-beta/CFN_DEPLOY_AHA.yml | 682 ++++++++++++++++++++ aha-2.1-beta/CFN_MGMT_ROLE.yml | 48 ++ aha-2.1-beta/CODE_OF_CONDUCT.md | 4 + aha-2.1-beta/CONTRIBUTING.md | 59 ++ aha-2.1-beta/LICENSE | 15 + aha-2.1-beta/README.md | 434 +++++++++++++ aha-2.1-beta/handler.py | 901 +++++++++++++++++++++++++++ aha-2.1-beta/messagegenerator.py | 631 +++++++++++++++++++ aha-2.1-beta/new_aha_event_schema.md | 221 +++++++ 10 files changed, 2996 insertions(+), 1 deletion(-) create mode 100644 aha-2.1-beta/CFN_DEPLOY_AHA.yml create mode 100644 aha-2.1-beta/CFN_MGMT_ROLE.yml create mode 100644 aha-2.1-beta/CODE_OF_CONDUCT.md create mode 100644 aha-2.1-beta/CONTRIBUTING.md create mode 100644 aha-2.1-beta/LICENSE create mode 100644 aha-2.1-beta/README.md create mode 100644 aha-2.1-beta/handler.py create mode 100644 aha-2.1-beta/messagegenerator.py create mode 100644 aha-2.1-beta/new_aha_event_schema.md diff --git a/CFN_DEPLOY_AHA.yml b/CFN_DEPLOY_AHA.yml index 5c72086..05fa587 100644 --- a/CFN_DEPLOY_AHA.yml +++ b/CFN_DEPLOY_AHA.yml @@ -413,7 +413,7 @@ Resources: EMAIL_SUBJECT: "${Subject}" DYNAMODB_TABLE: "${GlobalDDBTable}" EVENT_SEARCH_BACK: ${EventSearchBack} - ORG_STATUS: "${AWSOrganizationsEnabled}" + ORG_STATUS: ${AWSOrganizationsEnabled} HEALTH_EVENT_TYPE: "${AWSHealthEventType}" MANAGEMENT_ROLE_ARN: "${ManagementAccountRoleArn}" LambdaExecutionRole: diff --git a/aha-2.1-beta/CFN_DEPLOY_AHA.yml b/aha-2.1-beta/CFN_DEPLOY_AHA.yml new file mode 100644 index 0000000..834a03c --- /dev/null +++ b/aha-2.1-beta/CFN_DEPLOY_AHA.yml @@ -0,0 +1,682 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: CloudFormation Template for AWS Health Aware (AHA) +Metadata: + 'AWS::CloudFormation::Interface': + ParameterGroups: + - Label: + default: Customize Alerts/Notifications + Parameters: + - AWSOrganizationsEnabled + - AWSHealthEventType + - Label: + default: Package Information + Parameters: + - S3Bucket + - S3Key + - Label: + default: >- + Communication Channels - Slack/Microsoft Teams/Amazon Chime And/or + EventBridge + Parameters: + - SlackWebhookURL + - MicrosoftTeamsWebhookURL + - AmazonChimeWebhookURL + - EventBusName + - Label: + default: Email Setup - For Alerting via Email + Parameters: + - FromEmail + - ToEmail + - Subject + - Label: + default: More Configurations - Optional + Parameters: + - EventSearchBack + - Regions + - ManagementAccountRoleArn + - SecondaryRegion + - AccountIDs + ParameterLabels: + AWSOrganizationsEnabled: + default: AWS Organizations Enabled? + ManagementAccountRoleArn: + default: ARN of the AWS Organizations Management Account assume role (if using) + AWSHealthEventType: + default: The types of events to get alerted on + S3Bucket: + default: Name of S3 Bucket + S3Key: + default: Name of .zip file in S3 Bucket + SlackWebhookURL: + default: Slack Webhook URL + MicrosoftTeamsWebhookURL: + default: Microsoft Teams Webhook URL + AmazonChimeWebhookURL: + default: Amazon Chime Webhook URL + FromEmail: + default: Email From + ToEmail: + default: Email To + Subject: + default: Subject of Email + HealthAPIFrequency: + default: Hours back to search for events + Regions: + default: Which regions to search for events in + SecondaryRegion: + default: Deploy in secondary region? + AccountIDs: + default: Exclude any account numbers? +Conditions: + UsingSlack: !Not [!Equals [!Ref SlackWebhookURL, None]] + UsingTeams: !Not [!Equals [!Ref MicrosoftTeamsWebhookURL, None]] + UsingChime: !Not [!Equals [!Ref AmazonChimeWebhookURL, None]] + UsingEventBridge: !Not [!Equals [!Ref EventBusName, None]] + UsingSecrets: !Or [!Condition UsingSlack, !Condition UsingTeams, !Condition UsingChime, !Condition UsingEventBridge, !Condition UsingCrossAccountRole] + UsingCrossAccountRole: !Not [!Equals [!Ref ManagementAccountRoleArn, None]] + NotUsingMultiRegion: !Equals [!Ref SecondaryRegion, 'No'] + UsingMultiRegion: !Not [!Equals [!Ref SecondaryRegion, 'No']] + TestCondition: !Equals ['true', 'false'] + UsingMultiRegionTeams: !And [!Condition UsingTeams, !Condition UsingMultiRegion] + UsingMultiRegionSlack: !And [!Condition UsingSlack, !Condition UsingMultiRegion] + UsingMultiRegionEventBridge: !And [!Condition UsingEventBridge, !Condition UsingMultiRegion] + UsingMultiRegionChime: !And [!Condition UsingChime, !Condition UsingMultiRegion] + UsingMultiRegionCrossAccountRole: !And [!Condition UsingCrossAccountRole, !Condition UsingMultiRegion] + UsingAccountIds: !Not [!Equals [!Ref AccountIDs, None]] +Parameters: + AWSOrganizationsEnabled: + Description: >- + You can receive both PHD and SHD alerts if you're using AWS Organizations. + If you are, make sure to enable Organizational Health View: + (https://docs.aws.amazon.com/health/latest/ug/aggregate-events.html) to + aggregate all PHD events in your AWS Organization. If not, you can still + get SHD alerts. + Default: 'No' + AllowedValues: + - 'Yes' + - 'No' + Type: String + SecondaryRegion: + Description: You can deploy this in a secondary region for resiliency. As a result, + the DynamoDB table will become a Global DynamoDB table. Regions that support + Global DynamoDB tables are listed + Default: 'No' + AllowedValues: + - 'No' + - us-east-1 + - us-east-2 + - us-west-1 + - us-west-2 + - ap-south-1 + - ap-northeast-2 + - ap-southeast-1 + - ap-southeast-2 + - ap-northeast-1 + - ca-central-1 + - eu-central-1 + - eu-west-1 + - eu-west-2 + - eu-west-3 + - sa-east-1 + Type: String + ManagementAccountRoleArn: + Description: Arn of the IAM role in the top-level management account for collecting PHD Events. 'None' if deploying into the top-level management account. + Type: String + Default: None + AWSHealthEventType: + Description: >- + Select the event type that you want AHA to report on. Refer to + https://docs.aws.amazon.com/health/latest/APIReference/API_EventType.html for more information on EventType. + Default: 'issue | accountNotification | scheduledChange' + AllowedValues: + - 'issue | accountNotification | scheduledChange' + - 'issue' + Type: String + S3Bucket: + Description: >- + Name of your S3 Bucket where the AHA Package .zip resides. Just the name + of the bucket (e.g. my-s3-bucket) + Type: String + S3Key: + Description: >- + Name of the .zip in your S3 Bucket. Just the name of the file (e.g. + aha-v1.0.zip) + Type: String + EventBusName: + Description: >- + This is to ingest alerts into AWS EventBridge. Enter the event bus name if + you wish to send the alerts to the AWS EventBridge. Note: By ingesting + these alerts to AWS EventBridge, you can integrate with 35 SaaS vendors + such as DataDog/NewRelic/PagerDuty. If you don't prefer to use EventBridge, leave the default (None). + Type: String + Default: None + SlackWebhookURL: + Description: >- + Enter the Slack Webhook URL. If you don't prefer to use Slack, leave the default (None). + Type: String + Default: None + MicrosoftTeamsWebhookURL: + Description: >- + Enter Microsoft Teams Webhook URL. If you don't prefer to use MS Teams, + leave the default (None). + Type: String + Default: None + AmazonChimeWebhookURL: + Description: >- + Enter the Chime Webhook URL, If you don't prefer to use Amazon Chime, + leave the default (None). + Type: String + Default: None + Regions: + Description: >- + By default, AHA reports events affecting all AWS regions. + If you want to report on certain regions you can enter up to 10 in a comma separated format. + Available Regions: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-3, + ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2, + eu-south-1,eu-south-3,eu-north-1,me-south-1,sa-east-1,global + Default: all regions + AllowedPattern: ".+" + ConstraintDescription: No regions were entered, please read the documentation about selecting all regions or filtering on some. + Type: String + AccountIDs: + Description: >- + If you would like to EXCLUDE any accounts from alerting, upload a .csv file of comma-seperated account numbers to the same S3 bucket + where the AHA.zip package is located. Sample AccountIDs file name: aha_account_ids.csv. If not, leave the default of None. + Default: None + Type: String + AllowedPattern: (None)|(.+(\.csv))$ + EventSearchBack: + Description: How far back to search for events in hours. Default is 1 hour + Default: '1' + Type: Number + FromEmail: + Description: Enter FROM Email Address + Type: String + Default: none@domain.com + AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ + ConstraintDescription: 'FromEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' + ToEmail: + Description: >- + Enter email addresses separated by commas (for ex: abc@amazon.com, + bcd@amazon.com) + Type: String + Default: none@domain.com + AllowedPattern: ^([\w+-.%]+@[\w-.]+\.[A-Za-z]+)(, ?[\w+-.%]+@[\w-.]+\.[A-Za-z]+)*$ + ConstraintDescription: 'ToEmail is not a valid, please verify entry. If not sending to email, leave as the default, none@domain.com.' + Subject: + Description: Enter the subject of the email address + Type: String + Default: AWS Health Alert +Resources: + GlobalDDBTable: + Type: AWS::DynamoDB::GlobalTable + Condition: UsingMultiRegion + Properties: + AttributeDefinitions: + - AttributeName: arn + AttributeType: S + KeySchema: + - AttributeName: arn + KeyType: HASH + Replicas: + - Region: !Ref SecondaryRegion + ReadProvisionedThroughputSettings: + ReadCapacityUnits: 5 + - Region: !Ref "AWS::Region" + ReadProvisionedThroughputSettings: + ReadCapacityUnits: 5 + StreamSpecification: + StreamViewType: "NEW_AND_OLD_IMAGES" + TimeToLiveSpecification: + AttributeName: ttl + Enabled: true + WriteProvisionedThroughputSettings: + WriteCapacityAutoScalingSettings: + MaxCapacity: 10 + MinCapacity: 10 + TargetTrackingScalingPolicyConfiguration: + DisableScaleIn: false + ScaleInCooldown: 30 + ScaleOutCooldown: 30 + TargetValue: 10 + DynamoDBTable: + Type: 'AWS::DynamoDB::Table' + Condition: NotUsingMultiRegion + Properties: + AttributeDefinitions: + - AttributeName: arn + AttributeType: S + KeySchema: + - AttributeName: arn + KeyType: HASH + ProvisionedThroughput: + ReadCapacityUnits: 5 + WriteCapacityUnits: 5 + TimeToLiveSpecification: + AttributeName: ttl + Enabled: TRUE + AHASecondaryRegionStackSet: + Condition: UsingMultiRegion + DependsOn: GlobalDDBTable + Type: AWS::CloudFormation::StackSet + Properties: + Description: Secondary Region CloudFormation Template for AWS Health Aware (AHA) + PermissionModel: SELF_MANAGED + Capabilities: [CAPABILITY_IAM] + StackInstancesGroup: + - Regions: + - !Ref 'SecondaryRegion' + DeploymentTargets: + Accounts: + - !Ref 'AWS::AccountId' + StackSetName: 'aha-multi-region' + TemplateBody: + !Sub | + Resources: + AHA2ndRegionBucket: + Type: AWS::S3::Bucket + CopyAHA: + Type: Custom::CopyAHA + Properties: + DestBucket: !Ref 'AHA2ndRegionBucket' + ServiceToken: !GetAtt 'CopyAHAFunction.Arn' + SourceBucket: ${S3Bucket} + Object: + - ${S3Key} + CopyAHARole: + Type: AWS::IAM::Role + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole + Path: / + Policies: + - PolicyName: aha-lambda-copier + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - s3:GetObject + Resource: + - 'arn:aws:s3:::${S3Bucket}*' + - Effect: Allow + Action: + - s3:PutObject + - s3:DeleteObject + Resource: + - !Join ['', [ 'arn:aws:s3:::', !Ref AHA2ndRegionBucket, '*']] + CopyAHAFunction: + Type: AWS::Lambda::Function + DependsOn: AHA2ndRegionBucket + Properties: + Description: Copies AHA .zip from a source S3 bucket to a destination + Handler: index.handler + Runtime: python3.8 + Role: !GetAtt 'CopyAHARole.Arn' + Timeout: 240 + Code: + ZipFile: | + import json + import logging + import threading + import boto3 + import cfnresponse + + def copy_object(source_bucket, dest_bucket, object): + s3 = boto3.client('s3') + for o in object: + key = o + copy_source = { + 'Bucket': source_bucket, + 'Key': key + } + print('copy_source: %s' % copy_source) + print('dest_bucket = %s'%dest_bucket) + print('key = %s' %key) + s3.copy_object(CopySource=copy_source, Bucket=dest_bucket, + Key=key) + + def delete_object(bucket, object): + s3 = boto3.client('s3') + objects = {'Objects': [{'Key': o} for o in object]} + s3.delete_objects(Bucket=bucket, Delete=objects) + + def timeout(event, context): + logging.error('Execution is about to time out, sending failure response to CloudFormation') + cfnresponse.send(event, context, cfnresponse.FAILED, {}, None) + + def handler(event, context): + # make sure we send a failure to CloudFormation if the function + # is going to timeout + timer = threading.Timer((context.get_remaining_time_in_millis() + / 1000.00) - 0.5, timeout, args=[event, context]) + timer.start() + + print('Received event: %s' % json.dumps(event)) + status = cfnresponse.SUCCESS + try: + source_bucket = event['ResourceProperties']['SourceBucket'] + dest_bucket = event['ResourceProperties']['DestBucket'] + object = event['ResourceProperties']['Object'] + if event['RequestType'] == 'Delete': + delete_object(dest_bucket, object) + else: + copy_object(source_bucket, dest_bucket, object) + except Exception as e: + logging.error('Exception: %s' % e, exc_info=True) + status = cfnresponse.FAILED + finally: + timer.cancel() + cfnresponse.send(event, context, status, {}, None) + LambdaSchedule: + Type: AWS::Events::Rule + Properties: + Description: Lambda trigger Event + ScheduleExpression: rate(1 minute) + State: ENABLED + Targets: + - Arn: !GetAtt 'LambdaFunction.Arn' + Id: LambdaSchedule + LambdaSchedulePermission: + Type: AWS::Lambda::Permission + Properties: + Action: lambda:InvokeFunction + FunctionName: !GetAtt 'LambdaFunction.Arn' + Principal: events.amazonaws.com + SourceArn: !GetAtt 'LambdaSchedule.Arn' + LambdaFunction: + Type: AWS::Lambda::Function + DependsOn: CopyAHA + Properties: + Description: Lambda function that runs AHA + Code: + S3Bucket: + Ref: AHA2ndRegionBucket + S3Key: "${S3Key}" + Handler: handler.main + MemorySize: 128 + Timeout: 600 + Role: ${LambdaExecutionRole.Arn} + Runtime: python3.8 + Environment: + Variables: + REGIONS: ${Regions} + FROM_EMAIL: "${FromEmail}" + TO_EMAIL: "${ToEmail}" + EMAIL_SUBJECT: "${Subject}" + DYNAMODB_TABLE: "${GlobalDDBTable}" + EVENT_SEARCH_BACK: ${EventSearchBack} + ORG_STATUS: "${AWSOrganizationsEnabled}" + HEALTH_EVENT_TYPE: "${AWSHealthEventType}" + MANAGEMENT_ROLE_ARN: "${ManagementAccountRoleArn}" + LambdaExecutionRole: + Type: 'AWS::IAM::Role' + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: AHA-LambdaPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:logs:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - !If + - UsingSecrets + - Effect: Allow + Action: + - 'secretsmanager:GetResourcePolicy' + - 'secretsmanager:DescribeSecret' + - 'secretsmanager:ListSecretVersionIds' + - 'secretsmanager:GetSecretValue' + Resource: + - !If [UsingTeams, !Sub '${MicrosoftChannelSecret}', !Ref AWS::NoValue] + - !If [UsingSlack, !Sub '${SlackChannelSecret}', !Ref AWS::NoValue] + - !If [UsingEventBridge, !Sub '${EventBusNameSecret}', !Ref AWS::NoValue] + - !If [UsingChime, !Sub '${ChimeChannelSecret}', !Ref AWS::NoValue] + - !If [UsingCrossAccountRole, !Sub '${AssumeRoleSecret}', !Ref AWS::NoValue] + - !If + - UsingMultiRegionTeams + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${MicrosoftChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionSlack + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${SlackChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionEventBridge + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${EventBusNameSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionChime + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${ChimeChannelSecret}' ]]} + - !Ref AWS::NoValue + - !If + - UsingMultiRegionCrossAccountRole + - !Sub + - 'arn:aws:secretsmanager:${SecondaryRegion}:${AWS::AccountId}:secret:${SecretNameWithSha}' + - { SecretNameWithSha: !Select [1, !Split [':secret:', !Sub '${AssumeRoleSecret}' ]]} + - !Ref AWS::NoValue + - !Ref 'AWS::NoValue' + - Effect: Allow + Action: + - health:DescribeAffectedAccountsForOrganization + - health:DescribeAffectedEntitiesForOrganization + - health:DescribeEventDetailsForOrganization + - health:DescribeEventsForOrganization + - health:DescribeEventDetails + - health:DescribeEvents + - health:DescribeEventTypes + - health:DescribeAffectedEntities + - organizations:ListAccounts + - organizations:DescribeAccount + Resource: "*" + - Effect: Allow + Action: + - dynamodb:ListTables + Resource: + - !Sub 'arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:dynamodb:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - Effect: Allow + Action: + - ses:SendEmail + Resource: + - !Sub 'arn:aws:ses:${AWS::Region}:${AWS::AccountId}:*' + - !If [UsingMultiRegion, !Sub 'arn:aws:ses:${SecondaryRegion}:${AWS::AccountId}:*', !Ref AWS::NoValue] + - Effect: Allow + Action: + - dynamodb:UpdateTimeToLive + - dynamodb:PutItem + - dynamodb:DeleteItem + - dynamodb:GetItem + - dynamodb:Scan + - dynamodb:Query + - dynamodb:UpdateItem + - dynamodb:UpdateTable + - dynamodb:GetRecords + Resource: !If [UsingMultiRegion, !GetAtt GlobalDDBTable.Arn, !GetAtt DynamoDBTable.Arn] + - Effect: Allow + Action: + - events:PutEvents + Resource: + - !Sub 'arn:aws:events:${AWS::Region}:${AWS::AccountId}:event-bus/${EventBusName}' + - !If [UsingMultiRegion, !Sub 'arn:aws:events:${SecondaryRegion}:${AWS::AccountId}:event-bus/${EventBusName}', !Ref AWS::NoValue] + - !If + - UsingAccountIds + - Effect: Allow + Action: + - s3:GetObject + Resource: !Sub 'arn:aws:s3:::${S3Bucket}/${AccountIDs}' + - !Ref 'AWS::NoValue' + - !If + - UsingCrossAccountRole + - Effect: Allow + Action: + - sts:AssumeRole + Resource: !Ref ManagementAccountRoleArn + - !Ref 'AWS::NoValue' + LambdaSchedule: + Type: 'AWS::Events::Rule' + Properties: + Description: Lambda trigger Event + ScheduleExpression: rate(1 minute) + State: ENABLED + Targets: + - Arn: !GetAtt LambdaFunction.Arn + Id: LambdaSchedule + LambdaSchedulePermission: + Type: 'AWS::Lambda::Permission' + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !GetAtt LambdaFunction.Arn + Principal: events.amazonaws.com + SourceArn: !GetAtt LambdaSchedule.Arn + MicrosoftChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingTeams + Properties: + Name: MicrosoftChannelID + Description: Microsoft Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: MicrosoftTeamsWebhookURL + Tags: + - Key: HealthCheckMicrosoft + Value: ChannelID + SlackChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingSlack + Properties: + Name: SlackChannelID + Description: Slack Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: SlackWebhookURL + Tags: + - Key: HealthCheckSlack + Value: ChannelID + EventBusNameSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingEventBridge + Properties: + Name: EventBusName + Description: EventBus Name Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: EventBusName + Tags: + - Key: EventBusName + Value: ChannelID + ChimeChannelSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingChime + Properties: + Name: ChimeChannelID + Description: Chime Channel ID Secret + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: AmazonChimeWebhookURL + Tags: + - Key: HealthCheckChime + Value: ChannelID + AssumeRoleSecret: + Type: 'AWS::SecretsManager::Secret' + Condition: UsingCrossAccountRole + Properties: + Name: AssumeRoleArn + Description: Management account role for AHA to assume + ReplicaRegions: + !If + - UsingMultiRegion + - [{ Region: !Sub '${SecondaryRegion}' }] + - !Ref "AWS::NoValue" + SecretString: + Ref: ManagementAccountRoleArn + Tags: + - Key: AssumeRoleArn + Value: ChannelID + LambdaFunction: + Type: 'AWS::Lambda::Function' + Properties: + Description: Lambda function that runs AHA + Code: + S3Bucket: + Ref: S3Bucket + S3Key: + Ref: S3Key + Handler: handler.main + MemorySize: 128 + Timeout: 600 + Role: + 'Fn::Sub': '${LambdaExecutionRole.Arn}' + Runtime: python3.8 + Environment: + Variables: + ACCOUNT_IDS: + Ref: AccountIDs + REGIONS: + Ref: Regions + S3_BUCKET: + Ref: S3Bucket + FROM_EMAIL: + Ref: FromEmail + TO_EMAIL: + Ref: ToEmail + EMAIL_SUBJECT: + Ref: Subject + DYNAMODB_TABLE: + !If [UsingMultiRegion, !Ref GlobalDDBTable, !Ref DynamoDBTable] + EVENT_SEARCH_BACK: + Ref: EventSearchBack + ORG_STATUS: + Ref: AWSOrganizationsEnabled + HEALTH_EVENT_TYPE: + Ref: AWSHealthEventType + MANAGEMENT_ROLE_ARN: + Ref: ManagementAccountRoleArn + \ No newline at end of file diff --git a/aha-2.1-beta/CFN_MGMT_ROLE.yml b/aha-2.1-beta/CFN_MGMT_ROLE.yml new file mode 100644 index 0000000..d6c172b --- /dev/null +++ b/aha-2.1-beta/CFN_MGMT_ROLE.yml @@ -0,0 +1,48 @@ +AWSTemplateFormatVersion: "2010-09-09" +Description: Deploy Cross-Account Role for PHD access +Parameters: + OrgMemberAccountId: + Type: String + AllowedPattern: '^\d{12}$' + Description: AWS Account ID of the AWS Organizations Member Account that will run AWS Health Aware +Resources: + AWSHealthAwareRoleForPHDEvents: + Type: "AWS::IAM::Role" + Properties: + Description: "Grants access to PHD events" + Path: / + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Action: + - sts:AssumeRole + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${OrgMemberAccountId}:root' + Policies: + - PolicyName: AllowHealthCalls + PolicyDocument: + Statement: + - Effect: Allow + Action: + - health:DescribeAffectedAccountsForOrganization + - health:DescribeAffectedEntitiesForOrganization + - health:DescribeEventDetailsForOrganization + - health:DescribeEventsForOrganization + - health:DescribeEventDetails + - health:DescribeEvents + - health:DescribeEventTypes + - health:DescribeAffectedEntities + Resource: "*" + - PolicyName: AllowsDescribeOrg + PolicyDocument: + Statement: + - Effect: Allow + Action: + - organizations:ListAccounts + - organizations:ListAWSServiceAccessForOrganization + - organizations:DescribeAccount + Resource: "*" +Outputs: + AWSHealthAwareRoleForPHDEventsArn: + Value: !GetAtt AWSHealthAwareRoleForPHDEvents.Arn diff --git a/aha-2.1-beta/CODE_OF_CONDUCT.md b/aha-2.1-beta/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..5b627cf --- /dev/null +++ b/aha-2.1-beta/CODE_OF_CONDUCT.md @@ -0,0 +1,4 @@ +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/aha-2.1-beta/CONTRIBUTING.md b/aha-2.1-beta/CONTRIBUTING.md new file mode 100644 index 0000000..c4b6a1c --- /dev/null +++ b/aha-2.1-beta/CONTRIBUTING.md @@ -0,0 +1,59 @@ +# Contributing Guidelines + +Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional +documentation, we greatly value feedback and contributions from our community. + +Please read through this document before submitting any issues or pull requests to ensure we have all the necessary +information to effectively respond to your bug report or contribution. + + +## Reporting Bugs/Feature Requests + +We welcome you to use the GitHub issue tracker to report bugs or suggest features. + +When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already +reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: + +* A reproducible test case or series of steps +* The version of our code being used +* Any modifications you've made relevant to the bug +* Anything unusual about your environment or deployment + + +## Contributing via Pull Requests +Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: + +1. You are working against the latest source on the *main* branch. +2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. +3. You open an issue to discuss any significant work - we would hate for your time to be wasted. + +To send us a pull request, please: + +1. Fork the repository. +2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. +3. Ensure local tests pass. +4. Commit to your fork using clear commit messages. +5. Send us a pull request, answering any default questions in the pull request interface. +6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. + +GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and +[creating a pull request](https://help.github.com/articles/creating-a-pull-request/). + + +## Finding contributions to work on +Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. + + +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. + + +## Security issue notifications +If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. + + +## Licensing + +See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. diff --git a/aha-2.1-beta/LICENSE b/aha-2.1-beta/LICENSE new file mode 100644 index 0000000..1bb4f21 --- /dev/null +++ b/aha-2.1-beta/LICENSE @@ -0,0 +1,15 @@ +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/aha-2.1-beta/README.md b/aha-2.1-beta/README.md new file mode 100644 index 0000000..9712598 --- /dev/null +++ b/aha-2.1-beta/README.md @@ -0,0 +1,434 @@ + +![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha_banner.png?raw=1) + + +**Table of Contents** + +- [Introduction](#introduction) +- [What's New](#whatsnew) +- [Architecture](#architecture) +- [Configuring an Endpoint](#configuring-an-endpoint) + * [Creating a Amazon Chime Webhook URL](#creating-a-amazon-chime-webhook-url) + * [Creating a Slack Webhook URL](#creating-a-slack-webhook-url) + * [Creating a Microsoft Teams Webhook URL](#creating-a-microsoft-teams-webhook-url) + * [Configuring an Email](#configuring-an-email) + * [Creating a Amazon EventBridge Ingestion ARN](#creating-a-amazon-eventbridge-ingestion-arn) +- [Deployment Options](#deployment-options) + - [CloudFormation](#cloudformation) + * [AHA for users WITHOUT AWS Organizations](#aha-without-aws-organizations-using-cloudformation) + * [AHA for users WITH AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-account-using-cloudformation) + * [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-cloudformation) + - [Terraform](#terraform) + * [AHA for users WITHOUT AWS Organizations ](#aha-without-aws-organizations-using-terraform) + * [AHA for users WITH AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-account-using-terraform) + * [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-terraform) +- [Updating using CloudFormation](#updating-using-cloudformation) +- [Updating using Terraform](#updating-using-terraform) +- [New Features](#new-features) +- [Troubleshooting](#troubleshooting) + +# Introduction +AWS Health Aware (AHA) is an automated notification tool for sending well-formatted AWS Health Alerts to Amazon Chime, Slack, Microsoft Teams, E-mail or an AWS Eventbridge compatible endpoint as long as you have Business or Enterprise Support. + +# What's New + +New in beta: Release 2.1 introduces an updated schema for Health events delivered to an EventBridge bus. This allows simplified matching of events which you can then consume with other AWS services or SaaS solutions. +Read more about the [new feature and how to filter events using EventBridge](https://github.com/aws-samples/aws-health-aware/blob/main/aha-2.1-beta/new_aha_event_schema.md). + +# Architecture + +## Single Region +![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha-arch-single-region.png?raw=1) + +## Multi Region +![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images/aha-arch-multi-region.png?raw=1) + +## Created AWS Resources + +| Resource | Description | +| ------------- | ------------------------------ | +| `DynamoDBTable` | DynamoDB Table used to store Event ARNs, updates and TTL | +| `ChimeChannelSecret` | Webhook URL for Amazon Chime stored in AWS Secrets Manager | +| `EventBusNameSecret` | EventBus ARN for Amazon EventBridge stored in AWS Secrets Manager | +| `LambdaExecutionRole` | IAM role used for LambdaFunction | +| `LambdaFunction` | Main Lambda function that reads from AWS Health API, sends to endpoints and writes to DynamoDB | +| `LambdaSchedule` | Amazon EventBridge rule that runs every min to invoke LambdaFunction | +| `LambdaSchedulePermission` | IAM Role used for LambdaSchedule | +| `MicrosoftChannelSecret` | Webhook URL for Microsoft Teams stored in AWS Secrets Manager | +| `SlackChannelSecret` | Webhook URL for Slack stored in AWS Secrets Manager | + +# Configuring an Endpoint +AHA can send to multiple endpoints (webhook URLs, Email or EventBridge). To use any of these you'll need to set it up before-hand as some of these are done on 3rd party websites. We'll go over some of the common ones here. + +## Creating a Amazon Chime Webhook URL +**You will need to have access to create a Amazon Chime room and manage webhooks.** + +1. Create a new [chat room](https://docs.aws.amazon.com/chime/latest/ug/chime-chat-room.html) for events (i.e. aws_events). +2. In the chat room created in step 1, **click** on the gear icon and **click** *manage webhooks and bots*. +3. **Click** *Add webhook*. +4. **Type** a name for the bot (e.g. AWS Health Bot) and **click** *Create*. +5. **Click** *Copy URL*, we will need it for the deployment. + +## Creating a Slack Webhook URL +**You will need to have access to add a new channel and app to your Slack Workspace**. + +*Webhook* +1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) +2. In your browser go to: workspace-name.slack.com/apps where workspace-name is the name of your Slack Workspace. +3. In the search bar, search for: *Incoming Webhooks* and **click** on it. +4. **Click** on *Add to Slack*. +5. From the dropdown **click** on the channel your created in step 1 and **click** *Add Incoming Webhooks integration*. +6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. +7. For the deployment we will need the *Webhook URL*. + +*Workflow* + +1. Create a new [channel](https://slack.com/help/articles/201402297-Create-a-channel) for events (i.e. aws_events) +2. Within Slack **click** on your workspace name drop down arrow in the upper left. **click on Tools > Workflow Builder** +3. **Click** Create in the upper right hand corner of the Workflow Builder and give your workflow a name **click** next. +4. **Click** on *select* next to **Webhook** and then **click** *add variable* add the following variables one at a time in the *Key* section. All *data type* will be *text*: +-text +-accounts +-resources +-service +-region +-status +-start_time +-event_arn +-updates +5. When done you should have 9 variables, double check them as they are case sensitive and will be referenced. When checked **click** on *done* and *next*. +6. **Click** on *add step* and then on the add a workflow step **click** *add* next to *send a message*. +7. Under *send this message to:* select the channel you created in Step 1 in *message text* you can should recreate this following: +![](https://github.com/aws-samples/aws-health-aware/blob/main/readme-images//workflow.png?raw=1) +8. **Click** *save* and the **click** *publish* +9. For the deployment we will need the *Webhook URL*. + +## Creating a Microsoft Teams Webhook URL +**You will need to have access to add a new channel and app to your Microsoft Teams channel**. + +1. Create a new [channel](https://docs.microsoft.com/en-us/microsoftteams/get-started-with-teams-create-your-first-teams-and-channels) for events (i.e. aws_events) +2. Within your Microsoft Team go to *Apps* +3. In the search bar, search for: *Incoming Webhook* and **click** on it. +4. **Click** on *Add to team*. +5. **Type** in the name of your on the channel your created in step 1 and **click** *Set up a connector*. +6. From this page you can change the name of the webhook (i.e. AWS Bot), the icon/emoji to use, etc. **Click** *Create* when done. +7. For the deployment we will need the webhook *URL* that is presented. + +## Configuring an Email + +1. You'll be able to send email alerts to one or many addresses. However, you must first [verify](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/verify-email-addresses-procedure.html) the email(s) in the Simple Email Service (SES) console. +2. AHA utilizes Amazon SES so all you need is to enter in a To: address and a From: address. +3. You *may* have to allow a rule in your environment so that the emails don't get labeled as SPAM. This will be something you have to congfigure on your own. + +## Creating a Amazon EventBridge Ingestion ARN +**Only required if you are going to be using EventBridge, you can create new with the instructions below or use an existing one**. + +1. In the AWS Console, search for **Amazon EventBridge**. +2. On the left hand side, **click** *Event buses*. +3. Under *Custom event* bus **click** *Create event bus* +4. Give your Event bus a name and **click** *Create*. +5. For the deployment we will need the *Name* of the Event bus **(not the ARN, e.g. aha-eb01)**. + +# Deployment Options + +## CloudFormation +There are 3 available ways to deploy AHA, all are done via the same CloudFormation template to make deployment as easy as possible. + +The 3 deployment methods for AHA are: + +1. [**AHA for users WITHOUT AWS Organizations**](#aha-without-aws-organizations-using-cloudformation): Users NOT using AWS Organizations. +2. [**AHA for users WITH AWS Organizations (Management Account)**](#aha-with-aws-organizations-on-management-account-using-cloudformation): Users who ARE using AWS Organizations and deploying in the top-level management account. +3. [**AHA for users WITH AWS Organizations (Member Account)**](#aha-with-aws-organizations-on-member-account-using-cloudformation): Users who ARE using AWS Organizations and deploying in a member account in the organization to assume a role in the top-level management account. + +## AHA Without AWS Organizations using CloudFormation + +### Prerequisites + +1. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) +2. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. +3. If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region + - Launch the stack. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) + - Launch the stack. + +### Deployment + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** +3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. +4. In your AWS console go to *CloudFormation*. +5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. + - In *Stack name* type a stack name (i.e. AHA-Deployment). + - In *AWSOrganizationsEnabled* leave it set to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the step for [AHA for users who ARE using AWS Organizations](#aha-with-aws-organizations-using-terraform) + - In *AWSOrganizationsEnabled* leave it set to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the steps for [AHA for users who ARE using AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-account-using-cloudformation) or [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-cloudformation) + - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. + - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 3 (e.g. my-aha-bucket). + - In *S3Key* type ***just*** the name of the .zip file you created in Step 2 (e.g. aha-v1.8.zip). + - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. + - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). + - In *ARN of the AWS Organizations Management Account assume role* leave it set to default None as this is only for customers using AWS Organizations. + - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. +7. Scroll to the bottom and **click** *Next*. +8. Scroll to the bottom and **click** *Next* again. +9. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +10. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). + +## AHA With AWS Organizations on Management Account using CloudFormation + +### Prerequisites + +1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view-in-health-console.html) from the console, so that you can aggregate all Personal Health Dashboard (PHD) events for all accounts in your AWS Organization. +2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) +3. Have access to deploy Cloudformation Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager in the **AWS Organizations Master Account**. +4. If using Multi-Region, you must deploy the following 2 CloudFormation templates to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region + - Launch the stack. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) + - Launch the stack. + +### Deployment + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +2. In the root of this package you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** +3. Upload the .zip you created in Step 1 to an S3 in the same region you plan to deploy this in. +4. In your AWS console go to *CloudFormation*. +5. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +6. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. + - In *Stack name* type a stack name (i.e. AHA-Deployment). + - In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-cloudformation) + - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. + - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 3 (e.g. my-aha-bucket). + - In *S3Key* type ***just*** the name of the .zip file you created in Step 2 (e.g. aha-v1.8.zip). + - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. + - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). + - In *ARN of the AWS Organizations Management Account assume role* leave it set to default None. + - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. +7. Scroll to the bottom and **click** *Next*. +8. Scroll to the bottom and **click** *Next* again. +9. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +10. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). + +## AHA With AWS Organizations on Member Account using CloudFormation + +### Prerequisites + +1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view-in-health-console.html) from the console, so that you can aggregate all Personal Health Dashboard (PHD) events for all accounts in your AWS Organization. +2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) +3. Have access to deploy Cloudformation Templates with the following resource: AWS IAM policies in the **AWS Organizations Master Account**. +4. If using Multi-Region, you must deploy the following 2 CloudFormation templates in the **Member Account** to allow the Stackset deployment to deploy resources **even if you have full administrator privileges, you still need to follow these steps**. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetAdministrationRole.yml - this will allows CFT Stacksets to launch AHA in another region + - Launch the stack. + - In CloudFormation Console create a stack with new resources from the following S3 URL: https://s3.amazonaws.com/cloudformation-stackset-sample-templates-us-east-1/AWSCloudFormationStackSetExecutionRole.yml) - In *AdministratorAccountId* type in the 12 digit account number you're running the solution in (e.g. 000123456789) + - Launch the stack. + +### Deployment + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +2. In your top-level management account AWS console go to *CloudFormation* +3. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +4. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_MGMT_ROLE.yml` **Click** *Next*. + - In *Stack name* type a stack name (i.e. aha-assume-role). + - In *OrgMemberAccountId* put in the account id of the member account you plan to run AHA in (e.g. 000123456789). +5. Scroll to the bottom and **click** *Next*. +6. Scroll to the bottom and **click** *Next* again. +7. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +8. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 1-2 minutes). This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume. +9. In the *Outputs* tab, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need it for step 16. +10. Back In the root of the package you downloaded/cloned you'll have two files; `handler.py` and `messagegenerator.py`. Use your tool of choice to zip them both up and name them with a unique name (e.g. aha-v1.8.zip). **Note: Putting the version number in the name will make upgrading AHA seamless.** +11. Upload the .zip you created in Step 11 to an S3 in the same region you plan to deploy this in. +12. Login to the member account you plan to deploy this in and in your AWS console go to *CloudFormation*. +13. In the *CloudFormation* console **click** *Create stack > With new resources (standard)*. +14. Under *Template Source* **click** *Upload a template file* and **click** *Choose file* and select `CFN_DEPLOY_AHA.yml` **Click** *Next*. + - In *Stack name* type a stack name (i.e. AHA-Deployment). + - In *AWSOrganizationsEnabled* change the dropdown to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-cloudformation) + - In *AWSHealthEventType* select whether you want to receive *all* event types or *only* issues. + - In *S3Bucket* type ***just*** the bucket name of the S3 bucket used in step 12 (e.g. my-aha-bucket). + - In *S3Key* type ***just*** the name of the .zip file you created in Step 11 (e.g. aha-v1.8.zip). + - In the *Communications Channels* section enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - In the *Email Setup* section enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - In *EventSearchBack* enter in the amount of hours you want to search back for events. Default is 1 hour. + - In *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated with (e.g. us-east-1, us-east-2). + - In *ManagementAccountRoleArn* enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201) + - In *Deploy in secondary region?* select another region to deploy AHA in. Otherwise leave to default No. +15. Scroll to the bottom and **click** *Next*. +16. Scroll to the bottom and **click** *Next* again. +17. Scroll to the bottom and **click** the *checkbox* and **click** *Create stack*. +18. Wait until *Status* changes to *CREATE_COMPLETE* (roughly 2-4 minutes or if deploying in a secondary region, it can take up to 30 minutes). + +## Terraform + +There are 3 available ways to deploy AHA, all are done via the same Terraform template to make deployment as easy as possible. + +**NOTE: ** AHA code is tested with Terraform version v1.0.9, please make sure to have minimum terraform verson of v1.0.9 installed. + +The 3 deployment methods for AHA are: + +1. [**AHA for users NOT using AWS Organizations using Terraform**](#aha-without-aws-organizations-using-terraform): Users NOT using AWS Organizations. +2. [**AHA for users WITH AWS Organizations using Terraform (Management Account)**](#aha-with-aws-organizations-on-management-account-using-terraform): Users who ARE using AWS Organizations and deploying in the top-level management account. +3. [**AHA for users WITH AWS Organizations using Terraform (Member Account)**](#aha-with-aws-organizations-on-member-account-using-terraform): Users who ARE using AWS Organizations and deploying in a member account in the organization to assume a role in the top-level management account. + +## AHA Without AWS Organizations using Terraform + +### Prerequisites + +1. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) +2. Have access to deploy Terraform Templates with the following resources: AWS IAM policies, Amazon DynamoDB Tables, AWS Lambda, Amazon EventBridge and AWS Secrets Manager. + +**NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. + +### Deployment - Terraform + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +``` +$ git clone https://github.com/aws-samples/aws-health-aware.git +$ cd aws-health-aware/terraform/Terraform_DEPLOY_AHA +``` +2. Update parameters file **terraform.tfvars** as below + - *aha_primary_region* - change to region where you want to deploy AHA solution + - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. + - *AWSOrganizationsEnabled* - Leave it to default which is `No`. If you do have AWS Organizations enabled and you want to aggregate across all your accounts, you should be following the steps for [AHA for users who ARE using AWS Organizations (Management Account)](#aha-with-aws-organizations-on-management-account-using-terraform)] or [AHA for users WITH AWS Organizations (Member Account)](#aha-with-aws-organizations-on-member-account-using-terraform) + - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. + - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. + - *Regions* - enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). + - *ManagementAccountRoleArn* - Leave it default empty value + - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. + - *ManagementAccountRoleArn* - In ARN of the AWS Organizations Management Account assume role leave it set to default None as this is only for customers using AWS Organizations. +3. Deploy the solution using terraform commands below. +``` +$ terraform init +$ terraform plan +$ terraform apply +``` + +## AHA WITH AWS Organizations on Management Account using Terraform + +1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view-in-health-console.html) from the console, so that you can aggregate all Personal Health Dashboard (PHD) events for all accounts in your AWS Organization. +2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) + +**NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. + +### Deployment - Terraform + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +``` +$ git clone https://github.com/aws-samples/aws-health-aware.git +$ cd aws-health-aware/terraform/Terraform_DEPLOY_AHA +``` +5. Update parameters file **terraform.tfvars** as below + - *aha_primary_region* - change to region where you want to deploy AHA solution + - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. + - *AWSOrganizationsEnabled* - change the value to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-terraform) + - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. + - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. + - *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). + - *ManagementAccountRoleArn* - Leave it default empty value + - *S3Bucket* - type ***just*** the name of the S3 bucket where exclude file .csv you upload. leave it empty if exclude Account feature is not used. + - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. + - *ManagementAccountRoleArn* - In ARN of the AWS Organizations Management Account assume role leave it set to default None, unless you are using a member account instead of the management account. Instructions for this configuration are in the next section. +3. Deploy the solution using terraform commands below. +``` +$ terraform init +$ terraform plan +$ terraform apply +``` + +## AHA WITH AWS Organizations on Member Account using Terraform + +1. [Enable Health Organizational View](https://docs.aws.amazon.com/health/latest/ug/enable-organizational-view-in-health-console.html) from the console, so that you can aggregate all Personal Health Dashboard (PHD) events for all accounts in your AWS Organization. +2. Have at least 1 [endpoint](#configuring-an-endpoint) configured (you can have multiple) + +**NOTE: ** For Multi region deployment, DynamoDB table will be created with PAY_PER_REQUEST billing mode insted of PROVISIONED due to limitation with terraform. + +### Deployment - Terraform + +1. Clone the AHA package that from this repository. If you're not familiar with the process, [here](https://git-scm.com/docs/git-clone) is some documentation. The URL to clone is in the upper right-hand corner labeled `Clone uri` +``` +$ git clone https://github.com/aws-samples/aws-health-aware.git +``` +2. In your top-level management account deploy terraform module Terraform_MGMT_ROLE.tf to create Cross-Account Role for PHD access +``` +$ cd aws-health-aware/terraform/Terraform_MGMT_ROLE +$ terraform init +$ terraform plan +$ terraform apply + Input *OrgMemberAccountId* Enter the account id of the member account you plan to run AHA in (e.g. 000123456789). +``` +3. Wait for deployment to complete. This will create an IAM role with the necessary AWS Organizations and AWS Health API permissions for the member account to assume. and note the **AWSHealthAwareRoleForPHDEventsArn** role name, this will be used during deploying solution in member account +4. In the *Outputs* section, there will be a value for *AWSHealthAwareRoleForPHDEventsArn* (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201), copy that down as you will need to update params file (variable ManagementAccountRoleArn). +4. Change directory to **terraform/Terraform_DEPLOY_AHA** to deploy the solution +5. Update parameters file **terraform.tfvars** as below + - *aha_primary_region* - change to region where you want to deploy AHA solution + - *aha_secondary_region* - Required if needed to deploy in AHA solution in multiple regions, change to another region (Secondary) where you want to deploy AHA solution, Otherwise leave to default empty value. + - *AWSOrganizationsEnabled* - change the value to `Yes`. If you do NOT have AWS Organizations enabled you should be following the steps for [AHA for users who are NOT using AWS Organizations](#aha-without-aws-organizations-using-terraform) + - *AWSHealthEventType* - select whether you want to receive *all* event types or *only* issues. + - *Communications Channels* section - enter the URLs, Emails and/or ARN of the endpoints you configured previously. + - *Email Setup* section - enter the From and To Email addresses as well as the Email subject. If you aren't configuring email, just leave it as is. + - *EventSearchBack* - enter in the amount of hours you want to search back for events. Default is 1 hour. + - *Regions* enter in the regions you want to search for events in. Default is all regions. You can filter for up to 10, comma separated (e.g. us-east-1, us-east-2). + - *ManagementAccountRoleArn* - Enter in the full IAM arn from step 10 (e.g. arn:aws:iam::000123456789:role/aha-org-role-AWSHealthAwareRoleForPHDEvents-ABCSDE12201) + - *S3Bucket* - type ***just*** the name of the S3 bucket where exclude file .csv you upload. leave it empty if exclude Account feature is not used. + - *ExcludeAccountIDs* - type ***just*** the name of the .csv file you want to upload if needed to exclude accounts from monitoring, else leave it to empty. +4. Deploy the solution using terraform commands below. +``` +$ terraform init +$ terraform plan +$ terraform apply +``` + +# Updating using CloudFormation +**Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:** +1. Download the updated CloudFormation Template .yml file and 2 `.py` files. +2. Zip up the 2 `.py` files and name the .zip with a different version number than before (e.g. if the .zip you originally uploaded is aha-v1.8.zip the new one should be aha-v1.9.zip) +3. In the AWS CloudFormation console **click** on the name of your stack, then **click** *Update*. +4. In the *Prepare template* section **click** *Replace current template*, **click** *Upload a template file*, **click** *Choose file*, select the newer `CFN_DEPLOY_AHA.yml` file you downloaded and finally **click** *Next*. +5. In the *S3Key* text box change the version number in the name of the .zip to match name of the .zip you uploaded in Step 2 (The name of the .zip has to be different for CloudFormation to recognize a change). **Click** *Next*. +6. At the next screen **click** *Next* and finally **click** *Update stack*. This will now upgrade your environment to the latest version you downloaded. + +**If for some reason, you still have issues after updating, you can easily just delete the stack and redeploy. The infrastructure can be destroyed and rebuilt within minutes through CloudFormation.** + +# Updating using Terraform +**Until this project is migrated to the AWS Serverless Application Model (SAM), updates will have to be done as described below:** +1. Pull the latest code from git repository for AHA. +2. Update the parameters file terraform.tfvars per your requirement +3. Copy the terraform template files to directory where your previous state exists +4. Deploy the templates as below +``` +$ cd aws-health-aware +$ git pull https://github.com/aws-samples/aws-health-aware.git +$ cd terraform/Terraform_DEPLOY_AHA +$ terraform init +$ terraform plan - This command should show any difference existing config and latest code. +$ terraform apply +``` + +**If for some reason, you still have issues after updating, you can easily just delete the stack and redeploy. The infrastructure can be destroyed and rebuilt within minutes through Terraform.** + +# New Features +We are happy to announce the launch of new enhancements to AHA. Please try them out and keep sending us your feedback! +1. Available in the beta, a revised schema for AHA events sent to EventBridge which enables new filtering and routing options. See the [new AHA event schema readme](new_aha_event_schema.md) for more detail. +2. Multi-region deployment option +3. Updated file names for improved clarity +4. Ability to filter accounts (Refer to AccountIDs CFN parameter for more info on how to exclude accounts from AHA notifications) +4. Ability to view Account Names for a given Account ID in the PHD alerts +5. If you are running AHA with the Non-Org mode, AHA will send the Account #' and resource(s) impacts if applicable for a given alert +6. Ability to deploy AHA with the Org mode on a member account +7. Support for a new Health Event Type - "Investigation" +8. Terraform support to deploy the solution + +# Troubleshooting +* If for whatever reason you need to update the Webhook URL; just update the CloudFormation or terraform Template with the new Webhook URL. +* If you are expecting an event and it did not show up it may be an oddly formed event. Take a look at *CloudWatch > Log groups* and search for the name of your Lambda function. See what the error is and reach out to us [email](mailto:aha-builders@amazon.com) for help. +* If for any errors related to duplicate secrets during deployment, try deleting manually and redeploy the solution. Example command to delete SlackChannelID secret in us-east-1 region. +``` +$ aws secretsmanager delete-secret --secret-id SlackChannelID --force-delete-without-recovery --region us-east-1 +``` \ No newline at end of file diff --git a/aha-2.1-beta/handler.py b/aha-2.1-beta/handler.py new file mode 100644 index 0000000..951871a --- /dev/null +++ b/aha-2.1-beta/handler.py @@ -0,0 +1,901 @@ +import json +import boto3 +import os +import re +import time +import decimal +import socket +import configparser +from dateutil import parser +from datetime import datetime, timedelta +from urllib.parse import urlencode +from urllib.request import Request, urlopen, URLError, HTTPError +from botocore.config import Config +from botocore.exceptions import ClientError +from boto3.dynamodb.conditions import Key, Attr +from messagegenerator import get_message_for_slack, get_org_message_for_slack, get_message_for_chime, \ + get_org_message_for_chime, \ + get_message_for_teams, get_org_message_for_teams, get_message_for_email, get_org_message_for_email, \ + get_detail_for_eventbridge + +# query active health API endpoint +health_dns = socket.gethostbyname_ex('global.health.amazonaws.com') +(current_endpoint, global_endpoint, ip_endpoint) = health_dns +health_active_list = current_endpoint.split('.') +health_active_region = health_active_list[1] +print("current health region: ", health_active_region) + +# create a boto3 health client w/ backoff/retry +config = Config( + region_name=health_active_region, + retries=dict( + max_attempts=10 # org view apis have a lower tps than the single + # account apis so we need to use larger + # backoff/retry values than than the boto defaults + ) +) + +# TODO decide if account_name should be blank on error +# Get Account Name +def get_account_name(account_id): + org_client = get_sts_token('organizations') + try: + account_name = org_client.describe_account(AccountId=account_id)['Account']['Name'] + except Exception: + account_name = account_id + return account_name + +def send_alert(event_details, affected_accounts, affected_entities, event_type): + slack_url = get_secrets()["slack"] + teams_url = get_secrets()["teams"] + chime_url = get_secrets()["chime"] + SENDER = os.environ['FROM_EMAIL'] + RECIPIENT = os.environ['TO_EMAIL'] + event_bus_name = get_secrets()["eventbusname"] + + #get the list of resources from the array of affected entities + resources = get_resources_from_entities(affected_entities) + + if "None" not in event_bus_name: + try: + print("Sending the alert to Event Bridge") + send_to_eventbridge(get_detail_for_eventbridge(event_details, affected_entities), event_type, resources, event_bus_name) + except HTTPError as e: + print("Got an error while sending message to EventBridge: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.slack.com/services" in slack_url: + try: + print("Sending the alert to Slack Webhook Channel") + send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, resources, slack_webhook="webhook"), slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.slack.com/workflows" in slack_url: + try: + print("Sending the alert to Slack Workflows Channel") + send_to_slack(get_message_for_slack(event_details, event_type, affected_accounts, resources, slack_webhook="workflow"), slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "office.com/webhook" in teams_url: + try: + print("Sending the alert to Teams") + send_to_teams(get_message_for_teams(event_details, event_type, affected_accounts, resources), teams_url) + except HTTPError as e: + print("Got an error while sending message to Teams: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + # validate sender and recipient's email addresses + if "none@domain.com" not in SENDER and RECIPIENT: + try: + print("Sending the alert to the emails") + send_email(event_details, event_type, affected_accounts, resources) + except HTTPError as e: + print("Got an error while sending message to Email: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.chime.aws/incomingwebhooks" in chime_url: + try: + print("Sending the alert to Chime channel") + send_to_chime(get_message_for_chime(event_details, event_type, affected_accounts, resources), chime_url) + except HTTPError as e: + print("Got an error while sending message to Chime: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + +def send_org_alert(event_details, affected_org_accounts, affected_org_entities, event_type): + slack_url = get_secrets()["slack"] + teams_url = get_secrets()["teams"] + chime_url = get_secrets()["chime"] + SENDER = os.environ['FROM_EMAIL'] + RECIPIENT = os.environ['TO_EMAIL'] + event_bus_name = get_secrets()["eventbusname"] + + #get the list of resources from the array of affected entities + resources = get_resources_from_entities(affected_org_entities) + + if "None" not in event_bus_name: + try: + print("Sending the org alert to Event Bridge") + send_to_eventbridge( + get_detail_for_eventbridge(event_details, affected_org_entities), + event_type, resources, event_bus_name) + except HTTPError as e: + print("Got an error while sending message to EventBridge: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.slack.com/services" in slack_url: + try: + print("Sending the alert to Slack Webhook Channel") + send_to_slack( + get_org_message_for_slack(event_details, event_type, affected_org_accounts, resources, slack_webhook="webhook"), + slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.slack.com/workflows" in slack_url: + try: + print("Sending the alert to Slack Workflow Channel") + send_to_slack( + get_org_message_for_slack(event_details, event_type, affected_org_accounts, resources, slack_webhook="workflow"), + slack_url) + except HTTPError as e: + print("Got an error while sending message to Slack: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "office.com/webhook" in teams_url: + try: + print("Sending the alert to Teams") + send_to_teams( + get_org_message_for_teams(event_details, event_type, affected_org_accounts, resources), + teams_url) + except HTTPError as e: + print("Got an error while sending message to Teams: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + # validate sender and recipient's email addresses + if "none@domain.com" not in SENDER and RECIPIENT: + try: + print("Sending the alert to the emails") + send_org_email(event_details, event_type, affected_org_accounts, resources) + except HTTPError as e: + print("Got an error while sending message to Email: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + if "hooks.chime.aws/incomingwebhooks" in chime_url: + try: + print("Sending the alert to Chime channel") + send_to_chime( + get_org_message_for_chime(event_details, event_type, affected_org_accounts, resources), + chime_url) + except HTTPError as e: + print("Got an error while sending message to Chime: ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason) + pass + + +def send_to_slack(message, webhookurl): + slack_message = message + req = Request(webhookurl, data=json.dumps(slack_message).encode("utf-8"), + headers={'content-type': 'application/json'}) + try: + response = urlopen(req) + response.read() + except HTTPError as e: + print("Request failed : ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason, e.reason) + + +def send_to_chime(message, webhookurl): + chime_message = {'Content': message} + req = Request(webhookurl, data=json.dumps(chime_message).encode("utf-8"), + headers={"content-Type": "application/json"}) + try: + response = urlopen(req) + response.read() + except HTTPError as e: + print("Request failed : ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason, e.reason) + + +def send_to_teams(message, webhookurl): + teams_message = message + req = Request(webhookurl, data=json.dumps(teams_message).encode("utf-8"), + headers={"content-type": "application/json"}) + try: + response = urlopen(req) + response.read() + except HTTPError as e: + print("Request failed : ", e.code, e.reason) + except URLError as e: + print("Server connection failed: ", e.reason, e.reason) + + +def send_email(event_details, eventType, affected_accounts, affected_entities): + SENDER = os.environ['FROM_EMAIL'] + RECIPIENT = os.environ['TO_EMAIL'].split(",") + #AWS_REGIONS = "us-east-1" + AWS_REGION = os.environ['AWS_REGION'] + SUBJECT = os.environ['EMAIL_SUBJECT'] + BODY_HTML = get_message_for_email(event_details, eventType, affected_accounts, affected_entities) + client = boto3.client('ses', region_name=AWS_REGION) + response = client.send_email( + Source=SENDER, + Destination={ + 'ToAddresses': RECIPIENT + }, + Message={ + 'Body': { + 'Html': { + 'Data': BODY_HTML + }, + }, + 'Subject': { + 'Charset': 'UTF-8', + 'Data': SUBJECT, + }, + }, + ) + + +def send_org_email(event_details, eventType, affected_org_accounts, affected_org_entities): + SENDER = os.environ['FROM_EMAIL'] + RECIPIENT = os.environ['TO_EMAIL'].split(",") + #AWS_REGION = "us-east-1" + AWS_REGION = os.environ['AWS_REGION'] + SUBJECT = os.environ['EMAIL_SUBJECT'] + BODY_HTML = get_org_message_for_email(event_details, eventType, affected_org_accounts, affected_org_entities) + client = boto3.client('ses', region_name=AWS_REGION) + response = client.send_email( + Source=SENDER, + Destination={ + 'ToAddresses': RECIPIENT + }, + Message={ + 'Body': { + 'Html': { + 'Data': BODY_HTML + }, + }, + 'Subject': { + 'Charset': 'UTF-8', + 'Data': SUBJECT, + }, + }, + ) + +# non-organization view affected accounts +def get_health_accounts(health_client, event, event_arn): + affected_accounts = [] + event_accounts_paginator = health_client.get_paginator('describe_affected_entities') + event_accounts_page_iterator = event_accounts_paginator.paginate( + filter = { + 'eventArns': [ + event_arn + ] + } + ) + for event_accounts_page in event_accounts_page_iterator: + json_event_accounts = json.dumps(event_accounts_page, default=myconverter) + parsed_event_accounts = json.loads(json_event_accounts) + try: + affected_accounts.append(parsed_event_accounts['entities'][0]['awsAccountId']) + except Exception: + affected_accounts = [] + return affected_accounts + +# organization view affected accounts +def get_health_org_accounts(health_client, event, event_arn): + affected_org_accounts = [] + event_accounts_paginator = health_client.get_paginator('describe_affected_accounts_for_organization') + event_accounts_page_iterator = event_accounts_paginator.paginate( + eventArn=event_arn + ) + for event_accounts_page in event_accounts_page_iterator: + json_event_accounts = json.dumps(event_accounts_page, default=myconverter) + parsed_event_accounts = json.loads(json_event_accounts) + affected_org_accounts = affected_org_accounts + (parsed_event_accounts['affectedAccounts']) + return affected_org_accounts + +# get the array of affected entities for all affected accounts and return as an array of JSON objects +def get_affected_entities(health_client, event_arn, affected_accounts, is_org_mode): + affected_entity_array = [] + + for account in affected_accounts: + + if is_org_mode: + event_entities_paginator = health_client.get_paginator('describe_affected_entities_for_organization') + event_entities_page_iterator = event_entities_paginator.paginate( + organizationEntityFilters=[ + { + 'awsAccountId': account, + 'eventArn': event_arn + } + ] + ) + else: + event_entities_paginator = health_client.get_paginator('describe_affected_entities') + event_entities_page_iterator = event_entities_paginator.paginate( + filter = { + 'eventArns': [ + event_arn + ] + } + ) + + for event_entities_page in event_entities_page_iterator: + json_event_entities = json.dumps(event_entities_page, default=myconverter) + parsed_event_entities = json.loads(json_event_entities) + for entity in parsed_event_entities['entities']: + entity.pop("entityArn") #remove entityArn to avoid confusion with the arn of the entityValue (not present) + entity.pop("eventArn") #remove eventArn duplicate of detail.arn + entity.pop("lastUpdatedTime") #remove for brevity + if is_org_mode: + entity['awsAccountName'] = get_account_name(entity['awsAccountId']) + affected_entity_array.append(entity) + + return affected_entity_array + +#COMMON +#get the entityValues from the array and return as an array (of strings) for use with chat channels +#don't list entities which are accounts (handled separately for chat applications) +def get_resources_from_entities(affected_entity_array): + + resources = [] + + for entity in affected_entity_array: + if entity['entityValue'] == "UNKNOWN": + #UNKNOWN indicates a public/non-accountspecific event, no resources + pass + elif entity['entityValue'] != "AWS_ACCOUNT" and entity['entityValue'] != entity['awsAccountId']: + resources.append(entity['entityValue']) + return resources + + +# For Customers using AWS Organizations +def update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts, affected_org_entities): + # open dynamoDB + dynamodb = boto3.resource("dynamodb") + ddb_table = os.environ['DYNAMODB_TABLE'] + aha_ddb_table = dynamodb.Table(ddb_table) + event_latestDescription = event_details['successfulSet'][0]['eventDescription']['latestDescription'] + # set time parameters + delta_hours = os.environ['EVENT_SEARCH_BACK'] + delta_hours = int(delta_hours) + delta_hours_sec = delta_hours * 3600 + + # formatting time in seconds + srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" + str_ddb_format_sec = '%s' + sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) + + # check if event arn already exists + try: + response = aha_ddb_table.get_item( + Key={ + 'arn': event_arn + } + ) + except ClientError as e: + print(e.response['Error']['Message']) + else: + is_item_response = response.get('Item') + if is_item_response == None: + print(datetime.now().strftime(srt_ddb_format_full) + ": record not found") + # write to dynamodb + response = aha_ddb_table.put_item( + Item={ + 'arn': event_arn, + 'lastUpdatedTime': str_update, + 'added': sec_now, + 'ttl': int(sec_now) + delta_hours_sec + 86400, + 'statusCode': status_code, + 'affectedAccountIDs': affected_org_accounts, + 'latestDescription': event_latestDescription + # Cleanup: DynamoDB entry deleted 24 hours after last update + } + ) + affected_org_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts] + # send to configured endpoints + if status_code != "closed": + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create") + else: + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve") + + else: + item = response['Item'] + if item['lastUpdatedTime'] != str_update and (item['statusCode'] != status_code or + item['latestDescription'] != event_latestDescription or + item['affectedAccountIDs'] != affected_org_accounts): + print(datetime.now().strftime(srt_ddb_format_full) + ": last Update is different") + # write to dynamodb + response = aha_ddb_table.put_item( + Item={ + 'arn': event_arn, + 'lastUpdatedTime': str_update, + 'added': sec_now, + 'ttl': int(sec_now) + delta_hours_sec + 86400, + 'statusCode': status_code, + 'affectedAccountIDs': affected_org_accounts, + 'latestDescription': event_latestDescription + # Cleanup: DynamoDB entry deleted 24 hours after last update + } + ) + affected_org_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_org_accounts] + # send to configured endpoints + if status_code != "closed": + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="create") + else: + send_org_alert(event_details, affected_org_accounts_details, affected_org_entities, event_type="resolve") + else: + print("No new updates found, checking again in 1 minute.") + + +# For Customers not using AWS Organizations +def update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities): + # open dynamoDB + dynamodb = boto3.resource("dynamodb") + ddb_table = os.environ['DYNAMODB_TABLE'] + aha_ddb_table = dynamodb.Table(ddb_table) + event_latestDescription = event_details['successfulSet'][0]['eventDescription']['latestDescription'] + + # set time parameters + delta_hours = os.environ['EVENT_SEARCH_BACK'] + delta_hours = int(delta_hours) + delta_hours_sec = delta_hours * 3600 + + # formatting time in seconds + srt_ddb_format_full = "%Y-%m-%d %H:%M:%S" + str_ddb_format_sec = '%s' + sec_now = datetime.strftime(datetime.now(), str_ddb_format_sec) + + # check if event arn already exists + try: + response = aha_ddb_table.get_item( + Key={ + 'arn': event_arn + } + ) + except ClientError as e: + print(e.response['Error']['Message']) + else: + is_item_response = response.get('Item') + if is_item_response == None: + print(datetime.now().strftime(srt_ddb_format_full) + ": record not found") + # write to dynamodb + response = aha_ddb_table.put_item( + Item={ + 'arn': event_arn, + 'lastUpdatedTime': str_update, + 'added': sec_now, + 'ttl': int(sec_now) + delta_hours_sec + 86400, + 'statusCode': status_code, + 'affectedAccountIDs': affected_accounts, + 'latestDescription': event_latestDescription + # Cleanup: DynamoDB entry deleted 24 hours after last update + } + ) + + affected_accounts_details = affected_accounts + + # send to configured endpoints + if status_code != "closed": + send_alert(event_details, affected_accounts_details, affected_entities, event_type="create") + else: + send_alert(event_details, affected_accounts_details, affected_entities, event_type="resolve") + else: + item = response['Item'] + if item['lastUpdatedTime'] != str_update and (item['statusCode'] != status_code or + item['latestDescription'] != event_latestDescription or + item['affectedAccountIDs'] != affected_accounts): + print(datetime.now().strftime(srt_ddb_format_full) + ": last Update is different") + # write to dynamodb + response = aha_ddb_table.put_item( + Item={ + 'arn': event_arn, + 'lastUpdatedTime': str_update, + 'added': sec_now, + 'ttl': int(sec_now) + delta_hours_sec + 86400, + 'statusCode': status_code, + 'affectedAccountIDs': affected_accounts, + 'latestDescription': event_latestDescription + # Cleanup: DynamoDB entry deleted 24 hours after last update + } + ) + affected_accounts_details = [ + f"{get_account_name(account_id)} ({account_id})" for account_id in affected_accounts] + # send to configured endpoints + if status_code != "closed": + send_alert(event_details, affected_accounts_details, affected_entities, event_type="create") + else: + send_alert(event_details, affected_accounts_details, affected_entities, event_type="resolve") + else: + print("No new updates found, checking again in 1 minute.") + +def get_secrets(): + secret_teams_name = "MicrosoftChannelID" + secret_slack_name = "SlackChannelID" + secret_chime_name = "ChimeChannelID" + region_name = os.environ['AWS_REGION'] + get_secret_value_response_assumerole = "" + get_secret_value_response_eventbus = "" + get_secret_value_response_chime = "" + get_secret_value_response_teams = "" + get_secret_value_response_slack = "" + event_bus_name = "EventBusName" + secret_assumerole_name = "AssumeRoleArn" + + # create a Secrets Manager client + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + region_name=region_name + ) + # Iteration through the configured AWS Secrets + try: + get_secret_value_response_teams = client.get_secret_value( + SecretId=secret_teams_name + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print("No AWS Secret configured for Teams, skipping") + teams_channel_id = "None" + else: + print("There was an error with the Teams secret: ",e.response) + teams_channel_id = "None" + finally: + if 'SecretString' in get_secret_value_response_teams: + teams_channel_id = get_secret_value_response_teams['SecretString'] + else: + teams_channel_id = "None" + try: + get_secret_value_response_slack = client.get_secret_value( + SecretId=secret_slack_name + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print("No AWS Secret configured for Slack, skipping") + slack_channel_id = "None" + else: + print("There was an error with the Slack secret: ",e.response) + slack_channel_id = "None" + finally: + if 'SecretString' in get_secret_value_response_slack: + slack_channel_id = get_secret_value_response_slack['SecretString'] + else: + slack_channel_id = "None" + try: + get_secret_value_response_chime = client.get_secret_value( + SecretId=secret_chime_name + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print("No AWS Secret configured for Chime, skipping") + chime_channel_id = "None" + else: + print("There was an error with the Chime secret: ",e.response) + chime_channel_id = "None" + finally: + if 'SecretString' in get_secret_value_response_chime: + chime_channel_id = get_secret_value_response_chime['SecretString'] + else: + chime_channel_id = "None" + try: + get_secret_value_response_assumerole = client.get_secret_value( + SecretId=secret_assumerole_name + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print("No AWS Secret configured for Assume Role, skipping") + assumerole_channel_id = "None" + else: + print("There was an error with the Assume Role secret: ",e.response) + assumerole_channel_id = "None" + finally: + if 'SecretString' in get_secret_value_response_assumerole: + assumerole_channel_id = get_secret_value_response_assumerole['SecretString'] + else: + assumerole_channel_id = "None" + try: + get_secret_value_response_eventbus = client.get_secret_value( + SecretId=event_bus_name + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDeniedException': + print("No AWS Secret configured for EventBridge, skipping") + eventbus_channel_id = "None" + else: + print("There was an error with the EventBridge secret: ",e.response) + eventbus_channel_id = "None" + finally: + if 'SecretString' in get_secret_value_response_eventbus: + eventbus_channel_id = get_secret_value_response_eventbus['SecretString'] + else: + eventbus_channel_id = "None" + secrets = { + "teams": teams_channel_id, + "slack": slack_channel_id, + "chime": chime_channel_id, + "eventbusname": eventbus_channel_id, + "ahaassumerole": assumerole_channel_id + } + # uncomment below to verify secrets values + #print("Secrets: ",secrets) + return secrets + + +def describe_events(health_client): + str_ddb_format_sec = '%s' + # set hours to search back in time for events + delta_hours = os.environ['EVENT_SEARCH_BACK'] + health_event_type = os.environ['HEALTH_EVENT_TYPE'] + delta_hours = int(delta_hours) + time_delta = (datetime.now() - timedelta(hours=delta_hours)) + print("Searching for events and updates made after: ", time_delta) + dict_regions = os.environ['REGIONS'] + + str_filter = { + 'lastUpdatedTimes': [ + { + 'from': time_delta + } + ] + } + + if health_event_type == "issue": + event_type_filter = {'eventTypeCategories': ['issue','investigation']} + print("AHA will be monitoring events with event type categories as 'issue' only!") + str_filter.update(event_type_filter) + + if dict_regions != "all regions": + dict_regions = [region.strip() for region in dict_regions.split(',')] + print("AHA will monitor for events only in the selected regions: ", dict_regions) + region_filter = {'regions': dict_regions} + str_filter.update(region_filter) + + event_paginator = health_client.get_paginator('describe_events') + event_page_iterator = event_paginator.paginate(filter=str_filter) + for response in event_page_iterator: + events = response.get('events', []) + aws_events = json.dumps(events, default=myconverter) + aws_events = json.loads(aws_events) + print('Event(s) Received: ', json.dumps(aws_events)) + if len(aws_events) > 0: # if there are new event(s) from AWS + for event in aws_events: + event_arn = event['arn'] + status_code = event['statusCode'] + str_update = parser.parse((event['lastUpdatedTime'])) + str_update = str_update.strftime(str_ddb_format_sec) + + # get non-organizational view requirements + affected_accounts = get_health_accounts(health_client, event, event_arn) + affected_entities = get_affected_entities(health_client, event_arn, affected_accounts, is_org_mode = False) + + # get event details + event_details = json.dumps(describe_event_details(health_client, event_arn), default=myconverter) + event_details = json.loads(event_details) + print("Event Details: ", event_details) + if event_details['successfulSet'] == []: + print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", + event_details['failedSet'][0]['errorName'], ":", + event_details['failedSet'][0]['errorMessage']) + continue + else: + # write to dynamoDB for persistence + update_ddb(event_arn, str_update, status_code, event_details, affected_accounts, affected_entities) + else: + print("No events found in time frame, checking again in 1 minute.") + + +def describe_org_events(health_client): + str_ddb_format_sec = '%s' + # set hours to search back in time for events + delta_hours = os.environ['EVENT_SEARCH_BACK'] + health_event_type = os.environ['HEALTH_EVENT_TYPE'] + dict_regions = os.environ['REGIONS'] + delta_hours = int(delta_hours) + time_delta = (datetime.now() - timedelta(hours=delta_hours)) + print("Searching for events and updates made after: ", time_delta) + + str_filter = { + 'lastUpdatedTime': { + 'from': time_delta + } + } + + if health_event_type == "issue": + event_type_filter = {'eventTypeCategories': ['issue','investigation']} + print("AHA will be monitoring events with event type categories as 'issue' only!") + str_filter.update(event_type_filter) + + if dict_regions != "all regions": + dict_regions = [region.strip() for region in dict_regions.split(',')] + print("AHA will monitor for events only in the selected regions: ", dict_regions) + region_filter = {'regions': dict_regions} + str_filter.update(region_filter) + + org_event_paginator = health_client.get_paginator('describe_events_for_organization') + org_event_page_iterator = org_event_paginator.paginate(filter=str_filter) + for response in org_event_page_iterator: + events = response.get('events', []) + aws_events = json.dumps(events, default=myconverter) + aws_events = json.loads(aws_events) + print('Event(s) Received: ', json.dumps(aws_events)) + if len(aws_events) > 0: + for event in aws_events: + event_arn = event['arn'] + status_code = event['statusCode'] + str_update = parser.parse((event['lastUpdatedTime'])) + str_update = str_update.strftime(str_ddb_format_sec) + + # get organizational view requirements + affected_org_accounts = get_health_org_accounts(health_client, event, event_arn) + if os.environ['ACCOUNT_IDS'] == "None" or os.environ['ACCOUNT_IDS'] == "": + affected_org_accounts = affected_org_accounts + update_org_ddb_flag=True + else: + account_ids_to_filter = getAccountIDs() + if affected_org_accounts != []: + focused_org_accounts = [i for i in affected_org_accounts if i not in account_ids_to_filter] + print("Focused list is ", focused_org_accounts) + if focused_org_accounts != []: + update_org_ddb_flag=True + affected_org_accounts = focused_org_accounts + else: + update_org_ddb_flag=False + print("Focused Organization Account list is empty") + else: + update_org_ddb_flag=True + + affected_org_entities = get_affected_entities(health_client, event_arn, affected_org_accounts, is_org_mode = True) + # get event details + event_details = json.dumps(describe_org_event_details(health_client, event_arn, affected_org_accounts), + default=myconverter) + event_details = json.loads(event_details) + print("Event Details: ", event_details) + if event_details['successfulSet'] == []: + print("An error occured with account:", event_details['failedSet'][0]['awsAccountId'], "due to:", + event_details['failedSet'][0]['errorName'], ":", + event_details['failedSet'][0]['errorMessage']) + continue + else: + # write to dynamoDB for persistence + if update_org_ddb_flag: + update_org_ddb(event_arn, str_update, status_code, event_details, affected_org_accounts, + affected_org_entities) + else: + print("No events found in time frame, checking again in 1 minute.") + +def myconverter(json_object): + if isinstance(json_object, datetime): + return json_object.__str__() + + +def describe_event_details(health_client, event_arn): + response = health_client.describe_event_details( + eventArns=[event_arn], + ) + return response + + +def describe_org_event_details(health_client, event_arn, affected_org_accounts): + if len(affected_org_accounts) >= 1: + affected_account_ids = affected_org_accounts[0] + response = health_client.describe_event_details_for_organization( + organizationEventDetailFilters=[ + { + 'awsAccountId': affected_account_ids, + 'eventArn': event_arn + } + ] + ) + return response + else: + response = describe_event_details(health_client, event_arn) + return response + +def eventbridge_generate_entries(message, resources, event_bus): + return [ {'Source': 'aha', 'DetailType': 'AHA Event', 'Resources': resources, 'Detail': json.dumps(message), + 'EventBusName': event_bus}, ] + +def send_to_eventbridge(message, event_type, resources, event_bus): + print("Sending response to Eventbridge - event_type, event_bus", event_type, event_bus) + client = boto3.client('events') + + entries = eventbridge_generate_entries(message, resources, event_bus) + + print("Sending entries: ", entries) + + response = client.put_events(Entries=entries) + print("Response from eventbridge is:", response) + +def getAccountIDs(): + account_ids = "" + key_file_name = os.environ['ACCOUNT_IDS'] + print("Key filename is - ", key_file_name) + if os.path.splitext(os.path.basename(key_file_name))[1] == '.csv': + s3 = boto3.client('s3') + data = s3.get_object(Bucket=os.environ['S3_BUCKET'], Key=key_file_name) + account_ids = [account.decode('utf-8') for account in data['Body'].iter_lines()] + else: + print("Key filename is not a .csv file") + print(account_ids) + return account_ids + +def get_sts_token(service): + assumeRoleArn = get_secrets()["ahaassumerole"] + boto3_client = None + + if "arn:aws:iam::" in assumeRoleArn: + ACCESS_KEY = [] + SECRET_KEY = [] + SESSION_TOKEN = [] + + sts_connection = boto3.client('sts') + + ct = datetime.now() + role_session_name = "cross_acct_aha_session" + + acct_b = sts_connection.assume_role( + RoleArn=assumeRoleArn, + RoleSessionName=role_session_name, + DurationSeconds=900, + ) + + ACCESS_KEY = acct_b['Credentials']['AccessKeyId'] + SECRET_KEY = acct_b['Credentials']['SecretAccessKey'] + SESSION_TOKEN = acct_b['Credentials']['SessionToken'] + + # create service client using the assumed role credentials, e.g. S3 + boto3_client = boto3.client( + service, + config=config, + aws_access_key_id=ACCESS_KEY, + aws_secret_access_key=SECRET_KEY, + aws_session_token=SESSION_TOKEN, + ) + print("Running in member account deployment mode") + else: + boto3_client = boto3.client(service, config=config) + print("Running in management account deployment mode") + + return boto3_client + +def main(event, context): + print("THANK YOU FOR CHOOSING AWS HEALTH AWARE!") + health_client = get_sts_token('health') + org_status = os.environ['ORG_STATUS'] + #str_ddb_format_sec = '%s' + + # check for AWS Organizations Status + if org_status == "No": + #TODO update text below to reflect current functionality + print("AWS Organizations is not enabled. Only Service Health Dashboard messages will be alerted.") + describe_events(health_client) + else: + print( + "AWS Organizations is enabled. Personal Health Dashboard and Service Health Dashboard messages will be alerted.") + describe_org_events(health_client) + +if __name__ == "__main__": + main('', '') diff --git a/aha-2.1-beta/messagegenerator.py b/aha-2.1-beta/messagegenerator.py new file mode 100644 index 0000000..12c619b --- /dev/null +++ b/aha-2.1-beta/messagegenerator.py @@ -0,0 +1,631 @@ +import json +import boto3 +from datetime import datetime, timedelta +from botocore.exceptions import ClientError +import os +import re +import sys +import time + + +def get_message_for_slack(event_details, event_type, affected_accounts, affected_entities, slack_webhook): + message = "" + summary = "" + if slack_webhook == "webhook": + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + if affected_entities == "UNKNOWN": + affected_entities = "All resources\nin region" + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "danger", + "fields": [ + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "00ff00", + "fields": [ + { "title": "Account(s)", "value": affected_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + else: + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + if affected_entities == "UNKNOWN": + affected_entities = "All resources\nin region" + else: + affected_entities = "All resources in region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts in region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "accounts": affected_accounts, + "resources": affected_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "accounts": affected_accounts, + "resources": affected_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + + print("Message sent to Slack: ", message) + return message + +# COMMON compose the event detail field for org and non-org +def get_detail_for_eventbridge(event_details, affected_entities): + + message = {} + + #replace the key "arn" with eventArn to match event format from aws.health + message["eventArn"] = "" + message.update(event_details['successfulSet'][0]['event']) + message["eventArn"] = message.pop("arn") + #message = event_details['successfulSet'][0]['event'] + + message["eventDescription"] = event_details["successfulSet"][0]["eventDescription"] + message["affectedEntities"] = affected_entities + + # Log length of json message for debugging if eventbridge may reject the message as messages + # are limited in size to 256KB + json_message = json.dumps(message) + print("PHD/SHD Message generated for EventBridge with estimated size ", str(sys.getsizeof(json_message) / 1024), "KB: ", message) + + return message + +def get_org_message_for_slack(event_details, event_type, affected_org_accounts, affected_org_entities, slack_webhook): + message = "" + summary = "" + if slack_webhook == "webhook": + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources\nin region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts\nin region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "danger", + "fields": [ + { "title": "Account(s)", "value": affected_org_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_org_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "attachments": [ + { + "color": "00ff00", + "fields": [ + { "title": "Account(s)", "value": affected_org_accounts, "short": True }, + { "title": "Resource(s)", "value": affected_org_entities, "short": True }, + { "title": "Service", "value": event_details['successfulSet'][0]['event']['service'], "short": True }, + { "title": "Region", "value": event_details['successfulSet'][0]['event']['region'], "short": True }, + { "title": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), "short": True }, + { "title": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime']), "short": True }, + { "title": "Status", "value": event_details['successfulSet'][0]['event']['statusCode'], "short": True }, + { "title": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn'], "short": False }, + { "title": "Updates", "value": get_last_aws_update(event_details), "short": False } + ], + } + ] + } + else: + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources in region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts in region" + if event_type == "create": + summary += ( + f":rotating_light:*[NEW] AWS Health reported an issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region.*" + ) + message = { + "text": summary, + "accounts": affected_org_accounts, + "resources": affected_org_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + + elif event_type == "resolve": + summary += ( + f":heavy_check_mark:*[RESOLVED] The AWS Health issue with the {event_details['successfulSet'][0]['event']['service'].upper()} service in " + f"the {event_details['successfulSet'][0]['event']['region'].upper()} region is now resolved.*" + ) + message = { + "text": summary, + "accounts": affected_org_accounts, + "resources": affected_org_entities, + "service": event_details['successfulSet'][0]['event']['service'], + "region": event_details['successfulSet'][0]['event']['region'], + "start_time": cleanup_time(event_details['successfulSet'][0]['event']['startTime']), + "status": event_details['successfulSet'][0]['event']['statusCode'], + "event_arn": event_details['successfulSet'][0]['event']['arn'], + "updates": get_last_aws_update(event_details) + } + json.dumps(message) + print("Message sent to Slack: ", message) + return message + + +def get_message_for_chime(event_details, event_type, affected_accounts, affected_entities): + message = "" + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + if affected_entities == "UNKNOWN": + affected_entities = "All resources\nin region" + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" + summary = "" + if event_type == "create": + + message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region.**" + "\n" + "---" + "\n" + "**Account(s)**: " + affected_accounts + "\n" + "**Resource(s)**: " + affected_entities + "\n" + "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" + "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" + "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" + "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" + "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" + "**Updates:**" + "\n" + get_last_aws_update(event_details) + ) + + elif event_type == "resolve": + + message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper() + " service in " + event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n" + "---" + "\n" + "**Account(s)**: " + affected_accounts + "\n" + "**Resource(s)**: " + affected_entities + "\n" + "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" + "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" + "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" + "**End Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['endTime']) + "\n" + "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" + "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" + "**Updates:**" + "\n" + get_last_aws_update(event_details) + ) + json.dumps(message) + print("Message sent to Chime: ", message) + return message + + +def get_org_message_for_chime(event_details, event_type, affected_org_accounts, affected_org_entities): + message = "" + summary = "" + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources in region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts in region" + if event_type == "create": + + message = str("/md" + "\n" + "**:rotating_light:\[NEW\] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event']['service'].upper()) + " service in " + str(event_details['successfulSet'][0]['event']['region'].upper() + " region**" + "\n" + "---" + "\n" + "**Account(s)**: " + affected_org_accounts + "\n" + "**Resource(s)**: " + affected_org_entities + "\n" + "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" + "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" + "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" + "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" + "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" + "**Updates:**" + "\n" + get_last_aws_update(event_details) + ) + + elif event_type == "resolve": + + message = str("/md" + "\n" + "**:heavy_check_mark:\[RESOLVED\] The AWS Health issue with the " + event_details['successfulSet'][0]['event']['service'].upper()) + " service in " + str(event_details['successfulSet'][0]['event']['region'].upper() + " region is now resolved.**" + "\n" + "---" + "\n" + "**Account(s)**: " + affected_org_accounts + "\n" + "**Resource(s)**: " + affected_org_entities + "\n" + "**Service**: " + event_details['successfulSet'][0]['event']['service'] + "\n" + "**Region**: " + event_details['successfulSet'][0]['event']['region'] + "\n" + "**Start Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['startTime']) + "\n" + "**End Time (UTC)**: " + cleanup_time(event_details['successfulSet'][0]['event']['endTime']) + "\n" + "**Status**: " + event_details['successfulSet'][0]['event']['statusCode'] + "\n" + "**Event ARN**: " + event_details['successfulSet'][0]['event']['arn'] + "\n" + "**Updates:**" + "\n" + get_last_aws_update(event_details) + ) + print("Message sent to Chime: ", message) + return message + + + +def get_message_for_teams(event_details, event_type, affected_accounts, affected_entities): + message = "" + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + if affected_entities == "UNKNOWN": + affected_entities = "All resources\nin region" + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" + summary = "" + if event_type == "create": + title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][ + 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ + 'region'].upper() + " region." + message = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "themeColor": "FF0000", + "summary": "AWS Health Aware Alert", + "sections": [ + { + "activityTitle": str(title), + "markdown": False, + "facts": [ + {"name": "Account(s)", "value": affected_accounts}, + {"name": "Resource(s)", "value": affected_entities}, + {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, + {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, + {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, + {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, + {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, + {"name": "Updates", "value": get_last_aws_update(event_details)} + ], + } + ] + } + + elif event_type == "resolve": + title = "✅ [RESOLVED] The AWS Health issue with the " + event_details['successfulSet'][0]['event'][ + 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ + 'region'].upper() + " region is now resolved." + message = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "themeColor": "00ff00", + "summary": "AWS Health Aware Alert", + "sections": [ + { + "activityTitle": str(title), + "markdown": False, + "facts": [ + {"name": "Account(s)", "value": affected_accounts}, + {"name": "Resource(s)", "value": affected_entities}, + {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, + {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, + {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, + {"name": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime'])}, + {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, + {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, + {"name": "Updates", "value": get_last_aws_update(event_details)} + ], + } + ] + } + print("Message sent to Teams: ", message) + return message + + +def get_org_message_for_teams(event_details, event_type, affected_org_accounts, affected_org_entities): + message = "" + summary = "" + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All resources in region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts in region" + if event_type == "create": + title = "🚨 [NEW] AWS Health reported an issue with the " + event_details['successfulSet'][0]['event'][ + 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ + 'region'].upper() + " region." + message = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "themeColor": "FF0000", + "summary": "AWS Health Aware Alert", + "sections": [ + { + "activityTitle": title, + "markdown": False, + "facts": [ + {"name": "Account(s)", "value": affected_org_accounts}, + {"name": "Resource(s)", "value": affected_org_entities}, + {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, + {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, + {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, + {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, + {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, + {"name": "Updates", "value": event_details['successfulSet'][0]['eventDescription']['latestDescription']} + ], + } + ] + } + + elif event_type == "resolve": + title = "✅ [RESOLVED] The AWS Health issue with the " + event_details['successfulSet'][0]['event'][ + 'service'].upper() + " service in the " + event_details['successfulSet'][0]['event'][ + 'region'].upper() + " region is now resolved." + message = { + "@type": "MessageCard", + "@context": "http://schema.org/extensions", + "themeColor": "00ff00", + "summary": "AWS Health Aware Alert", + "sections": [ + { + "activityTitle": title, + "markdown": False, + "facts": [ + {"name": "Account(s)", "value": affected_org_accounts}, + {"name": "Resource(s)", "value": affected_org_entities}, + {"name": "Service", "value": event_details['successfulSet'][0]['event']['service']}, + {"name": "Region", "value": event_details['successfulSet'][0]['event']['region']}, + {"name": "Start Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}, + {"name": "End Time (UTC)", "value": cleanup_time(event_details['successfulSet'][0]['event']['endTime'])}, + {"name": "Status", "value": event_details['successfulSet'][0]['event']['statusCode']}, + {"name": "Event ARN", "value": event_details['successfulSet'][0]['event']['arn']}, + {"name": "Updates", "value": event_details['successfulSet'][0]['eventDescription']['latestDescription']} + ], + } + ] + } + return message + print("Message sent to Teams: ", message) + + +def get_message_for_email(event_details, event_type, affected_accounts, affected_entities): + # Not srue why we have the new line in the affected entities code here + if len(affected_entities) >= 1: + affected_entities = "\n".join(affected_entities) + if affected_entities == "UNKNOWN": + affected_entities = "All resources\nin region" + else: + affected_entities = "All resources\nin region" + if len(affected_accounts) >= 1: + affected_accounts = "\n".join(affected_accounts) + else: + affected_accounts = "All accounts\nin region" + if event_type == "create": + BODY_HTML = f""" + + + Greetings from AWS Health Aware,
+

There is an AWS incident that is in effect which may likely impact your resources. Here are the details:

+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
+ Service: {event_details['successfulSet'][0]['event']['service']}
+ Region: {event_details['successfulSet'][0]['event']['region']}
+ Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
+ Status: {event_details['successfulSet'][0]['event']['statusCode']}
+ Event ARN: {event_details['successfulSet'][0]['event']['arn']}
+ Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

+ For updates, please visit the AWS Service Health Dashboard
+ If you are experiencing issues related to this event, please open an AWS Support case within your account.

+ Thanks,

AHA: AWS Health Aware +

+ + + """ + else: + BODY_HTML = f""" + + + Greetings again from AWS Health Aware,
+

Good news! The AWS Health incident from earlier has now been marked as resolved.

+ Account(s): {affected_accounts}
+ Resource(s): {affected_entities}
+ Service: {event_details['successfulSet'][0]['event']['service']}
+ Region: {event_details['successfulSet'][0]['event']['region']}
+ Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
+ End Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['endTime'])}
+ Status: {event_details['successfulSet'][0]['event']['statusCode']}
+ Event ARN: {event_details['successfulSet'][0]['event']['arn']}
+ Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

+ If you are still experiencing issues related to this event, please open an AWS Support case within your account.

+

+ Thanks,

AHA: AWS Health Aware +

+ + + """ + print("Message sent to Email: ", BODY_HTML) + return BODY_HTML + + +def get_org_message_for_email(event_details, event_type, affected_org_accounts, affected_org_entities): + if len(affected_org_entities) >= 1: + affected_org_entities = "\n".join(affected_org_entities) + else: + affected_org_entities = "All services related resources in region" + if len(affected_org_accounts) >= 1: + affected_org_accounts = "\n".join(affected_org_accounts) + else: + affected_org_accounts = "All accounts in region" + if event_type == "create": + BODY_HTML = f""" + + + Greetings from AWS Health Aware,
+

There is an AWS incident that is in effect which may likely impact your resources. Here are the details:

+ Account(s): {affected_org_accounts}
+ Resource(s): {affected_org_entities}
+ Service: {event_details['successfulSet'][0]['event']['service']}
+ Region: {event_details['successfulSet'][0]['event']['region']}
+ Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
+ Status: {event_details['successfulSet'][0]['event']['statusCode']}
+ Event ARN: {event_details['successfulSet'][0]['event']['arn']}
+ Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

+ For updates, please visit the AWS Service Health Dashboard
+ If you are experiencing issues related to this event, please open an AWS Support case within your account.

+ Thanks,

AHA: AWS Health Aware +

+ + + """ + else: + BODY_HTML = f""" + + + Greetings again from AWS Health Aware,
+

Good news! The AWS Health incident from earlier has now been marked as resolved.

+ Account(s): {affected_org_accounts}
+ Resource(s): {affected_org_entities}
+ Service: {event_details['successfulSet'][0]['event']['service']}
+ Region: {event_details['successfulSet'][0]['event']['region']}
+ Start Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['startTime'])}
+ End Time (UTC): {cleanup_time(event_details['successfulSet'][0]['event']['endTime'])}
+ Status: {event_details['successfulSet'][0]['event']['statusCode']}
+ Event ARN: {event_details['successfulSet'][0]['event']['arn']}
+ Updates: {event_details['successfulSet'][0]['eventDescription']['latestDescription']}

+ If you are still experiencing issues related to this event, please open an AWS Support case within your account.

+ Thanks,

AHA: AWS Health Aware +

+ + + """ + print("Message sent to Email: ", BODY_HTML) + return BODY_HTML + + +def cleanup_time(event_time): + """ + Takes as input a datetime string as received from The AWS Health event_detail call. It converts this string to a + datetime object, changes the timezone to EST and then formats it into a readable string to display in Slack. + + :param event_time: datetime string + :type event_time: str + :return: A formatted string that includes the month, date, year and 12-hour time. + :rtype: str + """ + event_time = datetime.strptime(event_time[:16], '%Y-%m-%d %H:%M') + return event_time.strftime("%Y-%m-%d %H:%M:%S") + + +def get_last_aws_update(event_details): + """ + Takes as input the event_details and returns the last update from AWS (instead of the entire timeline) + + :param event_details: Detailed information about a specific AWS health event. + :type event_details: dict + :return: the last update message from AWS + :rtype: str + """ + aws_message = event_details['successfulSet'][0]['eventDescription']['latestDescription'] + return aws_message + + +def format_date(event_time): + """ + Takes as input a datetime string as received from The AWS Health event_detail call. It converts this string to a + datetime object, changes the timezone to EST and then formats it into a readable string to display in Slack. + + :param event_time: datetime string + :type event_time: str + :return: A formatted string that includes the month, date, year and 12-hour time. + :rtype: str + """ + event_time = datetime.strptime(event_time[:16], '%Y-%m-%d %H:%M') + return event_time.strftime('%B %d, %Y at %I:%M %p') diff --git a/aha-2.1-beta/new_aha_event_schema.md b/aha-2.1-beta/new_aha_event_schema.md new file mode 100644 index 0000000..1cdbb7e --- /dev/null +++ b/aha-2.1-beta/new_aha_event_schema.md @@ -0,0 +1,221 @@ +# Readme for new AHA Event schema + +## New AHA Event Schema + +With release X.Y.Z, AHA includes an updated format for events published to EventBridge. Building on the [existing event format](https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#health-event-types) published by AWS Health, AHA enriches it with additional data from the Health API and AWS Organizations to enable new options for filtering in EventBridge. + + +>Note: If you used the previous { "title": "Title", "value": "Value" } schema in your rules, you must update your rules to reflect the new schema when deploying the new version of AHA + +### Schema: + +``` +{ + "version": "0", + "id": "7bf73129-1428-4cd3-a780-95db273d1602", + "detail-type": "AHA Event", + "source": "aha", + "account": "123456789012", + "time": "2022-07-14T03:56:10Z", + "region": "region of the eventbus", + "resources": [ + "i-1234567890abcdef0" + ], + "detail": { + "eventArn": "arn:aws:health:region::event/id", + "service": "service", + "eventTypeCode": "typecode", + "eventTypeCategory": "category", + "region": "region of the Health event", + "startTime": "2022-07-02 12:33:26.951000+00:00", + "endTime": "2022-07-02 12:33:26.951000+00:00", + "lastUpdatedTime": "2022-07-02 12:36:18.576000+00:00", + "statusCode": "status", + "eventScopeCode": "scopecode", + "eventDescription": { + "latestDescription": "description" + }, + "affectedEntities": [{ + "entityValue": "i-1234567890abcdef0", + "awsAccountId": "account number", + "awsAccountName": "account name" + }] + } +} +``` + +### AHA added properties + +**eventScopeCode:** Specifies if the Health event is a public AWS service event or an account-specific event. +Values: *string -* `PUBLIC | ACCOUNT_SPECIFIC` + +**statusCode:** Reflects whether the event is ongoing, resolved or in the case of scheduled maintenance, upcoming. +Values: *string -* `open | closed | upcoming` + +**affectedEntities:** For ACCOUNT_SPECIFIC events, AHA includes expanded detail on resources. **affectedEntities** includes the listed **resources**, each as an **entitityValue** with the resource ID (as it appears in events for single accounts). AHA adds the related **awsAccountId** and In AWS Organizations, **awsAccountName** of the resource. +Values: *entity object(s). May be empty if no resources are listed* + + +## EventBridge pattern examples + +As a primer we recommended you review the [EventBridge EventPatterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns.html) documentation and examples on [Content filtering in Amazon EventBridge event patterns](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-patterns-content-based-filtering.html) + +Use the following sample event published by AWS Health Aware to test matching in the provided examples: + +``` +{ + "version": "0", + "id": "e47c4390-b295-ce6f-7e94-f13083d7bb90", + "detail-type": "AHA Event", + "source": "aha", + "account": "`234567890123`", + "time": "2022-07-20T18:26:17Z", + "region": "us-east-1", + "resources": [ + "vpn-0d0e3eeefe6aabb0d" + ], + "detail": { + "arn": "arn:aws:health:us-east-1::event/VPN/AWS_VPN_REDUNDANCY_LOSS/AWS_VPN_REDUNDANCY_LOSS-1656151378267-7672191-IAD", + "service": "VPN", + "eventTypeCode": "AWS_VPN_REDUNDANCY_LOSS", + "eventTypeCategory": "accountNotification", + "region": "us-east-1", + "startTime": "2022-06-25 10:00:48.868000+00:00", + "lastUpdatedTime": "2022-06-25 10:02:58.371000+00:00", + "statusCode": "open", + "eventScopeCode": "ACCOUNT_SPECIFIC", + "eventDescription": { + "latestDescription": "Your VPN Connection associated with this event in the us-east-1 Region had a momentary lapse of redundancy as one of two tunnel endpoints was replaced. Connectivity on the second tunnel was not affected during this time. Both tunnels are now operating normally.\n\nReplacements can occur for several reasons, including health, software upgrades, customer-initiated modifications, and when underlying hardware is retired. If you have configured your VPN Customer Gateway to use both tunnels, then your VPN Connection will have utilized the alternate tunnel during the replacement process. For more on tunnel endpoint replacements, please see our documentation [1].\n\nIf you have not configured your VPN Customer Gateway to use both tunnels, then your VPN Connection may have been interrupted during the replacement. We encourage you to configure your router to use both tunnels. You can obtain the VPN Connection configuration recommendations for several types of VPN devices from the AWS Management Console [2]. On the \"Amazon VPC\" tab, select \"VPN Connections\". Then highlight the VPN Connection and choose \"Download Configuration\".\n\n[1] https://docs.aws.amazon.com/vpn/latest/s2svpn/monitoring-vpn-health-events.html\n[2] https://console.aws.amazon.com" + }, + "affectedEntities": [{ + "entityValue": "vpn-0d0e3eeefe6aabb0d", + "awsAccountId": "987654321987", + "awsAccountName": "Prod-Apps" + }] + } +} +``` + + +To write a rule that matches resources found in the event, reference the **resources** key of the JSON event, and provide an event pattern to the EventBridge rule. Example 1 matches on an exact resource - “*vpn-0d0e3eeefe6aabb0d*”. Example 2 matches any resource starting with "*vpn-*" +**Example 1:** + +``` +{ + "resources": [ + "vpn-0d0e3eeefe6aabb0d" + ] +} +``` + + +**Example 2:** + +``` +{ + "resources": [ + {"prefix": "vpn-"} + ] +} +``` + + +To match based on a specific service, note that **service** is nested within the **detail** key in the JSON structure, so we reference both **detail** and **service**. Example 3 matches the VPN service, and Example 4 matches EC2 OR S3 (will not match the sample event). To get a list of all service names used by AWS Health you can use the cli command - `aws health describe-event-types` + +**Example 3:** + +``` +{ + "detail": { + "service": ["VPN"] + } +} +``` + + +**Example 4:** + +``` +{ + "detail": { + "service": ["EC2", "S3"] + } +} +``` + + +To match events based on an AWS account name or number, use the following patterns. Take note of the additional levels of nesting based on the sample event. Example 5 matches a specific account number as **awsAccountId**. Example 6 matches a specific account name, and Example 7 adds an additional field of **eventTypeCategory** along with the “prefix” filter pattern which will match any value in the **awsAccountName** field that starts with “*Prod*” similar to a wildcard match of “*Prod**” + +**Example 5:** + +``` +{ + "detail": { + "affectedEntities": { + "awsAccountId": ["987654321987"] + } + } +} +``` + + +**Example 6:** + +``` + **** { + "detail": { + "affectedEntities": { + "awsAccountName": ["Prod-Apps"] + } + } +} +``` + + +**Example 7:** + +``` +{ + "detail": { + "eventTypeCategory": ["accountNotification"], + "affectedEntities": { + "awsAccountName": [{"prefix": "Prod"}] + } + } +} +``` + + +Combine any of the patterns listed to create more specific rules. Note that all patterns in the rule must match for the EventBridge rule to trigger. Example 8 will only match when all 3 conditions exist in an AHA event - **service** = *VPN*, **region** = *us-east-1* and **awsAccountId** = *987654321098* + +**Example 8:** + +``` +{ + "detail": { + "service": ["VPN"], + "region": ["us-east-1"], + "affectedEntities": { + "awsAccountId": ["987654321987"] + } + } +} +``` + + +As a best practice, also include `"source": ["aha"]` in your pattern if the event bus contains events generated by other sources. + +**Example 9:** + +``` +{ + "source": ["aha"], + "detail": { + "service": ["VPN"], + "region": ["us-east-1"], + "affectedEntities": { + "awsAccountId": ["987654321987"] + } + } +} +```