Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Troubleshooting module robisoh88 new #2

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
294e572
minor writing changes.
robisoh88 Sep 5, 2024
e147913
adding new workernode troubleshooting modules and website docs
robisoh88 Oct 10, 2024
996219d
added troubleshooting workernode modules and website documents
robisoh88 Oct 10, 2024
cc9fe08
minor doc changes
robisoh88 Oct 10, 2024
58465fe
Merge branch 'aws-samples:main' into troubleshooting-module-robisoh88
robisoh88 Oct 22, 2024
3fb2a16
make final touches in website content for troubleshooting/workernodes…
robisoh88 Oct 23, 2024
fc2d999
merging for latest update from upstream branch
robisoh88 Oct 23, 2024
3d0b457
Resolve merge conflicts and apply stashed changes
robisoh88 Oct 23, 2024
01fa3e7
Resolve merge conflicts and apply stashed changes
robisoh88 Oct 23, 2024
5be808b
Resolve merge conflicts and apply stashed changes
robisoh88 Oct 23, 2024
f32e285
Resolve merge conflicts and apply stashed changes
robisoh88 Oct 23, 2024
19844e1
merging
robisoh88 Oct 23, 2024
3bd1ab2
Fixing lint errors and adding support for automated test
Oct 25, 2024
9ab1340
Merge branch 'troubleshooting-module' into troubleshooting-module-rob…
Oct 25, 2024
78c1847
merge upstream changes and resolve conflicts
robisoh88 Oct 29, 2024
f3d9b69
created all hooks and tested troublshooting/workernodes directory. al…
robisoh88 Nov 2, 2024
fd27d0a
Commit staged changes before resolving conflicts
robisoh88 Nov 5, 2024
458e8e8
passing pre-commit
robisoh88 Nov 6, 2024
3f32ce7
deleted comments in vars.tf for worker node troubleshooting modules
robisoh88 Nov 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion manifests/.workshop/terraform/lab.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ module "lab" {

locals {
environment_variables = try(module.lab.environment_variables, [])
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ module "eks_blueprints_addons" {

tags = merge(
var.tags,
local.tags
local.tags,
locals.tags
)

depends_on = [null_resource.break_public_subnet]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

# Clean up any resources created by the user OUTSIDE of Terraform here

# All stdout output will be hidden from the user
# To display a message to the user:
# logmessage "Deleting some resource...."
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
##To do - added nodegroup name as variable and change change to use the variable instead.




#I had to delete environment because my nodegroup went into degraded state. I tried to delete nodegroup bin, but reset-environment did not recreate node.
# Is there a terraform command I can use to recreate s particular resource from the base?

# Questions:
# I am trying to create a nodegroup??
# prepare environment for each issue??
# How do I reference variable from original eks.tf e.g. ami_version
# How do I reference variable e.g. account ID?
# what should i set region variable to?
# how do i reference node IAM role from the original node role?

# Add your Terraform here


#EBS Encryption issue

# Need to create new kms key, with all principal permission,Need to enable EBS Enrcyption on EBS
# - then scale nodegroup back to 0 then back to 3
# the customer will see no nodes
# message
# To do this, the EBS volume needs to be encrypted. Make sure the node role does not have permission for encryption.
# Can enable encryption on the region w/ https://docs.aws.amazon.com/cli/latest/reference/ec2/enable-ebs-encryption-by-default.html
# Once enabled, the nodegroup can be scaled down and scaled back up


#Bootstrap failure due to vpc endpoint issue
#could create issue due to subnet issue possiblly

#Custom LT issue

# terraform {
# required_providers {
# # kubectl = {
# # source = "gavinbunney/kubectl"
# # version = ">= 1.14"
# # }
# }
# }

provider "aws" {
region = "us-west-2"
alias = "Oregon"
}

/* locals {
tags = {
module = "troubleshooting"
}
} */

data "aws_subnets" "private" {
tags = {
created-by = "eks-workshop-v2"
env = var.addon_context.eks_cluster_id
}
filter {
name = "tag:Name"
values = ["*Private*"]
}
}

data "aws_eks_cluster" "cluster" {
name = var.eks_cluster_id
}

#scale nodegroup to 0 and create a new managed node group for scenario (otherwise the issue will transition mng to degraded state and fail reset-environment will take a very long time e.g. 20 minutes)
# Decrease desired count to 0

data "aws_eks_node_group" "default" {
cluster_name = data.aws_eks_cluster.cluster.id
node_group_name = "default"
}

##creating KMS CMK - schedule deletion after minimum of 7 days
resource "aws_kms_key" "new_kms_key" {
description = "NEW KMS CMK"
# deletion_window_in_days = 7
enable_key_rotation = true
}

resource "random_string" "random_suffix" {
length = 5
special = false
upper = false
}

resource "aws_kms_alias" "new_kms_key_alias" {
name = "alias/new_kms_key_alias_${random_string.random_suffix.result}"
target_key_id = aws_kms_key.new_kms_key.key_id
depends_on = [aws_kms_key.new_kms_key]
lifecycle {
create_before_destroy = true
}
}


# make key alias unique/random for self driven workshopers


#get account ID and output it for use
data "aws_caller_identity" "current" {}

/* output "account_id" {
value = data.aws_caller_identity.current.account_id
} */

##creating policy document for key policy
data "aws_iam_policy_document" "key_administrators_policy" {
statement {
effect = "Allow"
principals {
type = "AWS"
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"]
}
actions = ["kms:*"]
resources = ["*"]
}
}
#add key policy to key
resource "aws_kms_key_policy" "example" {
key_id = aws_kms_key.new_kms_key.key_id
policy = data.aws_iam_policy_document.key_administrators_policy.json
}
#remember to use least priviledge permissions where possible ^

#enable encryption by default pointing to the new cmk. Disable script for destroy environment.
# resource "aws_ebs_encryption_by_default" "ebs-encryption-default" {
# enabled = true
# }


# resource "null_resource" "modify_ebs_default_kms_key" {
# provisioner "local-exec" {
# command = "aws ec2 modify-ebs-default-kms-key-id --kms-key-id ${aws_kms_key.new_kms_key.key_id} --region us-west-2"

# environment = {
# AWS_DEFAULT_REGION = "us-west-2"
# }

# }
# }


# Create a new launch template so ec2 instances will have a name for easier identification during troubleshooting.
resource "aws_launch_template" "new_launch_template" {
name = "new_nodegroup_1"

instance_type = "m5.large"

block_device_mappings {
device_name = "/dev/xvda"

ebs {
volume_size = 20
volume_type = "gp2"
encrypted = true
kms_key_id = aws_kms_key.new_kms_key.arn
}
}

tag_specifications {
resource_type = "instance"
tags = {
Name = "troubleshooting-one-${var.eks_cluster_id}"
}
}
depends_on = [aws_kms_key.new_kms_key]
lifecycle {
create_before_destroy = true
}

}

#create new nodegroup called newnodegroup with zero node, so MNG will not go to degraded state
resource "aws_eks_node_group" "new_nodegroup_1" {
cluster_name = data.aws_eks_cluster.cluster.id
node_group_name = "new_nodegroup_1"
node_role_arn = data.aws_eks_node_group.default.node_role_arn
release_version = data.aws_eks_node_group.default.release_version
subnet_ids = data.aws_subnets.private.ids

scaling_config {
desired_size = 0
max_size = 1
min_size = 0
}

update_config {
max_unavailable_percentage = 50
}
launch_template {
id = aws_launch_template.new_launch_template.id
version = aws_launch_template.new_launch_template.latest_version
}
depends_on = [aws_launch_template.new_launch_template]
}
resource "null_resource" "increase_desired_count" {
provisioner "local-exec" {
command = "aws eks update-nodegroup-config --cluster-name ${data.aws_eks_cluster.cluster.id} --nodegroup-name ${aws_eks_node_group.new_nodegroup_1.node_group_name} --scaling-config minSize=0,maxSize=1,desiredSize=1"
when = create
environment = {
AWS_DEFAULT_REGION = "us-west-2"
}
#This will eventually transition newnodegroup into Degraded state. Need to find out how to bring it back to healthy state.

}
depends_on = [aws_eks_node_group.new_nodegroup_1]
}



#TESTING DESTROY COMMAND

# data "aws_vpc" "selected" {
# tags = {
# created-by = "eks-workshop-v2"
# env = var.addon_context.eks_cluster_id
# }
# }

# resource "null_resource" "create_security_group" {

# provisioner "local-exec" {
# command = <<-EOT
# SG_ID=$(aws ec2 create-security-group \
# --group-name "testing" \
# --description "A simple security group for testing purposes" \
# --vpc-id "${data.aws_vpc.selected.id}" \
# --tag-specifications 'ResourceType=security-group,Tags=[{Key=Name,Value=testing}]' \
# --output json | jq -r '.GroupId')

# echo $SG_ID > sg_id.txt

# aws ec2 authorize-security-group-egress \
# --group-id $SG_ID \
# --protocol all \
# --port all \
# --cidr 0.0.0.0/0

# aws ec2 authorize-security-group-ingress \
# --group-id $SG_ID \
# --protocol tcp \
# --port 22 \
# --cidr 0.0.0.0/0
# EOT
# }

# provisioner "local-exec" {
# when = destroy
# command = <<-EOT
# SG_ID=$(cat sg_id.txt)
# aws ec2 delete-security-group --group-id $SG_ID
# EOT
# }
# }

# data "local_file" "sg_id" {
# depends_on = [null_resource.create_security_group]
# filename = "sg_id.txt"
# }

# output "security_group_id" {
# value = trimspace(data.local_file.sg_id.content)
# }



# resource "null_resource" "test" {
# provisioner "local-exec" {
# command = "aws ec2 disable-ebs-encryption-by-default "
# environment = {
# AWS_DEFAULT_REGION = "us-west-2" # Replace with your desired region
# }
# when = destroy
# }

# resource "null_resource" "disable_ebs_encryption" {
# provisioner "local-exec" {
# command = "aws ec2 disable-ebs-encryption-by-default "
# environment = {
# AWS_DEFAULT_REGION = "us-west-2" # Replace with your desired region
# }
# when = destroy
# }

# provisioner "local-exec" {
# command = "mkdir -p /eks-workshop/logs; echo \" key: ${each.key} Value:${each.value}\" >> /eks-workshop/logs/action-load-balancer-output.log"
# }
# }


Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
output "environment_variables" {
description = "Environment variables to be added to the IDE shell"
value = {
MY_ENVIRONMENT_VARIABLE = "abc1234",
NEW_NODEGROUP_1_ASG_NAME = aws_eks_node_group.new_nodegroup_1.resources[0].autoscaling_groups[0].name,
NEW_NODEGROUP_1_LT_ID = aws_eks_node_group.new_nodegroup_1.launch_template[0].id,
NEW_KMS_KEY_ID = aws_kms_key.new_kms_key.id
}
}

# output "environment_variables" {
# description = "Environment variables to be added to the IDE shell"
# value = merge({
# VPC_ID = data.aws_vpc.selected.id,
# LOAD_BALANCER_CONTROLLER_ROLE_NAME = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_name,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_FIX = module.eks_blueprints_addons.aws_load_balancer_controller.iam_policy_arn,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_ISSUE = aws_iam_policy.issue.arn,
# LOAD_BALANCER_CONTROLLER_ROLE_ARN = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_arn
# }, {
# for index, id in data.aws_subnets.public.ids : "PUBLIC_SUBNET_${index + 1}" => id
# }
# )
# }
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_id" {
description = "EKS cluster name"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_version" {
description = "EKS cluster version"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "cluster_security_group_id" {
description = "EKS cluster security group ID"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "addon_context" {
description = "Addon context that can be passed directly to blueprints addon modules"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "tags" {
description = "Tags to apply to AWS resources"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "resources_precreated" {
description = "Have expensive resources been created already"
type = bool
}
Loading
Loading