Skip to content

Commit

Permalink
Changes to deploy waggle-dance service on Kubernetes. (#69)
Browse files Browse the repository at this point in the history
* deploy waggle-dance on Kubernetes

* configure 90% memory as heapsize for k8s containers

* configure AWS LoadBalancer for k8s service

* configure waggle-dance cname

* create kubernetes resources when wd_instance_type is k8s

* create aws_instance when instance type is ec2

* waggle_dance_dns output

* create ssm association only when deployment type is ec2

* add waggle_dance_load_balancers output

* clean up ec2 deployment

* terraform fmt

* disable cloudwatch dashboard and alerts when instance type is not ecs

* add k8s_namespace variable

* update changelog, disable aws_cloudwatch_log_group when deploying on k8s

* remove iam role when deploying to k8s

* remove aws_sns_topic when deploying to k8s

* fix aws_sns_topic reference

* update README.md

* disable waggle-dance dns for testing

* Revert "disable waggle-dance dns for testing"

This reverts commit f3d7ea8.

* remove ami_id from README.md

* update changelog
  • Loading branch information
rpoluri authored Nov 12, 2019
1 parent 9212626 commit 2ef4599
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 302 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [3.0.0] - TBD
## [3.0.0] - 2019-11-12

### Added
- Support for running Waggle Dance on Kubernetes.
- Upgrade to Terraform version 0.12

### Changed
- Tag remote metastore VPC endpoints.

### Removed
- Support for running Waggle Dance on EC2 nodes.

## [2.0.1] - 2019-07-17

### Added
Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ For more information please refer to the main [Apiary](https://github.com/Expedi
| graphite_prefix | Prefix addded to all metrics sent to Graphite from this Waggle Dance instance. | string | `waggle-dance` | no |
| ingress_cidr | Generally allowed ingress CIDR list. | list | - | yes |
| instance_name | Waggle Dance instance name to identify resources in multi-instance deployments. | string | `` | no |
| k8s_namespace | K8s namespace to create waggle-dance deployment.| string | ``| no |
| k8s_docker_registry_secret | Docker Registry authentication K8s secret name.| string | ``| no |
| local_metastores | List of federated Metastore endpoints directly accessible on the local network. | list | `<list>` | no |
| memory | The amount of memory (in MiB) used to allocate for the Waggle Dance container. Valid values: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-cpu-memory-error.html | string | `4096` | no |
| primary_metastore_host | Primary Hive Metastore hostname configured in Waggle Dance. | string | `localhost` | no |
Expand All @@ -31,13 +33,10 @@ For more information please refer to the main [Apiary](https://github.com/Expedi
| tags | A map of tags to apply to resources. | map | `<map>` | no |
| vpc_id | VPC ID. | string | - | yes |
| wd_ecs_task_count | Number of ECS tasks to create. | string | `1` | no |
| wd_instance_type | Waggle Dance instance type, possible values: `ecs`,`ec2`. | string | `ecs` | no |
| wd_instance_type | Waggle Dance instance type, possible values: `ecs`,`k8s`. | string | `ecs` | no |
| waggledance_version | Waggle Dance version to install on EC2 nodes | string | `3.3.2` | no |
| key_name | Waggle Dance EC2 ssh key pair name. | string | automation | no |
| root_vol_type | Waggle Dance EC2 root volume type. | string | `gp2` | no |
| root_vol_size | Waggle Dance EC2 root volume size. | string | `10` | no |
| ec2_instance_type | Waggle Dance EC2 instance type. | string | `m5.large` | no |
| ami_id | Amazon Linux AMI. | string | - | no |

## Usage

Expand Down
39 changes: 20 additions & 19 deletions cloudwatch.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
*/

resource "aws_cloudwatch_log_group" "waggledance_ecs" {
name = local.instance_alias
tags = var.tags
count = var.wd_instance_type == "ecs" ? 1 : 0
name = local.instance_alias
tags = var.tags
}

resource "aws_cloudwatch_dashboard" "apiary_federation" {
count = var.wd_instance_type == "ecs" ? 1 : 0
dashboard_name = "${local.instance_alias}-${var.aws_region}"

dashboard_body = <<EOF
{
"widgets": [
Expand Down Expand Up @@ -51,16 +52,16 @@ EOF
locals {
alerts = [
{
alarm_name = "${local.instance_alias}-cpu"
namespace = "AWS/ECS"
alarm_name = "${local.instance_alias}-cpu"
namespace = "AWS/ECS"
metric_name = "CPUUtilization"
threshold = "80"
threshold = "80"
},
{
alarm_name = "${local.instance_alias}-memory"
namespace = "AWS/ECS"
alarm_name = "${local.instance_alias}-memory"
namespace = "AWS/ECS"
metric_name = "MemoryUtilization"
threshold = "70"
threshold = "70"
},
]

Expand All @@ -77,18 +78,18 @@ locals {
}

resource "aws_cloudwatch_metric_alarm" "waggledance_alert" {
count = length(local.alerts)
alarm_name = local.alerts[count.index].alarm_name
count = var.wd_instance_type == "ecs" ? length(local.alerts) : 0
alarm_name = local.alerts[count.index].alarm_name
comparison_operator = lookup(local.alerts[count.index], "comparison_operator", "GreaterThanOrEqualToThreshold")
metric_name = local.alerts[count.index].metric_name
namespace = local.alerts[count.index].namespace
period = lookup(local.alerts[count.index], "period", "120")
evaluation_periods = lookup(local.alerts[count.index], "evaluation_periods", "2")
statistic = "Average"
threshold = local.alerts[count.index].threshold
metric_name = local.alerts[count.index].metric_name
namespace = local.alerts[count.index].namespace
period = lookup(local.alerts[count.index], "period", "120")
evaluation_periods = lookup(local.alerts[count.index], "evaluation_periods", "2")
statistic = "Average"
threshold = local.alerts[count.index].threshold

#alarm_description = ""
insufficient_data_actions = []
dimensions = local.dimensions[count.index]
alarm_actions = [aws_sns_topic.apiary_federation_ops_sns.arn]
dimensions = local.dimensions[count.index]
alarm_actions = [aws_sns_topic.apiary_federation_ops_sns[0].arn]
}
1 change: 0 additions & 1 deletion common.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
locals {
instance_alias = var.instance_name == "" ? "waggledance" : format("waggledance-%s", var.instance_name)
remote_metastore_zone_prefix = var.instance_name == "" ? "remote-metastore" : format("remote-metastore-%s", var.instance_name)
cw_arn = "arn:aws:swf:${var.aws_region}:${data.aws_caller_identity.current.account_id}:action/actions/AWS_EC2.InstanceId.Reboot/1.0"
}

data "aws_caller_identity" "current" {}
Expand Down
81 changes: 0 additions & 81 deletions ec2.tf

This file was deleted.

4 changes: 2 additions & 2 deletions ecs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ resource "aws_ecs_service" "waggledance_service" {
resource "aws_ecs_task_definition" "waggledance" {
count = var.wd_instance_type == "ecs" ? 1 : 0
family = local.instance_alias
task_role_arn = aws_iam_role.waggledance_task.arn
execution_role_arn = aws_iam_role.waggledance_task_exec.arn
task_role_arn = aws_iam_role.waggledance_task[0].arn
execution_role_arn = aws_iam_role.waggledance_task_exec[0].arn
network_mode = "awsvpc"
memory = var.memory
cpu = var.cpu
Expand Down
31 changes: 11 additions & 20 deletions iam-ecs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
*/

resource "aws_iam_role" "waggledance_task_exec" {
name = "${local.instance_alias}-ecs-task-exec-${var.aws_region}"
count = var.wd_instance_type == "ecs" ? 1 : 0
name = "${local.instance_alias}-ecs-task-exec-${var.aws_region}"

assume_role_policy = <<EOF
{
Expand All @@ -27,14 +28,15 @@ EOF
}

resource "aws_iam_role_policy_attachment" "task_exec_managed" {
role = aws_iam_role.waggledance_task_exec.id
count = var.wd_instance_type == "ecs" ? 1 : 0
role = aws_iam_role.waggledance_task_exec[0].id
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

resource "aws_iam_role_policy" "secretsmanager_for_ecs_task_exec" {
count = var.docker_registry_auth_secret_name == "" ? 0 : 1
name = "secretsmanager-exec"
role = aws_iam_role.waggledance_task_exec.id
name = "secretsmanager-exec"
role = aws_iam_role.waggledance_task_exec[0].id

policy = <<EOF
{
Expand All @@ -49,7 +51,8 @@ EOF
}

resource "aws_iam_role" "waggledance_task" {
name = "${local.instance_alias}-ecs-task-${var.aws_region}"
count = var.wd_instance_type == "ecs" ? 1 : 0
name = "${local.instance_alias}-ecs-task-${var.aws_region}"

assume_role_policy = <<EOF
{
Expand All @@ -59,7 +62,7 @@ resource "aws_iam_role" "waggledance_task" {
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": [ "ecs-tasks.amazonaws.com", "ec2.amazonaws.com" ]
"Service": "ecs-tasks.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
Expand All @@ -72,8 +75,8 @@ EOF

resource "aws_iam_role_policy" "secretsmanager_for_waggledance_task" {
count = var.bastion_ssh_key_secret_name == "" ? 0 : 1
name = "secretsmanager"
role = aws_iam_role.waggledance_task.id
name = "secretsmanager"
role = aws_iam_role.waggledance_task[0].id

policy = <<EOF
{
Expand All @@ -86,15 +89,3 @@ resource "aws_iam_role_policy" "secretsmanager_for_waggledance_task" {
}
EOF
}

resource "aws_iam_role_policy_attachment" "waggledance_ssm_policy" {
count = var.wd_instance_type == "ecs" ? 0 : 1
role = aws_iam_role.waggledance_task.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2RoleforSSM"
}

resource "aws_iam_instance_profile" "waggledance" {
count = var.wd_instance_type == "ecs" ? 0 : 1
name = aws_iam_role.waggledance_task.name
role = aws_iam_role.waggledance_task.name
}
88 changes: 88 additions & 0 deletions k8s.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/**
* Copyright (C) 2018-2019 Expedia Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
*/

locals {
heapsize = ceil((var.memory * 90) / 100)
}
resource "kubernetes_deployment" "waggle_dance" {
count = var.wd_instance_type == "k8s" ? 1 : 0
metadata {
name = "waggle-dance"
namespace = var.k8s_namespace
labels = {
name = "waggle-dance"
}
}

spec {
replicas = 3
selector {
match_labels = {
name = "waggle-dance"
}
}

template {
metadata {
labels = {
name = "waggle-dance"
}
}

spec {
container {
image = "${var.docker_image}:${var.docker_version}"
name = "waggle-dance"
env {
name = "HEAPSIZE"
value = local.heapsize
}
env {
name = "SERVER_YAML"
value = base64encode(data.template_file.server_yaml.rendered)
}
env {
name = "FEDERATION_YAML"
value = base64encode(data.template_file.federation_yaml.rendered)
}
resources {
limits {
memory = "${var.memory}Mi"
}
requests {
memory = "${var.memory}Mi"
}
}
}
image_pull_secrets {
name = var.k8s_docker_registry_secret
}
}
}
}
}

resource "kubernetes_service" "waggle_dance" {
count = var.wd_instance_type == "k8s" ? 1 : 0
metadata {
name = "waggle-dance"
namespace = var.k8s_namespace
annotations = {
"service.beta.kubernetes.io/aws-load-balancer-internal" = "true"
"service.beta.kubernetes.io/aws-load-balancer-type" = "nlb"
}
}
spec {
selector = {
name = "waggle-dance"
}
port {
port = 48869
target_port = 48869
}
type = "LoadBalancer"
}
}
3 changes: 3 additions & 0 deletions ouputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "waggle_dance_load_balancers" {
value = kubernetes_service.waggle_dance[0].load_balancer_ingress.*.hostname
}
Loading

0 comments on commit 2ef4599

Please sign in to comment.