Skip to content

Commit

Permalink
Feature/storage cost optimization (#106)
Browse files Browse the repository at this point in the history
* Adding lifecycle policy for s3 data buckets

Making No of days in Lifecycle policy configurable

Making storage classes configurable at schema level

* PR comments

* PR comments

* PR Comments

* PR Comments

* PR Comments

* fix to run terraform when apiary_managed_schemas is empty
  • Loading branch information
spuranda123 authored and rpoluri committed May 23, 2019
1 parent f3c3068 commit 47e62f2
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 11 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [2.0.0] - TBD

### Added
- Option to configure S3 storage class for cost optimization.
- Change in structure of `apiary_managed_schemas` variable from list to list of maps.

## [1.1.0] - TBD

### Added
Expand Down
4 changes: 3 additions & 1 deletion VARIABLES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
| apiary_domain_name | Apiary domain name for Route 53. | string | `` | no |
| apiary_log_bucket | Bucket for Apiary logs. | string | - | yes |
| apiary_log_prefix | Prefix for Apiary logs. | string | `` | no |
| apiary_managed_schemas | Schema names from which S3 bucket names will be derived, corresponding S3 bucket will be named as apiary_instance-aws_account-aws_region-schema_name. | list | `<list>` | no |
| apiary_managed_schemas | Schema names from which S3 bucket names will be derived, corresponding S3 bucket will be named as apiary_instance-aws_account-aws_region-schema_name, along with S3 storage properties like storage class and number of days for transitions. For valid values for S3 Storage classes, Reference: https://www.terraform.io/docs/providers/aws/r/s3_bucket.html#storage_class | list of map | `<list of map>` | no |
| apiary_producer_iamroles | AWS IAM roles allowed write access to managed Apiary S3 buckets. | map | `<map>` | no |
| apiary_rds_additional_sg | Comma-separated string containing additional security groups to attach to RDS. | list | `<list>` | no |
| apiary_shared_schemas | Schema names which are accessible from read-only metastore, default is all schemas. | list | `<list>` | no |
Expand Down Expand Up @@ -57,4 +57,6 @@
| ranger_audit_solr_url | Ranger Solr audit provider configuration. | string | `` | no |
| ranger_policy_manager_url | Ranger admin URL to synchronize policies. | string | `` | no |
| secondary_vpcs | List of VPCs to associate with Service Discovery namespace. | list | `<list>` | no |
| s3_lifecycle_policy_transition_period | Number of days for transition to a different storage class using lifecycle policy. | string | `30` | no |
| s3_storage_class | Destination S3 storage class for transition in the lifecycle policy. | string | `INTELLIGENT_TIERING` | no |
| vpc_id | VPC ID. | string | - | yes |
31 changes: 26 additions & 5 deletions common.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
*/

locals {
instance_alias = "${ var.instance_name == "" ? "apiary" : format("apiary-%s",var.instance_name) }"
enable_route53_records = "${ var.apiary_domain_name == "" ? "0" : "1" }"
apiary_managed_schemas = "${ split(",",replace(join(",",var.apiary_managed_schemas),"_","-")) }"
apiary_data_buckets = "${ formatlist("%s-%s-%s-%s",local.instance_alias,data.aws_caller_identity.current.account_id,var.aws_region,local.apiary_managed_schemas) }"
gluedb_prefix = "${ var.instance_name == "" ? "" : "${var.instance_name}_" }"
instance_alias = "${ var.instance_name == "" ? "apiary" : format("apiary-%s",var.instance_name) }"
enable_route53_records = "${ var.apiary_domain_name == "" ? "0" : "1" }"
apiary_managed_schema_names_original = ["${data.template_file.schema_names.*.rendered}"]
apiary_managed_schema_names_replaced = ["${data.template_file.schema_names_replaced.*.rendered}"]
apiary_data_buckets = "${ formatlist("%s-%s-%s-%s",local.instance_alias,data.aws_caller_identity.current.account_id,var.aws_region,local.apiary_managed_schema_names_replaced) }"
gluedb_prefix = "${ var.instance_name == "" ? "" : "${var.instance_name}_" }"
}

data "aws_caller_identity" "current" {}
Expand All @@ -23,3 +24,23 @@ data "aws_route53_zone" "apiary_zone" {
name = "${var.apiary_domain_name}"
vpc_id = "${var.vpc_id}"
}

data "template_file" "schema_names" {
count = "${length(var.apiary_managed_schemas)}"
template = "${lookup(var.apiary_managed_schemas[count.index], "schema_name")}"
}

data "template_file" "schema_names_replaced" {
count = "${length(var.apiary_managed_schemas)}"
template = "${replace(lookup(var.apiary_managed_schemas[count.index], "schema_name"),"_","-")}"
}

data "template_file" "s3_lifecycle_policy_transition_period" {
count = "${length(var.apiary_managed_schemas)}"
template = "${lookup(var.apiary_managed_schemas[count.index], "s3_lifecycle_policy_transition_period", var.s3_lifecycle_policy_transition_period)}"
}

data "template_file" "s3_storage_class" {
count = "${length(var.apiary_managed_schemas)}"
template = "${lookup(var.apiary_managed_schemas[count.index], "s3_storage_class", var.s3_storage_class)}"
}
14 changes: 12 additions & 2 deletions s3.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ data "template_file" "bucket_policy" {

vars {
#if apiary_shared_schemas is empty or contains current schema, allow customer accounts to access this bucket.
customer_principal = "${ length(var.apiary_shared_schemas) == 0 || contains(var.apiary_shared_schemas, element(concat(var.apiary_managed_schemas,list("")),count.index)) ?
customer_principal = "${ length(var.apiary_shared_schemas) == 0 || contains(var.apiary_shared_schemas, element(concat(local.apiary_managed_schema_names_original,list("")),count.index)) ?
join("\",\"", formatlist("arn:aws:iam::%s:root",var.apiary_customer_accounts)) :
"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" }"

bucket_name = "${local.apiary_data_buckets[count.index]}"
producer_iamroles = "${replace(lookup(var.apiary_producer_iamroles,element(concat(var.apiary_managed_schemas,list("")),count.index),"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"),",","\",\"")}"
producer_iamroles = "${replace(lookup(var.apiary_producer_iamroles,element(concat(local.apiary_managed_schema_names_original,list("")),count.index),"arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"),",","\",\"")}"
}
}

Expand All @@ -37,6 +37,16 @@ resource "aws_s3_bucket" "apiary_data_bucket" {
target_bucket = "${var.apiary_log_bucket}"
target_prefix = "${var.apiary_log_prefix}${local.apiary_data_buckets[count.index]}/"
}

lifecycle_rule {
id = "cost_optimization"
enabled = true

transition {
days = "${data.template_file.s3_lifecycle_policy_transition_period.*.rendered[count.index]}"
storage_class = "${data.template_file.s3_storage_class.*.rendered[count.index]}"
}
}
}

resource "aws_s3_bucket_notification" "data_events" {
Expand Down
4 changes: 2 additions & 2 deletions sns.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ POLICY

resource "aws_sns_topic" "apiary_data_events" {
count = "${ var.enable_data_events == "" ? 0 : length(var.apiary_managed_schemas) }"
name = "${local.instance_alias}-${local.apiary_managed_schemas[count.index]}-data-events"
name = "${local.instance_alias}-${local.apiary_managed_schema_names_replaced[count.index]}-data-events"

policy = <<POLICY
{
Expand All @@ -38,7 +38,7 @@ resource "aws_sns_topic" "apiary_data_events" {
"Effect": "Allow",
"Principal": {"AWS":"*"},
"Action": "SNS:Publish",
"Resource": "arn:aws:sns:*:*:${local.instance_alias}-${local.apiary_managed_schemas[count.index]}-data-events",
"Resource": "arn:aws:sns:*:*:${local.instance_alias}-${local.apiary_managed_schema_names_replaced[count.index]}-data-events",
"Condition":{
"ArnLike":{"aws:SourceArn":"${aws_s3_bucket.apiary_data_bucket.*.arn[count.index]}"}
}
Expand Down
2 changes: 1 addition & 1 deletion templates.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ data "template_file" "hms_readwrite" {
hive_metastore_log_level = "${var.hms_log_level}"
nofile_ulimit = "${var.hms_nofile_ulimit}"
enable_metrics = "${var.enable_hive_metastore_metrics}"
managed_schemas = "${join(",",var.apiary_managed_schemas)}"
managed_schemas = "${join(",",local.apiary_managed_schema_names_original)}"
instance_name = "${local.instance_alias}"
sns_arn = "${ var.enable_metadata_events == "" ? "" : join("",aws_sns_topic.apiary_metadata_events.*.arn) }"
table_param_filter = "${ var.enable_metadata_events == "" ? "" : var.table_param_filter }"
Expand Down
12 changes: 12 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,15 @@ variable "docker_registry_auth_secret_name" {
type = "string"
default = ""
}

variable "s3_storage_class" {
description = "S3 storage class after transition using lifecycle policy"
type = "string"
default = "INTELLIGENT_TIERING"
}

variable "s3_lifecycle_policy_transition_period" {
description = "S3 Lifecycle Policy number of days for Transition rule"
type = "string"
default = "30"
}

0 comments on commit 47e62f2

Please sign in to comment.