From 76ae015a3874f6d483e3f4f700b91ba048570930 Mon Sep 17 00:00:00 2001 From: Jay Carlton <53479492+jaycarlton@users.noreply.github.com> Date: Mon, 30 Nov 2020 11:41:27 -0500 Subject: [PATCH] fix and fmt --- .gitignore | 1 + modules/workbench/README.md | 45 +++++++++---- modules/workbench/WORKBENCH-MODULE-PLAN.md | 72 +++++++++++++++++++++ modules/workbench/main.tf | 2 +- modules/workbench/modules/reporting/main.tf | 20 +++--- 5 files changed, 117 insertions(+), 23 deletions(-) create mode 100644 modules/workbench/WORKBENCH-MODULE-PLAN.md diff --git a/.gitignore b/.gitignore index 39f7fa5..5e69c9b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.tfstate *.backup *.iml +.DS_Store diff --git a/modules/workbench/README.md b/modules/workbench/README.md index d3235e5..a7e776e 100644 --- a/modules/workbench/README.md +++ b/modules/workbench/README.md @@ -1,24 +1,45 @@ -# Workbench Child Modules +# Workbench Terraform Modules The module directories here represent individually deployable subsystems, microservices, or other functional units. It's easy enough to put all buckets, say, -in a `gcs` module, but that wouldn't really let us operate on an individual components's bucket. +in a `gcs` module, but that wouldn't really let us operate on an individual components-owned bucket. Following is a broad outline fo each child module. If you feel irritated that you can't see, for example, all dashboards in one place, you can still go to the Console or use `gcloud`. +## Goals +### Automate ourselves out of a job +All the existing and planned Terraform modules have some level of scripted or otherwise automated +support processes. +## Non-goals +### Become the game in town +We don't want to get into a position where we force anyone to use Terraform if it's not the best +choice for them. Terraform is still pretty new, and changing rapidly. The Google provider is also +under rapid development. +### Wag the Dog +We do not have any aspirations to absorb any of the tasks that external teams are responsible for, +including building the GCP projects for each of our environments or conducting all administrative +tasks in either pmi-ops or terra projects. If Terraform really "takes off". then it may make sense to +share learnings, and at that point, there may be opportunities for our Terraform stack to use theirs, +or vice versa. While these boundaries may be fuzzy today, hopefully the addition of clear module +inputs and documentation will drive clarification of responsibilities and visibility into state, +dependencies, etc. +### Bypass security approvals +In some cases, actions that require security approval can be performed in Terraform, particularly +around IAM bindings, access groups, and roles. We don't want a situation where an audit finds that +individuals or service accounts were added or modified without going through the proper channels. -## Reporting +One potential workaround here is to invite sysadmin or security personnel to the private repository +to approve changes to the Terraform module inputs. + +## Currently Supported Modules + +### Reporting The state for reporting is currently the BigQuery dataset and its tables and views. In the future, -it makes sense to add j +it makes sense to add these sorts of things: * Reporting-specific metrics * Notifications on the system * Reporting-specific logs, specific logs * Data blocks for views (maybe) -## Backend Database (future) -This resource is inherently cross-functional, so we can just put -* The application DB -* backup settings -This will take advantage of the `google_sql_database_instance` resource. - -Schema migrations work via `Ruby->Gradle->Liquibase->MySql->🚂` -Maybe it needs a `Terraform` caboose. It looks like there's not currently a Liquibase provider. +In other words, the primary focus of the module is the Reporting system, but it may be convenient to +add reporting-specific artifacts that might otherwise be concerned with Monitoring or other auxiliary +services. diff --git a/modules/workbench/WORKBENCH-MODULE-PLAN.md b/modules/workbench/WORKBENCH-MODULE-PLAN.md new file mode 100644 index 0000000..55d2c0a --- /dev/null +++ b/modules/workbench/WORKBENCH-MODULE-PLAN.md @@ -0,0 +1,72 @@ +# Workbench Module Plan +The module directories here represent individually deployable subsystems, +microservices, or other functional units. It's easy enough to put all buckets, say, +in a `gcs` module, but that wouldn't really let us operate on an individual components's bucket. + +Following is a broad outline fo each child module. If you feel irritated that you can't see, for example, +all dashboards in one place, you can still go to the Console or use `gcloud`. + +# Workbench Module Development Plan +The Workbench is the topmost parent module in the AoU Workbench +Application configuration. It depends on several modules for individual +subsystems. + +After creating a valid Terraform configuration we're not finished, +as we need to make sure we don't step on other tools or automatioin. +For example, items that pertain to cloud resources will need to move +out of the workbench JSON config system. + +I have automation already for stackdriver setting that fetches all of theiir configurations +and plan to migrate it to Terraform. + +## Reporting +The state for reporting is currently the BigQuery dataset and its tables and views. +Highlights +* Reporting-specific metrics with the `google_logging_metric` [resource](https://www.terraform.io/docs/providers/google/r/logging_metric.html) +and others +* Notifications on the system +* Reporting-specific logs, specific logs +* Data blocks for views (maybe) + +## Backend Database (notional) +This resource is inherently cross-functional, so we can just put +* The application DB +* backup settings +This will take advantage of the `google_sql_database_instance` resource. + +Schema migrations work via `Ruby->Gradle->Liquibase->MySql->�` +Maybe it needs a `Terraform` caboose. It looks like there's not currently a Liquibase provider. + +It may not make sense organizationally to do this in Terraform, as there are dependencies on other +team(s) when instantiating or migrating databases. + +## Workbench to RDR Pipeline +Instantiate [google_cloud_tasks_queue](https://www.terraform.io/docs/providers/google/r/cloud_tasks_queue.html) resource +resouorces as necessary. + +## API Server +* AppEngine versions, instances, logs, etc. Isn't just named +App Engine, since that's the resource that gets crated. + +At the moment, there are no plans to rip and replace our existing deployment process or automation, +but we may find areas that the Terraform approach could be helpful (such as managing dependent +deployment artifacts or steps in a declarative way.) + +## Action Audit +This module maps to +* Stackdriver logs for each environment. (It will need to + move from the application JSON config likely.) +* Logs-based metrics on the initial log stream +* Sink to BigQuery dataset for each environment (Stackdriver may need to create initially, in which +case, we need to do `terraform state import`.) +* Logs-based metrics on the initial log stream +* Reporting datasets in BigQuery + +## Tiers and Egress Detection +There is a [sumo logic provider](https://www.sumologic.com/blog/terraform-provider-hosted/) for Terraform, which is very good +news. It looks really svelte. + +We will also want to control the VPC flow logs, +perimeters, etc, but it won't be in this `workbench` module, +because Terra-not-form owns the organization and needs to do +creation manually for now. diff --git a/modules/workbench/main.tf b/modules/workbench/main.tf index fcaeed2..776c769 100644 --- a/modules/workbench/main.tf +++ b/modules/workbench/main.tf @@ -1,4 +1,4 @@ -# Module for creating an instance of the scratch AoU RW Environment +# Workbench Analytics Reporting Subsystem module "reporting" { source = "./modules/reporting" diff --git a/modules/workbench/modules/reporting/main.tf b/modules/workbench/modules/reporting/main.tf index fc00bce..5119f78 100644 --- a/modules/workbench/modules/reporting/main.tf +++ b/modules/workbench/modules/reporting/main.tf @@ -58,9 +58,9 @@ locals { timeseries_view_template_filenames = fileset("${path.module}/assets/views/timeseries", "*.sql") # expanded to fully qualified path, e.g. ["/repos/workbench/terraform/modules/reporting/views/latest_users.sql", ...] timeseries_view_template_paths = [for file_name in local.timeseries_view_template_filenames : - pathexpand("${path.module}/assets/views/timeseries/${file_name}")] + pathexpand("${path.module}/assets/views/timeseries/${file_name}")] - live_view_tables = [for table_input in local.table_inputs : table_input["table_id"] ] + live_view_tables = [for table_input in local.table_inputs : table_input["table_id"]] live_view_template_path = pathexpand("${path.module}/assets/views/live/live_table.sql") # All live views (live_user, live_cohort, etc) depend on the tables being created first, so we need to make sure @@ -68,13 +68,13 @@ locals { # table (I think) but this should solve the dependency problem of trying to create the view before # its table. https://stackoverflow.com/q/64795896/12345554 live_views = [for table_name in module.main.table_names : - merge({ - view_id = "live_${table_name}" - query = templatefile(local.live_view_template_path, { - project = var.project_id - dataset = var.reporting_dataset_id - table_name = table_name - }) + merge({ + view_id = "live_${table_name}" + query = templatefile(local.live_view_template_path, { + project = var.project_id + dataset = var.reporting_dataset_id + table_name = table_name + }) }, local.VIEW_CONSTANTS)] # Create views for each .sql file in the views directory. There is no Terraform @@ -107,7 +107,7 @@ module "main" { description = "Daily output of relational tables and time series views for analysis. Views are provided for general ad-hoc analysis." tables = local.tables - views = local.views + views = local.views dataset_labels = { subsystem = "reporting"