Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
saliceti committed Oct 11, 2024
1 parent 398fda7 commit 6505cf7
Show file tree
Hide file tree
Showing 8 changed files with 443 additions and 0 deletions.
13 changes: 13 additions & 0 deletions aks/dfe_analytics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# DfE Analytics

Create resources in Google cloud Bigquery and provides the required variables to applications so they can send events.

## GCP provider - Command line

## GCP provider - Github actions

## Create or reuse existing resources

## Examples

## How to configure application
11 changes: 11 additions & 0 deletions aks/dfe_analytics/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module "cluster_data" {
source = "../cluster_data"
name = var.cluster
}

data "azurerm_client_config" "current" {}

data "azurerm_user_assigned_identity" "gcp_wif" {
name = "${var.azure_resource_prefix}-gcp-wif-${var.cluster}-${var.namespace}-id"
resource_group_name = module.cluster_data.configuration_map.resource_group_name
}
145 changes: 145 additions & 0 deletions aks/dfe_analytics/files/events.json.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
[
{
"description": "The timestamp at which the event occurred in the application.",
"mode": "REQUIRED",
"name": "occurred_at",
"type": "TIMESTAMP"
},
{
"description": "The type of the event, for example web_request. This determines the schema of the data which will be included in the data field.",
"mode": "REQUIRED",
"name": "event_type",
"type": "STRING"
},
{
"description": "If a user was logged in when they sent a web request event that is this event, then this is the UID of this user.",
"name": "user_id",
"type": "STRING"
},
{
"description": "Unique ID of the web request, if this event is a web request event",
"name": "request_uuid",
"type": "STRING"
},
{
"description": "Whether this web request was a GET or POST request, if this event is a web request event.",
"name": "request_method",
"type": "STRING"
},
{
"description": "The path, starting with a / and excluding any query parameters, of this web request, if this event is a web request",
"name": "request_path",
"type": "STRING"
},
{
"description": "The user agent of this web request, if this event is a web request. Allows a user's browser and operating system to be identified",
"name": "request_user_agent",
"type": "STRING"
},
{
"description": "The URL of any page the user was viewing when they initiated this web request, if this event is a web request. This is the full URL, including protocol (https://) and any query parameters, if the browser shared these with our application as part of the web request. It is very common for this referer to be truncated for referrals from external sites.",
"name": "request_referer",
"type": "STRING"
},
{
"description": "ARRAY of STRUCTs, each with a key and a value. Contains any query parameters that were sent to the application as part of this web reques, if this event is a web request.",
"fields": [
{
"description": "Name of the query parameter e.g. if the URL ended ?foo=bar then this will be foo.",
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"description": "Contents of the query parameter e.g. if the URL ended ?foo=bar then this will be bar.",
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "request_query",
"type": "RECORD"
},
{
"description": "Content type of any data that was returned to the browser following this web request, if this event is a web request. For example, 'text/html; charset=utf-8'. Image views, for example, may have a non-text/html content type.",
"name": "response_content_type",
"type": "STRING"
},
{
"description": "HTTP response code returned by the application in response to this web request, if this event is a web request. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status.",
"name": "response_status",
"type": "STRING"
},
{
"description": "ARRAY of STRUCTs, each with a key and a value. Contains a set of data points appropriate to the event_type of this event. For example, if this event was an entity create, update, delete or import event, data will contain the values of each field in the database after this event took place - according to the settings in the analytics.yml configured for this instance of dfe-analytics. Value be anonymised as a one way hash, depending on configuration settings.",
"fields": [
{
"description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.",
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.",
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "DATA",
"type": "RECORD"
},
{
"description": "If event_type was an entity create, update, delete or import event, the name of the table in the database that this entity is stored in. NULL otherwise.",
"name": "entity_table_name",
"type": "STRING"
},
{
"description": "Currently left blank for future use.",
"mode": "REPEATED",
"name": "event_tags",
"type": "STRING"
},
{
"description": "One way hash of a combination of the user's IP address and user agent, if this event is a web request. Can be used to identify the user anonymously, even when user_id is not set. Cannot be used to identify the user over a time period of longer than about a month, because of IP address changes and browser updates.",
"name": "anonymised_user_agent_and_ip",
"type": "STRING"
},
{
"description": "The application environment that the event was streamed from.",
"name": "environment",
"type": "STRING"
},
{
"description": "The namespace of the instance of dfe-analytics that streamed this event. For example this might identify the name of the service that streamed the event.",
"name": "namespace",
"type": "STRING"
},
{
"description": "Defined in the same way as the DATA ARRAY of STRUCTs, except containing fields configured to be hidden in analytics_hidden_pii.yml",
"fields": [
{
"description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.",
"mode": "REQUIRED",
"name": "KEY",
"type": "STRING"
},
{
"description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.",
"mode": "REPEATED",
"name": "value",
"policyTags": {
"names": [
"${policy_tag_name}"
]
},
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "hidden_DATA",
"type": "RECORD"
}
]
25 changes: 25 additions & 0 deletions aks/dfe_analytics/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
output "bigquery_project_id" {
description = "ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'..."
value = var.gcp_project_id
}
output "bigquery_table_name" {
description = "Biquery events table name"
value = local.gcp_table_name
}
output "bigquery_dataset" {
description = "Bigquery dataset name"
value = local.gcp_dataset_name
}
output "google_cloud_credentials" {
description = "Credentials for Google workload identity federation"
value = local.gcp_credentials
}
output "dfe_analytics_variables_map" {
description = "Map of environment variables required for dfe-analytics. Merge with application configuration secrets."
value = {
BIGQUERY_PROJECT_ID = var.gcp_project_id
BIGQUERY_TABLE_NAME = local.gcp_table_name
BIGQUERY_DATASET = local.gcp_dataset_name
GOOGLE_CLOUD_CREDENTIALS = local.gcp_credentials
}
}
13 changes: 13 additions & 0 deletions aks/dfe_analytics/provider.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "6.6.0"
}
}
}

provider "google" {
project = var.gcp_project_id
region = local.gcp_region
}
87 changes: 87 additions & 0 deletions aks/dfe_analytics/resources.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
resource "google_service_account" "appender" {
account_id = "appender-${var.service_short}-${var.environment}"
display_name = "Service Account appender to ${var.service_short} in ${var.environment} environment"
}

resource "google_service_account_iam_binding" "appender" {
service_account_id = google_service_account.appender.name
role = "roles/iam.workloadIdentityUser"

members = [
local.gcp_principal_with_subject
]
}

# Create key ring if it doesn't exist
resource "google_kms_key_ring" "bigquery" {
count = var.gcp_keyring == null ? 1 : 0

name = local.gcp_key_ring
location = local.gcp_region
}

# Create key if it doesn't exist
resource "google_kms_crypto_key" "bigquery" {
count = var.gcp_key == null ? 1 : 0

name = local.gcp_key
key_ring = google_kms_key_ring.bigquery[0].id
}

# Add permission if key didn't exist
data "google_bigquery_default_service_account" "main" {}
resource "google_kms_crypto_key_iam_member" "bigquery" {
count = var.gcp_key == null ? 1 : 0

crypto_key_id = google_kms_crypto_key.bigquery[0].id
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
member = "serviceAccount:${data.google_bigquery_default_service_account.main.email}"
}

# Create dataset if it doesn't exist
resource "google_bigquery_dataset" "main" {
count = var.gcp_dataset == null ? 1 : 0

dataset_id = local.gcp_dataset_name
location = local.gcp_region
default_encryption_configuration {
kms_key_name = google_kms_crypto_key_iam_member.bigquery[0].crypto_key_id
}
}

# Add service account permission to dataset, wether we create it or it already exists
resource "google_bigquery_dataset_iam_binding" "appender" {
dataset_id = var.gcp_dataset == null ? google_bigquery_dataset.main[0].dataset_id : var.gcp_dataset
role = "projects/${var.gcp_project_id}/roles/bigquery_appender_custom"

members = [
"serviceAccount:${google_service_account.appender.email}",
]
}

# Create table if dataset doesn't exist
resource "google_bigquery_table" "events" {
count = var.gcp_dataset == null ? 1 : 0

dataset_id = google_bigquery_dataset.main[0].dataset_id
table_id = local.gcp_table_name
description = "Events streamed into the BigQuery from the application"
clustering = ["event_type"]
deletion_protection = var.gcp_table_deletion_protection
require_partition_filter = false

encryption_configuration {
kms_key_name = google_kms_crypto_key_iam_member.bigquery[0].crypto_key_id
}

time_partitioning {
type = "DAY"
field = "occurred_at"
}

# https://github.com/DFE-Digital/dfe-analytics/blob/main/docs/create-events-table.sql
schema = templatefile(
"${path.module}/file/events.json.tmpl",
{ policy_tag_name = local.gcp_policy_tag_name }
)
}
62 changes: 62 additions & 0 deletions aks/dfe_analytics/tfdocs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
## Requirements

| Name | Version |
|------|---------|
| <a name="requirement_google"></a> [google](#requirement\_google) | 6.6.0 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_azurerm"></a> [azurerm](#provider\_azurerm) | n/a |
| <a name="provider_google"></a> [google](#provider\_google) | 6.6.0 |

## Modules

| Name | Source | Version |
|------|--------|---------|
| <a name="module_cluster_data"></a> [cluster\_data](#module\_cluster\_data) | ../cluster_data | n/a |

## Resources

| Name | Type |
|------|------|
| [google_bigquery_dataset.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset) | resource |
| [google_bigquery_dataset_iam_binding.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset_iam_binding) | resource |
| [google_bigquery_table.events](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_table) | resource |
| [google_kms_crypto_key.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_crypto_key) | resource |
| [google_kms_crypto_key_iam_member.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_crypto_key_iam_member) | resource |
| [google_kms_key_ring.bigquery](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/kms_key_ring) | resource |
| [google_service_account.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account) | resource |
| [google_service_account_iam_binding.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account_iam_binding) | resource |
| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source |
| [azurerm_user_assigned_identity.gcp_wif](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/user_assigned_identity) | data source |
| [google_bigquery_default_service_account.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/data-sources/bigquery_default_service_account) | data source |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_azure_resource_prefix"></a> [azure\_resource\_prefix](#input\_azure\_resource\_prefix) | Prefix of Azure resources for the service | `string` | n/a | yes |
| <a name="input_cluster"></a> [cluster](#input\_cluster) | AKS cluster name e.g. test, production... | `string` | n/a | yes |
| <a name="input_environment"></a> [environment](#input\_environment) | Service environment name e.g. production, test, pr-1234... | `string` | n/a | yes |
| <a name="input_gcp_dataset"></a> [gcp\_dataset](#input\_gcp\_dataset) | Name of an existing dataset. Optional: if not provided, create a new dataset | `string` | `null` | no |
| <a name="input_gcp_key"></a> [gcp\_key](#input\_gcp\_key) | Name of an existing customer-managed encryption key (CMEK). Optional: if not provided, create a new key | `string` | `null` | no |
| <a name="input_gcp_keyring"></a> [gcp\_keyring](#input\_gcp\_keyring) | Name of an existing keyring. Optional: if not provided, create a new keyring | `string` | `null` | no |
| <a name="input_gcp_policy_tag_id"></a> [gcp\_policy\_tag\_id](#input\_gcp\_policy\_tag\_id) | Policy tag ID | `number` | n/a | yes |
| <a name="input_gcp_project_id"></a> [gcp\_project\_id](#input\_gcp\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... | `string` | n/a | yes |
| <a name="input_gcp_project_number"></a> [gcp\_project\_number](#input\_gcp\_project\_number) | Google cloud project number | `number` | n/a | yes |
| <a name="input_gcp_table_deletion_protection"></a> [gcp\_table\_deletion\_protection](#input\_gcp\_table\_deletion\_protection) | Prevents deletion of the event table. Default: true | `bool` | `true` | no |
| <a name="input_gcp_taxonomy_id"></a> [gcp\_taxonomy\_id](#input\_gcp\_taxonomy\_id) | Policy tags taxonomy ID | `number` | n/a | yes |
| <a name="input_namespace"></a> [namespace](#input\_namespace) | AKS Namespace where the service is deployed to | `string` | n/a | yes |
| <a name="input_service_short"></a> [service\_short](#input\_service\_short) | Short name for the service e.g. att, aytq... | `string` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_bigquery_dataset"></a> [bigquery\_dataset](#output\_bigquery\_dataset) | Bigquery dataset name |
| <a name="output_bigquery_project_id"></a> [bigquery\_project\_id](#output\_bigquery\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... |
| <a name="output_bigquery_table_name"></a> [bigquery\_table\_name](#output\_bigquery\_table\_name) | Biquery events table name |
| <a name="output_dfe_analytics_variables_map"></a> [dfe\_analytics\_variables\_map](#output\_dfe\_analytics\_variables\_map) | Map of environment variables required for dfe-analytics. Merge with application configuration secrets. |
| <a name="output_google_cloud_credentials"></a> [google\_cloud\_credentials](#output\_google\_cloud\_credentials) | Credentials for Google workload identity federation |
Loading

0 comments on commit 6505cf7

Please sign in to comment.