diff --git a/.github/workflows/databricks.yml b/.github/workflows/databricks.yml index 7cd0b68f..40f585c4 100644 --- a/.github/workflows/databricks.yml +++ b/.github/workflows/databricks.yml @@ -6,11 +6,11 @@ on: - main paths: - '.github/workflows/databricks.yml' - - 'terraform/databricks/**' + - 'terraform/databricks/databricks-workspace/**' # - '.github/actions/**' env: - terraform_workingdir: "terraform/databricks" + terraform_workingdir: "terraform/databricks/databricks-workspace" GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: terraform-lint: diff --git a/terraform/databricks/README.md b/terraform/databricks/databricks-workspace/README.md similarity index 51% rename from terraform/databricks/README.md rename to terraform/databricks/databricks-workspace/README.md index 2b36711f..8707a4d0 100644 --- a/terraform/databricks/README.md +++ b/terraform/databricks/databricks-workspace/README.md @@ -21,20 +21,33 @@ | [public\_subnet\_name](#input\_public\_subnet\_name) | The name of the Public (Host) Subnet within the Virtual Network. | `string` | `null` | no | | [private\_subnet\_name](#input\_private\_subnet\_name) | The name of the Private (Container) Subnet within the Virtual Network. | `string` | `null` | no | | [virtual\_network\_id](#input\_virtual\_network\_id) | The ID of a Virtual Network where this Databricks Cluster should be created. | `string` | `null` | no | +| [vnet\_address\_prefix](#input\_vnet\_address\_prefix) | Address prefix for Managed virtual network. Changing this forces a new resource to be created. | `string` | `"10.139"` | no | | [public\_subnet\_network\_security\_group\_association\_id](#input\_public\_subnet\_network\_security\_group\_association\_id) | The resource ID of the azurerm\_subnet\_network\_security\_group\_association resource which is referred to by the public\_subnet\_name field. | `string` | `null` | no | | [private\_subnet\_network\_security\_group\_association\_id](#input\_private\_subnet\_network\_security\_group\_association\_id) | The resource ID of the azurerm\_subnet\_network\_security\_group\_association resource which is referred to by the private\_subnet\_name field. | `string` | `null` | no | | [no\_public\_ip](#input\_no\_public\_ip) | Are public IP Addresses not allowed? | `bool` | `true` | no | | [public\_network\_access\_enabled](#input\_public\_network\_access\_enabled) | Should the Databricks Workspace be accesible through the public network? | `bool` | `false` | no | | [is\_private\_endpoint](#input\_is\_private\_endpoint) | Whether private endpoints are enabled to access the resource. | `bool` | `true` | no | -| [private\_link\_deployment\_type](#input\_private\_link\_deployment\_type) | Type of configuration for Private Link. In Standard configuration, there are separate private endpoints for frontend and backend. | `string` | `"simplified"` | no | +| [private\_link\_deployment\_type](#input\_private\_link\_deployment\_type) | Type of configuration for Private Link. In Standard configuration, there are separate private endpoints for frontend and backend. In Simplified configuration, there is a single private endpoint for UI and API. In Webauth configuration, only a webauth private endpoint is configured. | `string` | `"simplified"` | no | | [frontend\_private\_dns\_zone\_ids](#input\_frontend\_private\_dns\_zone\_ids) | Specifies the list of Private DNS Zones to include for the frontend. Must be provided when private\_link\_deployment\_type is standard | `list(string)` | `[]` | no | | [backend\_private\_dns\_zone\_ids](#input\_backend\_private\_dns\_zone\_ids) | Specifies the list of Private DNS Zones to include for the backend. Must be provided when is\_private\_endpoint is true | `list(string)` | `[]` | no | +| [backend\_dbfs\_private\_dns\_zone\_ids](#input\_backend\_dbfs\_private\_dns\_zone\_ids) | Specifies the list of Private DNS Zones to include for the backend connection to the Databricks File System Storage Account. Must be provided when is\_private\_endpoint is true | `list(string)` | `[]` | no | | [frontend\_subnet\_id](#input\_frontend\_subnet\_id) | The ID of the subnet from which private IP addresses will be allocated for the user access Private Endpoints. Must be provided when private\_link\_deployment\_type is standard | `string` | `null` | no | | [backend\_subnet\_id](#input\_backend\_subnet\_id) | The ID of the subnet from which private IP addresses will be allocated for the backend Private Endpoint. Must be provided when is\_private\_endpoint is true | `string` | `null` | no | -| [private\_web\_auth\_workspace](#input\_private\_web\_auth\_workspace) | Azure Databricks Workspace Instance Resource identifier for Private Endpoint Web Authentication. Defaults to the created workspace if not provided | `string` | `null` | no | +| [is\_web\_auth\_workspace](#input\_is\_web\_auth\_workspace) | Should this Azure Databricks Workspace Instance be used for Private Endpoint Web Authentication? There must be only one per region. | `string` | `false` | no | | [enable\_ip\_access\_list](#input\_enable\_ip\_access\_list) | Enable IP access lists. | `bool` | `false` | no | | [allow\_ip\_list](#input\_allow\_ip\_list) | Specifies the list of IPs allowed to the workspace. | `list(string)` | `[]` | no | | [block\_ip\_list](#input\_block\_ip\_list) | Specifies the list of IPs blocked to the workspace. | `list(string)` | `[]` | no | +| [machine\_learning\_workspace\_id](#input\_machine\_learning\_workspace\_id) | The ID of a Azure Machine Learning workspace to link with Databricks workspace. Changing this forces a new resource to be created. | `string` | `null` | no | +| [storage\_account\_name](#input\_storage\_account\_name) | Default Databricks File Storage account name. Defaults to a randomized name(e.g. dbstoragel6mfeghoe5kxu). Changing this forces a new resource to be created. | `string` | `null` | no | +| [storage\_account\_sku\_name](#input\_storage\_account\_sku\_name) | Storage account SKU name. Possible values include Standard\_LRS, Standard\_GRS, Standard\_RAGRS, Standard\_GZRS, Standard\_RAGZRS, Standard\_ZRS, Premium\_LRS or Premium\_ZRS. Changing this forces a new resource to be created. | `string` | `"Standard_GRS"` | no | +| [infrastructure\_encryption\_enabled](#input\_infrastructure\_encryption\_enabled) | Is the Databricks File System root file system enabled with a secondary layer of encryption with platform managed keys? This field is only valid if the Databricks Workspace sku is set to premium. Changing this forces a new resource to be created. | `bool` | `false` | no | +| [customer\_managed\_key\_enabled](#input\_customer\_managed\_key\_enabled) | Is the workspace enabled for customer managed key encryption? If true this enables the Managed Identity for the managed storage account. This field is only valid if the Databricks Workspace sku is set to premium. | `bool` | `false` | no | +| [managed\_services\_cmk\_key\_vault\_key\_id](#input\_managed\_services\_cmk\_key\_vault\_key\_id) | Customer managed encryption properties for the Databricks Workspace managed resources(e.g. Notebooks and Artifacts). | `string` | `null` | no | +| [managed\_disk\_cmk\_key\_vault\_key\_id](#input\_managed\_disk\_cmk\_key\_vault\_key\_id) | Customer managed encryption properties for the Databricks Workspace managed disks. | `string` | `null` | no | +| [managed\_disk\_cmk\_rotation\_to\_latest\_version\_enabled](#input\_managed\_disk\_cmk\_rotation\_to\_latest\_version\_enabled) | Whether customer managed keys for disk encryption will automatically be rotated to the latest version. | `bool` | `false` | no | +| [load\_balancer\_backend\_address\_pool\_id](#input\_load\_balancer\_backend\_address\_pool\_id) | Resource ID of the Outbound Load balancer Backend Address Pool for Secure Cluster Connectivity (No Public IP) workspace. Changing this forces a new resource to be created. | `string` | `null` | no | +| [nat\_gateway\_name](#input\_nat\_gateway\_name) | Name of the NAT gateway for Secure Cluster Connectivity (No Public IP) workspace subnets. Changing this forces a new resource to be created. | `string` | `"nat-gateway"` | no | +| [public\_ip\_name](#input\_public\_ip\_name) | Name of the Public IP for No Public IP workspace with managed vNet. Changing this forces a new resource to be created. | `string` | `"nat-gw-public-ip"` | no | ## Outputs @@ -42,6 +55,11 @@ |------|-------------| | [id](#output\_id) | Resource identifier of the instance of Azure Databricks. | | [workspace\_url](#output\_workspace\_url) | The URL used to connect to the workspace of the Azure Databricks. | +| [workspace\_id](#output\_workspace\_id) | The unique identifier of the databricks workspace in Databricks control plane. | | [name](#output\_name) | The name of the instance of Azure Databricks. | | [resource\_group\_name](#output\_resource\_group\_name) | Resource Group where the instance of Azure Databricks exists. | +| [managed\_resource\_group\_id](#output\_managed\_resource\_group\_id) | The ID of the Managed Resource Group created by the Databricks Workspace. | +| [storage\_account\_identity](#output\_storage\_account\_identity) | Block exports the principal\_id, tenant\_id and type of the internal databricks storage account identity for enabling Customer Managed Keys. | +| [managed\_disk\_identity](#output\_managed\_disk\_identity) | Block exports the principal\_id, tenant\_id and type of the internal databricks disks identity for enabling Customer Managed Keys. | +| [disk\_encryption\_set\_id](#output\_disk\_encryption\_set\_id) | The ID of Managed Disk Encryption Set created by the Databricks Workspace. | \ No newline at end of file diff --git a/terraform/databricks/main.tf b/terraform/databricks/databricks-workspace/main.tf similarity index 59% rename from terraform/databricks/main.tf rename to terraform/databricks/databricks-workspace/main.tf index aa82bd86..fd59e53e 100644 --- a/terraform/databricks/main.tf +++ b/terraform/databricks/databricks-workspace/main.tf @@ -3,21 +3,32 @@ # https://learn.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/private-link resource "azurerm_databricks_workspace" "adl_databricks" { - name = "adb-${var.basename}" - resource_group_name = var.resource_group_name - location = var.location - sku = var.sku - - managed_resource_group_name = "${var.resource_group_name}-adb-managed" - public_network_access_enabled = var.public_network_access_enabled - network_security_group_rules_required = var.is_private_endpoint ? "NoAzureDatabricksRules" : "AllRules" + name = "adb-${var.basename}" + resource_group_name = var.resource_group_name + location = var.location + sku = var.sku + managed_resource_group_name = "rg-${var.basename}-adb-managed" + infrastructure_encryption_enabled = var.infrastructure_encryption_enabled + customer_managed_key_enabled = var.customer_managed_key_enabled + managed_services_cmk_key_vault_key_id = var.managed_services_cmk_key_vault_key_id + managed_disk_cmk_key_vault_key_id = var.managed_services_cmk_key_vault_key_id + managed_disk_cmk_rotation_to_latest_version_enabled = var.managed_disk_cmk_rotation_to_latest_version_enabled + public_network_access_enabled = var.public_network_access_enabled + network_security_group_rules_required = var.is_private_endpoint ? "NoAzureDatabricksRules" : "AllRules" + load_balancer_backend_address_pool_id = var.load_balancer_backend_address_pool_id custom_parameters { + nat_gateway_name = var.nat_gateway_name + public_ip_name = var.public_ip_name no_public_ip = var.no_public_ip public_subnet_name = var.public_subnet_name private_subnet_name = var.private_subnet_name + vnet_address_prefix = var.vnet_address_prefix virtual_network_id = var.virtual_network_id public_subnet_network_security_group_association_id = var.public_subnet_network_security_group_association_id private_subnet_network_security_group_association_id = var.private_subnet_network_security_group_association_id + machine_learning_workspace_id = var.machine_learning_workspace_id + storage_account_name = var.storage_account_name + storage_account_sku_name = var.storage_account_sku_name } tags = var.tags @@ -33,6 +44,10 @@ resource "azurerm_databricks_workspace" "adl_databricks" { condition = (!var.is_private_endpoint || var.no_public_ip) error_message = "Private link endpoint requires No Public IP (no_public_ip set to 'true')" } + precondition { + condition = var.is_private_endpoint ? var.storage_account_name != null : true + error_message = "Private link endpoint requires Storage Account Name to be provided (storage_account_name not null)" + } precondition { condition = (!var.enable_ip_access_list || var.public_network_access_enabled) @@ -80,7 +95,7 @@ resource "databricks_ip_access_list" "adb_ws_block-list" { # Private Endpoint configuration module "adb_be_pe" { - source = "../private-endpoint" + source = "../../private-endpoint" basename = "${azurerm_databricks_workspace.adl_databricks[0].name}-databricks-be" resource_group_name = var.resource_group_name location = var.location @@ -90,11 +105,25 @@ module "adb_be_pe" { is_manual_connection = false private_dns_zone_ids = var.backend_private_dns_zone_ids tags = var.tags - module_enabled = var.module_enabled && var.is_private_endpoint + module_enabled = var.module_enabled && var.is_private_endpoint && (var.private_link_deployment_type != "webauth") +} + +module "adb_dbfs_pe" { + source = "../../private-endpoint" + basename = "${azurerm_databricks_workspace.adl_databricks[0].name}-databricks-dbfs" + resource_group_name = var.resource_group_name + location = var.location + subnet_id = var.backend_subnet_id + private_connection_resource_id = var.storage_account_name == null ? null : join("", [azurerm_databricks_workspace.adl_databricks[0].managed_resource_group_id, "/providers/Microsoft.Storage/storageAccounts/${var.storage_account_name}"]) + subresource_names = ["dfs"] + is_manual_connection = false + private_dns_zone_ids = var.backend_dbfs_private_dns_zone_ids + tags = var.tags + module_enabled = var.module_enabled && var.is_private_endpoint && (var.private_link_deployment_type != "webauth") } module "adb_fe_pe" { - source = "../private-endpoint" + source = "../../private-endpoint" basename = "${azurerm_databricks_workspace.adl_databricks[0].name}-databricks-fe" resource_group_name = var.resource_group_name location = var.location @@ -108,15 +137,15 @@ module "adb_fe_pe" { } module "adb_sso_pe" { - source = "../private-endpoint" + source = "../../private-endpoint" basename = "${azurerm_databricks_workspace.adl_databricks[0].name}-databricks-sso" resource_group_name = var.resource_group_name location = var.location - subnet_id = var.private_link_deployment_type == "standard" ? var.frontend_subnet_id : var.backend_subnet_id - private_connection_resource_id = (var.private_web_auth_workspace != null) ? var.private_web_auth_workspace : azurerm_databricks_workspace.adl_databricks[0].id + subnet_id = var.private_link_deployment_type == "simplified" ? var.backend_subnet_id : var.frontend_subnet_id + private_connection_resource_id = azurerm_databricks_workspace.adl_databricks[0].id subresource_names = ["browser_authentication"] is_manual_connection = false private_dns_zone_ids = var.frontend_private_dns_zone_ids tags = var.tags - module_enabled = var.module_enabled && var.is_private_endpoint + module_enabled = var.module_enabled && var.is_private_endpoint && (var.is_web_auth_workspace || var.private_link_deployment_type == "webauth") } \ No newline at end of file diff --git a/terraform/databricks/databricks-workspace/outputs.tf b/terraform/databricks/databricks-workspace/outputs.tf new file mode 100644 index 00000000..b82b7b7d --- /dev/null +++ b/terraform/databricks/databricks-workspace/outputs.tf @@ -0,0 +1,71 @@ +output "id" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].id : null + ) + description = "Resource identifier of the instance of Azure Databricks." +} + +output "workspace_url" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].workspace_url : null + ) + description = "The URL used to connect to the workspace of the Azure Databricks." +} + +output "workspace_id" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].workspace_id : null + ) + description = "The unique identifier of the databricks workspace in Databricks control plane." +} + +output "name" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].name : null + ) + description = "The name of the instance of Azure Databricks." +} + +output "resource_group_name" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].resource_group_name : null + ) + description = "Resource Group where the instance of Azure Databricks exists." +} + +output "managed_resource_group_id" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].managed_resource_group_id : null + ) + description = "The ID of the Managed Resource Group created by the Databricks Workspace." +} + +output "storage_account_identity" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].storage_account_identity : [] + ) + description = "Block exports the principal_id, tenant_id and type of the internal databricks storage account identity for enabling Customer Managed Keys." +} + +output "managed_disk_identity" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].managed_disk_identity : [] + ) + description = "Block exports the principal_id, tenant_id and type of the internal databricks disks identity for enabling Customer Managed Keys." +} + +output "disk_encryption_set_id" { + value = ( + length(azurerm_databricks_workspace.adl_databricks) > 0 ? + azurerm_databricks_workspace.adl_databricks[0].disk_encryption_set_id : null + ) + description = "The ID of Managed Disk Encryption Set created by the Databricks Workspace." +} \ No newline at end of file diff --git a/terraform/databricks/providers.tf b/terraform/databricks/databricks-workspace/providers.tf similarity index 100% rename from terraform/databricks/providers.tf rename to terraform/databricks/databricks-workspace/providers.tf diff --git a/terraform/databricks/databricks-workspace/test/databricks.tf b/terraform/databricks/databricks-workspace/test/databricks.tf new file mode 100644 index 00000000..41c41d65 --- /dev/null +++ b/terraform/databricks/databricks-workspace/test/databricks.tf @@ -0,0 +1,254 @@ +module "databricks" { + source = "../" + basename = random_string.postfix.result + resource_group_name = module.local_rg.name + location = var.location + virtual_network_id = module.local_vnet.id + storage_account_name = "st${random_string.postfix.result}" + storage_account_sku_name = "Standard_LRS" + infrastructure_encryption_enabled = true + customer_managed_key_enabled = true + managed_services_cmk_key_vault_key_id = azurerm_key_vault_key.adl_adb_ws_cmk.id + managed_disk_cmk_key_vault_key_id = azurerm_key_vault_key.adl_adb_ws_cmk.id + managed_disk_cmk_rotation_to_latest_version_enabled = false + is_private_endpoint = true + private_link_deployment_type = "simplified" + is_web_auth_workspace = true + backend_subnet_id = module.local_snet_default.id + backend_private_dns_zone_ids = [module.local_pdnsz_adb.list[local.dns_databricks].id] + backend_dbfs_private_dns_zone_ids = [module.local_pdnsz_dbfs.list[local.dns_dbfs_dfs].id] + public_subnet_name = module.local_snet_public.name + private_subnet_name = module.local_snet_private.name + public_subnet_network_security_group_association_id = module.local_snet_nsg_association_public.id + private_subnet_network_security_group_association_id = module.local_snet_nsg_association_private.id + public_network_access_enabled = false + tags = {} +} + +resource "time_sleep" "time_sleep" { + depends_on = [ + module.databricks + ] + create_duration = "40s" +} + +resource "azurerm_databricks_workspace_customer_managed_key" "adl_adb_ws_cmk" { + depends_on = [ + azurerm_key_vault_access_policy.databricks, + time_sleep.time_sleep + ] + + workspace_id = module.databricks.id + key_vault_key_id = azurerm_key_vault_key.adl_adb_ws_cmk.id +} + +# Modules dependencies + +data "http" "ip" { + url = "https://ifconfig.me" +} + +module "local_rg" { + source = "../../../resource-group" + basename = random_string.postfix.result + location = var.location + tags = local.tags +} + +module "local_vnet" { + source = "../../../virtual-network" + resource_group_name = module.local_rg.name + basename = random_string.postfix.result + location = var.location + address_space = ["10.0.0.0/16"] +} + +module "local_snet_default" { + source = "../../../subnet" + resource_group_name = module.local_rg.name + name = "vnet-${random_string.postfix.result}-adb-default" + vnet_name = module.local_vnet.name + address_prefixes = ["10.0.6.0/24"] +} + +module "local_snet_public" { + source = "../../../subnet" + resource_group_name = module.local_rg.name + name = "vnet-${random_string.postfix.result}-adb-public" + vnet_name = module.local_vnet.name + address_prefixes = ["10.0.4.0/24"] + subnet_delegation = { + databricks-del-pub = [ + { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + ] + } +} + +module "local_snet_private" { + source = "../../../subnet" + resource_group_name = module.local_rg.name + name = "vnet-${random_string.postfix.result}-adb-private" + vnet_name = module.local_vnet.name + address_prefixes = ["10.0.5.0/24"] + subnet_delegation = { + databricks-del-pri = [ + { + name = "Microsoft.Databricks/workspaces" + actions = [ + "Microsoft.Network/virtualNetworks/subnets/join/action", + "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", + "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" + ] + } + ] + } +} + +module "local_nsg" { + source = "../../../network-security-group" + basename = random_string.postfix.result + resource_group_name = module.local_rg.name + location = var.location +} + +module "local_snet_nsg_association_public" { + source = "../../../subnet-network-security-group-association" + subnet_id = module.local_snet_public.id + network_security_group_id = module.local_nsg.id +} + +module "local_snet_nsg_association_private" { + source = "../../../subnet-network-security-group-association" + subnet_id = module.local_snet_private.id + network_security_group_id = module.local_nsg.id +} + +module "local_pdnsz_adb" { + source = "../../../private-dns-zone" + resource_group_name = module.local_rg.name + dns_zones = [local.dns_databricks] + vnet_id = module.local_vnet.id +} + +module "local_pdnsz_dbfs" { + source = "../../../private-dns-zone" + resource_group_name = module.local_rg.name + dns_zones = [local.dns_dbfs_dfs] + vnet_id = module.local_vnet.id +} + +module "local_pdnsz_kv" { + source = "../../../private-dns-zone" + resource_group_name = module.local_rg.name + dns_zones = [local.dns_key_vault] + vnet_id = module.local_vnet.id +} + +# Key Vault + +module "key_vault" { + source = "../../../key-vault" + basename = random_string.postfix.result + resource_group_name = module.local_rg.name + location = var.location + subnet_id = module.local_snet_default.id + private_dns_zone_ids = [module.local_pdnsz_kv.list[local.dns_key_vault].id] + sku_name = "premium" + public_network_access_enabled = true + is_private_endpoint = false + firewall_default_action = "Allow" + firewall_ip_rules = ["${data.http.ip.body}/32"] + tags = {} +} + +resource "azurerm_key_vault_key" "adl_adb_ws_cmk" { + depends_on = [ + azurerm_key_vault_access_policy.terraform, + azurerm_key_vault_access_policy.managed_services + ] + + name = "databricks-cmk-certificate" + key_vault_id = module.key_vault.id + key_type = "RSA" + key_size = 2048 + + key_opts = [ + "decrypt", + "encrypt", + "sign", + "unwrapKey", + "verify", + "wrapKey", + ] +} + +data "azurerm_client_config" "current" {} + +resource "azurerm_key_vault_access_policy" "terraform" { + key_vault_id = module.key_vault.id + tenant_id = data.azurerm_client_config.current.tenant_id + object_id = data.azurerm_client_config.current.object_id + + key_permissions = [ + "Create", + "Delete", + "Get", + "Purge", + "Recover", + "Update", + "List", + "Decrypt", + "Sign", + "GetRotationPolicy", + ] +} + +resource "azurerm_key_vault_access_policy" "databricks" { + + key_vault_id = module.key_vault.id + tenant_id = module.databricks.storage_account_identity.0.tenant_id + object_id = module.databricks.storage_account_identity.0.principal_id + + key_permissions = [ + "Get", + "UnwrapKey", + "WrapKey", + ] +} + +resource "azurerm_key_vault_access_policy" "managed_disks" { + + key_vault_id = module.key_vault.id + tenant_id = module.databricks.managed_disk_identity.0.tenant_id + object_id = module.databricks.managed_disk_identity.0.principal_id + + key_permissions = [ + "Get", + "UnwrapKey", + "WrapKey", + ] +} + +# AzureDatabricks application which retrieves the keys for managed services encryption +#data "azuread_application" "azuredatabricks" { +# application_id = "2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" +#} + +resource "azurerm_key_vault_access_policy" "managed_services" { + key_vault_id = module.key_vault.id + tenant_id = data.azurerm_client_config.current.tenant_id + object_id = "446abf6e-a8d1-49fe-9df7-a8a429c98c41" #data.azuread_application.azuredatabricks.object_id + + key_permissions = [ + "Get", + "UnwrapKey", + "WrapKey", + ] +} \ No newline at end of file diff --git a/terraform/databricks/test/locals.tf b/terraform/databricks/databricks-workspace/test/locals.tf similarity index 63% rename from terraform/databricks/test/locals.tf rename to terraform/databricks/databricks-workspace/test/locals.tf index ac5881e7..3c858cf6 100644 --- a/terraform/databricks/test/locals.tf +++ b/terraform/databricks/databricks-workspace/test/locals.tf @@ -6,4 +6,6 @@ locals { } dns_databricks = "privatelink.azuredatabricks.net" + dns_dbfs_dfs = "privatelink.dfs.core.windows.net" + dns_key_vault = "privatelink.vaultcore.azure.net" } \ No newline at end of file diff --git a/terraform/databricks/test/outputs.tf b/terraform/databricks/databricks-workspace/test/outputs.tf similarity index 71% rename from terraform/databricks/test/outputs.tf rename to terraform/databricks/databricks-workspace/test/outputs.tf index fa5a220d..873960b8 100644 --- a/terraform/databricks/test/outputs.tf +++ b/terraform/databricks/databricks-workspace/test/outputs.tf @@ -12,4 +12,8 @@ output "name" { output "resource_group_name" { value = module.databricks.resource_group_name +} + +output "cmk_id" { + value = azurerm_databricks_workspace_customer_managed_key.adl_adb_ws_cmk.id } \ No newline at end of file diff --git a/terraform/databricks/test/providers.tf b/terraform/databricks/databricks-workspace/test/providers.tf similarity index 100% rename from terraform/databricks/test/providers.tf rename to terraform/databricks/databricks-workspace/test/providers.tf diff --git a/terraform/databricks/test/unit_test.go b/terraform/databricks/databricks-workspace/test/unit_test.go similarity index 100% rename from terraform/databricks/test/unit_test.go rename to terraform/databricks/databricks-workspace/test/unit_test.go diff --git a/terraform/databricks/test/variables.tf b/terraform/databricks/databricks-workspace/test/variables.tf similarity index 100% rename from terraform/databricks/test/variables.tf rename to terraform/databricks/databricks-workspace/test/variables.tf diff --git a/terraform/databricks/variables.tf b/terraform/databricks/databricks-workspace/variables.tf similarity index 58% rename from terraform/databricks/variables.tf rename to terraform/databricks/databricks-workspace/variables.tf index ea27a3fc..cbb31fdd 100644 --- a/terraform/databricks/variables.tf +++ b/terraform/databricks/databricks-workspace/variables.tf @@ -69,6 +69,12 @@ variable "virtual_network_id" { default = null } +variable "vnet_address_prefix" { + type = string + description = "Address prefix for Managed virtual network. Changing this forces a new resource to be created." + default = "10.139" +} + variable "public_subnet_network_security_group_association_id" { type = string description = "The resource ID of the azurerm_subnet_network_security_group_association resource which is referred to by the public_subnet_name field." @@ -101,10 +107,10 @@ variable "is_private_endpoint" { variable "private_link_deployment_type" { type = string - description = "Type of configuration for Private Link. In Standard configuration, there are separate private endpoints for frontend and backend." + description = "Type of configuration for Private Link. In Standard configuration, there are separate private endpoints for frontend and backend. In Simplified configuration, there is a single private endpoint for UI and API. In Webauth configuration, only a webauth private endpoint is configured." validation { - condition = contains(["standard", "simplified"], lower(var.private_link_deployment_type)) - error_message = "Valid values for private_link_deployment_type are \"standard\" or \"simplified\"." + condition = contains(["standard", "simplified", "webauth"], lower(var.private_link_deployment_type)) + error_message = "Valid values for private_link_deployment_type are \"standard\", \"simplified\" or \"webauth\"." } default = "simplified" } @@ -120,6 +126,11 @@ variable "backend_private_dns_zone_ids" { description = "Specifies the list of Private DNS Zones to include for the backend. Must be provided when is_private_endpoint is true" default = [] } +variable "backend_dbfs_private_dns_zone_ids" { + type = list(string) + description = "Specifies the list of Private DNS Zones to include for the backend connection to the Databricks File System Storage Account. Must be provided when is_private_endpoint is true" + default = [] +} variable "frontend_subnet_id" { type = string @@ -133,10 +144,10 @@ variable "backend_subnet_id" { default = null } -variable "private_web_auth_workspace" { +variable "is_web_auth_workspace" { type = string - description = "Azure Databricks Workspace Instance Resource identifier for Private Endpoint Web Authentication. Defaults to the created workspace if not provided" - default = null + description = "Should this Azure Databricks Workspace Instance be used for Private Endpoint Web Authentication? There must be only one per region." + default = false } variable "enable_ip_access_list" { @@ -163,4 +174,74 @@ variable "block_ip_list" { error_message = "Invalid IP or IP range in CIDR format found in the list." } default = [] +} + +variable "machine_learning_workspace_id" { + type = string + description = "The ID of a Azure Machine Learning workspace to link with Databricks workspace. Changing this forces a new resource to be created." + default = null +} + +variable "storage_account_name" { + type = string + description = "Default Databricks File Storage account name. Defaults to a randomized name(e.g. dbstoragel6mfeghoe5kxu). Changing this forces a new resource to be created." + default = null +} + +variable "storage_account_sku_name" { + type = string + description = "Storage account SKU name. Possible values include Standard_LRS, Standard_GRS, Standard_RAGRS, Standard_GZRS, Standard_RAGZRS, Standard_ZRS, Premium_LRS or Premium_ZRS. Changing this forces a new resource to be created." + validation { + condition = contains(["Standard_LRS", "Standard_GRS", "Standard_RAGRS", "Standard_GZRS", "Standard_RAGZRS", "Standard_ZRS", "Premium_LRS", "StandardPremium_ZRS_GRS"], var.storage_account_sku_name) + error_message = "Valid values for storage_account_sku_name include Standard_LRS, Standard_GRS, Standard_RAGRS, Standard_GZRS, Standard_RAGZRS, Standard_ZRS, Premium_LRS or Premium_ZRS." + } + default = "Standard_GRS" +} + +variable "infrastructure_encryption_enabled" { + type = bool + description = "Is the Databricks File System root file system enabled with a secondary layer of encryption with platform managed keys? This field is only valid if the Databricks Workspace sku is set to premium. Changing this forces a new resource to be created." + default = false +} + +variable "customer_managed_key_enabled" { + type = bool + description = "Is the workspace enabled for customer managed key encryption? If true this enables the Managed Identity for the managed storage account. This field is only valid if the Databricks Workspace sku is set to premium." + default = false +} + +variable "managed_services_cmk_key_vault_key_id" { + type = string + description = "Customer managed encryption properties for the Databricks Workspace managed resources(e.g. Notebooks and Artifacts)." + default = null +} + +variable "managed_disk_cmk_key_vault_key_id" { + type = string + description = "Customer managed encryption properties for the Databricks Workspace managed disks." + default = null +} + +variable "managed_disk_cmk_rotation_to_latest_version_enabled" { + type = bool + description = "Whether customer managed keys for disk encryption will automatically be rotated to the latest version." + default = false +} + +variable "load_balancer_backend_address_pool_id" { + type = string + description = "Resource ID of the Outbound Load balancer Backend Address Pool for Secure Cluster Connectivity (No Public IP) workspace. Changing this forces a new resource to be created." + default = null +} + +variable "nat_gateway_name" { + type = string + description = "Name of the NAT gateway for Secure Cluster Connectivity (No Public IP) workspace subnets. Changing this forces a new resource to be created." + default = "nat-gateway" +} + +variable "public_ip_name" { + type = string + description = "Name of the Public IP for No Public IP workspace with managed vNet. Changing this forces a new resource to be created." + default = "nat-gw-public-ip" } \ No newline at end of file diff --git a/terraform/databricks/outputs.tf b/terraform/databricks/outputs.tf deleted file mode 100644 index 64a07a52..00000000 --- a/terraform/databricks/outputs.tf +++ /dev/null @@ -1,31 +0,0 @@ -output "id" { - value = ( - length(azurerm_databricks_workspace.adl_databricks) > 0 ? - azurerm_databricks_workspace.adl_databricks[0].id : "" - ) - description = "Resource identifier of the instance of Azure Databricks." -} - -output "workspace_url" { - value = ( - length(azurerm_databricks_workspace.adl_databricks) > 0 ? - azurerm_databricks_workspace.adl_databricks[0].workspace_url : "" - ) - description = "The URL used to connect to the workspace of the Azure Databricks." -} - -output "name" { - value = ( - length(azurerm_databricks_workspace.adl_databricks) > 0 ? - azurerm_databricks_workspace.adl_databricks[0].name : "" - ) - description = "The name of the instance of Azure Databricks." -} - -output "resource_group_name" { - value = ( - length(azurerm_databricks_workspace.adl_databricks) > 0 ? - azurerm_databricks_workspace.adl_databricks[0].resource_group_name : "" - ) - description = "Resource Group where the instance of Azure Databricks exists." -} diff --git a/terraform/databricks/test/databricks.tf b/terraform/databricks/test/databricks.tf deleted file mode 100644 index 44c301d6..00000000 --- a/terraform/databricks/test/databricks.tf +++ /dev/null @@ -1,114 +0,0 @@ -module "databricks" { - source = "../" - basename = random_string.postfix.result - resource_group_name = module.local_rg.name - location = var.location - virtual_network_id = module.local_vnet.id - is_private_endpoint = true - private_link_deployment_type = "simplified" - backend_subnet_id = module.local_snet_default.id - backend_private_dns_zone_ids = [module.local_pdnsz_adb.list[local.dns_databricks].id] - public_subnet_name = module.local_snet_public.name - private_subnet_name = module.local_snet_private.name - public_subnet_network_security_group_association_id = module.local_snet_nsg_association_public.id - private_subnet_network_security_group_association_id = module.local_snet_nsg_association_private.id - public_network_access_enabled = true - enable_ip_access_list = true - allow_ip_list = ["${data.http.ip.body}/32"] - tags = {} -} - -# Modules dependencies - -data "http" "ip" { - url = "https://ifconfig.me" -} - -module "local_rg" { - source = "../../resource-group" - basename = random_string.postfix.result - location = var.location - tags = local.tags -} - -module "local_vnet" { - source = "../../virtual-network" - resource_group_name = module.local_rg.name - basename = random_string.postfix.result - location = var.location - address_space = ["10.0.0.0/16"] -} - -module "local_snet_default" { - source = "../../subnet" - resource_group_name = module.local_rg.name - name = "vnet-${random_string.postfix.result}-adb-default" - vnet_name = module.local_vnet.name - address_prefixes = ["10.0.6.0/24"] -} - -module "local_snet_public" { - source = "../../subnet" - resource_group_name = module.local_rg.name - name = "vnet-${random_string.postfix.result}-adb-public" - vnet_name = module.local_vnet.name - address_prefixes = ["10.0.4.0/24"] - subnet_delegation = { - databricks-del-pub = [ - { - name = "Microsoft.Databricks/workspaces" - actions = [ - "Microsoft.Network/virtualNetworks/subnets/join/action", - "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", - "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" - ] - } - ] - } -} - -module "local_snet_private" { - source = "../../subnet" - resource_group_name = module.local_rg.name - name = "vnet-${random_string.postfix.result}-adb-private" - vnet_name = module.local_vnet.name - address_prefixes = ["10.0.5.0/24"] - subnet_delegation = { - databricks-del-pri = [ - { - name = "Microsoft.Databricks/workspaces" - actions = [ - "Microsoft.Network/virtualNetworks/subnets/join/action", - "Microsoft.Network/virtualNetworks/subnets/prepareNetworkPolicies/action", - "Microsoft.Network/virtualNetworks/subnets/unprepareNetworkPolicies/action" - ] - } - ] - } -} - -module "local_nsg" { - source = "../../network-security-group" - basename = random_string.postfix.result - resource_group_name = module.local_rg.name - location = var.location -} - -module "local_snet_nsg_association_public" { - source = "../../subnet-network-security-group-association" - subnet_id = module.local_snet_public.id - network_security_group_id = module.local_nsg.id -} - -module "local_snet_nsg_association_private" { - source = "../../subnet-network-security-group-association" - subnet_id = module.local_snet_private.id - network_security_group_id = module.local_nsg.id -} - -module "local_pdnsz_adb" { - source = "../../private-dns-zone" - resource_group_name = module.local_rg.name - dns_zones = [local.dns_databricks] - vnet_id = module.local_vnet.id -} \ No newline at end of file