From 7bb2a933f59ab445225ab8760d300ac647d289b5 Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Thu, 6 Apr 2023 23:14:33 +0000 Subject: [PATCH 1/6] register feature add storage account add vault add extension update AKS version add FW rules API version updates --- 01-prerequisites.md | 3 +- acr-stamp.bicep | 198 ++++++++++++++++++++++++++++++++--- cluster-stamp.bicep | 113 +++++++++++++++++--- networking/hub-regionA.bicep | 60 +++++++++++ 4 files changed, 344 insertions(+), 30 deletions(-) diff --git a/01-prerequisites.md b/01-prerequisites.md index cfe56c88..c6ceb7ba 100644 --- a/01-prerequisites.md +++ b/01-prerequisites.md @@ -40,9 +40,10 @@ This is the starting point for the instructions on deploying the [AKS baseline r az feature register --namespace "Microsoft.ContainerService" -n "AKS-AzureDefender" az feature register --namespace "Microsoft.ContainerService" -n "EnableWorkloadIdentityPreview" az feature register --namespace "Microsoft.ContainerService" -n "EnableImageCleanerPreview" + az feature register --namespace "Microsoft.ContainerService" -n "TrustedAccessPreview" # Keep running until all say "Registered." (This may take up to 20 minutes.) - az feature list -o table --query "[?name=='Microsoft.ContainerService/AKS-AzureDefender' || name=='Microsoft.ContainerService/EnableWorkloadIdentityPreview' || name=='Microsoft.ContainerService/EnableImageCleanerPreview'].{Name:name,State:properties.state}" + az feature list -o table --query "[?name=='Microsoft.ContainerService/AKS-AzureDefender' || name=='Microsoft.ContainerService/EnableWorkloadIdentityPreview' || name=='Microsoft.ContainerService/EnableImageCleanerPreview' || name=='Microsoft.ContainerService/TrustedAccessPreview'].{Name:name,State:properties.state}" # When all say "Registered" then re-register the AKS resource provider az provider register --namespace Microsoft.ContainerService diff --git a/acr-stamp.bicep b/acr-stamp.bicep index f57ada3a..eaa9d96a 100644 --- a/acr-stamp.bicep +++ b/acr-stamp.bicep @@ -58,16 +58,16 @@ var subRgUniqueString = uniqueString('aks', subscription().subscriptionId, resou /*** EXISTING RESOURCES ***/ -resource spokeResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = { +resource spokeResourceGroup 'Microsoft.Resources/resourceGroups@2022-09-01' existing = { scope: subscription() - name: '${split(targetVnetResourceId,'/')[4]}' + name: split(targetVnetResourceId,'/')[4] } -resource spokeVirtualNetwork 'Microsoft.Network/virtualNetworks@2021-05-01' existing = { +resource spokeVirtualNetwork 'Microsoft.Network/virtualNetworks@2022-09-01' existing = { scope: spokeResourceGroup - name: '${last(split(targetVnetResourceId,'/'))}' + name: last(split(targetVnetResourceId,'/')) - resource snetPrivateLinkEndpoints 'subnets@2021-05-01' existing = { + resource snetPrivateLinkEndpoints 'subnets' existing = { name: 'snet-privatelinkendpoints' } } @@ -75,7 +75,7 @@ resource spokeVirtualNetwork 'Microsoft.Network/virtualNetworks@2021-05-01' exis /*** RESOURCES ***/ // This Log Analytics workspace will be the log sink for all resources in the cluster resource group. This includes ACR, the AKS cluster, Key Vault, etc. It also is the Container Insights log sink for the AKS cluster. -resource laAks 'Microsoft.OperationalInsights/workspaces@2021-06-01' = { +resource laAks 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { name: 'la-aks-${subRgUniqueString}' location: location properties: { @@ -88,7 +88,7 @@ resource laAks 'Microsoft.OperationalInsights/workspaces@2021-06-01' = { // Apply the built-in 'Container registries should have anonymous authentication disabled' policy. Azure RBAC only is allowed. var pdAnonymousContainerRegistryAccessDisallowedId = tenantResourceId('Microsoft.Authorization/policyDefinitions', '9f2dea28-e834-476c-99c5-3507b4728395') -resource paAnonymousContainerRegistryAccessDisallowed 'Microsoft.Authorization/policyAssignments@2021-06-01' = { +resource paAnonymousContainerRegistryAccessDisallowed 'Microsoft.Authorization/policyAssignments@2022-06-01' = { name: guid(resourceGroup().id, pdAnonymousContainerRegistryAccessDisallowedId) location: 'global' scope: resourceGroup() @@ -107,7 +107,7 @@ resource paAnonymousContainerRegistryAccessDisallowed 'Microsoft.Authorization/p // Apply the built-in 'Container registries should have local admin account disabled' policy. Azure RBAC only is allowed. var pdAdminAccountContainerRegistryAccessDisallowedId = tenantResourceId('Microsoft.Authorization/policyDefinitions', 'dc921057-6b28-4fbe-9b83-f7bec05db6c2') -resource paAdminAccountContainerRegistryAccessDisallowed 'Microsoft.Authorization/policyAssignments@2021-06-01' = { +resource paAdminAccountContainerRegistryAccessDisallowed 'Microsoft.Authorization/policyAssignments@2022-06-01' = { name: guid(resourceGroup().id, pdAdminAccountContainerRegistryAccessDisallowedId) location: 'global' scope: resourceGroup() @@ -130,12 +130,12 @@ resource dnsPrivateZoneAcr 'Microsoft.Network/privateDnsZones@2020-06-01' = { location: 'global' properties: {} - resource dnsVnetLinkAcrToSpoke 'virtualNetworkLinks@2020-06-01' = { + resource dnsVnetLinkAcrToSpoke 'virtualNetworkLinks' = { name: 'to_${spokeVirtualNetwork.name}' location: 'global' properties: { virtualNetwork: { - id: targetVnetResourceId + id: spokeVirtualNetwork.id } registrationEnabled: false } @@ -209,8 +209,8 @@ resource acrAks_diagnosticsSettings 'Microsoft.Insights/diagnosticSettings@2021- } } -// Expose Azure Container Registry via Private Link, into the cluster nodes subnet. -resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2021-05-01' = { +// Expose Azure Container Registry via Private Link, into the cluster nodes virtual network. +resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2022-09-01' = { name: 'pe-${acrAks.name}' location: location dependsOn: [ @@ -233,7 +233,7 @@ resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2021-05-01 ] } - resource privateDnsZoneGroupAcr 'privateDnsZoneGroups@2021-05-01' = { + resource privateDnsZoneGroupAcr 'privateDnsZoneGroups' = { name: 'default' properties: { privateDnsZoneConfigs: [ @@ -248,6 +248,178 @@ resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2021-05-01 } } +// AKS Backup is configured and managed via a backup vault in the same region +// This ideally wouldn't be tied to the individual cluster stamp, as it would +// exist longer than the lifecycle of the cluster, just log sinks. We are +// representing that by place this resource creation into this pre-cluster +// deployment file. +resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' = { + name: 'bvAksBackupVault' + location: location + properties: { + storageSettings: [ + { + datastoreType: 'VaultStore' + type: 'GeoRedundant' + } + ] + securitySettings: { + immutabilitySettings: { + state: 'Disabled' + } + softDeleteSettings: { + state: 'On' + retentionDurationInDays: 14 + } + } + featureSettings: { + crossSubscriptionRestoreSettings: { + state: 'Disabled' + } + } + } + + // Daily UTC midnight Kubernetes backup policy as an example. Configure policy as needed. + resource aksPolicy 'backupPolicies' = { + name: 'bp-aks-default-daily' + properties: { + objectType: 'BackupPolicy' + datasourceTypes: [ + 'Microsoft.ContainerService/managedClusters' + ] + policyRules: [ + { + objectType: 'AzureBackupRule' + name: 'BackupDaily' + backupParameters: { + objectType: 'AzureBackupParams' + backupType: 'Incremental' + } + dataStore: { + objectType: 'DataStoreInfoBase' + dataStoreType: 'OperationalStore' + } + trigger: { + objectType: 'ScheduleBasedTriggerContext' + schedule: { + timeZone: 'UTC' + repeatingTimeIntervals: [ + 'R/2023-04-06T0:0:00+00:00/P1D' + ] + } + taggingCriteria: [] + } + } + ] + } + } +} + +// Backup vault logging +resource bvAksBackupVault_diagnosticsSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'default' + scope: bvAksBackupVault + properties: { + workspaceId: laAks.id + logs: [ + { + category: 'AzureBackupReport' + enabled: true + } + ] + } +} + +// This stores AKS Backup content, to be used by all clusters +resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { + name: 'stbackup${subRgUniqueString}' + location: location + sku: { + name: 'Standard_GRS' + } + kind: 'StorageV2' + properties: { + allowSharedKeyAccess: false + dnsEndpointType: 'Default' + defaultToOAuthAuthentication: false + publicNetworkAccess: 'Disabled' + allowCrossTenantReplication: false + allowBlobPublicAccess: false + minimumTlsVersion: 'TLS1_2' + isHnsEnabled: false + isLocalUserEnabled: false + isSftpEnabled: false + routingPreference: { + publishInternetEndpoints: false + publishMicrosoftEndpoints: true + routingChoice: 'MicrosoftRouting' + } + networkAcls: { + bypass: 'None' + virtualNetworkRules: [] + ipRules: [] + defaultAction: 'Deny' + } + supportsHttpsTrafficOnly: true + accessTier: 'Hot' + } +} + +// Private DNS Zone for our AKS Backup storage account +resource dnsPrivateZoneBlob 'Microsoft.Network/privateDnsZones@2020-06-01' = { + name: 'privatelink.blob.core.windows.net' + location: 'global' + properties: {} + + + // Enabling Storage Account Private Link on cluster virtual network. + resource vnetlnk 'virtualNetworkLinks' = { + name: 'to_${spokeVirtualNetwork.name}' + location: 'global' + properties: { + virtualNetwork: { + id: spokeVirtualNetwork.id + } + registrationEnabled: false + } + } +} + +resource peAksBackupStorage 'Microsoft.Network/privateEndpoints@2022-07-01' = { + name: 'pe-${storageAksBackups.name}' + location: location + properties: { + subnet: { + id: spokeVirtualNetwork::snetPrivateLinkEndpoints.id + } + privateLinkServiceConnections: [ + { + name: 'to_${spokeVirtualNetwork.name}' + properties: { + privateLinkServiceId: storageAksBackups.id + groupIds: [ + 'blob' + ] + } + } + ] + } + + resource pdnszg 'privateDnsZoneGroups' = { + name: 'default' + properties: { + privateDnsZoneConfigs: [ + { + name: 'privatelink-blob-core-windows-net' + properties: { + privateDnsZoneId: dnsPrivateZoneBlob.id + } + } + ] + } + } +} + /*** OUTPUTS ***/ output containerRegistryName string = acrAks.name diff --git a/cluster-stamp.bicep b/cluster-stamp.bicep index 17411846..8b683c69 100644 --- a/cluster-stamp.bicep +++ b/cluster-stamp.bicep @@ -43,7 +43,7 @@ param clusterAuthorizedIPRanges array = [] 'southeastasia' ]) param location string = 'eastus2' -param kubernetesVersion string = '1.25.5' +param kubernetesVersion string = '1.26.0' @description('Domain name to use for App Gateway and AKS ingress.') param domainName string = 'contoso.com' @@ -60,6 +60,7 @@ param gitOpsBootstrappingRepoBranch string = 'main' var subRgUniqueString = uniqueString('aks', subscription().subscriptionId, resourceGroup().id) var clusterName = 'aks-${subRgUniqueString}' +var backupStorageAccountName = 'stbackup${subRgUniqueString}' var agwName = 'apw-${clusterName}' var aksIngressDomainName = 'aks-ingress.${domainName}' @@ -171,6 +172,12 @@ resource keyVaultSecretsUserRole 'Microsoft.Authorization/roleDefinitions@2018-0 scope: subscription() } +// Built-in Azure RBAC role that is applied to the AKS backup managed identity to allow it to write data to storage. +resource storageBlobDataContributorRole 'Microsoft.Authorization/roleDefinitions@2018-01-01-preview' existing = { + name: 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' + scope: subscription() +} + /*** EXISTING RESOURCE GROUP RESOURCES ***/ // Azure Container Registry @@ -185,6 +192,22 @@ resource la 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' existi name: 'la-${clusterName}' } +// Backup Vault (for AKS Backup) +resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' existing = { + scope: resourceGroup() + name: 'bvAksBackupVault' +} + +// The existing storage account for backups +resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' existing = { + scope: resourceGroup() + name: 'stbackup${subRgUniqueString}' + + resource blobService 'blobServices' existing = { + name: 'default' + } +} + // Kubernetes namespace: a0008 -- this doesn't technically exist prior to deployment, but is required as a resource reference later in the template // to support Azure RBAC-managed API Server access, scoped to the namespace level. #disable-next-line BCP081 // this namespaces child type doesn't have a defined bicep type yet. @@ -198,13 +221,13 @@ resource nsA0008 'Microsoft.ContainerService/managedClusters/namespaces@2022-01- // Spoke resource group resource targetResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = { scope: subscription() - name: '${split(targetVnetResourceId,'/')[4]}' + name: split(targetVnetResourceId, '/')[4] } // Spoke virtual network resource targetVirtualNetwork 'Microsoft.Network/virtualNetworks@2022-05-01' existing = { scope: targetResourceGroup - name: '${last(split(targetVnetResourceId,'/'))}' + name: last(split(targetVnetResourceId, '/')) // Spoke virutual network's subnet for the cluster nodes resource snetClusterNodes 'subnets' existing = { @@ -971,6 +994,7 @@ resource paAKSLinuxRestrictive 'Microsoft.Authorization/policyAssignments@2021-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' // Known violations // K8sAzureAllowedSeccomp @@ -1061,6 +1085,7 @@ resource paRoRootFilesystem 'Microsoft.Authorization/policyAssignments@2021-06-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } excludedContainers: { @@ -1100,6 +1125,7 @@ resource paEnforceResourceLimits 'Microsoft.Authorization/policyAssignments@2021 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } effect: { @@ -1129,6 +1155,7 @@ resource paEnforceImageSource 'Microsoft.Authorization/policyAssignments@2021-06 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } effect: { @@ -1154,6 +1181,7 @@ resource paAllowedHostPaths 'Microsoft.Authorization/policyAssignments@2021-06-0 'gatekeeper-system' 'azure-arc' 'flux-system' + 'dataprotection-microsoft' ] } allowedHostPaths: { @@ -1184,6 +1212,7 @@ resource paAllowedExternalIPs 'Microsoft.Authorization/policyAssignments@2021-06 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } allowedExternalIPs: { @@ -1213,6 +1242,7 @@ resource paDisallowEndpointEditPermissions 'Microsoft.Authorization/policyAssign 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } effect: { @@ -1238,6 +1268,7 @@ resource paDisallowNamespaceUsage 'Microsoft.Authorization/policyAssignments@202 'kube-system' 'gatekeeper-system' 'azure-arc' + 'dataprotection-microsoft' ] } namespaces: { @@ -1488,7 +1519,7 @@ resource kv 'Microsoft.KeyVault/vaults@2021-11-01-preview' = { } } - resource kvsGatewayPublicCert 'secrets' = { + resource kvsGatewayPublicCert 'secrets' = { name: 'gateway-public-cert' properties: { value: appGatewayListenerCertificate @@ -1496,7 +1527,7 @@ resource kv 'Microsoft.KeyVault/vaults@2021-11-01-preview' = { } } -resource kv_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { +resource kv_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { scope: kv name: 'default' properties: { @@ -1768,7 +1799,6 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { loadBalancerProfile: json('null') serviceCidr: '172.16.0.0/16' dnsServiceIP: '172.16.0.10' - dockerBridgeCidr: '172.18.0.1/16' } aadProfile: { managed: true @@ -1810,23 +1840,23 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { enabled: false // This is for the AKS-PrometheusAddonPreview, which is not enabled in this cluster as Container Insights is already collecting. } } - storageProfile: { // By default, do not support native state storage, enable as needed to support workloads that require state + storageProfile: { // By default, do not support native state storage, enable as needed to support workloads that require state blobCSIDriver: { enabled: false // Azure Blobs } diskCSIDriver: { - enabled: false // Azure Disk + enabled: true // Azure Disk } fileCSIDriver: { - enabled: false // Azure Files + enabled: false // Azure Files } snapshotController: { - enabled: false // CSI Snapshotter: https://github.com/kubernetes-csi/external-snapshotter + enabled: true // CSI Snapshotter: https://github.com/kubernetes-csi/external-snapshotter } } workloadAutoScalerProfile: { keda: { - enabled: false // Enable if using KEDA to scale workloads + enabled: false // Enable if using KEDA to scale workloads } } disableLocalAccounts: true @@ -1902,6 +1932,10 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { paRbacEnabled paManagedIdentitiesEnabled + // Logical dependency, our backup source should exist before cluster creation, as the cluster will be + // bootstrapped with backup configured. + storageAksBackups + peKv kvPodMiIngressControllerKeyVaultReader_roleAssignment kvPodMiIngressControllerSecretsUserRole_roleAssignment @@ -1974,7 +2008,7 @@ resource maAadA0008ReaderGroupServiceClusterUserRole_roleAssignment 'Microsoft.A } } -resource mc_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { +resource mc_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { scope: mc name: 'default' properties: { @@ -2073,6 +2107,53 @@ resource mc_fluxConfiguration 'Microsoft.KubernetesConfiguration/fluxConfigurati ] } +// New storage container in the existing storage account specifically for this cluster +resource backupContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2022-09-01' = { + parent: storageAksBackups::blobService + name: toLower('backup-${clusterName}') + properties: { + publicAccess: 'None' + } +} + +// Ensures that data protection (AKS Backup) is installed. +resource mc_dataProtectionExtension 'Microsoft.KubernetesConfiguration/extensions@2022-11-01' = { + scope: mc + name: 'dataProtection' + properties: { + extensionType: 'microsoft.dataprotection.kubernetes' + autoUpgradeMinorVersion: true + aksAssignedIdentity: { + type: 'SystemAssigned' + } + releaseTrain: 'Stable' + scope: { + cluster: { + releaseNamespace: 'dataprotection-microsoft' + } + } + configurationSettings: { + 'configuration.backupStorageLocation.config.subscriptionId': split(storageAksBackups.id, '/')[2] + 'configuration.backupStorageLocation.config.resourceGroup': split(storageAksBackups.id, '/')[4] + 'configuration.backupStorageLocation.config.storageAccount': storageAksBackups.name + 'configuration.backupStorageLocation.bucket': backupContainer.name + 'credentials.tenantId': mc.identity.tenantId + } + configurationProtectedSettings: {} + } +} + +// Grant the Data Protection extension write access this cluster's backup container in the storage account. +resource dataProtetionExtensionStorageContainer_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { + scope: backupContainer + name: guid(backupContainer.id, 'mi-dataProtection-extension', storageBlobDataContributorRole.id) + properties: { + roleDefinitionId: storageBlobDataContributorRole.id + principalId: mc_dataProtectionExtension.properties.aksAssignedIdentity.principalId + principalType: 'ServicePrincipal' + } +} + module ndEnsureClusterUserAssignedHasRbacToManageVMSS 'nested_EnsureClusterUserAssignedHasRbacToManageVMSS.bicep' = { name: 'EnsureClusterUserAssignedHasRbacToManageVMSS' scope: nodeResourceGroup @@ -2090,7 +2171,7 @@ resource st 'Microsoft.EventGrid/systemTopics@2021-12-01' = { } } -resource st_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { +resource st_diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { scope: st name: 'default' properties: { @@ -2122,9 +2203,9 @@ resource wafPolicy 'Microsoft.Network/ApplicationGatewayWebApplicationFirewallPo managedRules: { managedRuleSets: [ { - ruleSetType: 'OWASP' - ruleSetVersion: '3.2' - ruleGroupOverrides: [] + ruleSetType: 'OWASP' + ruleSetVersion: '3.2' + ruleGroupOverrides: [] } { ruleSetType: 'Microsoft_BotManagerRuleSet' diff --git a/networking/hub-regionA.bicep b/networking/hub-regionA.bicep index 12605d16..4ce307c3 100644 --- a/networking/hub-regionA.bicep +++ b/networking/hub-regionA.bicep @@ -666,6 +666,66 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { } ] } + { + ruleCollectionType: 'FirewallPolicyFilterRuleCollection' + name: 'AksBackup-Traffic' + priority: 350 + action: { + type: 'Allow' + } + rules: [ + { + ruleType: 'ApplicationRule' + name: 'container-origin' + description: 'Supports pulling AKS backup images.' + protocols: [ + { + protocolType: 'Https' + port: 443 + } + ] + fqdnTags: [] + webCategories: [] + targetFqdns: [ + 'kubernetesbcdrextensionacr.azurecr.io' + 'pipelineagent.azurecr.io' + ] + targetUrls: [] + destinationAddresses: [] + terminateTLS: false + sourceAddresses: [] + sourceIpGroups: [ + ipgNodepoolSubnet.id + ] + } + { + ruleType: 'ApplicationRule' + name: 'cert-requirements' + description: 'Supports cert validation required by the AKS backup agent.' + protocols: [ + { + protocolType: 'Http' + port: 80 + } + ] + fqdnTags: [] + webCategories: [] + targetFqdns: [ + 'oneocsp.microsoft.com' + 'ocsp.digicert.com' + 'crl3.digicert.com' + 'www.microsoft.com' + ] + targetUrls: [] + destinationAddresses: [] + terminateTLS: false + sourceAddresses: [] + sourceIpGroups: [ + ipgNodepoolSubnet.id + ] + } + ] + } ] } } From 08227ca6b47d17896f39be2abd4594dda7855866 Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Fri, 7 Apr 2023 14:09:21 +0000 Subject: [PATCH 2/6] Remove old preview feature, fix more linter issues --- 01-prerequisites.md | 9 ++++----- 03-aad.md | 2 +- 05-bootstrap-prep.md | 6 +++++- networking/hub-regionA.bicep | 5 +++-- networking/spoke-BU0001A0008.bicep | 4 ++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/01-prerequisites.md b/01-prerequisites.md index c6ceb7ba..e88be4aa 100644 --- a/01-prerequisites.md +++ b/01-prerequisites.md @@ -30,20 +30,19 @@ This is the starting point for the instructions on deploying the [AKS baseline r 1. While the following feature(s) are still in _preview_, please enable them in your target subscription. - 1. [Register the Defender for Containers preview feature = `AKS-AzureDefender`](https://learn.microsoft.com/azure/defender-for-cloud/defender-for-containers-enable?pivots=defender-for-container-aks&tabs=k8s-deploy-cli%2Ck8s-deploy-asc%2Ck8s-verify-asc%2Ck8s-remove-arc%2Caks-removeprofile-api#deploy-the-defender-profile) - 1. [Register the Workload Identity preview feature = `EnableWorkloadIdentityPreview`](https://learn.microsoft.com/azure/aks/workload-identity-deploy-cluster#register-the-enableworkloadidentitypreview-feature-flag) - 1. Register the ImageCleaner (Earser) preview feature = `EnableImageCleanerPreview`](https://learn.microsoft.com/azure/aks/image-cleaner#prerequisites) + 1. [Register the ImageCleaner (Earser) preview feature = `EnableImageCleanerPreview`](https://learn.microsoft.com/azure/aks/image-cleaner#prerequisites) + + 1. [Register the Trusted Access preview feature = `TrustedAccessPreview`](https://learn.microsoft.com/azure/backup/azure-kubernetes-service-cluster-manage-backups#register-the-trusted-access) to support AKS Backup. ```bash - az feature register --namespace "Microsoft.ContainerService" -n "AKS-AzureDefender" az feature register --namespace "Microsoft.ContainerService" -n "EnableWorkloadIdentityPreview" az feature register --namespace "Microsoft.ContainerService" -n "EnableImageCleanerPreview" az feature register --namespace "Microsoft.ContainerService" -n "TrustedAccessPreview" # Keep running until all say "Registered." (This may take up to 20 minutes.) - az feature list -o table --query "[?name=='Microsoft.ContainerService/AKS-AzureDefender' || name=='Microsoft.ContainerService/EnableWorkloadIdentityPreview' || name=='Microsoft.ContainerService/EnableImageCleanerPreview' || name=='Microsoft.ContainerService/TrustedAccessPreview'].{Name:name,State:properties.state}" + az feature list -o table --query "[?name=='Microsoft.ContainerService/EnableWorkloadIdentityPreview' || name=='Microsoft.ContainerService/EnableImageCleanerPreview' || name=='Microsoft.ContainerService/TrustedAccessPreview'].{Name:name,State:properties.state}" # When all say "Registered" then re-register the AKS resource provider az provider register --namespace Microsoft.ContainerService diff --git a/03-aad.md b/03-aad.md index 96e54e9c..f1fd605c 100644 --- a/03-aad.md +++ b/03-aad.md @@ -29,7 +29,7 @@ This does not configure anything related to workload identity. This configuratio 1. Playing the role as the Contoso Bicycle Azure AD team, login into the tenant where Kubernetes Cluster API authorization will be associated with. - > :bulb: Skip the `az login` command if you plan to use your current user account's Azure AD tenant for Kubernetes authorization. + > :bulb: Skip the `az login` command if you plan to use your current user account's Azure AD tenant for Kubernetes authorization. _Using the same tenant is common._ ```bash az login -t --allow-no-subscriptions diff --git a/05-bootstrap-prep.md b/05-bootstrap-prep.md index d4d607d1..968cf325 100644 --- a/05-bootstrap-prep.md +++ b/05-bootstrap-prep.md @@ -12,10 +12,14 @@ Container registries often have a lifecycle that extends beyond the scope of a s The role of this pre-existing ACR instance is made more prominant when we think about cluster bootstrapping. That is the process that happens after Azure resource deployment of the cluster, but before your first workload lands in the cluster. The cluster will be bootstrapped _immedately and automatically_ after resource deployment, which means you'll need ACR in place to act as your official OCI artifact repository for required images and Helm charts used in that bootstrapping process. -### Method +### Bootstrapping method We'll be bootstrapping this cluster with the Flux GitOps agent as installed as an AKS extension. This specific choice does not imply that Flux, or GitOps in general, is the only approach to bootstrapping. Consider your organizational familiarity and acceptance of tooling like this and decide if cluster bootstrapping should be performed with GitOps or via your deployment pipelines. If you are running a fleet of clusters, a GitOps approach is highly recommended for uniformity and easier governance. When running only a few clusters, GitOps might be seen as "too much" and you might instead opt for integrating that process into one or more deployment pipelines to ensure bootstrapping takes place. No matter which way you go, you'll need your bootstrapping artifacts ready to go before you start your cluster deployment so that you can minimize the time between cluster deployment and bootstrapping. Using the Flux AKS extension allows your cluster to start already bootstrapped and sets you up with a solid management foundation going forward. +### Additional resources + +In addition to ACR being deployed to support bootstrapping, this is where any other resources that are considered not tied to the lifecycle of an individual cluster is deployed. ACR is one example as talked about above. Another example in this implementation, includes the AKS Backup Vault and backup artifacts storage account which likely would exist prior to and after any individual AKS cluster's existance. When designing your pipelines, ensure to isolate components by their lifecycle watch for singletons in an architecture. These are typically resources like regional logging sinks, supporting global routing infrastructure, etc. As compared to potentially transiently/replaceable components, like the AKS cluster itself. _This implemention does not represent a complete seperation of stamp vs regional resources, but is fairly close. Deviations are strickly for ease of deployment in this walkthrough instead of as examples of guidance._ + ## Steps 1. Create the AKS cluster resource group. diff --git a/networking/hub-regionA.bicep b/networking/hub-regionA.bicep index 4ce307c3..bf38573d 100644 --- a/networking/hub-regionA.bicep +++ b/networking/hub-regionA.bicep @@ -647,8 +647,8 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { targetFqdns: [ '${location}.dp.kubernetesconfiguration.azure.com' 'mcr.microsoft.com' - '${split(environment().resourceManager, '/')[2]}' // Prevent the linter from getting upset at management.azure.com - https://github.com/Azure/bicep/issues/3080 - '${split(environment().authentication.loginEndpoint, '/')[2]}' // Prevent the linter from getting upset at login.microsoftonline.com + split(environment().resourceManager, '/')[2] // Prevent the linter from getting upset at management.azure.com - https://github.com/Azure/bicep/issues/3080 + split(environment().authentication.loginEndpoint, '/')[2] // Prevent the linter from getting upset at login.microsoftonline.com '*.blob.${environment().suffixes.storage}' // required for the extension installer to download the helm chart install flux. This storage account is not predictable, but does look like eusreplstore196 for example. 'azurearcfork8s.azurecr.io' // required for a few of the images installed by the extension. '*.docker.io' // Only required if you use the default bootstrapping manifests included in this repo. @@ -687,6 +687,7 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { fqdnTags: [] webCategories: [] targetFqdns: [ + 'mcr.microsoft.com' 'kubernetesbcdrextensionacr.azurecr.io' 'pipelineagent.azurecr.io' ] diff --git a/networking/spoke-BU0001A0008.bicep b/networking/spoke-BU0001A0008.bicep index 1448781c..e57c526a 100644 --- a/networking/spoke-BU0001A0008.bicep +++ b/networking/spoke-BU0001A0008.bicep @@ -35,12 +35,12 @@ var clusterVNetName = 'vnet-spoke-${orgAppId}-00' // This is 'rg-enterprise-networking-hubs' if using the default values in the walkthrough resource hubResourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' existing = { scope: subscription() - name: '${split(hubVnetResourceId,'/')[4]}' + name: split(hubVnetResourceId,'/')[4] } resource hubVirtualNetwork 'Microsoft.Network/virtualNetworks@2021-05-01' existing = { scope: hubResourceGroup - name: '${last(split(hubVnetResourceId,'/'))}' + name: last(split(hubVnetResourceId,'/')) } // This is the firewall that was deployed in 'hub-default.bicep' From dc65adc4219bdc399adfcfacc240d67b183394b4 Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Fri, 7 Apr 2023 15:10:15 +0000 Subject: [PATCH 3/6] Add private DNS message Fixup backup vault Add storage diagnostics config --- 04-networking.md | 6 +++ 05-bootstrap-prep.md | 2 +- acr-stamp.bicep | 106 ++++++++++++++++++++++++++++++------------- 3 files changed, 82 insertions(+), 32 deletions(-) diff --git a/04-networking.md b/04-networking.md index 14415eaf..fa1742f7 100644 --- a/04-networking.md +++ b/04-networking.md @@ -104,6 +104,12 @@ The following two resource groups will be created and populated with networking > > Hubs and spokes are controlled by the networking team's GitHub Actions workflows. This automation is not included in this reference implementation as this body of work is focused on the AKS baseline and not the networking team's CI/CD practices. +## Private DNS Zones + +Private DNS zones in this reference implementation are implemented directly at the spoke level, meaning the workload team creates the private link DNS zones & records for the resources needed; furthermore, the workload is directly using Azure DNS for resolution. Your networking topology might support this decentralized model or instead DNS & DNS zones for Private Link might be handed at the regional hub or in a [VWAN virtual hub extension](https://learn.microsoft.com/azure/architecture/guide/networking/private-link-vwan-dns-virtual-hub-extension-pattern) by your networking team. + +If your organization operate a centeralized DNS model, you'll need to adapt how DNS zones records are managed this implementation into your existing enteprise networking DNS zone strategy. Since this reference implementation is expected to be deployed isolated from existing infrastructure; this is not something you need to address now; but will be something to understand and address when taking your solution to production. + ### Next step :arrow_forward: [Prep for cluster bootstrapping](./05-bootstrap-prep.md) diff --git a/05-bootstrap-prep.md b/05-bootstrap-prep.md index 968cf325..41415f88 100644 --- a/05-bootstrap-prep.md +++ b/05-bootstrap-prep.md @@ -42,7 +42,7 @@ In addition to ACR being deployed to support bootstrapping, this is where any ot echo RESOURCEID_VNET_CLUSTERSPOKE_AKS_BASELINE: $RESOURCEID_VNET_CLUSTERSPOKE_AKS_BASELINE ``` -1. Deploy the container registry template. +1. Deploy the container registry and non-stamp resources template. ```bash # [This takes about four minutes.] diff --git a/acr-stamp.bicep b/acr-stamp.bicep index eaa9d96a..394491e8 100644 --- a/acr-stamp.bicep +++ b/acr-stamp.bicep @@ -248,19 +248,18 @@ resource privateEndpointAcrToVnet 'Microsoft.Network/privateEndpoints@2022-09-01 } } -// AKS Backup is configured and managed via a backup vault in the same region -// This ideally wouldn't be tied to the individual cluster stamp, as it would -// exist longer than the lifecycle of the cluster, just log sinks. We are -// representing that by place this resource creation into this pre-cluster -// deployment file. +// Supports configuring the AKS Backup extension. resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' = { name: 'bvAksBackupVault' location: location + identity: { + type: 'SystemAssigned' + } properties: { storageSettings: [ { datastoreType: 'VaultStore' - type: 'GeoRedundant' + type: 'ZoneRedundant' } ] securitySettings: { @@ -272,11 +271,7 @@ resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' = { retentionDurationInDays: 14 } } - featureSettings: { - crossSubscriptionRestoreSettings: { - state: 'Disabled' - } - } + featureSettings: {} } // Daily UTC midnight Kubernetes backup policy as an example. Configure policy as needed. @@ -304,32 +299,43 @@ resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' = { schedule: { timeZone: 'UTC' repeatingTimeIntervals: [ - 'R/2023-04-06T0:0:00+00:00/P1D' + 'R/2023-01-01T00:00:00+00:00/P1D' ] } - taggingCriteria: [] + taggingCriteria: [ + { + tagInfo: { + tagName: 'Default' + } + taggingPriority: 99 + isDefault: true + } + ] } } + { + objectType: 'AzureRetentionRule' + name: 'Default' + isDefault: true + lifecycles: [ + { + deleteAfter: { + objectType: 'AbsoluteDeleteOption' + duration: 'P7D' + } + targetDataStoreCopySettings: [] + sourceDataStore: { + dataStoreType: 'OperationalStore' + objectType: 'DataStoreInfoBase' + } + } + ] + } ] } } } -// Backup vault logging -resource bvAksBackupVault_diagnosticsSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { - name: 'default' - scope: bvAksBackupVault - properties: { - workspaceId: laAks.id - logs: [ - { - category: 'AzureBackupReport' - enabled: true - } - ] - } -} - // This stores AKS Backup content, to be used by all clusters resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { name: 'stbackup${subRgUniqueString}' @@ -340,8 +346,7 @@ resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { kind: 'StorageV2' properties: { allowSharedKeyAccess: false - dnsEndpointType: 'Default' - defaultToOAuthAuthentication: false + defaultToOAuthAuthentication: true publicNetworkAccess: 'Disabled' allowCrossTenantReplication: false allowBlobPublicAccess: false @@ -350,7 +355,7 @@ resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { isLocalUserEnabled: false isSftpEnabled: false routingPreference: { - publishInternetEndpoints: false + publishInternetEndpoints: true publishMicrosoftEndpoints: true routingChoice: 'MicrosoftRouting' } @@ -360,9 +365,26 @@ resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' = { ipRules: [] defaultAction: 'Deny' } + encryption: { + keySource: 'Microsoft.Storage' + services: { + file: { + keyType: 'Account' + enabled: true + } + blob: { + keyType: 'Account' + enabled: true + } + } + } supportsHttpsTrafficOnly: true accessTier: 'Hot' } + + resource blobservice 'blobServices' = { + name: 'default' + } } // Private DNS Zone for our AKS Backup storage account @@ -385,6 +407,28 @@ resource dnsPrivateZoneBlob 'Microsoft.Network/privateDnsZones@2020-06-01' = { } } +resource storageAksBackups_diagnosticsSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = { + name: 'default' + scope: storageAksBackups::blobservice + properties: { + workspaceId: laAks.id + logs: [ + { + category: 'StorageRead' + enabled: true + } + { + category: 'StorageWrite' + enabled: true + } + { + category: 'StorageDelete' + enabled: true + } + ] + } +} + resource peAksBackupStorage 'Microsoft.Network/privateEndpoints@2022-07-01' = { name: 'pe-${storageAksBackups.name}' location: location From 7235d323f39a0e3f55e8a2409dadd1c60cd3103f Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Fri, 7 Apr 2023 16:19:13 +0000 Subject: [PATCH 4/6] Enable trusted access Update alerts to use configured threasholds Add daily cap threashold alert Update to BotManager 1.0 rules in WAF --- acr-stamp.bicep | 41 +++++++++++++++++++++++++++++++ cluster-stamp.bicep | 59 +++++++++++++++++++++++++++++++-------------- 2 files changed, 82 insertions(+), 18 deletions(-) diff --git a/acr-stamp.bicep b/acr-stamp.bicep index 394491e8..6e3c53fa 100644 --- a/acr-stamp.bicep +++ b/acr-stamp.bicep @@ -83,6 +83,47 @@ resource laAks 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { name: 'PerGB2018' } retentionInDays: 30 + publicNetworkAccessForIngestion: 'Enabled' + publicNetworkAccessForQuery: 'Enabled' + workspaceCapping: { + dailyQuotaGb: -1 // No daily cap (configure alert below if enabled) + } + } +} + +// Add a alert rule if the log analytics workspace daily data cap has been reached. +// Logging costs can be a significant part of any architecture, and putting a cap on +// a logging sink (none of which are applied here), can help keep costs in check but +// you run a risk of losing critical data. +resource sqrDailyDataCapBreach 'Microsoft.Insights/scheduledQueryRules@2018-04-16' = { + name: 'Daily data cap breached for workspace ${laAks.name} CIQ-1' + location: location + properties: { + description: 'This alert monitors daily data cap defined on a workspace and fires when the daily data cap is breached.' + displayName: 'Daily data cap breached for workspace ${laAks.name} CIQ-1' + enabled: 'true' + source: { + dataSourceId: laAks.id + queryType: 'ResultCount' + authorizedResources: [] + query: '_LogOperation | where Operation == "Data collection Status" | where Detail contains "OverQuota"' + } + schedule: { + frequencyInMinutes: 5 + timeWindowInMinutes: 5 + } + action: { + 'odata.type': 'Microsoft.WindowsAzure.Management.Monitoring.Alerts.Models.Microsoft.AppInsights.Nexus.DataContracts.Resources.ScheduledQueryRules.AlertingAction' + severity: '1' + aznsAction: { + actionGroup: [] + } + throttlingInMin: 1440 + trigger: { + threshold: 0 + thresholdOperator: 'GreaterThan' + } + } } } diff --git a/cluster-stamp.bicep b/cluster-stamp.bicep index 8b683c69..63178d93 100644 --- a/cluster-stamp.bicep +++ b/cluster-stamp.bicep @@ -60,7 +60,6 @@ param gitOpsBootstrappingRepoBranch string = 'main' var subRgUniqueString = uniqueString('aks', subscription().subscriptionId, resourceGroup().id) var clusterName = 'aks-${subRgUniqueString}' -var backupStorageAccountName = 'stbackup${subRgUniqueString}' var agwName = 'apw-${clusterName}' var aksIngressDomainName = 'aks-ingress.${domainName}' @@ -180,6 +179,11 @@ resource storageBlobDataContributorRole 'Microsoft.Authorization/roleDefinitions /*** EXISTING RESOURCE GROUP RESOURCES ***/ +// Useful to think of these as resources that are not tied to the lifecycle of any individual +// cluster. Logging sinks, container registries, backup destinations, etc are typical +// resources that would exist before & after any individual cluster is deployed or is removed +// from the solution. + // Azure Container Registry resource acr 'Microsoft.ContainerRegistry/registries@2021-12-01-preview' existing = { scope: resourceGroup() @@ -198,7 +202,7 @@ resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' exi name: 'bvAksBackupVault' } -// The existing storage account for backups +// Storage Account for backups resource storageAksBackups 'Microsoft.Storage/storageAccounts@2022-09-01' existing = { scope: resourceGroup() name: 'stbackup${subRgUniqueString}' @@ -473,7 +477,7 @@ resource maJobsCompletedMoreThan6HoursAgo 'Microsoft.Insights/metricAlerts@2018- } resource maHighContainerCPUUsage 'Microsoft.Insights/metricAlerts@2018-03-01' = { - name: 'Container CPU usage high for ${clusterName} CI-9' + name: 'Container CPU usage violates the configured threshold for ${clusterName} CI-19' location: 'global' properties: { autoMitigate: true @@ -498,18 +502,18 @@ resource maHighContainerCPUUsage 'Microsoft.Insights/metricAlerts@2018-03-01' = ] } ] - metricName: 'cpuExceededPercentage' + metricName: 'cpuThresholdViolated' metricNamespace: 'Insights.Container/containers' name: 'Metric1' operator: 'GreaterThan' - threshold: 90 + threshold: 0 // This threshold is defined in the container-azm-ms-agentconfig.yaml file. timeAggregation: 'Average' skipMetricValidation: true } ] 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' } - description: 'This alert monitors container CPU utilization.' + description: 'This alert monitors container CPU usage. It uses the threshold defined in the config map.' enabled: true evaluationFrequency: 'PT1M' scopes: [ @@ -525,7 +529,7 @@ resource maHighContainerCPUUsage 'Microsoft.Insights/metricAlerts@2018-03-01' = } resource maHighContainerWorkingSetMemoryUsage 'Microsoft.Insights/metricAlerts@2018-03-01' = { - name: 'Container working set memory usage high for ${clusterName} CI-10' + name: 'Container working set memory usage violates the configured threshold for ${clusterName} CI-20' location: 'global' properties: { autoMitigate: true @@ -550,18 +554,18 @@ resource maHighContainerWorkingSetMemoryUsage 'Microsoft.Insights/metricAlerts@2 ] } ] - metricName: 'memoryWorkingSetExceededPercentage' + metricName: 'memoryWorkingSetThresholdViolated' metricNamespace: 'Insights.Container/containers' name: 'Metric1' operator: 'GreaterThan' - threshold: 90 + threshold: 0 // This threshold is defined in the container-azm-ms-agentconfig.yaml file. timeAggregation: 'Average' skipMetricValidation: true } ] 'odata.type': 'Microsoft.Azure.Monitor.SingleResourceMultipleMetricCriteria' } - description: 'This alert monitors container working set memory utilization.' + description: 'This alert monitors container working set memory usage. It uses the threshold defined in the config map.' enabled: true evaluationFrequency: 'PT1M' scopes: [ @@ -1682,7 +1686,7 @@ resource pdzAksIngress 'Microsoft.Network/privateDnsZones@2020-06-01' = { } } -resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { +resource mc 'Microsoft.ContainerService/managedClusters@2023-02-02-preview' = { name: clusterName location: location tags: { @@ -1886,6 +1890,11 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { enabled: true } enableNamespaceResources: false + ingressProfile: { + webAppRouting: { + enabled: false + } + } } identity: { type: 'UserAssigned' @@ -1894,8 +1903,8 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { } } sku: { - name: 'Basic' - tier: 'Paid' + name: 'Base' + tier: 'Standard' } dependsOn: [ sci @@ -1932,14 +1941,26 @@ resource mc 'Microsoft.ContainerService/managedClusters@2022-09-02-preview' = { paRbacEnabled paManagedIdentitiesEnabled - // Logical dependency, our backup source should exist before cluster creation, as the cluster will be + // Logical dependency, our backup destination should exist before cluster creation, as the cluster will be // bootstrapped with backup configured. - storageAksBackups + backupContainer peKv kvPodMiIngressControllerKeyVaultReader_roleAssignment kvPodMiIngressControllerSecretsUserRole_roleAssignment ] + + // Grant managed identity access from our Backup Vault to this cluster to support + // AKS Backup + resource trustedAccess 'trustedAccessRoleBindings' = { + name: 'ta-aks-backup' + properties: { + roles: [ + 'Microsoft.DataProtection/backupVaults/backup-operator' + ] + sourceResourceId: bvAksBackupVault.id + } + } } resource acrKubeletAcrPullRole_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { @@ -2107,7 +2128,9 @@ resource mc_fluxConfiguration 'Microsoft.KubernetesConfiguration/fluxConfigurati ] } -// New storage container in the existing storage account specifically for this cluster +// New storage container in the existing storage account specifically for this cluster. +// All clusters could back up to a single container or you can follow a container-per-cluster +// model like shown here. resource backupContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2022-09-01' = { parent: storageAksBackups::blobService name: toLower('backup-${clusterName}') @@ -2137,7 +2160,7 @@ resource mc_dataProtectionExtension 'Microsoft.KubernetesConfiguration/extension 'configuration.backupStorageLocation.config.resourceGroup': split(storageAksBackups.id, '/')[4] 'configuration.backupStorageLocation.config.storageAccount': storageAksBackups.name 'configuration.backupStorageLocation.bucket': backupContainer.name - 'credentials.tenantId': mc.identity.tenantId + 'credentials.tenantId': subscription().tenantId } configurationProtectedSettings: {} } @@ -2209,7 +2232,7 @@ resource wafPolicy 'Microsoft.Network/ApplicationGatewayWebApplicationFirewallPo } { ruleSetType: 'Microsoft_BotManagerRuleSet' - ruleSetVersion: '0.1' + ruleSetVersion: '1.0' ruleGroupOverrides: [] } ] From 1c46d0c33d30956be655a6ec5132a85840ac46cf Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Fri, 7 Apr 2023 17:32:24 +0000 Subject: [PATCH 5/6] Apply missing role assignments --- 06-aks-cluster.md | 2 +- cluster-stamp.bicep | 38 ++++++++++++++++++++++++++++++++++++ networking/hub-regionA.bicep | 2 ++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/06-aks-cluster.md b/06-aks-cluster.md index d097b55e..85058c36 100644 --- a/06-aks-cluster.md +++ b/06-aks-cluster.md @@ -20,7 +20,7 @@ Now that your [ACR instance is deployed and ready to support cluster bootstrappi :exclamation: By default, this deployment will allow unrestricted access to your cluster's API Server. You can limit access to the API Server to a set of well-known IP addresses (i.,e. a jump box subnet (connected to by Azure Bastion), build agents, or any other networks you'll administer the cluster from) by setting the `clusterAuthorizedIPRanges` parameter in all deployment options. This setting will also impact traffic originating from within the cluster trying to use the API server, so you will also need to include _all_ of the public IPs used by your egress Azure Firewall. For more information, see [Secure access to the API server using authorized IP address ranges](https://learn.microsoft.com/azure/aks/api-server-authorized-ip-ranges#create-an-aks-cluster-with-api-server-authorized-ip-ranges-enabled). ```bash - # [This takes about 18 minutes.] + # [This takes about 25 minutes.] az deployment group create -g rg-bu0001a0008 -f cluster-stamp.bicep -p targetVnetResourceId=${RESOURCEID_VNET_CLUSTERSPOKE_AKS_BASELINE} clusterAdminAadGroupObjectId=${AADOBJECTID_GROUP_CLUSTERADMIN_AKS_BASELINE} a0008NamespaceReaderAadGroupObjectId=${AADOBJECTID_GROUP_A0008_READER_AKS_BASELINE} k8sControlPlaneAuthorizationTenantId=${TENANTID_K8SRBAC_AKS_BASELINE} appGatewayListenerCertificate=${APP_GATEWAY_LISTENER_CERTIFICATE_AKS_BASELINE} aksIngressControllerCertificate=${AKS_INGRESS_CONTROLLER_CERTIFICATE_BASE64_AKS_BASELINE} domainName=${DOMAIN_NAME_AKS_BASELINE} gitOpsBootstrappingRepoHttpsUrl=${GITOPS_REPOURL} gitOpsBootstrappingRepoBranch=${GITOPS_CURRENT_BRANCH_NAME} location=eastus2 ``` diff --git a/cluster-stamp.bicep b/cluster-stamp.bicep index 63178d93..4cf5c8f5 100644 --- a/cluster-stamp.bicep +++ b/cluster-stamp.bicep @@ -177,6 +177,18 @@ resource storageBlobDataContributorRole 'Microsoft.Authorization/roleDefinitions scope: subscription() } +// Built-in Azure RBAC "Reader" role. Used by Backup Vault to see the AKS cluster. +resource readerRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' + scope: subscription() +} + +// Disk Snapshot Contributor (for AKS Backup) +resource diskSnapshotContributorRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { + name: '7efff54f-a5b4-42b5-a1c5-5411624893ce' + scope: subscription() +} + /*** EXISTING RESOURCE GROUP RESOURCES ***/ // Useful to think of these as resources that are not tied to the lifecycle of any individual @@ -1910,6 +1922,7 @@ resource mc 'Microsoft.ContainerService/managedClusters@2023-02-02-preview' = { sci ndEnsureClusterIdentityHasRbacToSelfManagedResources + mcDiskSnapshotSupport_roleAssignment // Azure Policy for Kubernetes policies that we'd want in place before pods start showing up // in the cluster. The are not technically a dependency from the resource provider perspective, @@ -2160,12 +2173,25 @@ resource mc_dataProtectionExtension 'Microsoft.KubernetesConfiguration/extension 'configuration.backupStorageLocation.config.resourceGroup': split(storageAksBackups.id, '/')[4] 'configuration.backupStorageLocation.config.storageAccount': storageAksBackups.name 'configuration.backupStorageLocation.bucket': backupContainer.name + 'configuration.backupStorageLocation.prefix': '' + 'configuration.volumeSnapshotLocation.config.resourceGroup': resourceGroup().id // Using the cluster resource group, if you use another RG, RBAC needs to be adjusted. + 'configuration.volumeSnapshotLocation.config.incremental': 'false' 'credentials.tenantId': subscription().tenantId } configurationProtectedSettings: {} } } +// Kubelet needs access to the resource group for AKS backup snapshots +resource mcDiskSnapshotSupport_roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(resourceGroup().id, diskSnapshotContributorRole.id, miClusterControlPlane.id) + properties: { + roleDefinitionId: diskSnapshotContributorRole.id + principalId: miClusterControlPlane.properties.principalId + principalType: 'ServicePrincipal' + } +} + // Grant the Data Protection extension write access this cluster's backup container in the storage account. resource dataProtetionExtensionStorageContainer_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { scope: backupContainer @@ -2177,6 +2203,18 @@ resource dataProtetionExtensionStorageContainer_roleAssignment 'Microsoft.Author } } +// Allows the backup vault's identity to see the cluster and the snapshots +// Details: https://learn.microsoft.com/azure/backup/azure-kubernetes-service-cluster-backup-concept#required-roles-and-permissions +resource backupVaultReadClusterAndSnapshots_roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + scope: resourceGroup() // Covers both the cluster and the snapshot resource group + name: guid(bvAksBackupVault.id, mc_dataProtectionExtension.id, readerRole.id) + properties: { + roleDefinitionId: readerRole.id + principalId: bvAksBackupVault.identity.principalId + principalType: 'ServicePrincipal' + } +} + module ndEnsureClusterUserAssignedHasRbacToManageVMSS 'nested_EnsureClusterUserAssignedHasRbacToManageVMSS.bicep' = { name: 'EnsureClusterUserAssignedHasRbacToManageVMSS' scope: nodeResourceGroup diff --git a/networking/hub-regionA.bicep b/networking/hub-regionA.bicep index bf38573d..306ab860 100644 --- a/networking/hub-regionA.bicep +++ b/networking/hub-regionA.bicep @@ -647,6 +647,7 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { targetFqdns: [ '${location}.dp.kubernetesconfiguration.azure.com' 'mcr.microsoft.com' + 'raw.githubusercontent.com' split(environment().resourceManager, '/')[2] // Prevent the linter from getting upset at management.azure.com - https://github.com/Azure/bicep/issues/3080 split(environment().authentication.loginEndpoint, '/')[2] // Prevent the linter from getting upset at login.microsoftonline.com '*.blob.${environment().suffixes.storage}' // required for the extension installer to download the helm chart install flux. This storage account is not predictable, but does look like eusreplstore196 for example. @@ -690,6 +691,7 @@ resource fwPolicy 'Microsoft.Network/firewallPolicies@2021-05-01' = { 'mcr.microsoft.com' 'kubernetesbcdrextensionacr.azurecr.io' 'pipelineagent.azurecr.io' + 'search.maven.org' ] targetUrls: [] destinationAddresses: [] From ed982f528db04c6895c5a95b507018dda4959a71 Mon Sep 17 00:00:00 2001 From: Chad Kittel Date: Fri, 7 Apr 2023 19:44:29 +0000 Subject: [PATCH 6/6] enroll a backup instance --- cluster-stamp.bicep | 82 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/cluster-stamp.bicep b/cluster-stamp.bicep index 4cf5c8f5..b763fd81 100644 --- a/cluster-stamp.bicep +++ b/cluster-stamp.bicep @@ -177,6 +177,12 @@ resource storageBlobDataContributorRole 'Microsoft.Authorization/roleDefinitions scope: subscription() } +// Built-in Azure RBAC role that is applied to the AKS backup managed identity to allow it to write data to storage. +resource storageAccountContributorRole 'Microsoft.Authorization/roleDefinitions@2018-01-01-preview' existing = { + name: '17d1049b-9a84-46fb-8f53-869881c3d3ab' + scope: subscription() +} + // Built-in Azure RBAC "Reader" role. Used by Backup Vault to see the AKS cluster. resource readerRole 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { name: 'acdd72a7-3385-48ef-bd42-f606fba81ae7' @@ -212,6 +218,10 @@ resource la 'Microsoft.OperationalInsights/workspaces@2021-12-01-preview' existi resource bvAksBackupVault 'Microsoft.DataProtection/backupVaults@2023-01-01' existing = { scope: resourceGroup() name: 'bvAksBackupVault' + + resource defaultPolicy 'backupPolicies' existing = { + name: 'bp-aks-default-daily' + } } // Storage Account for backups @@ -2155,7 +2165,7 @@ resource backupContainer 'Microsoft.Storage/storageAccounts/blobServices/contain // Ensures that data protection (AKS Backup) is installed. resource mc_dataProtectionExtension 'Microsoft.KubernetesConfiguration/extensions@2022-11-01' = { scope: mc - name: 'dataProtection' + name: 'azure-aks-backup' properties: { extensionType: 'microsoft.dataprotection.kubernetes' autoUpgradeMinorVersion: true @@ -2203,6 +2213,76 @@ resource dataProtetionExtensionStorageContainer_roleAssignment 'Microsoft.Author } } +// Grant the Data Protection extension storage contributor access this cluster's backup storage account. +resource dataProtetionExtensionStorage_roleAssignment 'Microsoft.Authorization/roleAssignments@2020-10-01-preview' = { + scope: storageAksBackups + name: guid(backupContainer.id, mc_dataProtectionExtension.id, storageAccountContributorRole.id) + properties: { + roleDefinitionId: storageAccountContributorRole.id + principalId: mc_dataProtectionExtension.properties.aksAssignedIdentity.principalId + principalType: 'ServicePrincipal' + } +} + +// Enable daily backups +// All and future namespaces +// +resource backupInstance 'Microsoft.DataProtection/backupVaults/backupInstances@2023-01-01' = { + parent: bvAksBackupVault + name: 'bi-${mc.name}' + properties: { + friendlyName: 'bi-${clusterName}' + objectType: 'BackupInstance' + dataSourceSetInfo: { + objectType: 'DatasourceSet' + resourceID: mc.id + resourceType: 'Microsoft.ContainerService/managedClusters' + resourceLocation: location + resourceName: mc.name + resourceUri: mc.id + datasourceType: 'Microsoft.ContainerService/managedClusters' + } + dataSourceInfo: { + objectType: 'Datasource' + resourceID: mc.id + resourceType: 'Microsoft.ContainerService/managedClusters' + resourceLocation: location + resourceName: mc.name + resourceUri: mc.id + datasourceType: 'Microsoft.ContainerService/managedClusters' + } + policyInfo: { + policyId: bvAksBackupVault::defaultPolicy.id + policyParameters: { + dataStoreParametersList: [ + { + objectType: 'AzureOperationalStoreParameters' + dataStoreType: 'OperationalStore' + resourceGroupId: resourceGroup().id + } + ] + backupDatasourceParametersList: [ + { + objectType: 'KubernetesClusterBackupDatasourceParameters' + includeClusterScopeResources: true + snapshotVolumes: false + labelSelectors: null + includedNamespaces: null + excludedNamespaces: null + includedResourceTypes: null + excludedResourceTypes: [ + 'v1/Secret' + ] + } + ] + } + } + datasourceAuthCredentials: null + validationType: null + } +} + + // Allows the backup vault's identity to see the cluster and the snapshots // Details: https://learn.microsoft.com/azure/backup/azure-kubernetes-service-cluster-backup-concept#required-roles-and-permissions resource backupVaultReadClusterAndSnapshots_roleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {