From 30724f9430c4368bf511cf6abf875e15becc7b1b Mon Sep 17 00:00:00 2001 From: Rafael Fonseca Date: Mon, 2 Dec 2024 15:11:02 +0100 Subject: [PATCH] OCPBUGS-45218: aws: fix perm requirement for edge nodes If an edge machine pool is specified without an instance type, the installer needs the `ec2:DescribeInstanceTypeOfferings` permission to derive the correct instance type available according to the local/wavelength zones being used. Before this change, the permission was optional and its absence would result in the installer picking a hard-coded non-edge pool instance type which can cause unsupported configuration issues in mapi's output: ``` providerStatus: conditions: - lastTransitionTime: "2024-11-28T15:32:09Z" message: 'error launching instance: The requested configuration is currently not supported. Please check the documentation for supported configurations.' reason: MachineCreationFailed status: "False" type: MachineCreation ``` --- pkg/asset/installconfig/aws/permissions.go | 25 +++++++ .../installconfig/aws/permissions_test.go | 65 +++++++++++++++++++ pkg/asset/machines/worker.go | 7 +- 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/pkg/asset/installconfig/aws/permissions.go b/pkg/asset/installconfig/aws/permissions.go index f8a764c3f59..8e3ec300d09 100644 --- a/pkg/asset/installconfig/aws/permissions.go +++ b/pkg/asset/installconfig/aws/permissions.go @@ -75,6 +75,10 @@ const ( // PermissionCarrierGateway is a permission set required when an edge compute pool with WL zones is set in the install-config. PermissionCarrierGateway PermissionGroup = "permission-create-carrier-gateway" + // PermissionEdgeDefaultInstance is a permission set required when an edge compute pool is set without an instance + // type in the install-config. + PermissionEdgeDefaultInstance PermissionGroup = "permission-edge-default-instance" + // PermissionMintCreds is a permission set required when minting credentials. PermissionMintCreds PermissionGroup = "permission-mint-creds" @@ -347,6 +351,10 @@ var permissions = map[PermissionGroup][]string{ // Needed to delete Carrier Gateways. "ec2:DeleteCarrierGateway", }, + PermissionEdgeDefaultInstance: { + // Needed to filter zones by instance type + "ec2:DescribeInstanceTypeOfferings", + }, // From: https://github.com/openshift/cloud-credential-operator/blob/master/pkg/aws/utils.go // TODO: export these in CCO so we don't have to duplicate them here. PermissionMintCreds: { @@ -542,6 +550,10 @@ func RequiredPermissionGroups(ic *types.InstallConfig) []PermissionGroup { permissionGroups = append(permissionGroups, PermissionCarrierGateway) } + if includesEdgeDefaultInstanceType(ic) { + permissionGroups = append(permissionGroups, PermissionEdgeDefaultInstance) + } + return permissionGroups } @@ -736,3 +748,16 @@ func includesWavelengthZones(installConfig *types.InstallConfig) bool { return false } + +// includesEdgeDefaultInstanceType checks if any edge machine pool is specified without an instance type. +func includesEdgeDefaultInstanceType(installConfig *types.InstallConfig) bool { + for _, mpool := range installConfig.Compute { + if mpool.Name != types.MachinePoolEdgeRoleName { + continue + } + if mpool.Platform.AWS == nil || len(mpool.Platform.AWS.InstanceType) == 0 { + return true + } + } + return false +} diff --git a/pkg/asset/installconfig/aws/permissions_test.go b/pkg/asset/installconfig/aws/permissions_test.go index 193af85b9ed..2c8b0305873 100644 --- a/pkg/asset/installconfig/aws/permissions_test.go +++ b/pkg/asset/installconfig/aws/permissions_test.go @@ -857,3 +857,68 @@ func TestIncludesWavelengthZones(t *testing.T) { }) }) } + +func TestIncludesEdgeDefaultInstance(t *testing.T) { + t.Run("Should be true when at least one edge compute pool specified", func(t *testing.T) { + t.Run("without platform", func(t *testing.T) { + ic := validInstallConfig() + ic.Compute = append(ic.Compute, types.MachinePool{ + Name: "edge", + }) + ic.Compute = append(ic.Compute, types.MachinePool{ + Name: "edge", + Platform: types.MachinePoolPlatform{ + AWS: &aws.MachinePool{ + Zones: []string{"us-west-2-pdx-1a", "us-west-2-wl1-sea-wlz-1"}, + InstanceType: "m6a.xlarge", + }, + }, + }) + requiredPerms := RequiredPermissionGroups(ic) + assert.Contains(t, requiredPerms, PermissionEdgeDefaultInstance) + }) + t.Run("without instance type", func(t *testing.T) { + ic := validInstallConfig() + ic.Compute = append(ic.Compute, types.MachinePool{ + Name: "edge", + Platform: types.MachinePoolPlatform{ + AWS: &aws.MachinePool{ + Zones: []string{"us-west-2-pdx-1a", "us-west-2-wl1-sea-wlz-1"}, + }, + }, + }) + ic.Compute = append(ic.Compute, types.MachinePool{ + Name: "edge", + Platform: types.MachinePoolPlatform{ + AWS: &aws.MachinePool{ + Zones: []string{"us-west-2-pdx-1a", "us-west-2-wl1-sea-wlz-1"}, + InstanceType: "m6a.xlarge", + }, + }, + }) + requiredPerms := RequiredPermissionGroups(ic) + assert.Contains(t, requiredPerms, PermissionEdgeDefaultInstance) + }) + }) + t.Run("Should be false when", func(t *testing.T) { + t.Run("edge compute specified with instance type", func(t *testing.T) { + ic := validInstallConfig() + ic.Compute = append(ic.Compute, types.MachinePool{ + Name: "edge", + Platform: types.MachinePoolPlatform{ + AWS: &aws.MachinePool{ + Zones: []string{"us-west-1a", "us-west-2-pdx-1a"}, + InstanceType: "m6a.xlarge", + }, + }, + }) + requiredPerms := RequiredPermissionGroups(ic) + assert.NotContains(t, requiredPerms, PermissionEdgeDefaultInstance) + }) + t.Run("edge compute not specified", func(t *testing.T) { + ic := validInstallConfig() + requiredPerms := RequiredPermissionGroups(ic) + assert.NotContains(t, requiredPerms, PermissionEdgeDefaultInstance) + }) + }) +} diff --git a/pkg/asset/machines/worker.go b/pkg/asset/machines/worker.go index 136a5a87440..b4cbb61732c 100644 --- a/pkg/asset/machines/worker.go +++ b/pkg/asset/machines/worker.go @@ -405,10 +405,9 @@ func (w *Worker) Generate(ctx context.Context, dependencies asset.Parents) error instanceTypes := awsdefaults.InstanceTypes(installConfig.Config.Platform.AWS.Region, arch, configv1.HighlyAvailableTopologyMode) switch pool.Name { case types.MachinePoolEdgeRoleName: - ok := awsSetPreferredInstanceByEdgeZone(ctx, instanceTypes, installConfig.AWS, zones) - if !ok { - logrus.Warnf("failed to find preferred instance type for one or more zones in the %s pool, using default: %s", pool.Name, instanceTypes[0]) - mpool.InstanceType = instanceTypes[0] + if !awsSetPreferredInstanceByEdgeZone(ctx, instanceTypes, installConfig.AWS, zones) { + // Using the default instance type from the non-edge pool often fails. + return fmt.Errorf("failed to find instance type for one or more zones in the %s pool. Please specify an instance type in the install-config.yaml", pool.Name) } default: mpool.InstanceType, err = aws.PreferredInstanceType(ctx, installConfig.AWS, instanceTypes, mpool.Zones)