From b674f3dd0c9f9dc238b2bbd5518fade662d12077 Mon Sep 17 00:00:00 2001 From: Alper Rifat Ulucinar Date: Thu, 14 Dec 2023 15:07:37 +0300 Subject: [PATCH] Cache the Terraform instance state returned from schema.Resource.Apply in external-client's Create even if the returned diagnostics contain errors. - In most cases, the Terraform plugin SDK's create implementation for a resource comprises multiple steps (with the creation of the external resource being the very first step). In case, the creation succeeds but any of the subsequent steps fail, then upjet's TF plugin SDK-based external client will not record this state losing the only opportunity to associate the MR with the newly provisioned external resource in some cases. We now put this initial state into the upjet's in-memory state cache so that it's now available for the external- client's next observe call. Signed-off-by: Alper Rifat Ulucinar --- pkg/controller/external_nofork.go | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pkg/controller/external_nofork.go b/pkg/controller/external_nofork.go index 6197b220..1ae2e28c 100644 --- a/pkg/controller/external_nofork.go +++ b/pkg/controller/external_nofork.go @@ -564,8 +564,36 @@ func (n *noForkExternal) Create(ctx context.Context, mg xpresource.Managed) (man start := time.Now() newState, diag := n.resourceSchema.Apply(ctx, n.opTracker.GetTfState(), n.instanceDiff, n.ts.Meta) metrics.ExternalAPITime.WithLabelValues("create").Observe(time.Since(start).Seconds()) - // diag := n.resourceSchema.CreateWithoutTimeout(ctx, n.resourceData, n.ts.Meta) if diag != nil && diag.HasError() { + // we need to store the Terraform state from the downstream create call if + // one is available even if the diagnostics has reported errors. + // The downstream create call comprises multiple external API calls such as + // the external resource create call, expected state assertion calls + // (external resource state reads) and external resource state refresh + // calls, etc. Any of these steps can fail and if the initial + // external resource create call succeeds, then the TF plugin SDK makes the + // state (together with the TF ID associated with the external resource) + // available reporting any encountered issues in the returned diagnostics. + // If we don't record the returned state from the successful create call, + // then we may hit issues for resources whose Crossplane identifiers cannot + // be computed solely from spec parameters and provider configs, i.e., + // those that contain a random part generated by the CSP. Please see: + // https://github.com/upbound/provider-aws/issues/1010, or + // https://github.com/upbound/provider-aws/issues/1018, which both involve + // MRs with config.IdentifierFromProvider external-name configurations. + // NOTE: The safe (and thus the proper) thing to do in this situation from + // the Crossplane provider's perspective is to set the MR's + // `crossplane.io/external-create-failed` annotation because the provider + // does not know the exact state the external resource is in and a manual + // intervention may be required. But at the time we are introducing this + // fix, we believe associating the external-resource with the MR will just + // provide a better UX although the external resource may not be in the + // expected/desired state yet. We are also planning for improvements on the + // crossplane-runtime's managed reconciler to better support upjet's async + // operations in this regard. + if !n.opTracker.HasState() { // we do not expect a previous state here but just being defensive + n.opTracker.SetTfState(newState) + } return managed.ExternalCreation{}, errors.Errorf("failed to create the resource: %v", diag) }