-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
KUBESAW-187: Adjust ksctl adm restart command to use rollout-restart (#…
…79) * KUBESAW-187: Adjust ksctl adm restart command to use rollout-restart Signed-off-by: Feny Mehta <[email protected]> * some checking Signed-off-by: Feny Mehta <[email protected]> * golint Signed-off-by: Feny Mehta <[email protected]> * few changes to the logic Signed-off-by: Feny Mehta <[email protected]> * t cases Signed-off-by: Feny Mehta <[email protected]> * eview comments Signed-off-by: Feny Mehta <[email protected]> * Review comments Signed-off-by: Feny Mehta <[email protected]> * check the args Signed-off-by: Feny Mehta <[email protected]> * adding unit test cases Signed-off-by: Feny Mehta <[email protected]> * Change in test cases Signed-off-by: Feny Mehta <[email protected]> * minor change in unit test Signed-off-by: Feny Mehta <[email protected]> * unregister-member test Signed-off-by: Feny Mehta <[email protected]> * unit test case for restart Signed-off-by: Feny Mehta <[email protected]> * test case for delete Signed-off-by: Feny Mehta <[email protected]> * Rc1 Signed-off-by: Feny Mehta <[email protected]> * golint Signed-off-by: Feny Mehta <[email protected]> * changes to the logic of restart Signed-off-by: Feny Mehta <[email protected]> * review comments-2 Signed-off-by: Feny Mehta <[email protected]> * restart-test changes Signed-off-by: Feny Mehta <[email protected]> * CI Signed-off-by: Feny Mehta <[email protected]> * golang ci Signed-off-by: Feny Mehta <[email protected]> * adding tc Signed-off-by: Feny Mehta <[email protected]> * some addition to test cases Signed-off-by: Feny Mehta <[email protected]> * some changes Signed-off-by: Feny Mehta <[email protected]> * adding some comments Signed-off-by: Feny Mehta <[email protected]> * autoscalling buffer test case Signed-off-by: Feny Mehta <[email protected]> * Modification of test cases Signed-off-by: Feny Mehta <[email protected]> * Go lint Signed-off-by: Feny Mehta <[email protected]> * Test case of status Signed-off-by: Feny Mehta <[email protected]> * Linter Signed-off-by: Feny Mehta <[email protected]> * test of unregister_member Signed-off-by: Feny Mehta <[email protected]> * phase-3 rc Signed-off-by: Feny Mehta <[email protected]> * code cov Signed-off-by: Feny Mehta <[email protected]> * some changes to status func Signed-off-by: Feny Mehta <[email protected]> * leftovers Signed-off-by: Feny Mehta <[email protected]> * merge conflict Signed-off-by: Feny Mehta <[email protected]> * some changes as per rc Signed-off-by: Feny Mehta <[email protected]> * go version fix Signed-off-by: Feny Mehta <[email protected]> * extra left overs Signed-off-by: Feny Mehta <[email protected]> * linter Signed-off-by: Feny Mehta <[email protected]> --------- Signed-off-by: Feny Mehta <[email protected]> Co-authored-by: Matous Jobanek <[email protected]> Co-authored-by: Francisc Munteanu <[email protected]>
- Loading branch information
1 parent
5a14398
commit a89cb52
Showing
9 changed files
with
553 additions
and
306 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,157 +1,227 @@ | ||
package adm | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"github.com/kubesaw/ksctl/pkg/client" | ||
"github.com/kubesaw/ksctl/pkg/cmd/flags" | ||
"github.com/kubesaw/ksctl/pkg/configuration" | ||
clicontext "github.com/kubesaw/ksctl/pkg/context" | ||
"github.com/kubesaw/ksctl/pkg/ioutils" | ||
|
||
"github.com/spf13/cobra" | ||
appsv1 "k8s.io/api/apps/v1" | ||
apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/cli-runtime/pkg/genericclioptions" | ||
"k8s.io/cli-runtime/pkg/genericiooptions" | ||
kubectlrollout "k8s.io/kubectl/pkg/cmd/rollout" | ||
cmdutil "k8s.io/kubectl/pkg/cmd/util" | ||
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" | ||
) | ||
|
||
type ( | ||
RolloutRestartFunc func(ctx *clicontext.CommandContext, deployment appsv1.Deployment) error | ||
RolloutStatusCheckerFunc func(ctx *clicontext.CommandContext, deployment appsv1.Deployment) error | ||
) | ||
|
||
// NewRestartCmd() is a function to restart the whole operator, it relies on the target cluster and fetches the cluster config | ||
// 1. If the command is run for host operator, it restart the whole host operator.(it deletes olm based pods(host-operator pods), | ||
// waits for the new pods to come up, then uses rollout-restart command for non-olm based - registration-service) | ||
// 2. If the command is run for member operator, it restart the whole member operator.(it deletes olm based pods(member-operator pods), | ||
// waits for the new pods to come up, then uses rollout-restart command for non-olm based deployments - webhooks) | ||
func NewRestartCmd() *cobra.Command { | ||
var targetCluster string | ||
command := &cobra.Command{ | ||
Use: "restart -t <cluster-name> <deployment-name>", | ||
Short: "Restarts a deployment", | ||
Long: `Restarts the deployment with the given name in the operator namespace. | ||
If no deployment name is provided, then it lists all existing deployments in the namespace.`, | ||
Args: cobra.RangeArgs(0, 1), | ||
Use: "restart <cluster-name>", | ||
Short: "Restarts an operator", | ||
Long: `Restarts the whole operator, it relies on the target cluster and fetches the cluster config | ||
1. If the command is run for host operator, it restarts the whole host operator. | ||
(it deletes olm based pods(host-operator pods),waits for the new pods to | ||
come up, then uses rollout-restart command for non-olm based deployments - registration-service) | ||
2. If the command is run for member operator, it restarts the whole member operator. | ||
(it deletes olm based pods(member-operator pods),waits for the new pods | ||
to come up, then uses rollout-restart command for non-olm based deployments - webhooks)`, | ||
Args: cobra.ExactArgs(1), | ||
RunE: func(cmd *cobra.Command, args []string) error { | ||
term := ioutils.NewTerminal(cmd.InOrStdin, cmd.OutOrStdout) | ||
ctx := clicontext.NewCommandContext(term, client.DefaultNewClient) | ||
return restart(ctx, targetCluster, args...) | ||
return restart(ctx, args[0]) | ||
}, | ||
} | ||
command.Flags().StringVarP(&targetCluster, "target-cluster", "t", "", "The target cluster") | ||
flags.MustMarkRequired(command, "target-cluster") | ||
return command | ||
} | ||
|
||
func restart(ctx *clicontext.CommandContext, clusterName string, deployments ...string) error { | ||
func restart(ctx *clicontext.CommandContext, clusterName string) error { | ||
kubeConfigFlags := genericclioptions.NewConfigFlags(true).WithDeprecatedPasswordFlag() | ||
ioStreams := genericiooptions.IOStreams{ | ||
In: os.Stdin, | ||
Out: os.Stdout, | ||
ErrOut: os.Stderr, | ||
} | ||
kubeConfigFlags.ClusterName = nil // `cluster` flag is redefined for our own purpose | ||
kubeConfigFlags.AuthInfoName = nil // unused here, so we can hide it | ||
kubeConfigFlags.Context = nil // unused here, so we can hide it | ||
|
||
cfg, err := configuration.LoadClusterConfig(ctx, clusterName) | ||
if err != nil { | ||
return err | ||
} | ||
cl, err := ctx.NewClient(cfg.Token, cfg.ServerAPI) | ||
kubeConfigFlags.Namespace = &cfg.OperatorNamespace | ||
kubeConfigFlags.APIServer = &cfg.ServerAPI | ||
kubeConfigFlags.BearerToken = &cfg.Token | ||
kubeconfig, err := client.EnsureKsctlConfigFile() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if len(deployments) == 0 { | ||
err := printExistingDeployments(ctx.Terminal, cl, cfg.OperatorNamespace) | ||
if err != nil { | ||
ctx.Terminal.Printlnf("\nERROR: Failed to list existing deployments\n :%s", err.Error()) | ||
} | ||
return fmt.Errorf("at least one deployment name is required, include one or more of the above deployments to restart") | ||
} | ||
deploymentName := deployments[0] | ||
kubeConfigFlags.KubeConfig = &kubeconfig | ||
factory := cmdutil.NewFactory(cmdutil.NewMatchVersionFlags(kubeConfigFlags)) | ||
|
||
if !ctx.AskForConfirmation( | ||
ioutils.WithMessagef("restart the deployment '%s' in namespace '%s'", deploymentName, cfg.OperatorNamespace)) { | ||
ioutils.WithMessagef("restart all the deployments in the cluster '%s' and namespace '%s' \n", clusterName, cfg.OperatorNamespace)) { | ||
return nil | ||
} | ||
return restartDeployment(ctx, cl, cfg.OperatorNamespace, deploymentName) | ||
} | ||
|
||
func restartDeployment(ctx *clicontext.CommandContext, cl runtimeclient.Client, ns string, deploymentName string) error { | ||
namespacedName := types.NamespacedName{ | ||
Namespace: ns, | ||
Name: deploymentName, | ||
cl, err := ctx.NewClient(cfg.Token, cfg.ServerAPI) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
originalReplicas, err := scaleToZero(cl, namespacedName) | ||
return restartDeployments(ctx, cl, cfg.OperatorNamespace, func(ctx *clicontext.CommandContext, deployment appsv1.Deployment) error { | ||
return checkRolloutStatus(ctx, factory, ioStreams, deployment) | ||
}, func(ctx *clicontext.CommandContext, deployment appsv1.Deployment) error { | ||
return restartNonOlmDeployments(ctx, deployment, factory, ioStreams) | ||
}) | ||
} | ||
|
||
// This function has the whole logic of getting the list of olm and non-olm based deployment, then proceed on restarting/deleting accordingly | ||
func restartDeployments(ctx *clicontext.CommandContext, cl runtimeclient.Client, ns string, checker RolloutStatusCheckerFunc, restarter RolloutRestartFunc) error { | ||
|
||
ctx.Printlnf("Fetching the current OLM and non-OLM deployments of the operator in %s namespace", ns) | ||
olmDeploymentList, nonOlmDeploymentList, err := getExistingDeployments(ctx, cl, ns) | ||
if err != nil { | ||
if apierrors.IsNotFound(err) { | ||
ctx.Printlnf("\nERROR: The given deployment '%s' wasn't found.", deploymentName) | ||
return printExistingDeployments(ctx, cl, ns) | ||
} | ||
return err | ||
} | ||
ctx.Println("The deployment was scaled to 0") | ||
if err := scaleBack(ctx, cl, namespacedName, originalReplicas); err != nil { | ||
ctx.Printlnf("Scaling the deployment '%s' in namespace '%s' back to '%d' replicas wasn't successful", originalReplicas) | ||
ctx.Println("Please, try to contact administrators to scale the deployment back manually") | ||
return err | ||
//if there is no olm operator deployment, no need for restart | ||
if len(olmDeploymentList.Items) == 0 { | ||
return fmt.Errorf("no operator deployment found in namespace %s , it is required for the operator deployment to be running so the command can proceed with restarting the KubeSaw components", ns) | ||
} | ||
//Deleting the pods of the olm based operator deployment and then checking the status | ||
for _, olmOperatorDeployment := range olmDeploymentList.Items { | ||
ctx.Printlnf("Proceeding to delete the Pods of %v", olmOperatorDeployment.Name) | ||
|
||
if err := deleteDeploymentPods(ctx, cl, olmOperatorDeployment); err != nil { | ||
return err | ||
} | ||
//sleeping here so that when the status is called we get the correct status | ||
time.Sleep(1 * time.Second) | ||
|
||
ctx.Printlnf("Checking the status of the deleted pod's deployment %v", olmOperatorDeployment.Name) | ||
//check the rollout status | ||
if err := checker(ctx, olmOperatorDeployment); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
//Non-Olm deployments like reg-svc,to be restarted | ||
//if no Non-OL deployment found it should just return with a message | ||
if len(nonOlmDeploymentList.Items) == 0 { | ||
// if there are no non-olm deployments | ||
ctx.Printlnf("No Non-OLM deployment found in namespace %s, hence no restart happened", ns) | ||
return nil | ||
} | ||
// if there is a Non-olm deployment found use rollout-restart command | ||
for _, nonOlmDeployment := range nonOlmDeploymentList.Items { | ||
//it should only use rollout restart for the deployments which are NOT autoscaling-buffer | ||
if nonOlmDeployment.Name != "autoscaling-buffer" { | ||
ctx.Printlnf("Proceeding to restart the non-olm deployment %v", nonOlmDeployment.Name) | ||
//using rollout-restart | ||
if err := restarter(ctx, nonOlmDeployment); err != nil { | ||
return err | ||
} | ||
//check the rollout status | ||
ctx.Printlnf("Checking the status of the rolled out deployment %v", nonOlmDeployment.Name) | ||
if err := checker(ctx, nonOlmDeployment); err != nil { | ||
return err | ||
} | ||
//if the deployment is not auto-scaling buffer, it should return from the function and not go to print the message for autoscaling buffer | ||
//We do not expect more than 1 non-olm deployment for each OLM deployment and hence returning here | ||
return nil | ||
} | ||
//message if there is a autoscaling buffer, it shouldn't be restarted but successfully exit | ||
ctx.Printlnf("Found only autoscaling-buffer deployment in namespace %s , which is not required to be restarted", ns) | ||
} | ||
|
||
ctx.Printlnf("The deployment was scaled back to '%d'", originalReplicas) | ||
return nil | ||
} | ||
|
||
func restartHostOperator(ctx *clicontext.CommandContext, hostClient runtimeclient.Client, hostNamespace string) error { | ||
deployments := &appsv1.DeploymentList{} | ||
if err := hostClient.List(context.TODO(), deployments, | ||
runtimeclient.InNamespace(hostNamespace), | ||
runtimeclient.MatchingLabels{"olm.owner.namespace": "toolchain-host-operator"}); err != nil { | ||
func deleteDeploymentPods(ctx *clicontext.CommandContext, cl runtimeclient.Client, deployment appsv1.Deployment) error { | ||
//get pods by label selector from the deployment | ||
pods := corev1.PodList{} | ||
selector, _ := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) | ||
if err := cl.List(ctx, &pods, | ||
runtimeclient.MatchingLabelsSelector{Selector: selector}, | ||
runtimeclient.InNamespace(deployment.Namespace)); err != nil { | ||
return err | ||
} | ||
if len(deployments.Items) != 1 { | ||
return fmt.Errorf("there should be a single deployment matching the label olm.owner.namespace=toolchain-host-operator in %s ns, but %d was found. "+ | ||
"It's not possible to restart the Host Operator deployment", hostNamespace, len(deployments.Items)) | ||
|
||
//delete pods | ||
for _, pod := range pods.Items { | ||
pod := pod // TODO We won't need it after upgrading to go 1.22: https://go.dev/blog/loopvar-preview | ||
ctx.Printlnf("Deleting pod: %s", pod.Name) | ||
if err := cl.Delete(ctx, &pod); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
return restartDeployment(ctx, hostClient, hostNamespace, deployments.Items[0].Name) | ||
return nil | ||
|
||
} | ||
|
||
func printExistingDeployments(term ioutils.Terminal, cl runtimeclient.Client, ns string) error { | ||
deployments := &appsv1.DeploymentList{} | ||
if err := cl.List(context.TODO(), deployments, runtimeclient.InNamespace(ns)); err != nil { | ||
func restartNonOlmDeployments(ctx *clicontext.CommandContext, deployment appsv1.Deployment, f cmdutil.Factory, ioStreams genericclioptions.IOStreams) error { | ||
|
||
o := kubectlrollout.NewRolloutRestartOptions(ioStreams) | ||
|
||
if err := o.Complete(f, nil, []string{"deployment/" + deployment.Name}); err != nil { | ||
return err | ||
} | ||
deploymentList := "\n" | ||
for _, deployment := range deployments.Items { | ||
deploymentList += fmt.Sprintf("%s\n", deployment.Name) | ||
|
||
if err := o.Validate(); err != nil { | ||
return err | ||
} | ||
term.PrintContextSeparatorWithBodyf(deploymentList, "Existing deployments in %s namespace", ns) | ||
return nil | ||
ctx.Printlnf("Running the rollout restart command for non-Olm deployment %v", deployment.Name) | ||
return o.RunRestart() | ||
} | ||
|
||
func scaleToZero(cl runtimeclient.Client, namespacedName types.NamespacedName) (int32, error) { | ||
// get the deployment | ||
deployment := &appsv1.Deployment{} | ||
if err := cl.Get(context.TODO(), namespacedName, deployment); err != nil { | ||
return 0, err | ||
func checkRolloutStatus(ctx *clicontext.CommandContext, f cmdutil.Factory, ioStreams genericclioptions.IOStreams, deployment appsv1.Deployment) error { | ||
|
||
cmd := kubectlrollout.NewRolloutStatusOptions(ioStreams) | ||
|
||
if err := cmd.Complete(f, []string{"deployment/" + deployment.Name}); err != nil { | ||
return err | ||
} | ||
// keep original number of replicas so we can bring it back | ||
originalReplicas := *deployment.Spec.Replicas | ||
zero := int32(0) | ||
deployment.Spec.Replicas = &zero | ||
|
||
// update the deployment so it scales to zero | ||
return originalReplicas, cl.Update(context.TODO(), deployment) | ||
if err := cmd.Validate(); err != nil { | ||
return err | ||
} | ||
ctx.Printlnf("Running the Rollout status to check the status of the deployment") | ||
return cmd.Run() | ||
} | ||
|
||
func scaleBack(term ioutils.Terminal, cl runtimeclient.Client, namespacedName types.NamespacedName, originalReplicas int32) error { | ||
return wait.PollUntilContextTimeout(context.TODO(), 500*time.Millisecond, 10*time.Second, false, func(ctx context.Context) (done bool, err error) { | ||
term.Println("") | ||
term.Printlnf("Trying to scale the deployment back to '%d'", originalReplicas) | ||
// get the updated | ||
deployment := &appsv1.Deployment{} | ||
if err := cl.Get(context.TODO(), namespacedName, deployment); err != nil { | ||
return false, err | ||
} | ||
// check if the replicas number wasn't already reset by a controller | ||
if *deployment.Spec.Replicas == originalReplicas { | ||
return true, nil | ||
} | ||
// set the original | ||
deployment.Spec.Replicas = &originalReplicas | ||
// and update to scale back | ||
if err := cl.Update(context.TODO(), deployment); err != nil { | ||
term.Printlnf("error updating Deployment '%s': %s. Will retry again...", namespacedName.Name, err.Error()) | ||
return false, nil | ||
} | ||
return true, nil | ||
}) | ||
func getExistingDeployments(ctx *clicontext.CommandContext, cl runtimeclient.Client, ns string) (*appsv1.DeploymentList, *appsv1.DeploymentList, error) { | ||
|
||
olmDeployments := &appsv1.DeploymentList{} | ||
if err := cl.List(ctx, olmDeployments, | ||
runtimeclient.InNamespace(ns), | ||
runtimeclient.MatchingLabels{"kubesaw-control-plane": "kubesaw-controller-manager"}); err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
nonOlmDeployments := &appsv1.DeploymentList{} | ||
if err := cl.List(ctx, nonOlmDeployments, | ||
runtimeclient.InNamespace(ns), | ||
runtimeclient.MatchingLabels{"toolchain.dev.openshift.com/provider": "codeready-toolchain"}); err != nil { | ||
return nil, nil, err | ||
} | ||
|
||
return olmDeployments, nonOlmDeployments, nil | ||
} |
Oops, something went wrong.