Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KUBESAW-12: Convert the health-check goroutine into ToolchainCluster controller #386

Merged
merged 16 commits into from
Apr 22, 2024
Merged
177 changes: 0 additions & 177 deletions controllers/toolchainclustercache/healthchecker_test.go

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,92 +1,32 @@
package toolchainclustercache
package toolchainclusterhealth

import (
"context"
"fmt"
"strings"
"time"

toolchainv1alpha1 "github.com/codeready-toolchain/api/api/v1alpha1"
"github.com/codeready-toolchain/toolchain-common/pkg/cluster"
"github.com/go-logr/logr"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
kubeclientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
)

var logger = logf.Log.WithName("toolchaincluster_healthcheck")

const (
healthzOk = "/healthz responded with ok"
healthzNotOk = "/healthz responded without ok"
clusterNotReachableMsg = "cluster is not reachable"
clusterReachableMsg = "cluster is reachable"
)

func StartHealthChecks(ctx context.Context, mgr manager.Manager, namespace string, period time.Duration) {
logger.Info("starting health checks", "period", period)
go wait.Until(func() {
updateClusterStatuses(ctx, namespace, mgr.GetClient())
}, period, ctx.Done())
}

type HealthChecker struct {
localClusterClient client.Client
remoteClusterClient client.Client
remoteClusterClientset *kubeclientset.Clientset
logger logr.Logger
}

// updateClusterStatuses checks cluster health and updates status of all ToolchainClusters
func updateClusterStatuses(ctx context.Context, namespace string, cl client.Client) {
clusters := &toolchainv1alpha1.ToolchainClusterList{}
err := cl.List(ctx, clusters, client.InNamespace(namespace))
if err != nil {
logger.Error(err, "unable to list existing ToolchainClusters")
return
}
if len(clusters.Items) == 0 {
logger.Info("no ToolchainCluster found")
}

for _, obj := range clusters.Items {
clusterObj := obj.DeepCopy()
clusterLogger := logger.WithValues("cluster-name", clusterObj.Name)

cachedCluster, ok := cluster.GetCachedToolchainCluster(clusterObj.Name)
if !ok {
clusterLogger.Error(fmt.Errorf("cluster %s not found in cache", clusterObj.Name), "failed to retrieve stored data for cluster")
clusterObj.Status.Conditions = []toolchainv1alpha1.ToolchainClusterCondition{clusterOfflineCondition()}
if err := cl.Status().Update(ctx, clusterObj); err != nil {
clusterLogger.Error(err, "failed to update the status of ToolchainCluster")
}
continue
}

clientSet, err := kubeclientset.NewForConfig(cachedCluster.RestConfig)
if err != nil {
clusterLogger.Error(err, "cannot create ClientSet for a ToolchainCluster")
continue
}

healthChecker := &HealthChecker{
localClusterClient: cl,
remoteClusterClient: cachedCluster.Client,
remoteClusterClientset: clientSet,
logger: clusterLogger,
}
// clusterLogger.Info("getting the current state of ToolchainCluster")
if err := healthChecker.updateIndividualClusterStatus(ctx, clusterObj); err != nil {
clusterLogger.Error(err, "unable to update cluster status of ToolchainCluster")
}
}
}

func (hc *HealthChecker) updateIndividualClusterStatus(ctx context.Context, toolchainCluster *toolchainv1alpha1.ToolchainCluster) error {

currentClusterStatus := hc.getClusterHealthStatus(ctx)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package toolchainclusterhealth

import (
"context"
"fmt"
"time"

toolchainv1alpha1 "github.com/codeready-toolchain/api/api/v1alpha1"
"github.com/codeready-toolchain/toolchain-common/pkg/cluster"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
kubeclientset "k8s.io/client-go/kubernetes"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

// NewReconciler returns a new Reconciler
func NewReconciler(mgr manager.Manager, namespace string, timeout time.Duration, requeAfter time.Duration) *Reconciler {
cacheLog := log.Log.WithName("toolchaincluster_health")
clusterCacheService := cluster.NewToolchainClusterService(mgr.GetClient(), cacheLog, namespace, timeout)
return &Reconciler{
client: mgr.GetClient(),
scheme: mgr.GetScheme(),
clusterCacheService: clusterCacheService,
requeAfter: requeAfter,

Check warning on line 28 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L21-L28

Added lines #L21 - L28 were not covered by tests
}
}

// SetupWithManager sets up the controller with the Manager.
func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&toolchainv1alpha1.ToolchainCluster{}).
Complete(r)

Check warning on line 36 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L33-L36

Added lines #L33 - L36 were not covered by tests
}

// Reconciler reconciles a ToolchainCluster object
type Reconciler struct {
client client.Client
scheme *runtime.Scheme
clusterCacheService cluster.ToolchainClusterService
fbm3307 marked this conversation as resolved.
Show resolved Hide resolved
requeAfter time.Duration
}

// Reconcile reads that state of the cluster for a ToolchainCluster object and makes changes based on the state read
// and what is in the ToolchainCluster.Spec. It updates the status of the individual cluster
// Note:
// The Controller will requeue the Request to be processed again if the returned error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.Result, error) {
reqLogger := log.FromContext(ctx).WithName("health")
reqLogger.Info("Reconciling ToolchainCluster")

// Fetch the ToolchainCluster instance
toolchainCluster := &toolchainv1alpha1.ToolchainCluster{}
err := r.client.Get(ctx, request.NamespacedName, toolchainCluster)
if err != nil {
if errors.IsNotFound(err) {
// Stop monitoring the toolchain cluster as it is deleted
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}

cachedCluster, ok := cluster.GetCachedToolchainCluster(toolchainCluster.Name)
if !ok {
err := fmt.Errorf("cluster %s not found in cache", toolchainCluster.Name)
toolchainCluster.Status.Conditions = []toolchainv1alpha1.ToolchainClusterCondition{clusterOfflineCondition()}
if err := r.client.Status().Update(ctx, toolchainCluster); err != nil {
reqLogger.Error(err, "failed to update the status of ToolchainCluster")

Check warning on line 73 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L70-L73

Added lines #L70 - L73 were not covered by tests
}
return reconcile.Result{}, err

Check warning on line 75 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L75

Added line #L75 was not covered by tests
}

clientSet, err := kubeclientset.NewForConfig(cachedCluster.RestConfig)
if err != nil {
return reconcile.Result{}, err

Check warning on line 80 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L80

Added line #L80 was not covered by tests
}

healthChecker := &HealthChecker{
localClusterClient: r.client,
remoteClusterClient: cachedCluster.Client,
remoteClusterClientset: clientSet,
logger: reqLogger,
}

//update the status of the individual cluster.
if err := healthChecker.updateIndividualClusterStatus(ctx, toolchainCluster); err != nil {
reqLogger.Error(err, "unable to update cluster status of ToolchainCluster")
return reconcile.Result{}, err

Check warning on line 93 in controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go

View check run for this annotation

Codecov / codecov/patch

controllers/toolchainclusterhealth/toolchaincluster_healthcheck_controller.go#L92-L93

Added lines #L92 - L93 were not covered by tests
}

return reconcile.Result{RequeueAfter: r.requeAfter}, nil
}
Loading
Loading