diff --git a/Makefile b/Makefile index 8190b0a..9f924ab 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,12 @@ # Image URL to use all building/pushing image targets #IMG_TAG ?= $(shell git rev-parse --short HEAD) -IMG_TAG ?= 0.1.2 +IMG_TAG ?= 0.2.0 IMG_NAME ?= rekuberate-io-sleepcycles DOCKER_HUB_NAME ?= $(shell docker info | sed '/Username:/!d;s/.* //') IMG ?= $(DOCKER_HUB_NAME)/$(IMG_NAME):$(IMG_TAG) +RUNNERS_IMG_NAME ?= rekuberate-io-sleepcycles-runners +KO_DOCKER_REPO = $(DOCKER_HUB_NAME)/$(RUNNERS_IMG_NAME) # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.24.2 @@ -144,4 +146,14 @@ $(HELMIFY): $(LOCALBIN) test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/helmify@latest helm: manifests kustomize helmify - $(KUSTOMIZE) build config/default | $(HELMIFY) charts/sleepcycles \ No newline at end of file + $(KUSTOMIZE) build config/default | $(HELMIFY) charts/sleepcycles + +KO ?= $(LOCALBIN)/ko + +.PHONY: ko +ko: $(KO) ## Download ko locally if necessary. +$(KO): $(LOCALBIN) + test -s $(LOCALBIN)/ko || GOBIN=$(LOCALBIN) go install github.com/google/ko@latest + +ko-build-runner: ko + cd runners && ko build --bare . diff --git a/README.md b/README.md index b06cd67..4540eca 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,124 @@ ![rekuberate-sleepcycle-banner.png](docs/images/rekuberate-sleepcycle-banner.png) -Define sleep & wake up cycles for your Kubernetes resources. Automatically schedule to shutdown Deployments, CronJobs, StatefulSets and HorizontalPodAutoscalers that occupy resources in your cluster and wake them up only when you need them, reducing that way the overall power consumption. +Define sleep & wake up cycles for your Kubernetes resources. Automatically schedule to shutdown **Deployments**, **CronJobs**, +**StatefulSets** and **HorizontalPodAutoscalers** that occupy resources in your cluster and wake them up **only** when you need them; +in that way you can: -> [!NOTE] -> You can read more in medium article [rekuberate-io/sleepcycles: an automated way to reclaim your unused Kubernetes resources](https://medium.com/@akyriako/rekuberate-io-sleepcycles-an-automated-way-to-reclaim-your-unused-kubernetes-resources-852e8db313ec). +- _schedule_ resource-hungry workloads (migrations, synchronizations, replications) in hours that do not impact your daily business +- _depressurize_ your cluster +- _decrease_ your costs +- _reduce_ your power consumption +- _lower_ you carbon footprint ## Getting Started -You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) or [K3D](https://k3d.io) to get a local cluster for testing, or run against a remote cluster. -**Note:** Your controller will automatically use the current context in your kubeconfig file (i.e. whatever cluster `kubectl cluster-info` shows). +You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) or [K3D](https://k3d.io) to get a local cluster for testing, +or run against a remote cluster. + +> [!CAUTION] +> Earliest compatible Kubernetes version is **1.25** + +### Samples + +Under `config/samples` you will find a set manifests that you can use to test this sleepcycles on your cluster: -Under `config/samples` you will find an example manifest that you can use to test this controller: +#### SleepCycles + +* _core_v1alpha1_sleepcycle_app_x.yaml_, manifests to deploy 2 `SleepCycle` resources in namespaces `app-1` and `app-2` ```yaml apiVersion: core.rekuberate.io/v1alpha1 kind: SleepCycle metadata: - name: sleepcycle-sample + name: sleepcycle-app-1 + namespace: app-1 spec: - shutdown: "0 20 * * *" + shutdown: "1/2 * * * *" shutdownTimeZone: "Europe/Athens" - wakeup: "30 7 * * 1-5" + wakeup: "*/2 * * * *" wakeupTimeZone: "Europe/Dublin" enabled: true ``` -You need to provide to every `SleepCycle` the `shutdown` (mandatory) and `wakeup` (non-mandatory) policies via Cron expressions (**do not include seconds or timezone**). -Additionally you can provide schedules on different timezones via the (non-mandatory) fields `shutdownTimeZone` and `wakeupTimeZone`. If they're not provided they default to **UTC**. -The example above will set a `SleepCycle` schedule shutting down your workloads **every day at 20:00 Athens local time** and waking them up **every weekday at 07:30 Dublin local time**. +> [!NOTE] +> The cron expressions of the samples are tailored so you perform a quick demo. The `shutdown` expression schedules +> the deployment to scale down on _odd_ minutes and the `wakeup` schedule to scale up on _even_ minutes. + +Every `SleepCycle` has the following **mandatory** properties: + +- `shutdown`: cron expression for your shutdown schedule +- `enabled`: whether this sleepcycle policy is enabled -`SleepCycle` is a **Namespaced Custom Resource**, and the controller will monitor all the resources in the Namespace you installed the -`SleepCycle` manifest and they are marked with a `Label` that has as key `rekuberate.io/sleepcycle:` and as value the `name` of the manifest you created: +and the following **non-mandatory** properties: + +- `shutdownTimeZone`: the timezone for your shutdown schedule, defaults to `UTC` +- `wakeup`: cron expression for your wake-up schedule +- `wakeupTimeZone`: the timezone for your wake-up schedule, defaults to `UTC` +- `successfulJobsHistoryLimit`: how many _completed_ CronJob Runner Pods to retain for debugging reasons, defaults to `1` +- `failedJobsHistoryLimit`: how many _failed_ CronJob Runner Pods to retain for debugging reasons, defaults to `1` +- `runnerImage`: the image to use when spawn CronJob Runner pods, defaults to `akyriako78/rekuberate-io-sleepcycles-runners` + +> [!IMPORTANT] +> DO **NOT** ADD **seconds** or **timezone** information to you cron expressions. + +#### Demo workloads + +* _whoami-app-1_x-deployment.yaml_, manifests to deploy 2 `Deployment` that provisions _traefik/whoami_ in namespace `app-1` +* _whoami-app-2_x-deployment.yaml_, manifests to deploy a `Deployment`that provisions _traefik/whoami_ in namespace `app-2` + +`SleepCycle` is a namespace-scoped custom resource; the controller will monitor all the resources in that namespace that +are marked with a `Label` that has as key `rekuberate.io/sleepcycle:` and as value the `name` of the manifest you created: ```yaml apiVersion: apps/v1 kind: Deployment metadata: + name: app-2 + namespace: app-2 labels: - app: nginx-demo - rekuberate.io/sleepcycle: sleepcycle-sample - name: nginx-demo - namespace: default + app: app-2 + rekuberate.io/sleepcycle: sleepcycle-app-2 spec: - ... - ... + replicas: 9 + selector: + matchLabels: + app: app-2 + template: + metadata: + name: app-2 + labels: + app: app-2 + spec: + containers: + - name: app-2 + image: traefik/whoami + imagePullPolicy: IfNotPresent ``` -### Running on the cluster +> [!IMPORTANT] +> Any workload in namespace `kube-system` marked with `rekuberate.io/sleepcycle` will be ignored by the controller **by design**. + +## How it works + +The diagram below describes how `rekuberate.io/sleepcycles` are dealing with scheduling a `Deployment`: + +1. The `sleepcycle-controller` **watches** periodically, every 1min, all the `SleepCycle` custom resources for changes (in **all** namespaces). +2. The controller, for **every** `SleepCycle` resource within the namespace `app-1`, collects all the resources that have been marked with the label `rekuberate.io/sleepcycle: sleepcycle-app1`. +3. It provisions, for **every** workload - in this case deployment `deployment-app1` a `CronJob` for the shutdown schedule and optionally a second `CronJob` if a wake-up schedule is provided. +4. It provisions a `ServiceAccount`, a `Role` and a `RoleBinding` **per namespace**, in order to make possible for runner-pods to update resources' specs. +5. The `Runner` pods will be created automatically by the cron jobs and are responsible for scaling the resources up or down. + + +![SCR-20240527-q9y.png](docs/images/SCR-20240527-qei.png) + +> [!NOTE] +> In the diagram it was depicted how `rekuberate.io/sleepcycles` scales `Deployment`. The same steps count for a +> `StatefulSet` and a `HorizontalPodAutoscaler`. There are two exception though: +> - a `HorizontalPodAutoscaler` will scale down to `1` replica and not to `0` as for a `Deployment` or a `Statefulset`. +> - a `CronJob` has no replicas to scale up or down, it is going to be enabled or suspended respectively. + +## Deploy + +### From sources 1. Build and push your image to the location specified by `IMG` in `Makefile`: @@ -55,82 +128,90 @@ IMG_TAG ?= $(shell git rev-parse --short HEAD) IMG_NAME ?= rekuberate-io-sleepcycles DOCKER_HUB_NAME ?= $(shell docker info | sed '/Username:/!d;s/.* //') IMG ?= $(DOCKER_HUB_NAME)/$(IMG_NAME):$(IMG_TAG) +RUNNERS_IMG_NAME ?= rekuberate-io-sleepcycles-runners +KO_DOCKER_REPO ?= $(DOCKER_HUB_NAME)/$(RUNNERS_IMG_NAME) ``` ```sh make docker-build docker-push ``` -2. Deploy the controller to the cluster with the image using `IMG`: +2. Deploy the controller to the cluster using the image defined in `IMG`: ```sh make deploy ``` -or - -3. Deploy the controller to the cluster with the image using a **Helm chart**: +and then deploy the samples: ```sh -helm repo add sleepcycles https://rekuberate-io.github.io/sleepcycles/ -helm repo update - -helm upgrade --install sleepcycles sleepcycles/sleepcycles -n rekuberate-system --create-namespace +kubectl create namespace app-1 +kubectl create namespace app-2 +kubectl apply -f config/samples ``` -4. Install Instances of Custom Resources: +#### Uninstall ```sh -kubectl apply -f config/samples/ +make undeploy ``` -### Uninstall CRDs -To delete the CRDs from the cluster: +### Using Helm (from sources) + +If you are on a development environment, you can quickly test & deploy the controller to the cluster +using a **Helm chart** directly from `config/helm`: ```sh -make uninstall +helm install rekuberate-io-sleepcycles config/helm/ -n --create-namespace ``` -### Undeploy controller -UnDeploy the controller to the cluster: +and then deploy the samples: ```sh -make undeploy +kubectl create namespace app-1 +kubectl create namespace app-2 +kubectl apply -f config/samples ``` -or if you have installed via Helm: +#### Uninstall ```shell -helm uninstall rekuberate-io-sleepcycles +helm uninstall rekuberate-io-sleepcycles -n ``` -## Contributing -Please refer to our [Contributing Guidelines](CONTRIBUTING.md) +### Using Helm (from repo) -### How it works -This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) +On the other hand if you are deploying on a production environment, it is **highly recommended** to deploy the +controller to the cluster using a **Helm chart** from its repo: -It uses [Controllers](https://kubernetes.io/docs/concepts/architecture/controller/) -which provides a reconcile function responsible for synchronizing resources untile the desired state is reached on the cluster - -### Test It Out -1. Install the CRDs into the cluster: ```sh -make install +helm repo add sleepcycles https://rekuberate-io.github.io/sleepcycles/ +helm repo update + +helm upgrade --install sleepcycles sleepcycles/sleepcycles -n rekuberate-system --create-namespace ``` -2. Run your controller (this will run in the foreground, so switch to a new terminal if you want to leave it running): +and then deploy the samples: ```sh -make run +kubectl create namespace app-1 +kubectl create namespace app-2 +kubectl apply -f config/samples ``` +#### Uninstall -![debugging the controller](docs/images/SCR-20221222-hij.png) +```shell +helm uninstall rekuberate-io-sleepcycles -n +``` -> [!TIP] -> You can also run this in one step by running: `make install run` +## Develop + +This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). It uses [Controllers](https://kubernetes.io/docs/concepts/architecture/controller/) +which provides a reconcile function responsible for synchronizing resources until the desired state is reached on the cluster. -### Modifying the API definitions +### Controller + +#### Modifying the API definitions If you are editing the API definitions, generate the manifests such as CRs or CRDs using: ```sh @@ -145,31 +226,42 @@ make install ``` > [!TIP] -> You can debug the controller in the IDE of your choice by hooking to the `main.go` or you can start -> the controller without debugging with: +> You can debug the controller in the IDE of your choice by hooking to the `main.go` **or** you can start +> the controller _without_ debugging with: ```sh make run ``` > [!TIP] -> Run `make --help` for more information on all potential `make` targets +> Run `make --help` for more information on all potential `make` targets +> More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) + +#### Build + +You always need to build a new docker container and push it to your repository: + +```sh +make docker-build docker-push +``` -More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) +> [!IMPORTANT] +> In this case you will need to adjust your Helm chart values to use your repository and container image. -## License +### Runner -Copyright 2022. +#### Build -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +```sh +make ko-build-runner +``` - http://www.apache.org/licenses/LICENSE-2.0 +> [!IMPORTANT] +> In this case you will need to adjust the `runnerImage` of your `SleepCycle` manifest to use your own Runner image. -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. +### Uninstall CRDs +To delete the CRDs from the cluster: +```sh +make uninstall +``` diff --git a/api/v1alpha1/sleepcycle_types.go b/api/v1alpha1/sleepcycle_types.go index f7eabc2..0418a76 100644 --- a/api/v1alpha1/sleepcycle_types.go +++ b/api/v1alpha1/sleepcycle_types.go @@ -35,7 +35,7 @@ type SleepCycleSpec struct { // +kubebuilder:validation:Pattern:=`(^((\*\/)?([0-5]?[0-9])((\,|\-|\/)([0-5]?[0-9]))*|\*)\s+((\*\/)?((2[0-3]|1[0-9]|[0-9]|00))((\,|\-|\/)(2[0-3]|1[0-9]|[0-9]|00))*|\*)\s+((\*\/)?([1-9]|[12][0-9]|3[01])((\,|\-|\/)([1-9]|[12][0-9]|3[01]))*|\*)\s+((\*\/)?([1-9]|1[0-2])((\,|\-|\/)([1-9]|1[0-2]))*|\*|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|des))\s+((\*\/)?[0-6]((\,|\-|\/)[0-6])*|\*|00|(sun|mon|tue|wed|thu|fri|sat))\s*$)|@(annually|yearly|monthly|weekly|daily|hourly|reboot)` // +kubebuilder:validation:Type=string - WakeUp string `json:"wakeup,omitempty"` + WakeUp *string `json:"wakeup,omitempty"` // +kubebuilder:validation:Optional // +kubebuilder:default:="UTC" @@ -44,31 +44,48 @@ type SleepCycleSpec struct { // +kubebuilder:validation:default:=true // +kubebuilder:validation:Type=boolean Enabled bool `json:"enabled"` + + // +optional + // +kubebuilder:default=1 + // +kubebuilder:validation:Type=integer + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=3 + // +kubebuilder:validation:ExclusiveMinimum=false + // +kubebuilder:validation:ExclusiveMaximum=false + SuccessfulJobsHistoryLimit int32 `json:"successfulJobsHistoryLimit,omitempty"` + + // +optional + // +kubebuilder:default=1 + // +kubebuilder:validation:Type=integer + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=3 + // +kubebuilder:validation:ExclusiveMinimum=false + // +kubebuilder:validation:ExclusiveMaximum=false + FailedJobsHistoryLimit int32 `json:"failedJobsHistoryLimit,omitempty"` + + // +kubebuilder:validation:Optional + // +kubebuilder:default:="akyriako78/rekuberate-io-sleepcycles-runners" + RunnerImage string `json:"runnerImage,omitempty"` } // SleepCycleStatus defines the observed state of SleepCycle type SleepCycleStatus struct { - UsedBy map[string]int `json:"usedBy,omitempty"` - Enabled bool `json:"enabled,omitempty"` - NextScheduledShutdownTime *metav1.Time `json:"nextScheduledShutdown,omitempty"` - NextScheduledWakeupTime *metav1.Time `json:"nextScheduledWakeUp,omitempty"` - NextScheduledOp string `json:"nextScheduledOp,omitempty"` - LastRunTime *metav1.Time `json:"lastRunTime,omitempty"` - LastRunOperation string `json:"lastRunOperation,omitempty"` - LastRunWasSuccessful bool `json:"lastRunWasSuccessful,omitempty"` + Enabled bool `json:"enabled,omitempty"` + State string `json:"state,omitempty"` + Targets string `json:"targets,omitempty"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status // SleepCycle is the Schema for the sleepcycles API +// +kubebuilder:printcolumn:name="Enabled",type=boolean,JSONPath=`.spec.enabled` +// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state` +// +kubebuilder:printcolumn:name="Targets",type=string,JSONPath=`.status.targets` // +kubebuilder:printcolumn:name="Shutdown Schedule",type=string,JSONPath=`.spec.shutdown` // +kubebuilder:printcolumn:name="Shutdown Timezone",type=string,JSONPath=`.spec.shutdownTimeZone` // +kubebuilder:printcolumn:name="Wakeup Schedule",type=string,JSONPath=`.spec.wakeup` // +kubebuilder:printcolumn:name="Wakeup Timezone",type=string,JSONPath=`.spec.wakeupTimeZone` -// +kubebuilder:printcolumn:name="Enabled",type=boolean,JSONPath=`.spec.enabled` -// +kubebuilder:printcolumn:name="Success",type=boolean,JSONPath=`.status.lastRunWasSuccessful` -// +kubebuilder:printcolumn:name="Last Op",type=string,JSONPath=`.status.lastRunOperation` type SleepCycle struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index d5d48a9..21a25b4 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -31,7 +31,7 @@ func (in *SleepCycle) DeepCopyInto(out *SleepCycle) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) + out.Status = in.Status } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SleepCycle. @@ -92,6 +92,11 @@ func (in *SleepCycleSpec) DeepCopyInto(out *SleepCycleSpec) { *out = new(string) **out = **in } + if in.WakeUp != nil { + in, out := &in.WakeUp, &out.WakeUp + *out = new(string) + **out = **in + } if in.WakeupTimeZone != nil { in, out := &in.WakeupTimeZone, &out.WakeupTimeZone *out = new(string) @@ -112,25 +117,6 @@ func (in *SleepCycleSpec) DeepCopy() *SleepCycleSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SleepCycleStatus) DeepCopyInto(out *SleepCycleStatus) { *out = *in - if in.UsedBy != nil { - in, out := &in.UsedBy, &out.UsedBy - *out = make(map[string]int, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.NextScheduledShutdownTime != nil { - in, out := &in.NextScheduledShutdownTime, &out.NextScheduledShutdownTime - *out = (*in).DeepCopy() - } - if in.NextScheduledWakeupTime != nil { - in, out := &in.NextScheduledWakeupTime, &out.NextScheduledWakeupTime - *out = (*in).DeepCopy() - } - if in.LastRunTime != nil { - in, out := &in.LastRunTime, &out.LastRunTime - *out = (*in).DeepCopy() - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SleepCycleStatus. diff --git a/charts/sleepcycles/Chart.yaml b/charts/sleepcycles/Chart.yaml index 641af71..f680433 100644 --- a/charts/sleepcycles/Chart.yaml +++ b/charts/sleepcycles/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: sleepcycles -description: A Helm chart for Kubernetes +description: Define sleep and wake up cycles for your Kubernetes resources # A chart can be either an 'application' or a 'library' chart. # # Application charts are a collection of templates that can be packaged into versioned archives @@ -13,9 +13,9 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.2 +version: 0.2.0 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.1.2" +appVersion: "0.2.0" \ No newline at end of file diff --git a/charts/sleepcycles/templates/manager-rbac.yaml b/charts/sleepcycles/templates/manager-rbac.yaml index 5c85492..7598e83 100644 --- a/charts/sleepcycles/templates/manager-rbac.yaml +++ b/charts/sleepcycles/templates/manager-rbac.yaml @@ -84,6 +84,18 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - core.rekuberate.io resources: diff --git a/charts/sleepcycles/templates/metrics-service.yaml b/charts/sleepcycles/templates/metrics-service.yaml index 750c1eb..525749d 100644 --- a/charts/sleepcycles/templates/metrics-service.yaml +++ b/charts/sleepcycles/templates/metrics-service.yaml @@ -11,4 +11,4 @@ spec: control-plane: controller-manager {{- include "sleepcycles.selectorLabels" . | nindent 4 }} ports: - {{- .Values.metricsService.ports | toYaml | nindent 2 }} \ No newline at end of file + {{- .Values.metricsService.ports | toYaml | nindent 2 -}} \ No newline at end of file diff --git a/charts/sleepcycles/templates/sleepcycle-crd.yaml b/charts/sleepcycles/templates/sleepcycle-crd.yaml index 4a91aa5..a5a6664 100644 --- a/charts/sleepcycles/templates/sleepcycle-crd.yaml +++ b/charts/sleepcycles/templates/sleepcycle-crd.yaml @@ -16,6 +16,15 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: + - jsonPath: .spec.enabled + name: Enabled + type: boolean + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.targets + name: Targets + type: string - jsonPath: .spec.shutdown name: Shutdown Schedule type: string @@ -28,15 +37,6 @@ spec: - jsonPath: .spec.wakeupTimeZone name: Wakeup Timezone type: string - - jsonPath: .spec.enabled - name: Enabled - type: boolean - - jsonPath: .status.lastRunWasSuccessful - name: Success - type: boolean - - jsonPath: .status.lastRunOperation - name: Last Op - type: string name: v1alpha1 schema: openAPIV3Schema: @@ -59,12 +59,27 @@ spec: properties: enabled: type: boolean + failedJobsHistoryLimit: + default: 1 + format: int32 + maximum: 3 + minimum: 1 + type: integer + runnerImage: + default: akyriako78/rekuberate-io-sleepcycles-runners + type: string shutdown: pattern: (^((\*\/)?([0-5]?[0-9])((\,|\-|\/)([0-5]?[0-9]))*|\*)\s+((\*\/)?((2[0-3]|1[0-9]|[0-9]|00))((\,|\-|\/)(2[0-3]|1[0-9]|[0-9]|00))*|\*)\s+((\*\/)?([1-9]|[12][0-9]|3[01])((\,|\-|\/)([1-9]|[12][0-9]|3[01]))*|\*)\s+((\*\/)?([1-9]|1[0-2])((\,|\-|\/)([1-9]|1[0-2]))*|\*|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|des))\s+((\*\/)?[0-6]((\,|\-|\/)[0-6])*|\*|00|(sun|mon|tue|wed|thu|fri|sat))\s*$)|@(annually|yearly|monthly|weekly|daily|hourly|reboot) type: string shutdownTimeZone: default: UTC type: string + successfulJobsHistoryLimit: + default: 1 + format: int32 + maximum: 3 + minimum: 1 + type: integer wakeup: pattern: (^((\*\/)?([0-5]?[0-9])((\,|\-|\/)([0-5]?[0-9]))*|\*)\s+((\*\/)?((2[0-3]|1[0-9]|[0-9]|00))((\,|\-|\/)(2[0-3]|1[0-9]|[0-9]|00))*|\*)\s+((\*\/)?([1-9]|[12][0-9]|3[01])((\,|\-|\/)([1-9]|[12][0-9]|3[01]))*|\*)\s+((\*\/)?([1-9]|1[0-2])((\,|\-|\/)([1-9]|1[0-2]))*|\*|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|des))\s+((\*\/)?[0-6]((\,|\-|\/)[0-6])*|\*|00|(sun|mon|tue|wed|thu|fri|sat))\s*$)|@(annually|yearly|monthly|weekly|daily|hourly|reboot) type: string @@ -80,25 +95,10 @@ spec: properties: enabled: type: boolean - lastRunOperation: - type: string - lastRunTime: - format: date-time - type: string - lastRunWasSuccessful: - type: boolean - nextScheduledOp: - type: string - nextScheduledShutdown: - format: date-time + state: type: string - nextScheduledWakeUp: - format: date-time + targets: type: string - usedBy: - additionalProperties: - type: integer - type: object type: object type: object served: true diff --git a/charts/sleepcycles/values.yaml b/charts/sleepcycles/values.yaml index 89d6c2f..dcb79a7 100644 --- a/charts/sleepcycles/values.yaml +++ b/charts/sleepcycles/values.yaml @@ -32,7 +32,7 @@ controllerManager: - ALL image: repository: akyriako78/rekuberate-io-sleepcycles - tag: 0.1.2 + tag: 0.2.0 resources: limits: cpu: 500m diff --git a/config/crd/bases/core.rekuberate.io_sleepcycles.yaml b/config/crd/bases/core.rekuberate.io_sleepcycles.yaml index 4179450..489c7ab 100644 --- a/config/crd/bases/core.rekuberate.io_sleepcycles.yaml +++ b/config/crd/bases/core.rekuberate.io_sleepcycles.yaml @@ -16,6 +16,15 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: + - jsonPath: .spec.enabled + name: Enabled + type: boolean + - jsonPath: .status.state + name: State + type: string + - jsonPath: .status.targets + name: Targets + type: string - jsonPath: .spec.shutdown name: Shutdown Schedule type: string @@ -28,15 +37,6 @@ spec: - jsonPath: .spec.wakeupTimeZone name: Wakeup Timezone type: string - - jsonPath: .spec.enabled - name: Enabled - type: boolean - - jsonPath: .status.lastRunWasSuccessful - name: Success - type: boolean - - jsonPath: .status.lastRunOperation - name: Last Op - type: string name: v1alpha1 schema: openAPIV3Schema: @@ -59,12 +59,27 @@ spec: properties: enabled: type: boolean + failedJobsHistoryLimit: + default: 1 + format: int32 + maximum: 3 + minimum: 1 + type: integer + runnerImage: + default: akyriako78/rekuberate-io-sleepcycles-runners + type: string shutdown: pattern: (^((\*\/)?([0-5]?[0-9])((\,|\-|\/)([0-5]?[0-9]))*|\*)\s+((\*\/)?((2[0-3]|1[0-9]|[0-9]|00))((\,|\-|\/)(2[0-3]|1[0-9]|[0-9]|00))*|\*)\s+((\*\/)?([1-9]|[12][0-9]|3[01])((\,|\-|\/)([1-9]|[12][0-9]|3[01]))*|\*)\s+((\*\/)?([1-9]|1[0-2])((\,|\-|\/)([1-9]|1[0-2]))*|\*|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|des))\s+((\*\/)?[0-6]((\,|\-|\/)[0-6])*|\*|00|(sun|mon|tue|wed|thu|fri|sat))\s*$)|@(annually|yearly|monthly|weekly|daily|hourly|reboot) type: string shutdownTimeZone: default: UTC type: string + successfulJobsHistoryLimit: + default: 1 + format: int32 + maximum: 3 + minimum: 1 + type: integer wakeup: pattern: (^((\*\/)?([0-5]?[0-9])((\,|\-|\/)([0-5]?[0-9]))*|\*)\s+((\*\/)?((2[0-3]|1[0-9]|[0-9]|00))((\,|\-|\/)(2[0-3]|1[0-9]|[0-9]|00))*|\*)\s+((\*\/)?([1-9]|[12][0-9]|3[01])((\,|\-|\/)([1-9]|[12][0-9]|3[01]))*|\*)\s+((\*\/)?([1-9]|1[0-2])((\,|\-|\/)([1-9]|1[0-2]))*|\*|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|des))\s+((\*\/)?[0-6]((\,|\-|\/)[0-6])*|\*|00|(sun|mon|tue|wed|thu|fri|sat))\s*$)|@(annually|yearly|monthly|weekly|daily|hourly|reboot) type: string @@ -80,25 +95,10 @@ spec: properties: enabled: type: boolean - lastRunOperation: - type: string - lastRunTime: - format: date-time - type: string - lastRunWasSuccessful: - type: boolean - nextScheduledOp: - type: string - nextScheduledShutdown: - format: date-time + state: type: string - nextScheduledWakeUp: - format: date-time + targets: type: string - usedBy: - additionalProperties: - type: integer - type: object type: object type: object served: true diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 329ffa7..10452d2 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -13,4 +13,4 @@ kind: Kustomization images: - name: controller newName: akyriako78/rekuberate-io-sleepcycles - newTag: 0.1.2 + newTag: 0.2.0 diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ec56128..cce0d36 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -84,6 +84,18 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - core.rekuberate.io resources: diff --git a/config/samples/apache-hpa.yaml b/config/samples/apache-hpa.yaml new file mode 100644 index 0000000..1b3a0f9 --- /dev/null +++ b/config/samples/apache-hpa.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: php-apache + namespace: app-2 +spec: + selector: + matchLabels: + run: php-apache + template: + metadata: + labels: + run: php-apache + spec: + containers: + - name: php-apache + image: registry.k8s.io/hpa-example + ports: + - containerPort: 80 + resources: + limits: + cpu: 500m + requests: + cpu: 200m +--- +apiVersion: v1 +kind: Service +metadata: + name: php-apache + namespace: app-2 + labels: + run: php-apache +spec: + ports: + - port: 80 + selector: + run: php-apache +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: php-apache + namespace: app-2 + labels: + rekuberate.io/sleepcycle: sleepcycle-app-2 +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: php-apache + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 50 diff --git a/config/samples/busybox-cronjob.yaml b/config/samples/busybox-cronjob.yaml new file mode 100644 index 0000000..54ddb30 --- /dev/null +++ b/config/samples/busybox-cronjob.yaml @@ -0,0 +1,23 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: hello-busybox + namespace: app-1 + labels: + app: hello-busybox + rekuberate.io/sleepcycle: sleepcycle-app-1 +spec: + schedule: "* * * * *" # Run every minute + jobTemplate: + spec: + template: + spec: + containers: + - name: hello-busybox + image: busybox:latest + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - date; echo Hello! + restartPolicy: OnFailure \ No newline at end of file diff --git a/config/samples/core_v1alpha1_sleepcycle.yaml b/config/samples/core_v1alpha1_sleepcycle.yaml deleted file mode 100644 index b780c66..0000000 --- a/config/samples/core_v1alpha1_sleepcycle.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: core.rekuberate.io/v1alpha1 -kind: SleepCycle -metadata: - name: sleepcycle-sample -spec: - shutdown: "*/1 * * * *" - shutdownTimeZone: "Europe/Athens" - wakeup: "*/2 * * * *" - wakeupTimeZone: "Europe/Dublin" - enabled: true \ No newline at end of file diff --git a/config/samples/core_v1alpha1_sleepcycle_app_1.yaml b/config/samples/core_v1alpha1_sleepcycle_app_1.yaml index 982d504..d990955 100644 --- a/config/samples/core_v1alpha1_sleepcycle_app_1.yaml +++ b/config/samples/core_v1alpha1_sleepcycle_app_1.yaml @@ -4,7 +4,7 @@ metadata: name: sleepcycle-app-1 namespace: app-1 spec: - shutdown: "*/1 * * * *" + shutdown: "1/2 * * * *" shutdownTimeZone: "Europe/Athens" wakeup: "*/2 * * * *" wakeupTimeZone: "Europe/Dublin" diff --git a/config/samples/core_v1alpha1_sleepcycle_app_2.yaml b/config/samples/core_v1alpha1_sleepcycle_app_2.yaml index c883430..56fdd6b 100644 --- a/config/samples/core_v1alpha1_sleepcycle_app_2.yaml +++ b/config/samples/core_v1alpha1_sleepcycle_app_2.yaml @@ -4,7 +4,7 @@ metadata: name: sleepcycle-app-2 namespace: app-2 spec: - shutdown: "*/1 * * * *" + shutdown: "1/2 * * * *" shutdownTimeZone: "Europe/Athens" wakeup: "*/2 * * * *" wakeupTimeZone: "Europe/Dublin" diff --git a/config/samples/nginx-statefulset.yaml b/config/samples/nginx-statefulset.yaml new file mode 100644 index 0000000..f340cd0 --- /dev/null +++ b/config/samples/nginx-statefulset.yaml @@ -0,0 +1,50 @@ +apiVersion: v1 +kind: Service +metadata: + name: nginx + namespace: app-2 + labels: + app: nginx +spec: + ports: + - port: 80 + name: web + clusterIP: None + selector: + app: nginx +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: web + namespace: app-2 + labels: + rekuberate.io/sleepcycle: sleepcycle-app-2 +spec: + serviceName: "nginx" + replicas: 2 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: registry.k8s.io/nginx-slim:0.8 + ports: + - containerPort: 80 + name: web + volumeMounts: + - name: www + mountPath: /usr/share/nginx/html + volumeClaimTemplates: + - metadata: + name: www + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi diff --git a/controllers/sleepcycle_controller.go b/controllers/sleepcycle_controller.go index ec6ce85..e9bfc91 100644 --- a/controllers/sleepcycle_controller.go +++ b/controllers/sleepcycle_controller.go @@ -20,32 +20,31 @@ import ( "context" "fmt" "github.com/go-logr/logr" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "strings" - "time" - + "github.com/hashicorp/go-multierror" corev1alpha1 "github.com/rekuberate-io/sleepcycles/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" autoscalingv1 "k8s.io/api/autoscaling/v1" batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "strings" + "time" ) const ( - SleepCycleLabel = "rekuberate.io/sleepcycle" - SleepCycleFinalizer = "sleepcycle.core.rekuberate.io/finalizer" - TimeWindowToleranceInSeconds int = 30 - SleepCycleStatusUpdateFailure string = "failed to update SleepCycle Status" - SleepCycleFinalizerFailure string = "finalizer failed" + SleepCycleLabel = "rekuberate.io/sleepcycle" + SleepCycleFinalizer = "sleepcycle.core.rekuberate.io/finalizer" + TimeWindowToleranceInSeconds int = 30 + requeueAfter time.Duration = 60 * time.Second + SleepCycleStatusUpdateFailure string = "failed to update status" + SleepCycleFinalizerFailure string = "finalizer failed" ) // SleepCycleReconciler reconciles a SleepCycle object @@ -59,10 +58,8 @@ type SleepCycleReconciler struct { type runtimeObjectReconciler func( ctx context.Context, req ctrl.Request, - original *corev1alpha1.SleepCycle, - desired *corev1alpha1.SleepCycle, - op SleepCycleOperation, -) (ctrl.Result, error) + sleepcycle *corev1alpha1.SleepCycle, +) (int, int, error) type runtimeObjectFinalizer func( ctx context.Context, @@ -97,6 +94,7 @@ var ( //+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups="",resources=events,verbs=create;patch +//+kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -110,128 +108,108 @@ var ( func (r *SleepCycleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { r.logger = log.Log.WithValues("namespace", req.Namespace, "sleepcycle", req.Name) + r.logger.Info("reconciling sleepcycle") + + nsks := "kube-system" + if req.Namespace == nsks { + r.logger.Info(fmt.Sprintf("setting sleepcycle schedule on resources in namespace %s is not supported", nsks)) + return ctrl.Result{}, nil + } + + provisioned := 0 + total := 0 + var original corev1alpha1.SleepCycle if err := r.Get(ctx, req.NamespacedName, &original); err != nil { if apierrors.IsNotFound(err) { - //r.logger.Error(err, "🛑️ unable to find SleepCycle") return ctrl.Result{}, nil } - r.logger.Error(err, "🛑 unable to fetch SleepCycle") + r.logger.Error(err, "unable to fetch sleepcycle") return ctrl.Result{}, err } - sleepCycleFullName := fmt.Sprintf("%v/%v", original.Namespace, original.Name) - - //TODO: Bug finalizer creates a deadlock when removing or upgrading - - //if original.ObjectMeta.DeletionTimestamp.IsZero() { - // // The object is not being deleted, so if it does not have our finalizer, - // // then lets add the finalizer and update the object. - // if !containsString(original.ObjectMeta.Finalizers, SleepCycleFinalizer) { - // original.ObjectMeta.Finalizers = append(original.ObjectMeta.Finalizers, SleepCycleFinalizer) - // if err := r.Update(ctx, &original); err != nil { - // r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleStatusUpdateFailure), "sleepcycle", sleepCycleFullName) - // r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleStatus", strings.ToLower(SleepCycleStatusUpdateFailure)) - // return ctrl.Result{}, err - // } - // } - //} else { - // // The object is being deleted - // if containsString(original.ObjectMeta.Finalizers, SleepCycleFinalizer) { - // // our finalizer is present, so lets handle our external dependency - // finalizers := []runtimeObjectFinalizer{r.FinalizeDeployments, r.FinalizeCronJobs, r.FinalizeStatefulSets, r.FinalizeHorizontalPodAutoscalers} - // for _, finalizer := range finalizers { - // result, err := finalizer(ctx, req, &original) - // if err != nil { - // r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleFinalizerFailure), "sleepcycle", sleepCycleFullName) - // r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleFinalizerFailure", fmt.Sprintf( - // "%s: %s", - // SleepCycleFinalizerFailure, - // err.Error(), - // )) - // - // return result, err - // } - // } - // - // // remove our finalizer from the list and update it. - // original.ObjectMeta.Finalizers = removeString(original.ObjectMeta.Finalizers, SleepCycleFinalizer) - // if err := r.Update(ctx, &original); err != nil { - // r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleStatusUpdateFailure), "sleepcycle", sleepCycleFullName) - // r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleStatus", strings.ToLower(SleepCycleStatusUpdateFailure)) - // return ctrl.Result{}, err - // } - // } - // - // // Our finalizer has finished, so the reconciler can do nothing. - // return reconcile.Result{}, nil - //} - - if !original.Spec.Enabled { - return ctrl.Result{}, nil + err := r.reconcileRbac(ctx, &original) + if err != nil { + r.recordEvent(&original, fmt.Sprintf("unable to create rbac resources in %s", req.Namespace), false) + return ctrl.Result{}, err } - currentOperation := r.getCurrentScheduledOperation(original) + if err := r.Get(ctx, req.NamespacedName, &original); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } - desired := *original.DeepCopy() - desired.Status.LastRunOperation = currentOperation.String() - if desired.Status.UsedBy == nil { - usedBy := make(map[string]int) - desired.Status.UsedBy = usedBy + r.logger.Error(err, "unable to fetch sleepcycle") + return ctrl.Result{}, err } + reconcilers := []runtimeObjectReconciler{r.ReconcileDeployments, r.ReconcileCronJobs, r.ReconcileStatefulSets, r.ReconcileHorizontalPodAutoscalers} + var errors error - r.logger = r.logger.WithValues("op", currentOperation.String()) - - if currentOperation != Watch { - reconcilers := []runtimeObjectReconciler{r.ReconcileDeployments, r.ReconcileCronJobs, r.ReconcileStatefulSets, r.ReconcileHorizontalPodAutoscalers} - - for _, reconciler := range reconcilers { - result, err := reconciler(ctx, req, &original, &desired, currentOperation) - if err != nil { - if currentOperation != Watch { - desired.Status.LastRunTime = &metav1.Time{Time: time.Now()} - desired.Status.LastRunWasSuccessful = false - } + for _, reconciler := range reconcilers { + p, t, err := reconciler(ctx, req, &original) + if err != nil { + errors = multierror.Append(errors, err) + } - if err := r.Status().Update(ctx, &desired); err != nil { - r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleStatusUpdateFailure), "sleepcycle", sleepCycleFullName) - r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleStatus", strings.ToLower(SleepCycleStatusUpdateFailure)) - return ctrl.Result{}, err - } + provisioned += p + total += t + } - return result, err + if errors != nil { + if merr, ok := errors.(*multierror.Error); ok { + for _, rerr := range merr.Errors { + r.logger.Error(rerr, "failed to reconcile") } } - - desired.Status.LastRunTime = &metav1.Time{Time: time.Now()} - desired.Status.LastRunWasSuccessful = true } - nextScheduledShutdown, nextScheduledWakeup := r.getSchedulesTime(original, false) - if nextScheduledWakeup != nil { - tz := r.getTimeZone(original.Spec.WakeupTimeZone) - t := nextScheduledWakeup.In(tz) - desired.Status.NextScheduledWakeupTime = &metav1.Time{Time: t} - } else { - desired.Status.NextScheduledWakeupTime = nil + state := "Ready" + if provisioned != 0 && provisioned < total { + state = "Warning" + } else if provisioned == 0 && total != 0 { + state = "NotReady" } - tz := r.getTimeZone(original.Spec.ShutdownTimeZone) - t := nextScheduledShutdown.In(tz) - desired.Status.NextScheduledShutdownTime = &metav1.Time{Time: t} - if err := r.Status().Update(ctx, &desired); err != nil { - r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleStatusUpdateFailure), "sleepcycle", sleepCycleFullName) - r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleStatus", strings.ToLower(SleepCycleStatusUpdateFailure)) + err = r.UpdateStatus(ctx, &original, state, []int{provisioned, total}) + if err != nil { return ctrl.Result{}, err } - nextOperation, requeueAfter := r.getNextScheduledOperation(original, ¤tOperation) + return ctrl.Result{RequeueAfter: requeueAfter}, errors +} + +func (r *SleepCycleReconciler) UpdateStatus(ctx context.Context, sleepcycle *corev1alpha1.SleepCycle, state string, targets []int) error { + patch := client.MergeFrom(sleepcycle.DeepCopy()) + sleepcycle.Status.State = state + sleepcycle.Status.Targets = fmt.Sprintf("%d/%d", targets[0], targets[1]) - r.logger.Info("Requeue", "next-op", nextOperation.String(), "after", requeueAfter) - return ctrl.Result{RequeueAfter: requeueAfter}, nil + err := r.Status().Patch(ctx, sleepcycle, patch) + if err != nil { + r.logger.Error(err, "unable to patch sleepcycle status") + return err + } + + return nil } +//if !original.Spec.Enabled { +// return ctrl.Result{}, nil +//} +//desired := *original.DeepCopy() +//reconcilers := []runtimeObjectReconciler{r.ReconcileDeployments, r.ReconcileCronJobs, r.ReconcileStatefulSets, r.ReconcileHorizontalPodAutoscalers} + +//r.logger = r.logger.WithValues("op", currentOperation.String()) +//tz := r.getTimeZone(original.Spec.ShutdownTimeZone) +//t := nextScheduledShutdown.In(tz) +//desired.Status.NextScheduledShutdownTime = &metav1.Time{Time: t} + +//if err := r.Status().Update(ctx, &desired); err != nil { +// r.logger.Error(err, fmt.Sprintf("🛑️ %s", SleepCycleStatusUpdateFailure), "sleepcycle", sleepCycleFullName) +// r.Recorder.Event(&original, corev1.EventTypeWarning, "SleepCycleStatus", strings.ToLower(SleepCycleStatusUpdateFailure)) +// return ctrl.Result{}, err +//} + func (r *SleepCycleReconciler) ScaleDeployment(ctx context.Context, deployment appsv1.Deployment, replicas int32) error { deepCopy := *deployment.DeepCopy() *deepCopy.Spec.Replicas = replicas @@ -302,21 +280,3 @@ func (r *SleepCycleReconciler) SetupWithManager(mgr ctrl.Manager) error { For(&corev1alpha1.SleepCycle{}, eventFilters). Complete(r) } - -func (r *SleepCycleReconciler) RecordEvent(sleepCycle corev1alpha1.SleepCycle, isError bool, namespacedName string, operation SleepCycleOperation, extra ...string) { - eventType := corev1.EventTypeNormal - reason := "SleepCycleOpSuccess" - message := fmt.Sprintf("%s on %s succeeded", operation.String(), namespacedName) - - if isError { - eventType = corev1.EventTypeWarning - reason = "SleepCycleOpFailure" - message = fmt.Sprintf("%s on %s failed", operation.String(), namespacedName) - } - - for _, s := range extra { - message = message + ". " + s - } - - r.Recorder.Event(&sleepCycle, eventType, reason, strings.ToLower(message)) -} diff --git a/controllers/sleepcycle_operation.go b/controllers/sleepcycle_operation.go deleted file mode 100644 index a13a7a3..0000000 --- a/controllers/sleepcycle_operation.go +++ /dev/null @@ -1,16 +0,0 @@ -package controllers - -type SleepCycleOperation int - -const ( - Watch SleepCycleOperation = iota - Shutdown - WakeUp -) - -func (sco SleepCycleOperation) String() string { - var values []string = []string{"Watch", "Shutdown", "Wakeup"} - name := values[sco] - - return name -} diff --git a/controllers/sleepcycle_reconcilers.go b/controllers/sleepcycle_reconcilers.go index 33403bf..c3e8408 100644 --- a/controllers/sleepcycle_reconcilers.go +++ b/controllers/sleepcycle_reconcilers.go @@ -2,7 +2,7 @@ package controllers import ( "context" - "fmt" + "github.com/hashicorp/go-multierror" corev1alpha1 "github.com/rekuberate-io/sleepcycles/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" autoscalingv1 "k8s.io/api/autoscaling/v1" @@ -18,257 +18,358 @@ const ( func (r *SleepCycleReconciler) ReconcileDeployments( ctx context.Context, req ctrl.Request, - original *corev1alpha1.SleepCycle, - desired *corev1alpha1.SleepCycle, - op SleepCycleOperation, -) (ctrl.Result, error) { + sleepcycle *corev1alpha1.SleepCycle, +) (int, int, error) { + provisioned := 0 + total := 0 + deploymentList := appsv1.DeploymentList{} if err := r.List(ctx, &deploymentList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { - return ctrl.Result{}, err + return 0, 0, err } if len(deploymentList.Items) == 0 { - r.refreshLabelsDeployments(original, desired, deploymentList) - return ctrl.Result{}, nil + return 0, 0, nil } - r.logger.Info("📚 Processing Deployments") - + var errors error for _, deployment := range deploymentList.Items { - hasSleepCycle := r.hasLabel(&deployment.ObjectMeta, original.Name) + logger := r.logger.WithValues("deployment", deployment.Name) - if hasSleepCycle { - deploymentFullName := fmt.Sprintf(UsedByLabelKey, deployment.Kind, deployment.Namespace, deployment.Name) - desired.Status.Enabled = original.Spec.Enabled + kind := deployment.TypeMeta.Kind + meta := deployment.ObjectMeta + replicas := *deployment.Spec.Replicas - currentReplicas := int(deployment.Status.Replicas) - val, ok := desired.Status.UsedBy[deploymentFullName] - if !ok || (val != currentReplicas && currentReplicas > 0) { - desired.Status.UsedBy[deploymentFullName] = currentReplicas - } + hasSleepCycle := r.hasLabel(&meta, sleepcycle.Name) + if hasSleepCycle { + total += 1 + provisioned += 1 - switch op { - case Watch: - case Shutdown: - if deployment.Status.Replicas != 0 { - err := r.ScaleDeployment(ctx, deployment, 0) - if err != nil { - r.logger.Error(err, "🛑️ Scaling Deployment failed", "deployment", deploymentFullName) - r.RecordEvent(*original, true, deploymentFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, deploymentFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, 0)}...) - r.logger.Info("🌙 Scaled Down Deployment", "deployment", deploymentFullName, "targetReplicas", 0) - } - case WakeUp: - targetReplicas := int32(desired.Status.UsedBy[deploymentFullName]) - - if deployment.Status.Replicas != targetReplicas { - err := r.ScaleDeployment(ctx, deployment, targetReplicas) - if err != nil { - r.logger.Error(err, "🛑️ Scaling Deployment failed", "deployment", deploymentFullName) - r.RecordEvent(*original, true, deploymentFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, deploymentFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, targetReplicas)}...) - r.logger.Info("☀️ Scaled Up Deployment", "deployment", deploymentFullName, "targetReplicas", targetReplicas) - } + err := r.reconcile(ctx, logger, sleepcycle, kind, meta, replicas) + if err != nil { + provisioned -= 1 + errors = multierror.Append(errors, err) } } } - r.refreshLabelsDeployments(original, desired, deploymentList) - return ctrl.Result{}, nil + return provisioned, total, errors } -func (r *SleepCycleReconciler) ReconcileCronJobs(ctx context.Context, +func (r *SleepCycleReconciler) ReconcileCronJobs( + ctx context.Context, req ctrl.Request, - original *corev1alpha1.SleepCycle, - desired *corev1alpha1.SleepCycle, - op SleepCycleOperation, -) (ctrl.Result, error) { + sleepcycle *corev1alpha1.SleepCycle, +) (int, int, error) { + provisioned := 0 + total := 0 + cronJobList := batchv1.CronJobList{} if err := r.List(ctx, &cronJobList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { - return ctrl.Result{}, err + return 0, 0, err } if len(cronJobList.Items) == 0 { - return ctrl.Result{}, nil + return 0, 0, nil } - r.logger.Info("🕑 Processing CronJobs") + var errors error + for _, cronjob := range cronJobList.Items { + logger := r.logger.WithValues("cronjob", cronjob.Name) + + kind := cronjob.TypeMeta.Kind + meta := cronjob.ObjectMeta - for _, cronJob := range cronJobList.Items { - hasSleepCycle := r.hasLabel(&cronJob.ObjectMeta, original.Name) + replicas := int32(1) + if *cronjob.Spec.Suspend { + replicas = int32(0) + } + hasSleepCycle := r.hasLabel(&meta, sleepcycle.Name) if hasSleepCycle { - cronJobFullName := fmt.Sprintf(UsedByLabelKey, cronJob.Kind, cronJob.Namespace, cronJob.Name) - - switch op { - case Watch: - case Shutdown: - if !*cronJob.Spec.Suspend { - err := r.SuspendCronJob(ctx, cronJob, true) - if err != nil { - r.logger.Error(err, "🛑️️ Suspending CronJob failed", "cronJob", cronJobFullName) - r.RecordEvent(*original, true, cronJobFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.logger.Info("🌙 Suspended CronJob", "cronJob", cronJobFullName) - } - case WakeUp: - if *cronJob.Spec.Suspend { - err := r.SuspendCronJob(ctx, cronJob, false) - if err != nil { - r.logger.Error(err, "🛑️️ Suspending CronJob failed", "cronJob", cronJobFullName) - r.RecordEvent(*original, true, cronJobFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.logger.Info("☀️ Enabled Cronjob", "cronJob", cronJobFullName) - } + total += 1 + provisioned += 1 + + err := r.reconcile(ctx, logger, sleepcycle, kind, meta, replicas) + if err != nil { + provisioned -= 1 + errors = multierror.Append(errors, err) } } } - return ctrl.Result{}, nil + return provisioned, total, errors } func (r *SleepCycleReconciler) ReconcileStatefulSets( ctx context.Context, req ctrl.Request, - original *corev1alpha1.SleepCycle, - desired *corev1alpha1.SleepCycle, - op SleepCycleOperation, -) (ctrl.Result, error) { + sleepcycle *corev1alpha1.SleepCycle, +) (int, int, error) { + provisioned := 0 + total := 0 + statefulSetList := appsv1.StatefulSetList{} if err := r.List(ctx, &statefulSetList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { - return ctrl.Result{}, err + return 0, 0, err } if len(statefulSetList.Items) == 0 { - r.refreshLabelsStatefulSets(original, desired, statefulSetList) - return ctrl.Result{}, nil + return 0, 0, nil } - r.logger.Info("📦 Processing StatefulSets") - + var errors error for _, statefulSet := range statefulSetList.Items { - hasSleepCycle := r.hasLabel(&statefulSet.ObjectMeta, original.Name) + logger := r.logger.WithValues("statefulset", statefulSet.Name) - if hasSleepCycle { - statefulSetFullName := fmt.Sprintf(UsedByLabelKey, statefulSet.Kind, statefulSet.Namespace, statefulSet.Name) - desired.Status.Enabled = original.Spec.Enabled + kind := statefulSet.TypeMeta.Kind + meta := statefulSet.ObjectMeta + replicas := *statefulSet.Spec.Replicas - currentReplicas := int(statefulSet.Status.Replicas) - val, ok := desired.Status.UsedBy[statefulSetFullName] - if !ok || (val != currentReplicas && currentReplicas > 0) { - desired.Status.UsedBy[statefulSetFullName] = currentReplicas - } + hasSleepCycle := r.hasLabel(&meta, sleepcycle.Name) + if hasSleepCycle { + total += 1 + provisioned += 1 - switch op { - case Watch: - case Shutdown: - if statefulSet.Status.Replicas != 0 { - err := r.ScaleStatefulSet(ctx, statefulSet, 0) - if err != nil { - r.logger.Error(err, "🛑️ Scaling StatefulSet failed", "statefulSet", statefulSetFullName) - r.RecordEvent(*original, true, statefulSetFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, statefulSetFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, 0)}...) - r.logger.Info("🌙 Scaled Down StatefulSet", "statefulSet", statefulSetFullName, "targetReplicas", 0) - } - case WakeUp: - targetReplicas := int32(desired.Status.UsedBy[statefulSetFullName]) - - if statefulSet.Status.Replicas != targetReplicas { - err := r.ScaleStatefulSet(ctx, statefulSet, targetReplicas) - if err != nil { - r.logger.Error(err, "🛑️ Scaling StatefulSet failed", "statefulSet", statefulSetFullName) - r.RecordEvent(*original, true, statefulSetFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, statefulSetFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, targetReplicas)}...) - r.logger.Info("☀️ Scaled Up StatefulSet", "statefulSet", statefulSetFullName, "targetReplicas", targetReplicas) - } + err := r.reconcile(ctx, logger, sleepcycle, kind, meta, replicas) + if err != nil { + provisioned -= 1 + errors = multierror.Append(errors, err) } } } - r.refreshLabelsStatefulSets(original, desired, statefulSetList) - return ctrl.Result{}, nil + return provisioned, total, errors } func (r *SleepCycleReconciler) ReconcileHorizontalPodAutoscalers( ctx context.Context, req ctrl.Request, - original *corev1alpha1.SleepCycle, - desired *corev1alpha1.SleepCycle, - op SleepCycleOperation, -) (ctrl.Result, error) { + sleepcycle *corev1alpha1.SleepCycle, +) (int, int, error) { + provisioned := 0 + total := 0 + hpaList := autoscalingv1.HorizontalPodAutoscalerList{} if err := r.List(ctx, &hpaList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { - return ctrl.Result{}, err + return 0, 0, err } if len(hpaList.Items) == 0 { - r.refreshLabelsHorizontalPodAutoscalers(original, desired, hpaList) - return ctrl.Result{}, nil + return 0, 0, nil } - r.logger.Info("📈 Processing HorizontalPodAutoscalers") - + var errors error for _, hpa := range hpaList.Items { - hasSleepCycle := r.hasLabel(&hpa.ObjectMeta, original.Name) + logger := r.logger.WithValues("hpa", hpa.Name) - if hasSleepCycle { - hpaFullName := fmt.Sprintf(UsedByLabelKey, hpa.Kind, hpa.Namespace, hpa.Name) - desired.Status.Enabled = original.Spec.Enabled + kind := hpa.TypeMeta.Kind + meta := hpa.ObjectMeta + replicas := hpa.Spec.MaxReplicas - maxReplicas := int(hpa.Spec.MaxReplicas) - val, ok := desired.Status.UsedBy[hpaFullName] - if !ok || (val != maxReplicas && maxReplicas > 0) { - desired.Status.UsedBy[hpaFullName] = maxReplicas - } + hasSleepCycle := r.hasLabel(&meta, sleepcycle.Name) + if hasSleepCycle { + total += 1 + provisioned += 1 - switch op { - case Watch: - case Shutdown: - if hpa.Spec.MaxReplicas != 1 { - err := r.ScaleHorizontalPodAutoscaler(ctx, hpa, 1) - if err != nil { - r.logger.Error(err, "🛑️ Scaling HorizontalPodAutoscaler failed", "hpa", hpaFullName) - r.RecordEvent(*original, true, hpaFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, hpaFullName, op, []string{fmt.Sprintf("Scaled from %d to %d max replicas", maxReplicas, 1)}...) - r.logger.Info("🌙 Scaled Down HorizontalPodAutoscaler", "hpa", hpaFullName, "maxReplicas", 1) - } - case WakeUp: - targetReplicas := int32(desired.Status.UsedBy[hpaFullName]) - - if hpa.Spec.MaxReplicas != targetReplicas { - err := r.ScaleHorizontalPodAutoscaler(ctx, hpa, targetReplicas) - if err != nil { - r.logger.Error(err, "🛑️ Scaling HorizontalPodAutoscaler failed", "hpa", hpaFullName) - r.RecordEvent(*original, true, hpaFullName, op, []string{err.Error()}...) - return ctrl.Result{}, err - } - - r.RecordEvent(*original, false, hpaFullName, op, []string{fmt.Sprintf("Scaled from %d to %d max replicas", maxReplicas, targetReplicas)}...) - r.logger.Info("☀️ Scaled Up HorizontalPodAutoscaler", "hpa", hpaFullName, "maxReplicas", targetReplicas) - } + err := r.reconcile(ctx, logger, sleepcycle, kind, meta, replicas) + if err != nil { + provisioned -= 1 + errors = multierror.Append(errors, err) } } } - r.refreshLabelsHorizontalPodAutoscalers(original, desired, hpaList) - return ctrl.Result{}, nil + return provisioned, total, errors } + +// +//func (r *SleepCycleReconciler) ReconcileCronJobs(ctx context.Context, +// req ctrl.Request, +// original *corev1alpha1.SleepCycle, +// desired *corev1alpha1.SleepCycle, +// op SleepCycleOperation, +//) (ctrl.Result, error) { +// cronJobList := batchv1.CronJobList{} +// if err := r.List(ctx, &cronJobList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { +// return ctrl.Result{}, err +// } +// +// if len(cronJobList.Items) == 0 { +// return ctrl.Result{}, nil +// } +// +// r.logger.Info("🕑 Processing CronJobs") +// +// for _, cronJob := range cronJobList.Items { +// hasSleepCycle := r.hasLabel(&cronJob.ObjectMeta, original.Name) +// +// if hasSleepCycle { +// cronJobFullName := fmt.Sprintf(UsedByLabelKey, cronJob.Kind, cronJob.Namespace, cronJob.Name) +// +// switch op { +// case Watch: +// case Shutdown: +// if !*cronJob.Spec.Suspend { +// err := r.SuspendCronJob(ctx, cronJob, true) +// if err != nil { +// r.logger.Error(err, "🛑️️ Suspending CronJob failed", "cronJob", cronJobFullName) +// r.RecordEvent(*original, true, cronJobFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.logger.Info("🌙 Suspended CronJob", "cronJob", cronJobFullName) +// } +// case WakeUp: +// if *cronJob.Spec.Suspend { +// err := r.SuspendCronJob(ctx, cronJob, false) +// if err != nil { +// r.logger.Error(err, "🛑️️ Suspending CronJob failed", "cronJob", cronJobFullName) +// r.RecordEvent(*original, true, cronJobFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.logger.Info("☀️ Enabled Cronjob", "cronJob", cronJobFullName) +// } +// } +// } +// } +// +// return ctrl.Result{}, nil +//} +// +//func (r *SleepCycleReconciler) ReconcileStatefulSets( +// ctx context.Context, +// req ctrl.Request, +// original *corev1alpha1.SleepCycle, +// desired *corev1alpha1.SleepCycle, +// op SleepCycleOperation, +//) (ctrl.Result, error) { +// statefulSetList := appsv1.StatefulSetList{} +// if err := r.List(ctx, &statefulSetList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { +// return ctrl.Result{}, err +// } +// +// if len(statefulSetList.Items) == 0 { +// r.refreshLabelsStatefulSets(original, desired, statefulSetList) +// return ctrl.Result{}, nil +// } +// +// r.logger.Info("📦 Processing StatefulSets") +// +// for _, statefulSet := range statefulSetList.Items { +// hasSleepCycle := r.hasLabel(&statefulSet.ObjectMeta, original.Name) +// +// if hasSleepCycle { +// statefulSetFullName := fmt.Sprintf(UsedByLabelKey, statefulSet.Kind, statefulSet.Namespace, statefulSet.Name) +// desired.Status.Enabled = original.Spec.Enabled +// +// currentReplicas := int(statefulSet.Status.Replicas) +// val, ok := desired.Status.UsedBy[statefulSetFullName] +// if !ok || (val != currentReplicas && currentReplicas > 0) { +// desired.Status.UsedBy[statefulSetFullName] = currentReplicas +// } +// +// switch op { +// case Watch: +// case Shutdown: +// if statefulSet.Status.Replicas != 0 { +// err := r.ScaleStatefulSet(ctx, statefulSet, 0) +// if err != nil { +// r.logger.Error(err, "🛑️ Scaling StatefulSet failed", "statefulSet", statefulSetFullName) +// r.RecordEvent(*original, true, statefulSetFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.RecordEvent(*original, false, statefulSetFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, 0)}...) +// r.logger.Info("🌙 Scaled Down StatefulSet", "statefulSet", statefulSetFullName, "targetReplicas", 0) +// } +// case WakeUp: +// targetReplicas := int32(desired.Status.UsedBy[statefulSetFullName]) +// +// if statefulSet.Status.Replicas != targetReplicas { +// err := r.ScaleStatefulSet(ctx, statefulSet, targetReplicas) +// if err != nil { +// r.logger.Error(err, "🛑️ Scaling StatefulSet failed", "statefulSet", statefulSetFullName) +// r.RecordEvent(*original, true, statefulSetFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.RecordEvent(*original, false, statefulSetFullName, op, []string{fmt.Sprintf("Scaled from %d to %d replicas", currentReplicas, targetReplicas)}...) +// r.logger.Info("☀️ Scaled Up StatefulSet", "statefulSet", statefulSetFullName, "targetReplicas", targetReplicas) +// } +// } +// } +// } +// +// r.refreshLabelsStatefulSets(original, desired, statefulSetList) +// return ctrl.Result{}, nil +//} +// +//func (r *SleepCycleReconciler) ReconcileHorizontalPodAutoscalers( +// ctx context.Context, +// req ctrl.Request, +// original *corev1alpha1.SleepCycle, +// desired *corev1alpha1.SleepCycle, +// op SleepCycleOperation, +//) (ctrl.Result, error) { +// hpaList := autoscalingv1.HorizontalPodAutoscalerList{} +// if err := r.List(ctx, &hpaList, &client.ListOptions{Namespace: req.NamespacedName.Namespace}); err != nil { +// return ctrl.Result{}, err +// } +// +// if len(hpaList.Items) == 0 { +// r.refreshLabelsHorizontalPodAutoscalers(original, desired, hpaList) +// return ctrl.Result{}, nil +// } +// +// r.logger.Info("📈 Processing HorizontalPodAutoscalers") +// +// for _, hpa := range hpaList.Items { +// hasSleepCycle := r.hasLabel(&hpa.ObjectMeta, original.Name) +// +// if hasSleepCycle { +// hpaFullName := fmt.Sprintf(UsedByLabelKey, hpa.Kind, hpa.Namespace, hpa.Name) +// desired.Status.Enabled = original.Spec.Enabled +// +// maxReplicas := int(hpa.Spec.MaxReplicas) +// val, ok := desired.Status.UsedBy[hpaFullName] +// if !ok || (val != maxReplicas && maxReplicas > 0) { +// desired.Status.UsedBy[hpaFullName] = maxReplicas +// } +// +// switch op { +// case Watch: +// case Shutdown: +// if hpa.Spec.MaxReplicas != 1 { +// err := r.ScaleHorizontalPodAutoscaler(ctx, hpa, 1) +// if err != nil { +// r.logger.Error(err, "🛑️ Scaling HorizontalPodAutoscaler failed", "hpa", hpaFullName) +// r.RecordEvent(*original, true, hpaFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.RecordEvent(*original, false, hpaFullName, op, []string{fmt.Sprintf("Scaled from %d to %d max replicas", maxReplicas, 1)}...) +// r.logger.Info("🌙 Scaled Down HorizontalPodAutoscaler", "hpa", hpaFullName, "maxReplicas", 1) +// } +// case WakeUp: +// targetReplicas := int32(desired.Status.UsedBy[hpaFullName]) +// +// if hpa.Spec.MaxReplicas != targetReplicas { +// err := r.ScaleHorizontalPodAutoscaler(ctx, hpa, targetReplicas) +// if err != nil { +// r.logger.Error(err, "🛑️ Scaling HorizontalPodAutoscaler failed", "hpa", hpaFullName) +// r.RecordEvent(*original, true, hpaFullName, op, []string{err.Error()}...) +// return ctrl.Result{}, err +// } +// +// r.RecordEvent(*original, false, hpaFullName, op, []string{fmt.Sprintf("Scaled from %d to %d max replicas", maxReplicas, targetReplicas)}...) +// r.logger.Info("☀️ Scaled Up HorizontalPodAutoscaler", "hpa", hpaFullName, "maxReplicas", targetReplicas) +// } +// } +// } +// } +// +// r.refreshLabelsHorizontalPodAutoscalers(original, desired, hpaList) +// return ctrl.Result{}, nil +//} diff --git a/controllers/sleepcycle_runners_cronjobs.go b/controllers/sleepcycle_runners_cronjobs.go new file mode 100644 index 0000000..c2f7e3d --- /dev/null +++ b/controllers/sleepcycle_runners_cronjobs.go @@ -0,0 +1,273 @@ +package controllers + +import ( + "context" + "fmt" + "github.com/go-logr/logr" + corev1alpha1 "github.com/rekuberate-io/sleepcycles/api/v1alpha1" + batchv1 "k8s.io/api/batch/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "strconv" +) + +var ( + startingDeadlineSeconds int64 = 15 +) + +const ( + OwnedBy = "rekuberate.io/owned-by" + Target = "rekuberate.io/target" + TargetKind = "rekuberate.io/target-kind" + TargetTimezone = "rekuberate.io/target-tz" + Replicas = "rekuberate.io/replicas" +) + +func (r *SleepCycleReconciler) getCronJob(ctx context.Context, objKey client.ObjectKey) (*batchv1.CronJob, error) { + var job batchv1.CronJob + if err := r.Get(ctx, objKey, &job); err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } + + return nil, err + } + + return &job, nil +} + +func (r *SleepCycleReconciler) createCronJob( + ctx context.Context, + logger logr.Logger, + sleepcycle *corev1alpha1.SleepCycle, + cronObjectKey client.ObjectKey, + targetKind string, + targetMeta metav1.ObjectMeta, + targetReplicas int32, + isShutdownOp bool, +) (*batchv1.CronJob, error) { + + logger.Info("creating runner", "cronjob", cronObjectKey) + backOffLimit := int32(0) + + schedule := sleepcycle.Spec.Shutdown + tz := sleepcycle.Spec.ShutdownTimeZone + suspend := !sleepcycle.Spec.Enabled + + if !isShutdownOp { + schedule = *sleepcycle.Spec.WakeUp + tz = sleepcycle.Spec.WakeupTimeZone + } + + labels := make(map[string]string) + labels[OwnedBy] = fmt.Sprintf("%s", sleepcycle.Name) + labels[Target] = fmt.Sprintf("%s", targetMeta.Name) + labels[TargetKind] = targetKind + + annotations := make(map[string]string) + annotations[TargetTimezone] = *tz + + if targetKind != "CronJob" { + annotations[Replicas] = fmt.Sprint(targetReplicas) + + if targetReplicas == 0 { + annotations[Replicas] = strconv.FormatInt(1, 10) + } + } + + job := &batchv1.CronJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: cronObjectKey.Name, + Namespace: cronObjectKey.Namespace, + Labels: labels, + Annotations: annotations, + }, + Spec: batchv1.CronJobSpec{ + SuccessfulJobsHistoryLimit: &sleepcycle.Spec.SuccessfulJobsHistoryLimit, + FailedJobsHistoryLimit: &sleepcycle.Spec.FailedJobsHistoryLimit, + Schedule: schedule, + TimeZone: tz, + StartingDeadlineSeconds: &startingDeadlineSeconds, + ConcurrencyPolicy: batchv1.ForbidConcurrent, + Suspend: &suspend, + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: cronObjectKey.Name, + Image: sleepcycle.Spec.RunnerImage, + Env: []v1.EnvVar{ + { + Name: "MY_POD_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }}, + }, + { + Name: "MY_POD_NAMESPACE", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }}, + }, + { + Name: "MY_CRONJOB_NAME", + Value: cronObjectKey.Name, + }, + }, + }, + }, + RestartPolicy: v1.RestartPolicyOnFailure, + ServiceAccountName: "rekuberate-runner", + }, + }, + BackoffLimit: &backOffLimit, + }, + }, + }, + } + + err := ctrl.SetControllerReference(sleepcycle, job, r.Scheme) + if err != nil { + logger.Error(err, "unable to set controller reference for runner", "cronjob", cronObjectKey.Name) + return nil, err + } + + err = r.Create(ctx, job) + if err != nil { + r.recordEvent(sleepcycle, fmt.Sprintf("unable to create runner %s/%s", cronObjectKey.Namespace, cronObjectKey.Name), true) + logger.Error(err, "unable to create runner", "cronjob", cronObjectKey.Name) + return nil, err + } + + r.recordEvent(sleepcycle, fmt.Sprintf("created runner %s/%s", cronObjectKey.Namespace, cronObjectKey.Name), false) + return job, nil +} + +func (r *SleepCycleReconciler) updateCronJob( + ctx context.Context, + logger logr.Logger, + sleepcycle *corev1alpha1.SleepCycle, + cronJob *batchv1.CronJob, + kind string, + schedule string, + timezone string, + suspend bool, + replicas int32, +) error { + deepCopy := cronJob.DeepCopy() + deepCopy.Spec.Schedule = schedule + *deepCopy.Spec.TimeZone = timezone + *deepCopy.Spec.Suspend = suspend + + if kind != "CronJob" { + if replicas != 0 { + deepCopy.Annotations[Replicas] = fmt.Sprint(replicas) + } + } + + if err := r.Update(ctx, deepCopy); err != nil { + r.recordEvent(sleepcycle, fmt.Sprintf("unable to update runner %s/%s", cronJob.Namespace, cronJob.Name), true) + logger.Error(err, "unable to update runner", "cronjob", cronJob.Name) + return err + } + + r.recordEvent(sleepcycle, fmt.Sprintf("updated runner %s/%s", cronJob.Namespace, cronJob.Name), false) + return nil +} + +func (r *SleepCycleReconciler) deleteCronJob(ctx context.Context, sleepcycle *corev1alpha1.SleepCycle, cronJob *batchv1.CronJob) error { + if err := r.Delete(ctx, cronJob); err != nil { + r.recordEvent(sleepcycle, fmt.Sprintf("unable to delete runner %s/%s", cronJob.Namespace, cronJob.Name), true) + return err + } + + r.recordEvent(sleepcycle, fmt.Sprintf("deleted runner %s/%s", cronJob.Namespace, cronJob.Name), false) + return nil +} + +func (r *SleepCycleReconciler) reconcileCronJob( + ctx context.Context, + logger logr.Logger, + sleepcycle *corev1alpha1.SleepCycle, + targetKind string, + targetMeta metav1.ObjectMeta, + targetReplicas int32, + isShutdownOp bool, +) error { + suffix := "shutdown" + if !isShutdownOp { + suffix = "wakeup" + } + + cronObjectKey := client.ObjectKey{ + Name: fmt.Sprintf("%s-%s-%s", sleepcycle.Name, targetMeta.Name, suffix), + Namespace: sleepcycle.Namespace, + } + cronjob, err := r.getCronJob(ctx, cronObjectKey) + if err != nil { + logger.Error(err, "unable to fetch runner", "cronjob", cronObjectKey.Name) + return err + } + + if cronjob == nil { + _, err := r.createCronJob(ctx, logger, sleepcycle, cronObjectKey, targetKind, targetMeta, targetReplicas, isShutdownOp) + if err != nil { + return err + } + } + + if cronjob != nil { + if !isShutdownOp && sleepcycle.Spec.WakeUp == nil { + err := r.deleteCronJob(ctx, sleepcycle, cronjob) + if err != nil { + return err + } + } + + schedule := sleepcycle.Spec.Shutdown + tz := sleepcycle.Spec.ShutdownTimeZone + suspend := !sleepcycle.Spec.Enabled + if !isShutdownOp { + schedule = *sleepcycle.Spec.WakeUp + tz = sleepcycle.Spec.WakeupTimeZone + } + + err := r.updateCronJob(ctx, logger, sleepcycle, cronjob, targetKind, schedule, *tz, suspend, targetReplicas) + if err != nil { + logger.Error(err, "failed to update runner", "name", cronObjectKey.Name) + return err + } + } + + return nil +} + +func (r *SleepCycleReconciler) reconcile( + ctx context.Context, + logger logr.Logger, + sleepcycle *corev1alpha1.SleepCycle, + targetKind string, + targetMeta metav1.ObjectMeta, + targetReplicas int32, +) error { + err := r.reconcileCronJob(ctx, logger, sleepcycle, targetKind, targetMeta, targetReplicas, true) + if err != nil { + return err + } + + if sleepcycle.Spec.WakeUp != nil { + err := r.reconcileCronJob(ctx, logger, sleepcycle, targetKind, targetMeta, targetReplicas, false) + if err != nil { + return err + } + } + + return nil +} diff --git a/controllers/sleepcycle_utils.go b/controllers/sleepcycle_utils.go index 668b854..dda780b 100644 --- a/controllers/sleepcycle_utils.go +++ b/controllers/sleepcycle_utils.go @@ -1,154 +1,14 @@ package controllers import ( - "fmt" - "github.com/gorhill/cronexpr" + "encoding/base64" corev1alpha1 "github.com/rekuberate-io/sleepcycles/api/v1alpha1" - appsv1 "k8s.io/api/apps/v1" - autoscalingv1 "k8s.io/api/autoscaling/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "math/rand" "strings" - "time" ) -func (r *SleepCycleReconciler) getCurrentScheduledOperation(sleepCycle corev1alpha1.SleepCycle) SleepCycleOperation { - nextScheduledShutdown, nextScheduledWakeup := r.getSchedulesTime(sleepCycle, true) - nextScheduledShutdownTimeWindow := r.getScheduleTimeWindow(nextScheduledShutdown) - nextScheduledWakeupTimeWindow := r.getScheduleTimeWindow(nextScheduledWakeup) - - var isWithinScheduleForShutdown, isWithinScheduleForWakeup = false, false - isWithinScheduleForShutdown = nextScheduledShutdownTimeWindow.IsScheduleWithinWindow(time.Now()) - - if nextScheduledWakeup == nil { - if !isWithinScheduleForShutdown { - return Watch - } - - return Shutdown - } - - isWithinScheduleForWakeup = nextScheduledWakeupTimeWindow.IsScheduleWithinWindow(time.Now()) - - if nextScheduledShutdown.Before(*nextScheduledWakeup) && isWithinScheduleForShutdown { - return Shutdown - } - - if nextScheduledWakeup.Before(*nextScheduledShutdown) && isWithinScheduleForWakeup { - return WakeUp - } - - if isWithinScheduleForShutdown && isWithinScheduleForWakeup { - return WakeUp - } - - return Watch -} - -func (r *SleepCycleReconciler) getNextScheduledOperation(sleepCycle corev1alpha1.SleepCycle, currentOperation *SleepCycleOperation) (SleepCycleOperation, time.Duration) { - var requeueAfter time.Duration - - if currentOperation == nil { - *currentOperation = r.getCurrentScheduledOperation(sleepCycle) - } - - nextScheduledShutdown, nextScheduledWakeup := r.getSchedulesTime(sleepCycle, false) - var nextOperation SleepCycleOperation - - switch *currentOperation { - case Watch: - if nextScheduledWakeup == nil { - nextOperation = Shutdown - requeueAfter = time.Until(*nextScheduledShutdown) - } else { - if nextScheduledShutdown.Before(*nextScheduledWakeup) { - nextOperation = Shutdown - requeueAfter = time.Until(*nextScheduledShutdown) - } else { - nextOperation = WakeUp - requeueAfter = time.Until(*nextScheduledWakeup) - } - } - case Shutdown: - if nextScheduledWakeup == nil { - nextOperation = Shutdown - requeueAfter = time.Until(*nextScheduledShutdown) - } else { - nextOperation = WakeUp - requeueAfter = time.Until(*nextScheduledWakeup) - } - case WakeUp: - nextOperation = Shutdown - requeueAfter = time.Until(*nextScheduledShutdown) - } - - return nextOperation, requeueAfter -} - -func (r *SleepCycleReconciler) getScheduleTimeWindow(timestamp *time.Time) *TimeWindow { - if timestamp != nil { - return NewTimeWindow(*timestamp) - } - - return nil -} - -func (r *SleepCycleReconciler) getSchedulesTime(sleepCycle corev1alpha1.SleepCycle, useStatus bool) (shutdown *time.Time, wakeup *time.Time) { - shutdown = nil - wakeup = nil - - if !useStatus { - shutdown = r.getTimeFromCronExpression(sleepCycle.Spec.Shutdown, sleepCycle.Spec.ShutdownTimeZone) - wakeup = r.getTimeFromCronExpression(sleepCycle.Spec.WakeUp, sleepCycle.Spec.WakeupTimeZone) - } else { - if sleepCycle.Status.NextScheduledWakeupTime != nil { - wakeupTimeWindow := NewTimeWindow(sleepCycle.Status.NextScheduledWakeupTime.Time) - - if wakeupTimeWindow.Right.Before(time.Now()) { - wakeup = r.getTimeFromCronExpression(sleepCycle.Spec.WakeUp, sleepCycle.Spec.WakeupTimeZone) - } else { - wakeup = &sleepCycle.Status.NextScheduledWakeupTime.Time - } - } else { - wakeup = r.getTimeFromCronExpression(sleepCycle.Spec.WakeUp, sleepCycle.Spec.WakeupTimeZone) - } - - if sleepCycle.Status.NextScheduledShutdownTime != nil { - shutdownTimeWindow := NewTimeWindow(sleepCycle.Status.NextScheduledShutdownTime.Time) - - if shutdownTimeWindow.Right.Before(time.Now()) { - shutdown = r.getTimeFromCronExpression(sleepCycle.Spec.Shutdown, sleepCycle.Spec.ShutdownTimeZone) - } else { - shutdown = &sleepCycle.Status.NextScheduledShutdownTime.Time - } - } else { - shutdown = r.getTimeFromCronExpression(sleepCycle.Spec.Shutdown, sleepCycle.Spec.ShutdownTimeZone) - } - } - - return shutdown, wakeup -} - -func (r *SleepCycleReconciler) getTimeFromCronExpression(cronexp string, timezone *string) *time.Time { - tz := r.getTimeZone(timezone) - - cronExpression, err := cronexpr.Parse(cronexp) - if err == nil { - t := cronExpression.Next(time.Now().In(tz)) - return &t - } - return nil -} - -func (r *SleepCycleReconciler) getTimeZone(timezone *string) *time.Location { - tz, err := time.LoadLocation(*timezone) - if err != nil { - r.logger.Info(fmt.Sprintf("no valid timezone, reverting to UTC: %s", err.Error())) - tz, _ = time.LoadLocation("UTC") - } - - return tz -} - func (r *SleepCycleReconciler) hasLabel(obj *metav1.ObjectMeta, tag string) bool { val, ok := obj.GetLabels()[SleepCycleLabel] @@ -159,94 +19,26 @@ func (r *SleepCycleReconciler) hasLabel(obj *metav1.ObjectMeta, tag string) bool return false } -func (r *SleepCycleReconciler) removeLabel(obj *metav1.ObjectMeta, tag string) bool { - val, ok := obj.GetLabels()[SleepCycleLabel] - - if ok && val == tag { - return true - } - - return false -} - -func (r *SleepCycleReconciler) refreshLabelsHorizontalPodAutoscalers(original *corev1alpha1.SleepCycle, desired *corev1alpha1.SleepCycle, hpas autoscalingv1.HorizontalPodAutoscalerList) { - usedBy := original.Status.UsedBy - if usedBy != nil { - for od, _ := range usedBy { - contains := false - if strings.HasPrefix(od, "(HorizontalPodAutoscaler)") { - for _, ed := range hpas.Items { - if od == fmt.Sprintf(UsedByLabelKey, ed.Kind, ed.Namespace, ed.Name) { - contains = true - break - } - } - - if !contains { - delete(desired.Status.UsedBy, od) - } - } - } +func (r *SleepCycleReconciler) generateToken() (string, error) { + token := make([]byte, 256) + _, err := rand.Read(token) + if err != nil { + r.logger.Error(err, "error while generating the secret token") + return "", err } -} - -func (r *SleepCycleReconciler) refreshLabelsStatefulSets(original *corev1alpha1.SleepCycle, desired *corev1alpha1.SleepCycle, statefulSets appsv1.StatefulSetList) { - usedBy := original.Status.UsedBy - if usedBy != nil { - for od, _ := range usedBy { - contains := false - if strings.HasPrefix(od, "(StatefulSet)") { - for _, ed := range statefulSets.Items { - if od == fmt.Sprintf(UsedByLabelKey, ed.Kind, ed.Namespace, ed.Name) { - contains = true - break - } - } - if !contains { - delete(desired.Status.UsedBy, od) - } - } - } - } + base64EncodedToken := base64.StdEncoding.EncodeToString(token) + return base64EncodedToken, nil } -func (r *SleepCycleReconciler) refreshLabelsDeployments(original *corev1alpha1.SleepCycle, desired *corev1alpha1.SleepCycle, deployments appsv1.DeploymentList) { - usedBy := original.Status.UsedBy - if usedBy != nil { - for od, _ := range usedBy { - contains := false - if strings.HasPrefix(od, "(Deployment)") { - for _, ed := range deployments.Items { - if od == fmt.Sprintf(UsedByLabelKey, ed.Kind, ed.Namespace, ed.Name) { - contains = true - break - } - } +func (r *SleepCycleReconciler) recordEvent(sleepCycle *corev1alpha1.SleepCycle, message string, isError bool) { + eventType := corev1.EventTypeNormal + reason := "SuccessfulSleepCycleReconcile" - if !contains { - delete(desired.Status.UsedBy, od) - } - } - } + if isError { + eventType = corev1.EventTypeWarning + reason = "FailedSleepCycleReconcile" } -} -func containsString(slice []string, s string) bool { - for _, item := range slice { - if item == s { - return true - } - } - return false -} - -func removeString(slice []string, s string) (result []string) { - for _, item := range slice { - if item == s { - continue - } - result = append(result, item) - } - return + r.Recorder.Event(sleepCycle, eventType, reason, strings.ToLower(message)) } diff --git a/controllers/sleepcycles_rbac.go b/controllers/sleepcycles_rbac.go new file mode 100644 index 0000000..ae7006a --- /dev/null +++ b/controllers/sleepcycles_rbac.go @@ -0,0 +1,122 @@ +package controllers + +import ( + "context" + "fmt" + corev1alpha1 "github.com/rekuberate-io/sleepcycles/api/v1alpha1" + v1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + serviceAccountName = "rekuberate-runner" +) + +func (r *SleepCycleReconciler) reconcileRbac(ctx context.Context, sleepcycle *corev1alpha1.SleepCycle) error { + createServiceAccount := false + serviceAccountObjectKey := client.ObjectKey{ + Namespace: sleepcycle.Namespace, + Name: serviceAccountName, + } + var serviceAccount v1.ServiceAccount + if err := r.Get(ctx, serviceAccountObjectKey, &serviceAccount); err != nil { + if apierrors.IsNotFound(err) { + createServiceAccount = true + } else { + r.logger.Error(err, "unable to fetch service account") + return err + } + } + + if !createServiceAccount { + return nil + } + + r.logger.Info("creating service account", "account", serviceAccountName) + newServiceAccount := &v1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceAccountObjectKey.Name, + Namespace: serviceAccountObjectKey.Namespace, + }, + } + err := r.Create(ctx, newServiceAccount) + if err != nil { + return err + } + + token, err := r.generateToken() + if err != nil { + return err + } + + r.logger.Info("creating secret", "secret", fmt.Sprintf("%s-secret", serviceAccountName)) + secret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-secret", serviceAccountName), + Namespace: sleepcycle.Namespace, + Annotations: map[string]string{ + "kubernetes.io/service-account.name": serviceAccountName, + }, + }, + Type: v1.SecretTypeServiceAccountToken, + Data: map[string][]byte{ + "token": []byte(token), + }, + } + + err = r.Create(ctx, secret) + if err != nil { + return err + } + + r.logger.Info("creating cluster role", "role", fmt.Sprintf("%s-role", serviceAccountName)) + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-role", serviceAccountName), + Namespace: sleepcycle.Namespace, + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{"", "apps", "batch", "core", "autoscaling"}, + Resources: []string{"*"}, + Verbs: []string{"create", "get", "update", "delete", "list", "post"}, + }, + }, + } + + err = r.Create(ctx, role) + if err != nil { + return err + } + + r.logger.Info("creating cluster role binding", "role", fmt.Sprintf("%s-rolebinding", serviceAccountName)) + roleBinding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-rolebinding", serviceAccountName), + Namespace: sleepcycle.Namespace, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: fmt.Sprintf("%s-role", serviceAccountName), + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: serviceAccountName, + Namespace: sleepcycle.Namespace, + }, + }, + } + + err = r.Create(ctx, roleBinding) + if err != nil { + return err + } + + r.recordEvent(sleepcycle, fmt.Sprintf("created rbac resources in %s", sleepcycle.Namespace), true) + return nil +} diff --git a/controllers/timewindow.go b/controllers/timewindow.go deleted file mode 100644 index 3523b7a..0000000 --- a/controllers/timewindow.go +++ /dev/null @@ -1,28 +0,0 @@ -package controllers - -import ( - "time" -) - -func NewTimeWindow(schedule time.Time) (timeWindow *TimeWindow) { - left := schedule.Add(-(time.Duration(TimeWindowToleranceInSeconds) * time.Second)) - right := schedule.Add(time.Duration(TimeWindowToleranceInSeconds) * time.Second) - - return &TimeWindow{ - Left: left, - Right: right, - } -} - -type TimeWindow struct { - Left time.Time - Right time.Time -} - -func (tw TimeWindow) IsScheduleWithinWindow(schedule time.Time) bool { - if schedule.After(tw.Left) && schedule.Before(tw.Right) { - return true - } - - return false -} diff --git a/docs/images/SCR-20240527-q9y.png b/docs/images/SCR-20240527-q9y.png new file mode 100644 index 0000000..3b39449 Binary files /dev/null and b/docs/images/SCR-20240527-q9y.png differ diff --git a/docs/images/SCR-20240527-qei.png b/docs/images/SCR-20240527-qei.png new file mode 100644 index 0000000..2ea9c64 Binary files /dev/null and b/docs/images/SCR-20240527-qei.png differ diff --git a/go.mod b/go.mod index 70565aa..1302384 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,11 @@ module github.com/rekuberate-io/sleepcycles go 1.18 require ( - github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 + github.com/go-logr/logr v1.2.0 + github.com/hashicorp/go-multierror v1.0.0 github.com/onsi/ginkgo v1.16.5 github.com/onsi/gomega v1.18.1 + go.uber.org/zap v1.19.1 k8s.io/api v0.24.2 k8s.io/apimachinery v0.24.2 k8s.io/client-go v0.24.2 @@ -29,7 +31,6 @@ require ( github.com/evanphx/json-patch v4.12.0+incompatible // indirect github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect github.com/fsnotify/fsnotify v1.5.1 // indirect - github.com/go-logr/logr v1.2.0 // indirect github.com/go-logr/zapr v1.2.0 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect github.com/go-openapi/jsonreference v0.19.5 // indirect @@ -41,6 +42,7 @@ require ( github.com/google/go-cmp v0.5.5 // indirect github.com/google/gofuzz v1.1.0 // indirect github.com/google/uuid v1.1.2 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect github.com/imdario/mergo v0.3.12 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -58,7 +60,6 @@ require ( github.com/spf13/pflag v1.0.5 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect - go.uber.org/zap v1.19.1 // indirect golang.org/x/crypto v0.0.0-20220214200702-86341886e292 // indirect golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect diff --git a/go.sum b/go.sum index 85c5531..c0f4c5e 100644 --- a/go.sum +++ b/go.sum @@ -256,8 +256,6 @@ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 h1:f0n1xnMSmBLzVfsMMvriDyA75NB/oBgILX2GcHXIQzY= -github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75/go.mod h1:g2644b03hfBX9Ov0ZBDgXXens4rxSxmqFBbhvKv2yVA= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= @@ -268,10 +266,12 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= diff --git a/runners/runner.go b/runners/runner.go new file mode 100644 index 0000000..46a72a8 --- /dev/null +++ b/runners/runner.go @@ -0,0 +1,325 @@ +package main + +import ( + "context" + "flag" + "fmt" + "github.com/go-logr/logr" + "github.com/pkg/errors" + "go.uber.org/zap/zapcore" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" + typedv1core "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" + "os" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "strconv" + "strings" + "time" +) + +var ( + logger logr.Logger + config *rest.Config + clientSet *kubernetes.Clientset + ctx context.Context + + envVarErr = "environment variable %s not found" + podEnvVar = "MY_POD_NAME" + namespaceEnvVar = "MY_POD_NAMESPACE" + cronjobEnvVar = "MY_CRONJOB_NAME" + + eventRecorder record.EventRecorder +) + +func init() { + opts := zap.Options{ + Development: true, + TimeEncoder: zapcore.ISO8601TimeEncoder, + StacktraceLevel: zapcore.DPanicLevel, + } + opts.BindFlags(flag.CommandLine) + + logger = zap.New(zap.UseFlagOptions(&opts)) + config = ctrl.GetConfigOrDie() + ctx = context.Background() +} + +func main() { + pd, ok := os.LookupEnv(podEnvVar) + if !ok { + logger.Error(fmt.Errorf(envVarErr, podEnvVar), "failed to load environment variable") + //os.Exit(78) + } + + ns, ok := os.LookupEnv(namespaceEnvVar) + if !ok { + logger.Error(fmt.Errorf(envVarErr, namespaceEnvVar), "failed to load environment variable") + //os.Exit(78) + } + + cj, ok := os.LookupEnv(cronjobEnvVar) + if !ok { + logger.Error(fmt.Errorf(envVarErr, cronjobEnvVar), "failed to load environment variable") + //os.Exit(78) + } + + logger.Info("starting runner", "namespace", ns, "cronjob", cj, "pod", pd) + cs, err := kubernetes.NewForConfig(config) + if err != nil { + logger.Error(err, "failed to create clientset") + //os.Exit(1) + } + clientSet = cs + + scheme := runtime.NewScheme() + _ = batchv1.AddToScheme(scheme) + + eventBroadcaster := record.NewBroadcaster() + defer eventBroadcaster.Shutdown() + + eventBroadcaster.StartStructuredLogging(4) + eventBroadcaster.StartRecordingToSink(&typedv1core.EventSinkImpl{Interface: clientSet.CoreV1().Events("")}) + eventRecorder = eventBroadcaster.NewRecorder(scheme, corev1.EventSource{Component: "rekuberate-io/sleepcycles-runner"}) + + cronjob, err := clientSet.BatchV1().CronJobs(ns).Get(ctx, cj, metav1.GetOptions{}) + if err != nil { + logger.Error(err, "failed to get runner cronjob") + os.Exit(1) + } + + isShutdownOp := true + if !strings.HasSuffix(cronjob.Name, "shutdown") { + isShutdownOp = false + } + + target := cronjob.Labels["rekuberate.io/target"] + kind := cronjob.Labels["rekuberate.io/target-kind"] + + replicas := int64(1) + if kind != "CronJob" { + replicas, err = strconv.ParseInt(cronjob.Annotations["rekuberate.io/replicas"], 10, 32) + if err != nil { + logger.Error(err, "failed to get rekuberate.io/replicas value") + } + } + + if err == nil { + err := run(ns, cronjob, target, kind, replicas, isShutdownOp) + if err != nil { + recordEvent(cronjob, err.Error(), true) + logger.Error(err, "runner failed", "target", target, "kind", kind) + } else { + action := "up" + if isShutdownOp { + action = "down" + } + recordEvent(cronjob, fmt.Sprintf("runner scaled %s %s", action, target), false) + } + } +} + +func run(ns string, cronjob *batchv1.CronJob, target string, kind string, targetReplicas int64, shutdown bool) error { + smsg := "scaling failed" + var serr error + + switch kind { + case "Deployment": + if shutdown { + targetReplicas = 0 + } + err := scaleDeployment(ctx, ns, cronjob, target, int32(targetReplicas)) + if err != nil { + serr = errors.Wrap(err, smsg) + } + case "StatefulSet": + if shutdown { + targetReplicas = 0 + } + err := scaleStatefulSets(ctx, ns, cronjob, target, int32(targetReplicas)) + if err != nil { + serr = errors.Wrap(err, smsg) + } + case "CronJob": + if shutdown { + targetReplicas = 0 + } + err := scaleCronJob(ctx, ns, cronjob, target, int32(targetReplicas)) + if err != nil { + serr = errors.Wrap(err, smsg) + } + case "HorizontalPodAutoscaler": + if shutdown { + targetReplicas = 1 + } + err := scaleHorizontalPodAutoscalers(ctx, ns, cronjob, target, int32(targetReplicas)) + if err != nil { + serr = errors.Wrap(err, smsg) + } + default: + err := fmt.Errorf("not supported kind: %s", kind) + serr = errors.Wrap(err, smsg) + } + + return serr +} + +func syncReplicas(ctx context.Context, namespace string, cronjob *batchv1.CronJob, currentReplicas int32, targetReplicas int32) error { + if currentReplicas != targetReplicas && currentReplicas > 0 { + if targetReplicas != 0 { + targetReplicas = currentReplicas + } + + cronjob.Annotations["rekuberate.io/replicas"] = fmt.Sprint(currentReplicas) + _, err := clientSet.BatchV1().CronJobs(namespace).Update(ctx, cronjob, metav1.UpdateOptions{}) + if err != nil { + return err + } + } + + return nil +} + +func scaleDeployment(ctx context.Context, namespace string, cronjob *batchv1.CronJob, target string, targetReplicas int32) error { + deployment, err := clientSet.AppsV1().Deployments(namespace).Get(ctx, target, metav1.GetOptions{}) + if err != nil { + return err + } + + currentReplicas := *deployment.Spec.Replicas + err = syncReplicas(ctx, namespace, cronjob, currentReplicas, targetReplicas) + if err != nil { + return err + } + + if currentReplicas != targetReplicas { + deployment.Spec.Replicas = &targetReplicas + _, err = clientSet.AppsV1().Deployments(namespace).Update(ctx, deployment, metav1.UpdateOptions{}) + if err != nil { + return err + } + + action := "down" + if targetReplicas > 0 { + action = "up" + } + + logger.Info(fmt.Sprintf("scaled %s deployment", action), "namespace", namespace, "deployment", target, "replicas", targetReplicas) + return nil + } + + logger.Info("deployment already in desired state", "namespace", namespace, "deployment", target, "replicas", targetReplicas) + + return nil +} + +func scaleCronJob(ctx context.Context, namespace string, cronjob *batchv1.CronJob, target string, targetReplicas int32) error { + cj, err := clientSet.BatchV1().CronJobs(namespace).Get(ctx, target, metav1.GetOptions{}) + if err != nil { + return err + } + + suspend := targetReplicas <= 0 + if suspend != *cj.Spec.Suspend { + cj.Spec.Suspend = &suspend + _, err = clientSet.BatchV1().CronJobs(namespace).Update(ctx, cj, metav1.UpdateOptions{}) + if err != nil { + return err + } + + action := "resumed" + if suspend { + action = "suspended" + } + + logger.Info(fmt.Sprintf("cronjob %s", action), "namespace", namespace, "cronjob", target) + return nil + } + + logger.Info("cronjob already in desired state", "namespace", namespace, "cronjob", target, "suspended", suspend) + return nil +} + +func scaleStatefulSets(ctx context.Context, namespace string, cronjob *batchv1.CronJob, target string, targetReplicas int32) error { + statefulSet, err := clientSet.AppsV1().StatefulSets(namespace).Get(ctx, target, metav1.GetOptions{}) + if err != nil { + return err + } + + currentReplicas := *statefulSet.Spec.Replicas + err = syncReplicas(ctx, namespace, cronjob, currentReplicas, targetReplicas) + if err != nil { + return err + } + + if currentReplicas != targetReplicas { + statefulSet.Spec.Replicas = &targetReplicas + _, err = clientSet.AppsV1().StatefulSets(namespace).Update(ctx, statefulSet, metav1.UpdateOptions{}) + if err != nil { + return err + } + + action := "down" + if targetReplicas > 0 { + action = "up" + } + + logger.Info(fmt.Sprintf("scaled %s statefulset", action), "namespace", namespace, "statefulset", target, "replicas", targetReplicas) + return nil + } + + logger.Info("statefulset already in desired state", "namespace", namespace, "statefulset", target, "replicas", targetReplicas) + + return nil +} + +func scaleHorizontalPodAutoscalers(ctx context.Context, namespace string, cronjob *batchv1.CronJob, target string, targetReplicas int32) error { + hpa, err := clientSet.AutoscalingV1().HorizontalPodAutoscalers(namespace).Get(ctx, target, metav1.GetOptions{}) + if err != nil { + return err + } + + currentReplicas := hpa.Spec.MaxReplicas + err = syncReplicas(ctx, namespace, cronjob, currentReplicas, targetReplicas) + if err != nil { + return err + } + + if currentReplicas != targetReplicas { + hpa.Spec.MaxReplicas = targetReplicas + _, err = clientSet.AutoscalingV1().HorizontalPodAutoscalers(namespace).Update(ctx, hpa, metav1.UpdateOptions{}) + if err != nil { + return err + } + + action := "down" + if targetReplicas > 0 { + action = "up" + } + + logger.Info(fmt.Sprintf("scaled max replicas %s", action), "namespace", namespace, "hpa", target, "replicas", targetReplicas) + return nil + } + + logger.Info("horizontal pod autoscaler already in desired state", "namespace", namespace, "hpa", target, "replicas", targetReplicas) + + return nil +} + +func recordEvent(cronjob *batchv1.CronJob, message string, isError bool) { + eventType := corev1.EventTypeNormal + reason := "SuccessfulSleepCycleScale" + + if isError { + eventType = corev1.EventTypeWarning + reason = "FailedSleepCycleScale" + } + + eventRecorder.Event(cronjob, eventType, reason, strings.ToLower(message)) + time.Sleep(2 * time.Second) +}