Skip to content

Commit

Permalink
[issue-464] Create a Prometheus ServiceMonitor object that can captur…
Browse files Browse the repository at this point in the history
…e/collect metrics from deployed SonataFlow instances (apache#540)
  • Loading branch information
jianrongzhang89 authored and rgdoliveira committed Oct 24, 2024
1 parent 4eea1f9 commit b99f958
Show file tree
Hide file tree
Showing 38 changed files with 823 additions and 66 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ env:
PYTHON_VERSION: "3.10"
KIND_VERSION: v0.20.0
KNATIVE_VERSION: v1.12.5
PROMETHEUS_VERSION: v0.70.0
OPERATOR_IMAGE_NAME: "127.0.0.1:5001/kogito-serverless-operator:0.0.1"

jobs:
Expand Down Expand Up @@ -68,6 +69,9 @@ jobs:
- name: Deploy Knative Eventing and Serving
run: make KNATIVE_VERSION=${{ env.KNATIVE_VERSION }} deploy-knative

- name: Deploy Prometheus
run: make PROMETHEUS_VERSION=${{ env.PROMETHEUS_VERSION }} deploy-prometheus

- name: Set OPERATOR_IMAGE_NAME to Point to Kind's Local Registry
run: echo "OPERATOR_IMAGE_NAME=${{ env.OPERATOR_IMAGE_NAME }}" >> $GITHUB_ENV

Expand All @@ -92,6 +96,9 @@ jobs:
- name: Run E2E Tests for Persistent Flows
run: make test-e2e label=flows-persistence

- name: Run E2E Tests for Workflow Monitoring
run: make test-e2e label=flows-monitoring

- name: Run E2E Tests for Platform
run: make test-e2e label=platform

Expand Down
26 changes: 22 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ test: manifests generate envtest test-api ## Run tests.
@$(MAKE) vet
@$(MAKE) fmt
@echo "🔍 Running controller tests..."
@KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
go test $(shell go list ./... | grep -v /test/) -coverprofile cover.out > /dev/null 2>&1
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $(shell go list ./... | grep -v /test/) -coverprofile cover.out
@echo "✅ Tests completed successfully. Coverage report generated: cover.out."

.PHONY: test-api
Expand Down Expand Up @@ -264,6 +263,8 @@ GOLANGCI_LINT_VERSION ?= v1.57.2
KIND_VERSION ?= v0.20.0
KNATIVE_VERSION ?= v1.13.2
TIMEOUT_SECS ?= 180s
PROMETHEUS_VERSION ?= v0.70.0
GRAFANA_VERSION ?= v5.13.0

KNATIVE_SERVING_PREFIX ?= "https://github.com/knative/serving/releases/download/knative-$(KNATIVE_VERSION)"
KNATIVE_EVENTING_PREFIX ?= "https://github.com/knative/eventing/releases/download/knative-$(KNATIVE_VERSION)"
Expand Down Expand Up @@ -402,7 +403,7 @@ generate-all: generate generate-deploy bundle
@$(MAKE) fmt

.PHONY: test-e2e # You will need to have a Minikube/Kind cluster up and running to run this target, and run container-builder before the test
label = "flows-ephemeral" # possible values are flows-ephemeral, flows-persistence, platform, cluster
label = "flows-ephemeral" # possible values are flows-ephemeral, flows-persistence, flows-monitoring, platform, cluster
test-e2e:
ifeq ($(label), cluster)
@echo "🌐 Running e2e tests for cluster..."
Expand All @@ -424,8 +425,13 @@ else ifeq ($(label), flows-persistence)
go test ./test/e2e/e2e_suite_test.go ./test/e2e/helpers.go ./test/e2e/workflow_test.go \
-v -ginkgo.v -ginkgo.no-color -ginkgo.github-output -ginkgo.label-filter=$(label) \
-ginkgo.junit-report=./e2e-test-report-workflow_test.xml -timeout 60m KUSTOMIZE=$(KUSTOMIZE);
else ifeq ($(label), flows-monitoring)
@echo "🔁 Running e2e tests for flows-monitoring..."
go test ./test/e2e/e2e_suite_test.go ./test/e2e/helpers.go ./test/e2e/workflow_test.go \
-v -ginkgo.v -ginkgo.no-color -ginkgo.github-output -ginkgo.label-filter=$(label) \
-ginkgo.junit-report=./e2e-test-report-workflow_test.xml -timeout 60m KUSTOMIZE=$(KUSTOMIZE);
else
@echo "❌ Invalid label. Please use one of: cluster, platform, flows-ephemeral, flows-persistence"
@echo "❌ Invalid label. Please use one of: cluster, platform, flows-ephemeral, flows-persistence, flows-monitoring"
endif


Expand All @@ -450,6 +456,18 @@ deploy-knative:
kubectl wait --for=condition=Ready=True KnativeServing/knative-serving -n knative-serving --timeout=$(TIMEOUT_SECS)
kubectl wait --for=condition=Ready=True KnativeEventing/knative-eventing -n knative-eventing --timeout=$(TIMEOUT_SECS)

.PHONY: deploy-prometheus
deploy-prometheus: create-cluster
kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/$(PROMETHEUS_VERSION)/bundle.yaml
kubectl wait --for=condition=Available=True deploy/prometheus-operator -n default --timeout=$(TIMEOUT_SECS)
kubectl apply -f ./test/testdata/prometheus.yaml -n default
kubectl wait --for=condition=Available=True prometheus/prometheus -n default --timeout=$(TIMEOUT_SECS)

.PHONY: deploy-grafana
deploy-grafana: create-cluster
kubectl create -f https://github.com/grafana/grafana-operator/releases/download/$(GRAFANA_VERSION)/kustomize-cluster_scoped.yaml
kubectl wait --for=condition=Available=True deploy/grafana-operator-controller-manager -n grafana --timeout=$(TIMEOUT_SECS)

.PHONY: delete-cluster
delete-cluster: install-kind
kind delete cluster && $(BUILDER) rm -f kind-registry
12 changes: 12 additions & 0 deletions api/v1alpha08/sonataflowplatform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type SonataFlowPlatformSpec struct {
// These properties MAY NOT be propagated to a SonataFlowClusterPlatform since PropertyVarSource can only refer local context sources.
// +optional
Properties *PropertyPlatformSpec `json:"properties,omitempty"`
// Settings for Prometheus monitoring
// +optional
Monitoring *PlatformMonitoringOptionsSpec `json:"monitoring,omitempty"`
}

// PlatformEventingSpec specifies the Knative Eventing integration details in the platform.
Expand All @@ -74,6 +77,15 @@ type PlatformEventingSpec struct {
Broker *duckv1.Destination `json:"broker,omitempty"`
}

// PlatformMonitoringOptionsSpec specifies the settings for monitoring
// +k8s:openapi-gen=true
type PlatformMonitoringOptionsSpec struct {
// Enabled indicates whether monitoring with Prometheus metrics is enabled
// +optional
// +default: false
Enabled bool `json:"enabled,omitempty"`
}

// PlatformCluster is the kind of orchestration cluster the platform is installed into
// +kubebuilder:validation:Enum=kubernetes;openshift
type PlatformCluster string
Expand Down
20 changes: 20 additions & 0 deletions api/v1alpha08/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions bundle/manifests/sonataflow.org_sonataflowplatforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,14 @@ spec:
type: string
type: object
type: object
monitoring:
description: Settings for Prometheus monitoring
properties:
enabled:
description: Enabled indicates whether monitoring with Prometheus
metrics is enabled
type: boolean
type: object
persistence:
description: |-
Persistence defines the platform persistence configuration. When this field is set,
Expand Down
5 changes: 3 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ import (
"github.com/apache/incubator-kie-kogito-serverless-operator/internal/controller"
"github.com/apache/incubator-kie-kogito-serverless-operator/internal/controller/cfg"
"github.com/apache/incubator-kie-kogito-serverless-operator/version"
prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/klog/v2/klogr"
eventingv1 "knative.dev/eventing/pkg/apis/eventing/v1"
sourcesv1 "knative.dev/eventing/pkg/apis/sources/v1"
servingv1 "knative.dev/serving/pkg/apis/serving/v1"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

"k8s.io/klog/v2/klogr"

"k8s.io/klog/v2"

"github.com/apache/incubator-kie-kogito-serverless-operator/utils"
Expand Down Expand Up @@ -66,6 +66,7 @@ func init() {
utilruntime.Must(sourcesv1.AddToScheme(scheme))
utilruntime.Must(eventingv1.AddToScheme(scheme))
utilruntime.Must(servingv1.AddToScheme(scheme))
utilruntime.Must(prometheus.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down
8 changes: 8 additions & 0 deletions config/crd/bases/sonataflow.org_sonataflowplatforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,14 @@ spec:
type: string
type: object
type: object
monitoring:
description: Settings for Prometheus monitoring
properties:
enabled:
description: Enabled indicates whether monitoring with Prometheus
metrics is enabled
type: boolean
type: object
persistence:
description: |-
Persistence defines the platform persistence configuration. When this field is set,
Expand Down
11 changes: 11 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ kind: ClusterRole
metadata:
name: manager-role
rules:
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- sonataflow.org
resources:
Expand Down
2 changes: 2 additions & 0 deletions go.work.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2247,6 +2247,7 @@ github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-2023020916533
github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY=
github.com/google/go-github/v27 v27.0.6 h1:oiOZuBmGHvrGM1X9uNUAUlLgp5r1UUO/M/KnbHnLRlQ=
github.com/google/go-github/v27 v27.0.6/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0=
github.com/google/go-jsonnet v0.18.0/go.mod h1:C3fTzyVJDslXdiTqw/bTFk7vSGyCtH3MGRbDfvEwGd0=
github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9 h1:OF1IPgv+F4NmqmJ98KTjdN97Vs1JxDPB3vbmYzV2dpk=
github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9/go.mod h1:6eQoGcuNJpa7jnd5pMGdkSaQpNDYvPlXWMcjXXThLlY=
github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
Expand Down Expand Up @@ -2588,6 +2589,7 @@ github.com/openzipkin/zipkin-go v0.3.0/go.mod h1:4c3sLeE8xjNqehmF5RpAFLPLJxXscc0
github.com/openzipkin/zipkin-go v0.4.2 h1:zjqfqHjUpPmB3c1GlCvvgsM1G4LkvqQbBDueDOCg/jA=
github.com/openzipkin/zipkin-go v0.4.2/go.mod h1:ZeVkFjuuBiSy13y8vpSDCjMi9GoI3hPpCJSBx/EYFhY=
github.com/operator-framework/api v0.1.1 h1:DbfxRJUPMQlQW6nbfoNzWLxv1rIv13Gt8GbsF2aglFk=
github.com/operator-framework/operator-lib v0.11.0/go.mod h1:RpyKhFAoG6DmKTDIwMuO6pI3LRc8IE9rxEYWy476o6g=
github.com/operator-framework/operator-registry v1.6.1 h1:Ow0Ko9DRIZ4xvH55vFAslcTy6A9FhlIeXvm+FhyRd84=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
Expand Down
19 changes: 1 addition & 18 deletions internal/controller/knative/knative.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/discovery"
"k8s.io/client-go/rest"
eventingv1 "knative.dev/eventing/pkg/apis/eventing/v1"
sourcesv1 "knative.dev/eventing/pkg/apis/sources/v1"
Expand All @@ -44,7 +43,6 @@ import (

var servingClient clientservingv1.ServingV1Interface
var eventingClient clienteventingv1.EventingV1Interface
var discoveryClient discovery.DiscoveryInterface

type Availability struct {
Eventing bool
Expand Down Expand Up @@ -92,23 +90,8 @@ func NewKnativeEventingClient(cfg *rest.Config) (*clienteventingv1.EventingV1Cli
return clienteventingv1.NewForConfig(cfg)
}

func getDiscoveryClient(cfg *rest.Config) (discovery.DiscoveryInterface, error) {
if discoveryClient == nil {
if cli, err := discovery.NewDiscoveryClientForConfig(cfg); err != nil {
return nil, err
} else {
discoveryClient = cli
}
}
return discoveryClient, nil
}

func SetDiscoveryClient(cli discovery.DiscoveryInterface) {
discoveryClient = cli
}

func GetKnativeAvailability(cfg *rest.Config) (*Availability, error) {
if cli, err := getDiscoveryClient(cfg); err != nil {
if cli, err := utils.GetDiscoveryClient(cfg); err != nil {
return nil, err
} else {
apiList, err := cli.ServerGroups()
Expand Down
52 changes: 52 additions & 0 deletions internal/controller/monitoring/monitoring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package monitoring

import (
operatorapi "github.com/apache/incubator-kie-kogito-serverless-operator/api/v1alpha08"
"github.com/apache/incubator-kie-kogito-serverless-operator/utils"
"k8s.io/client-go/rest"
)

const (
prometheusGroup = "monitoring.coreos.com"
)

func GetPrometheusAvailability(cfg *rest.Config) (bool, error) {
cli, err := utils.GetDiscoveryClient(cfg)
if err != nil {
return false, err
}
apiList, err := cli.ServerGroups()
if err != nil {
return false, err
}
for _, group := range apiList.Groups {
if group.Name == prometheusGroup {
return true, nil
}

}
return false, nil
}

func IsMonitoringEnabled(pl *operatorapi.SonataFlowPlatform) bool {
return pl != nil && pl.Spec.Monitoring != nil && pl.Spec.Monitoring.Enabled
}
33 changes: 32 additions & 1 deletion internal/controller/profiles/common/object_creators.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
cncfmodel "github.com/serverlessworkflow/sdk-go/v2/model"

"github.com/imdario/mergo"
prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -64,6 +65,8 @@ const (
deploymentKind = "Deployment"
k8sServiceAPIVersion = "v1"
k8sServiceKind = "Service"
k8sServicePortName = "web"
metricsServicePortPath = "/q/metrics"
)

// ObjectCreator is the func that creates the initial reference object, if the object doesn't exist in the cluster, this one is created.
Expand Down Expand Up @@ -262,6 +265,7 @@ func ServiceCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
Spec: corev1.ServiceSpec{
Selector: lbl,
Ports: []corev1.ServicePort{{
Name: k8sServicePortName,
Protocol: corev1.ProtocolTCP,
Port: defaultHTTPServicePort,
TargetPort: variables.DefaultHTTPWorkflowPortIntStr,
Expand Down Expand Up @@ -439,10 +443,37 @@ func UserPropsConfigMapCreator(workflow *operatorapi.SonataFlow) (client.Object,

// ManagedPropsConfigMapCreator creates an empty ConfigMap to hold the external application properties
func ManagedPropsConfigMapCreator(workflow *operatorapi.SonataFlow, platform *operatorapi.SonataFlowPlatform) (client.Object, error) {

props, err := properties.ApplicationManagedProperties(workflow, platform)
if err != nil {
return nil, err
}
return workflowproj.CreateNewManagedPropsConfigMap(workflow, props), nil
}

// ServiceMonitorCreator is an ObjectsCreator for Service Monitor for the workflow service.
func ServiceMonitorCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
lbl := workflowproj.GetMergedLabels(workflow)
spec := &prometheus.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
workflowproj.LabelWorkflow: workflow.Name,
workflowproj.LabelWorkflowNamespace: workflow.Namespace,
},
},
Endpoints: []prometheus.Endpoint{
{
Port: k8sServicePortName,
Path: metricsServicePortPath,
},
},
}
serviceMonitor := &prometheus.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: workflow.Name,
Namespace: workflow.Namespace,
Labels: lbl,
},
Spec: *spec,
}
return serviceMonitor, nil
}
Loading

0 comments on commit b99f958

Please sign in to comment.