Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PWX-38372 Cherry-picking diags code from master #1638

Merged
merged 4 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions cmd/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/libopenstorage/operator/drivers/storage"
_ "github.com/libopenstorage/operator/drivers/storage/portworx"
"github.com/libopenstorage/operator/pkg/apis"
"github.com/libopenstorage/operator/pkg/controller/portworxdiag"
"github.com/libopenstorage/operator/pkg/controller/storagecluster"
"github.com/libopenstorage/operator/pkg/controller/storagenode"
_ "github.com/libopenstorage/operator/pkg/log"
Expand All @@ -52,6 +53,7 @@ const (
flagEnableProfiling = "pprof"
flagDisableCacheFor = "disable-cache-for"
defaultLockObjectName = "openstorage-operator"
flagEnableDiagController = "diag-controller"
defaultResyncPeriod = 30 * time.Second
defaultMetricsPort = 8999
defaultPprofPort = 6060
Expand Down Expand Up @@ -110,6 +112,10 @@ func main() {
Name: flagEnableProfiling,
Usage: "Enable Portworx Operator profiling using pprof (default: false)",
},
cli.BoolFlag{
Name: flagEnableDiagController,
Usage: "Enable Portworx Diag Controller (default: false)",
},

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can default be true in your branch?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can do that

cli.StringFlag{
Name: flagDisableCacheFor,
Usage: "Comma separated object types to disable from cache to reduce memory usage, for example \"Pod,ConfigMap,Deployment,PersistentVolume\"",
Expand Down Expand Up @@ -146,6 +152,8 @@ func run(c *cli.Context) {
}()
}

diagControllerEnabled := c.Bool(flagEnableDiagController)

config, err := rest.InClusterConfig()
if err != nil {
log.Fatalf("Error getting cluster config: %v", err)
Expand Down Expand Up @@ -183,6 +191,15 @@ func run(c *cli.Context) {
log.Fatalf("Error registering CRD's for StorageNode controller: %v", err)
}

var diagController portworxdiag.Controller
if diagControllerEnabled {
diagController = portworxdiag.Controller{Driver: d}
err = diagController.RegisterCRD()
if err != nil {
log.Fatalf("Error registering CRDs for PortworxDiag controller: %v", err)
}
}

// TODO: Don't move createManager above register CRD section. This part will be refactored because of a bug,
// similar to https://github.com/kubernetes-sigs/controller-runtime/issues/321
mgr, err := createManager(c, config)
Expand Down Expand Up @@ -256,6 +273,12 @@ func run(c *cli.Context) {
log.Fatalf("Error initializing storage node controller: %v", err)
}

if diagControllerEnabled {
if err := diagController.Init(mgr); err != nil {
log.Fatalf("Error initializing portworx diag controller: %v", err)
}
}

if err := storageClusterController.StartWatch(); err != nil {
log.Fatalf("Error start watch on storage cluster controller: %v", err)
}
Expand All @@ -264,6 +287,12 @@ func run(c *cli.Context) {
log.Fatalf("Error starting watch on storage node controller: %v", err)
}

if diagControllerEnabled {
if err := diagController.StartWatch(); err != nil {
log.Fatalf("Error starting watch on portworx diag controller: %v", err)
}
}

if c.BoolT(flagMigration) {
log.Info("Migration is enabled")
migrationHandler := migration.New(&storageClusterController)
Expand Down
131 changes: 131 additions & 0 deletions deploy/crds/portworx.io_portworxdiags.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.3
creationTimestamp: null
name: portworxdiags.portworx.io
spec:
group: portworx.io
names:
kind: PortworxDiag
listKind: PortworxDiagList
plural: portworxdiags
shortNames:
- pxdiag
singular: portworxdiag
scope: Namespaced
versions:
- additionalPrinterColumns:
- description: Status of the Portworx diag collection.
jsonPath: .status.phase
name: Status
type: string
- description: Age of the diag resource.
jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1
schema:
openAPIV3Schema:
description: PortworxDiag represents a portworx diag
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
description: PortworxDiagSpec is the spec used to define a portworx diag.
properties:
portworx:
description: Configuration for diags collection of the main Portworx
component.
properties:
generateCore:
description: Generates the core dump as well when collecting the
diags. Could be useful to analyze the current state of the system.
type: boolean
nodes:
description: Nodes for which the diags need to be collected. If
a volume selector is also specified, then both the selectors
will be honored and the selected nodes will be a union of both
selectors.
properties:
all:
description: Select all nodes in the Portworx cluster. If
set to true, other selectors are ignored.
type: boolean
ids:
description: Ids of the nodes to be selected.
items:
type: string
type: array
labels:
additionalProperties:
type: string
description: Labels of the volumes to be selected.
type: object
type: object
volumes:
description: Volumes for which the diags need to be collected.
properties:
ids:
description: Ids of the volumes to be selected.
items:
type: string
type: array
labels:
additionalProperties:
type: string
description: Labels of the volumes to be selected.
type: object
type: object
type: object
type: object
status:
description: PortworxDiagStatus is the status of a portworx diag.
properties:
clusterUuid:
description: UUID of the Portworx cluster. This is useful to find
the uploaded diags.
type: string
message:
description: Optional message used to give the reason for any failure.
type: string
nodes:
description: Status of the diags collection from all the selected
nodes.
items:
description: Status of the diags collection from a single node.
properties:
message:
description: Optional message used to give the reason for any
failure.
type: string
nodeId:
description: ID of the node for which the diag status is reported.
type: string
status:
description: One word status of the diags collection on the
node.
type: string
type: object
type: array
phase:
description: One word status of the entire diags collection job.
type: string
type: object
type: object
served: true
storage: true
subresources:
status: {}
5 changes: 5 additions & 0 deletions drivers/storage/portworx/component/portworx_basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,11 @@ func (c *portworxBasic) createClusterRole() error {
Resources: []string{"*"},
Verbs: []string{"*"},
},
{
APIGroups: []string{"portworx.io"},
Resources: []string{"portworxdiags", "portworxdiags/status"},
Verbs: []string{"get", "list", "watch", "create", "update", "patch", "delete"},
},
{
APIGroups: []string{"security.openshift.io"},
Resources: []string{"securitycontextconstraints"},
Expand Down
4 changes: 2 additions & 2 deletions drivers/storage/portworx/portworx.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import (

const (
storkDriverName = "pxd"
defaultPortworxImage = "portworx/oci-monitor"
DefaultPortworxImage = "portworx/oci-monitor"
defaultSecretsProvider = "k8s"
defaultTokenLifetime = "24h"
defaultSelfSignedIssuer = "operator.portworx.io"
Expand Down Expand Up @@ -375,7 +375,7 @@ func (p *portworx) SetDefaultsOnStorageCluster(toUpdate *corev1.StorageCluster)

if toUpdate.Spec.Version == "" && pxEnabled {
if toUpdate.Spec.Image == "" {
toUpdate.Spec.Image = defaultPortworxImage
toUpdate.Spec.Image = DefaultPortworxImage
}
toUpdate.Spec.Image = toUpdate.Spec.Image + ":" + release.PortworxVersion
toUpdate.Spec.Version = release.PortworxVersion
Expand Down
4 changes: 2 additions & 2 deletions drivers/storage/portworx/portworx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1295,7 +1295,7 @@ func TestSetDefaultsOnStorageCluster(t *testing.T) {
require.NoError(t, err)

// Use default image from release manifest when spec.image is not set
require.Equal(t, defaultPortworxImage+":2.10.0", cluster.Spec.Image)
require.Equal(t, DefaultPortworxImage+":2.10.0", cluster.Spec.Image)
require.Equal(t, "2.10.0", cluster.Spec.Version)
require.Equal(t, "2.10.0", cluster.Status.Version)
require.True(t, cluster.Spec.Kvdb.Internal)
Expand All @@ -1309,7 +1309,7 @@ func TestSetDefaultsOnStorageCluster(t *testing.T) {
cluster.Spec.Version = " "
err = driver.SetDefaultsOnStorageCluster(cluster)
require.NoError(t, err)
require.Equal(t, defaultPortworxImage+":2.10.0", cluster.Spec.Image)
require.Equal(t, DefaultPortworxImage+":2.10.0", cluster.Spec.Image)
require.Equal(t, "2.10.0", cluster.Spec.Version)
require.Equal(t, "2.10.0", cluster.Status.Version)

Expand Down
3 changes: 3 additions & 0 deletions drivers/storage/portworx/testspec/portworxClusterRole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ rules:
- apiGroups: ["core.libopenstorage.org"]
resources: ["*"]
verbs: ["*"]
- apiGroups: ["portworx.io"]
resources: ["portworxdiags", "portworxdiags/status"]
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["security.openshift.io"]
resources: ["securitycontextconstraints"]
resourceNames: ["portworx"]
Expand Down
87 changes: 87 additions & 0 deletions drivers/storage/portworx/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,93 @@
return storageNodesCount, nil
}

func getStorageNodeMappingFromRPC(
cluster *corev1.StorageCluster,
sdkConn *grpc.ClientConn,
k8sClient client.Client,
) (map[string]string, map[string]string, error) {
nodeClient := api.NewOpenStorageNodeClient(sdkConn)
ctx, err := SetupContextWithToken(context.Background(), cluster, k8sClient, false)
if err != nil {
return nil, nil, err

Check warning on line 1262 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1258-L1262

Added lines #L1258 - L1262 were not covered by tests
}

nodeEnumerateResponse, err := nodeClient.EnumerateWithFilters(
ctx,
&api.SdkNodeEnumerateWithFiltersRequest{},
)
if err != nil {
return nil, nil, fmt.Errorf("failed to enumerate nodes: %v", err)

Check warning on line 1270 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1265-L1270

Added lines #L1265 - L1270 were not covered by tests
}

nodeNameToNodeID := map[string]string{}
nodeIDToNodeName := map[string]string{}

Check warning on line 1274 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1273-L1274

Added lines #L1273 - L1274 were not covered by tests

// Loop through all storage nodes
for _, n := range nodeEnumerateResponse.Nodes {
if n.SchedulerNodeName == "" {
continue

Check warning on line 1279 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1277-L1279

Added lines #L1277 - L1279 were not covered by tests
}

nodeNameToNodeID[n.SchedulerNodeName] = n.Id
nodeIDToNodeName[n.Id] = n.SchedulerNodeName

Check warning on line 1283 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1282-L1283

Added lines #L1282 - L1283 were not covered by tests
}

return nodeNameToNodeID, nodeIDToNodeName, nil

Check warning on line 1286 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1286

Added line #L1286 was not covered by tests
}

func getStorageNodeMappingFromK8s(
cluster *corev1.StorageCluster,
k8sClient client.Client,
) (map[string]string, map[string]string, error) {
nodes := &corev1.StorageNodeList{}
err := k8sClient.List(context.TODO(), nodes, &client.ListOptions{Namespace: cluster.Namespace})
if err != nil {
return nil, nil, fmt.Errorf("failed to list StorageNodes: %v", err)

Check warning on line 1296 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1292-L1296

Added lines #L1292 - L1296 were not covered by tests
}

nodeNameToNodeID := map[string]string{}
nodeIDToNodeName := map[string]string{}

Check warning on line 1300 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1299-L1300

Added lines #L1299 - L1300 were not covered by tests

// Loop through all storage nodes
for _, n := range nodes.Items {
if n.Status.NodeUID == "" {
continue

Check warning on line 1305 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1303-L1305

Added lines #L1303 - L1305 were not covered by tests
}

nodeNameToNodeID[n.Name] = n.Status.NodeUID
nodeIDToNodeName[n.Status.NodeUID] = n.Name

Check warning on line 1309 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1308-L1309

Added lines #L1308 - L1309 were not covered by tests
}

return nodeNameToNodeID, nodeIDToNodeName, nil

Check warning on line 1312 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1312

Added line #L1312 was not covered by tests
}

// GetStorageNodeMapping returns a mapping of node name to node ID, as well as the inverse mapping.
// If sdkConn is nil, it will fall back to k8s API which may not be up to date.
// If both fail then the error will be returned.
func GetStorageNodeMapping(
cluster *corev1.StorageCluster,
sdkConn *grpc.ClientConn,
k8sClient client.Client,
) (map[string]string, map[string]string, error) {
var nodeNameToNodeID, nodeIDToNodeName map[string]string
var rpcErr, k8sErr error

Check warning on line 1324 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1322-L1324

Added lines #L1322 - L1324 were not covered by tests

if sdkConn != nil {
nodeNameToNodeID, nodeIDToNodeName, rpcErr = getStorageNodeMappingFromRPC(cluster, sdkConn, k8sClient)
if rpcErr == nil {
return nodeNameToNodeID, nodeIDToNodeName, nil

Check warning on line 1329 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1326-L1329

Added lines #L1326 - L1329 were not covered by tests
}
logrus.WithError(rpcErr).Warn("Failed to get storage node mapping from RPC, falling back to k8s API which may not be up to date")

Check warning on line 1331 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1331

Added line #L1331 was not covered by tests
}

nodeNameToNodeID, nodeIDToNodeName, k8sErr = getStorageNodeMappingFromK8s(cluster, k8sClient)
if k8sErr != nil {
return nil, nil, fmt.Errorf("failed to get storage node mapping from both RPC and k8s. RPC err: %v. k8s err: %v", rpcErr, k8sErr)

Check warning on line 1336 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1334-L1336

Added lines #L1334 - L1336 were not covered by tests
}
return nodeNameToNodeID, nodeIDToNodeName, nil

Check warning on line 1338 in drivers/storage/portworx/util/util.go

View check run for this annotation

Codecov / codecov/patch

drivers/storage/portworx/util/util.go#L1338

Added line #L1338 was not covered by tests
}

func CleanupObject(obj client.Object) {
obj.SetGenerateName("")
obj.SetUID("")
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ require (
k8s.io/utils v0.0.0-20230505201702-9f6742963106
sigs.k8s.io/cluster-api v0.2.11
sigs.k8s.io/controller-runtime v0.14.5
sigs.k8s.io/controller-tools v0.11.3
sigs.k8s.io/yaml v1.4.0
)

Expand All @@ -62,6 +63,7 @@ require (
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/gobuffalo/flect v0.3.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
Expand All @@ -78,6 +80,7 @@ require (
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/hashicorp/serf v0.10.1 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand All @@ -102,6 +105,7 @@ require (
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
Expand Down
Loading
Loading