Skip to content

Commit

Permalink
Use external MGS from a pool of PersistentStorageInstance MGSs (#219)
Browse files Browse the repository at this point in the history
* Use external MGS from a pool of PersistentStorageInstance MGSs

 - Add an NnfStorageProfile option "standaloneMgtPoolName" to create a Lustre file system
   that only has an MGT. This option only works with the "create_persistent" directive.
 - Apply a label to the PersistentStorageInstance with the value of the "standaloneMgtPoolName"
   option. This adds the PersistentStorageInstance to a pool of ExternalMGSs.
 - Change the externalMgs option in the NnfStorageProfile to also accept "pool:[poolname]" where
   "poolname" is the name of an MGS pool.
 - Modify the Setup phase to pick an MGS from the pool and add a reference to the PersistentStorageInstance.

Signed-off-by: Matt Richerson <[email protected]>

* review comments

Signed-off-by: Matt Richerson <[email protected]>

---------

Signed-off-by: Matt Richerson <[email protected]>
  • Loading branch information
matthew-richerson authored Aug 30, 2023
1 parent 31c55db commit f3eb87a
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 35 deletions.
5 changes: 5 additions & 0 deletions api/v1alpha1/nnf_storage_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package v1alpha1
import (
dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2"
"github.com/HewlettPackard/dws/utils/updater"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -56,6 +57,10 @@ type NnfStorageLustreSpec struct {
// ExternalMgsNid is the NID of the MGS when a pre-existing MGS is
// provided by the DataWarp directive (#DW).
ExternalMgsNid string `json:"externalMgsNid,omitempty"`

// PersistentMgsReference is a reference to a persistent storage that is providing
// the external MGS.
PersistentMgsReference corev1.ObjectReference `json:"persistentMgsReference,omitempty"`
}

// NnfStorageAllocationSetSpec defines the details for an allocation set
Expand Down
13 changes: 9 additions & 4 deletions api/v1alpha1/nnfstorageprofile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ type NnfStorageProfileLustreData struct {
// +kubebuilder:default:=false
CombinedMGTMDT bool `json:"combinedMgtMdt,omitempty"`

// ExternalMGS contains the NIDs of a pre-existing MGS that should be used
// ExternalMGS specifies the use of an existing MGS rather than creating one. This can
// be either the NID(s) of a pre-existing MGS that should be used, or it can be an NNF Persistent
// Instance that was created with the "StandaloneMGTPoolName" option. In the latter case, the format
// is "pool:poolName" where "poolName" is the argument from "StandaloneMGTPoolName". A single MGS will
// be picked from the pool.
ExternalMGS string `json:"externalMgs,omitempty"`

// CapacityMGT specifies the size of the MGT device.
Expand All @@ -83,9 +87,10 @@ type NnfStorageProfileLustreData struct {
// +kubebuilder:default:=false
ExclusiveMDT bool `json:"exclusiveMdt,omitempty"`

// StandAloneMGT creates only a Lustre MGT without an MDT or OST
// +kubebuilder:default:=false
StandaloneMGT bool `json:"standaloneMgt,omitempty"`
// StandaloneMGTPoolName creates a Lustre MGT without a MDT or OST. This option can only be used when creating
// a persistent Lustre instance. The MGS is placed into a named pool that can be used by the "ExternalMGS" option.
// Multiple pools can be created.
StandaloneMGTPoolName string `json:"standaloneMgtPoolName,omitempty"`

// MgtCmdLines contains commands to create an MGT target.
MgtCmdLines NnfStorageProfileLustreCmdLines `json:"mgtCommandlines,omitempty"`
Expand Down
8 changes: 4 additions & 4 deletions api/v1alpha1/nnfstorageprofile_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ func (r *NnfStorageProfile) validateContentLustre() error {
return fmt.Errorf("cannot set both combinedMgtMdt and externalMgs")
}

if r.Data.LustreStorage.StandaloneMGT && len(r.Data.LustreStorage.ExternalMGS) > 0 {
return fmt.Errorf("cannot set both standaloneMgt and externalMgs")
if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && len(r.Data.LustreStorage.ExternalMGS) > 0 {
return fmt.Errorf("cannot set both standaloneMgtPoolName and externalMgs")
}

if r.Data.LustreStorage.StandaloneMGT && r.Data.LustreStorage.CombinedMGTMDT {
return fmt.Errorf("cannot set standaloneMgt and combinedMgtMdt")
if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && r.Data.LustreStorage.CombinedMGTMDT {
return fmt.Errorf("cannot set standaloneMgtPoolName and combinedMgtMdt")
}

for _, target := range []string{"mgt", "mdt", "mgtmdt", "ost"} {
Expand Down
12 changes: 6 additions & 6 deletions api/v1alpha1/nnfstorageprofile_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ var _ = Describe("NnfStorageProfile Webhook", func() {
Expect(newProfile.Data.Default).ToNot(BeTrue())
})

It("should accept standaloneMgt", func() {
nnfProfile.Data.LustreStorage.StandaloneMGT = true
It("should accept standaloneMgtPoolName", func() {
nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed())
Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed())
Expect(newProfile.Data.Default).ToNot(BeTrue())
Expand Down Expand Up @@ -150,15 +150,15 @@ var _ = Describe("NnfStorageProfile Webhook", func() {
nnfProfile = nil
})

It("should not accept standaloneMgt with externalMgs", func() {
nnfProfile.Data.LustreStorage.StandaloneMGT = true
It("should not accept standaloneMgtPoolName with externalMgs", func() {
nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
nnfProfile.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp"
Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed())
nnfProfile = nil
})

It("should not accept standaloneMgt with combinedMgtMdt", func() {
nnfProfile.Data.LustreStorage.StandaloneMGT = true
It("should not accept standaloneMgtPoolName with combinedMgtMdt", func() {
nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
nnfProfile.Data.LustreStorage.CombinedMGTMDT = true
Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed())
nnfProfile = nil
Expand Down
4 changes: 4 additions & 0 deletions api/v1alpha1/workflow_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,8 @@ const (
// PinnedContainerProfileLabelNameSpace is a label applied to NnfStorage objects to show
// which pinned container profile is being used.
PinnedContainerProfileLabelNameSpace = "nnf.cray.hpe.com/pinned_container_profile_namespace"

// StandaloneMGTLabel is a label applied to the PersistentStorageInstance to show that
// it is for a Lustre MGT only. The value for the label is the pool name.
StandaloneMGTLabel = "nnf.cray.hpe.com/standalone_mgt"
)
1 change: 1 addition & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 13 additions & 7 deletions config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,13 @@ spec:
colocated with any other target on the chosen server.
type: boolean
externalMgs:
description: ExternalMGS contains the NIDs of a pre-existing MGS
that should be used
description: ExternalMGS specifies the use of an existing MGS
rather than creating one. This can be either the NID(s) of a
pre-existing MGS that should be used, or it can be an NNF Persistent
Instance that was created with the "StandaloneMGTPoolName" option.
In the latter case, the format is "pool:poolName" where "poolName"
is the argument from "StandaloneMGTPoolName". A single MGS will
be picked from the pool.
type: string
mdtCommandlines:
description: MdtCmdLines contains commands to create an MDT target.
Expand Down Expand Up @@ -337,11 +342,12 @@ spec:
required:
- colocateComputes
type: object
standaloneMgt:
default: false
description: StandAloneMGT creates only a Lustre MGT without an
MDT or OST
type: boolean
standaloneMgtPoolName:
description: StandaloneMGTPoolName creates a Lustre MGT without
a MDT or OST. This option can only be used when creating a persistent
Lustre instance. The MGS is placed into a named pool that can
be used by the "ExternalMGS" option. Multiple pools can be created.
type: string
type: object
pinned:
default: false
Expand Down
39 changes: 39 additions & 0 deletions config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,45 @@ spec:
- name
type: object
type: array
persistentMgsReference:
description: PersistentMgsReference is a reference to a persistent
storage that is providing the external MGS.
properties:
apiVersion:
description: API version of the referent.
type: string
fieldPath:
description: 'If referring to a piece of an object instead
of an entire object, this string should contain a valid
JSON/Go field access statement, such as desiredState.manifest.containers[2].
For example, if the object reference is to a container
within a pod, this would take on a value like: "spec.containers{name}"
(where "name" refers to the name of the container that
triggered the event) or if no container name is specified
"spec.containers[2]" (container with index 2 in this pod).
This syntax is chosen only to have some well-defined way
of referencing a part of an object. TODO: this design
is not final and this field is subject to change in the
future.'
type: string
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
namespace:
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
type: string
resourceVersion:
description: 'Specific resourceVersion to which this reference
is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
type: string
uid:
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
type: string
type: object
x-kubernetes-map-type: atomic
targetType:
description: TargetType is the type of Lustre target to be created.
enum:
Expand Down
4 changes: 2 additions & 2 deletions controllers/directivebreakdown_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,9 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont
lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mgtmdt", useKey})
} else if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 {
lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey})
} else if nnfStorageProfile.Data.LustreStorage.StandaloneMGT {
} else if len(nnfStorageProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 {
if argsMap["command"] != "create_persistent" {
return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgt option can only be used with 'create_persistent' directive").WithFatal().WithUser()
return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgtPoolName option can only be used with 'create_persistent' directive").WithFatal().WithUser()
}

lustreComponents = []lustreComponentType{lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}}
Expand Down
18 changes: 9 additions & 9 deletions controllers/directivebreakdown_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ var _ = Describe("DirectiveBreakdown test", func() {
}).ShouldNot(Succeed())
})

It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgt", func() {
By("Setting standaloneMgt in the storage profile")
It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgtPoolName", func() {
By("Setting standaloneMgtPoolName in the storage profile")
Eventually(func(g Gomega) error {
g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed())
storageProfile.Data.LustreStorage.StandaloneMGT = true
storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
return k8sClient.Update(context.TODO(), storageProfile)
}).Should(Succeed())

Expand All @@ -166,11 +166,11 @@ var _ = Describe("DirectiveBreakdown test", func() {
}).ShouldNot(BeNil())
})

It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgt", func() {
By("Setting standaloneMgt in the storage profile")
It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgtPoolName", func() {
By("Setting standaloneMgtPoolName in the storage profile")
Eventually(func(g Gomega) error {
g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed())
storageProfile.Data.LustreStorage.StandaloneMGT = true
storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
return k8sClient.Update(context.TODO(), storageProfile)
}).Should(Succeed())

Expand All @@ -193,11 +193,11 @@ var _ = Describe("DirectiveBreakdown test", func() {
}).Should(BeTrue())
})

It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgt", func() {
By("Setting standaloneMgt in the storage profile")
It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgtPoolName", func() {
By("Setting standaloneMgtPoolName in the storage profile")
Eventually(func(g Gomega) error {
g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed())
storageProfile.Data.LustreStorage.StandaloneMGT = true
storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool"
return k8sClient.Update(context.TODO(), storageProfile)
}).Should(Succeed())

Expand Down
16 changes: 16 additions & 0 deletions controllers/nnf_persistentstorageinstance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,22 @@ func (r *PersistentStorageReconciler) Reconcile(ctx context.Context, req ctrl.Re
return ctrl.Result{Requeue: true}, nil
}

// If this PersistentStorageInstance is for a standalone MGT, add a label so it can be easily found
if argsMap["type"] == "lustre" && len(pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 {
labels := persistentStorage.GetLabels()
if _, ok := labels[nnfv1alpha1.StandaloneMGTLabel]; !ok {
labels[nnfv1alpha1.StandaloneMGTLabel] = pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName
persistentStorage.SetLabels(labels)
if err := r.Update(ctx, persistentStorage); err != nil {
if !apierrors.IsConflict(err) {
return ctrl.Result{}, err
}

return ctrl.Result{Requeue: true}, nil
}
}
}

// Create the Servers resource
servers, err := r.createServers(ctx, persistentStorage)
if err != nil {
Expand Down
82 changes: 81 additions & 1 deletion controllers/nnf_storage_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ package controllers

import (
"context"
"reflect"
"runtime"
"strconv"
"time"

"github.com/go-logr/logr"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kruntime "k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -166,7 +168,14 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request)
storage.Status.Error = nil

// For each allocation, create the NnfNodeStorage resources to fan out to the Rabbit nodes
for i := range storage.Spec.AllocationSets {
for i, allocationSet := range storage.Spec.AllocationSets {
// Add a reference to the external MGS PersistentStorageInstance if necessary
if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) {
if err := r.addPersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil {
return ctrl.Result{}, err
}
}

res, err := r.createNodeStorage(ctx, storage, i)
if err != nil {
return ctrl.Result{}, err
Expand Down Expand Up @@ -220,6 +229,69 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request)
return ctrl.Result{}, nil
}

func (r *NnfStorageReconciler) addPersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error {
persistentStorage := &dwsv1alpha2.PersistentStorageInstance{
ObjectMeta: metav1.ObjectMeta{
Name: persistentMgsReference.Name,
Namespace: persistentMgsReference.Namespace,
},
}

if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil {
return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage '%v' not found", client.ObjectKeyFromObject(persistentStorage)).WithMajor()
}

if persistentStorage.Status.State != dwsv1alpha2.PSIStateActive {
return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage is not active").WithFatal()
}

// Add a consumer reference to the persistent storage for this directive
reference := corev1.ObjectReference{
Name: nnfStorage.Name,
Namespace: nnfStorage.Namespace,
Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(),
}

for _, existingReference := range persistentStorage.Spec.ConsumerReferences {
if existingReference == reference {
return nil
}
}

persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences, reference)

return r.Update(ctx, persistentStorage)
}

func (r *NnfStorageReconciler) removePersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error {
persistentStorage := &dwsv1alpha2.PersistentStorageInstance{
ObjectMeta: metav1.ObjectMeta{
Name: persistentMgsReference.Name,
Namespace: persistentMgsReference.Namespace,
},
}

if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil {
return client.IgnoreNotFound(err)
}

// remove the consumer reference on the persistent storage for this directive
reference := corev1.ObjectReference{
Name: nnfStorage.Name,
Namespace: nnfStorage.Namespace,
Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(),
}

for i, existingReference := range persistentStorage.Spec.ConsumerReferences {
if existingReference == reference {
persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences[:i], persistentStorage.Spec.ConsumerReferences[i+1:]...)
return r.Update(ctx, persistentStorage)
}
}

return nil
}

// Create an NnfNodeStorage if it doesn't exist, or update it if it requires updating. Each
// Rabbit node gets an NnfNodeStorage, and there may be multiple allocations requested in it.
// This limits the number of resources that have to be broadcast to the Rabbits.
Expand Down Expand Up @@ -433,6 +505,14 @@ func (r *NnfStorageReconciler) teardownStorage(ctx context.Context, storage *nnf
return nodeStoragesExist, nil
}

for _, allocationSet := range storage.Spec.AllocationSets {
if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) {
if err := r.removePersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil {
return nodeStoragesExist, err
}
}
}

return nodeStoragesDeleted, nil
}

Expand Down
Loading

0 comments on commit f3eb87a

Please sign in to comment.