Skip to content

Commit

Permalink
File system and block device management in nnf-sos
Browse files Browse the repository at this point in the history
This commit moves the file system, LVM, and zpool management out of nnf-ec and
into nnf-sos. There are many parts to this change:

- Create a package for running the LVM commands (e.g., pvcreate)
- Create a package for managing block devices. This presents as the blockdevice interface.
Implementations include lvm, zpool, and mock.
- Create a package for managing file systems. This presents as the filesystem interface.
Implementations include simple (for xfs, gfs2 , and bind mounts), lustre, mock, and kind.
- Create an NnfNodeBlockStorage resource that interfaces with nnf-ec to manage the
storage pools and storage groups.
- Change NnfNodeStorage to use the blockdevice and filesystem interfaces to create and delete
file systems on the Rabbit.
- Change the ClientMount reconciler to also use the blockdevice and filesystem interfaces to
activate/mount and unmount/deactivate filesystems and blockdevices.
- Move the clientmount daemon from DWS to nnf-sos. The ClientMount reconciler is now shared
between compute and Rabbit nodes (run as clientmountd on the computes and inside nnf-node-manager
on the Rabbits), and it is NNF specific (not generic for DWS).
- Add command line arguments to the NnfStorageProfile for activating/deactivating single LVs
- Use a shared VG with locking for all LVM VGs since they're always shared between multiple nodes.
This causes LVM to enforce a single node activating a non-shared LV at a time. In the future we can
remove the internal locking nnf-sos does in the NnfAccess reconciler
- Use the ClientMount resource to mount the Lustre file system after creation to set the GID/UID
- Try to consistently use MgsAddress as the resource field for the MGS LNet Address
- Turn NnfNodeStorage Status section into a subresource

Signed-off-by: Matt Richerson <[email protected]>
  • Loading branch information
matthew-richerson committed Dec 18, 2023
1 parent 91601a9 commit eb48874
Show file tree
Hide file tree
Showing 100 changed files with 10,988 additions and 1,635 deletions.
73 changes: 73 additions & 0 deletions .github/workflows/rpm_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: RPM Build
on:
push:
branches:
- '*'
tags:
- 'v*'

jobs:
repo_version:
runs-on: ubuntu-latest
outputs:
version_output: ${{ steps.step1.outputs.version }}
steps:
- name: Verify context
run: |
echo "ref is ${{ github.ref }}"
echo "ref_type is ${{ github.ref_type }}"
- uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Get Version
id: step1
run: echo "version=$(./git-version-gen)" >> $GITHUB_OUTPUT

rpm_build:
runs-on: ubuntu-latest
needs: repo_version
container:
image: centos:8
env:
NODE_ENV: development
ports:
- 80
options: --cpus 1
steps:
- name: "Build context"
env:
VERSION_OUTPUT: ${{ needs.repo_version.outputs.version_output }}
run: |
echo "ref is ${{ github.ref }}"
echo "ref_type is ${{ github.ref_type }}"
echo "head.sha is ${{ github.event.pull_request.head.sha }}"
echo "git-version-gen is $VERSION_OUTPUT"
- name: checkout
uses: actions/checkout@v3
- name: environment setup
env:
VERSION_OUTPUT: ${{ needs.repo_version.outputs.version_output }}
run: |
dnf -y --disablerepo '*' --enablerepo=extras swap centos-linux-repos centos-stream-repos
dnf -y distro-sync
dnf -y makecache --refresh
dnf install -y rpm-build rpmdevtools git make
dnf module -y install go-toolset
rpmdev-setuptree
echo $VERSION_OUTPUT > .rpmversion
cat .rpmversion
tar -czf /github/home/rpmbuild/SOURCES/nnf-clientmount-1.0.tar.gz --transform 's,^,nnf-clientmount-1.0/,' .
- name: build rpms
run: rpmbuild -ba clientmount.spec
- name: upload rpms
uses: actions/upload-artifact@v3
with:
name: nnf-clientmount-1.0-1.el8.x86_64.rpm
path: /github/home/rpmbuild/RPMS/x86_64/nnf-clientmount-1.0-1.el8.x86_64.rpm
- name: upload srpms
uses: actions/upload-artifact@v3
with:
name: nnf-clientmount-1.0-1.el8.src.rpm
path: /github/home/rpmbuild/SRPMS/nnf-clientmount-1.0-1.el8.src.rpm
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ testbin/*
commands.log
kind-config.yaml
standalone-playground
mount-daemon/clientmount
nnf-sos
.version
config/begin/*
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ COPY go.sum go.sum
COPY cmd/ cmd/
COPY api/ api/
COPY internal/ internal/
COPY pkg/ pkg/
COPY vendor/ vendor/
COPY config/ config/

Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ test: manifests generate fmt vet envtest ## Run tests.
done

##@ Build
build-daemon: RPM_VERSION ?= $(shell ./git-version-gen)
build-daemon: PACKAGE = github.com/NearNodeFlash/nnf-sos/mount-daemon/version
build-daemon: manifests generate fmt vet ## Build standalone clientMount daemon
GOOS=linux GOARCH=amd64 go build -ldflags="-X '$(PACKAGE).version=$(RPM_VERSION)'" -o bin/clientmountd mount-daemon/main.go

build: generate fmt vet ## Build manager binary.
go build -o bin/manager cmd/main.go
Expand Down
9 changes: 9 additions & 0 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ resources:
kind: NnfNodeStorage
path: github.com/NearNodeFlash/nnf-sos/api/v1alpha1
version: v1alpha1
- api:
crdVersion: v1
namespaced: true
controller: true
domain: cray.hpe.com
group: nnf
kind: NnfNodeBlockStorage
path: github.com/NearNodeFlash/nnf-sos/api/v1alpha1
version: v1alpha1
- controller: true
domain: github.io
group: dataworkflowservices
Expand Down
127 changes: 127 additions & 0 deletions api/v1alpha1/nnf_node_block_storage_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
* Version 2.0 (the "License"); you may not use this file except
* in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package v1alpha1

import (
dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2"
"github.com/DataWorkflowServices/dws/utils/updater"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

type NnfNodeBlockStorageAllocationSpec struct {
// Aggregate capacity of the block devices for each allocation
Capacity int64 `json:"capacity,omitempty"`

// List of nodes where /dev devices should be created
Access []string `json:"access,omitempty"`
}

// NnfNodeBlockStorageSpec defines the desired storage attributes on a NNF Node.
// Storage spec are created on bequest of the user and fullfilled by the NNF Node Controller.
type NnfNodeBlockStorageSpec struct {
// Allocations is the list of storage allocation to make
Allocations []NnfNodeBlockStorageAllocationSpec `json:"allocations,omitempty"`
}

type NnfNodeBlockStorageStatus struct {
// Allocations is the list of storage allocations that were made
Allocations []NnfNodeBlockStorageAllocationStatus `json:"allocations,omitempty"`

dwsv1alpha2.ResourceError `json:",inline"`

Ready bool `json:"ready"`
}

type NnfNodeBlockStorageDeviceStatus struct {
// NQN of the base NVMe device
NQN string `json:"NQN"`

// Id of the Namespace on the NVMe device (e.g., "2")
NamespaceId string `json:"namespaceId"`

// Total capacity allocated for the storage. This may differ from the requested storage
// capacity as the system may round up to the requested capacity satisify underlying
// storage requirements (i.e. block size / stripe size).
CapacityAllocated int64 `json:"capacityAllocated,omitempty"`
}

type NnfNodeBlockStorageAccessStatus struct {
// /dev paths for each of the block devices
DevicePaths []string `json:"devicePaths,omitempty"`

// Redfish ID for the storage group
StorageGroupId string `json:"storageGroupId,omitempty"`
}

type NnfNodeBlockStorageAllocationStatus struct {
Accesses map[string]NnfNodeBlockStorageAccessStatus `json:"accesses,omitempty"`

// List of NVMe namespaces used by this allocation
Devices []NnfNodeBlockStorageDeviceStatus `json:"devices,omitempty"`

// Total capacity allocated for the storage. This may differ from the requested storage
// capacity as the system may round up to the requested capacity satisify underlying
// storage requirements (i.e. block size / stripe size).
CapacityAllocated int64 `json:"capacityAllocated,omitempty"`

// Redfish ID for the storage pool
StoragePoolId string `json:"storagePoolId,omitempty"`
}

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="READY",type="string",JSONPath=".status.ready"
// +kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity"
// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"
type NnfNodeBlockStorage struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec NnfNodeBlockStorageSpec `json:"spec,omitempty"`
Status NnfNodeBlockStorageStatus `json:"status,omitempty"`
}

func (ns *NnfNodeBlockStorage) GetStatus() updater.Status[*NnfNodeBlockStorageStatus] {
return &ns.Status
}

// +kubebuilder:object:root=true

// NnfNodeBlockStorageList contains a list of NNF Nodes
type NnfNodeBlockStorageList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []NnfNodeBlockStorage `json:"items"`
}

func (n *NnfNodeBlockStorageList) GetObjectList() []client.Object {
objectList := []client.Object{}

for i := range n.Items {
objectList = append(objectList, &n.Items[i])
}

return objectList
}

func init() {
SchemeBuilder.Register(&NnfNodeBlockStorage{}, &NnfNodeBlockStorageList{})
}
Loading

0 comments on commit eb48874

Please sign in to comment.