Skip to content

Commit

Permalink
Merge pull request #174 from NearNodeFlash/release-v0.1.1
Browse files Browse the repository at this point in the history
Release v0.1.1
  • Loading branch information
roehrich-hpe authored May 1, 2024
2 parents 7d3e905 + 409290a commit b11a220
Show file tree
Hide file tree
Showing 31 changed files with 10,965 additions and 11,721 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# These ARGs must be before the first FROM. This allows them to be valid for
# use in FROM instructions.
ARG NNFMFU_TAG_BASE=ghcr.io/nearnodeflash/nnf-mfu
ARG NNFMFU_VERSION=0.0.3
ARG NNFMFU_VERSION=0.1.0

# Build the manager binary
FROM golang:1.19-alpine as builder
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ IMAGE_TAG_BASE ?= ghcr.io/nearnodeflash/nnf-dm

# The NNF-MFU container image to use in NNFContainerProfile resources.
NNFMFU_TAG_BASE ?= ghcr.io/nearnodeflash/nnf-mfu
NNFMFU_VERSION ?= 0.0.3
NNFMFU_VERSION ?= 0.1.0

DOCKER_BUILDARGS=--build-arg NNFMFU_TAG_BASE=$(NNFMFU_TAG_BASE) --build-arg NNFMFU_VERSION=$(NNFMFU_VERSION)

Expand Down
4 changes: 2 additions & 2 deletions config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ kind: Kustomization
images:
- name: controller
newName: ghcr.io/nearnodeflash/nnf-dm
newTag: 0.1.0
newTag: 0.1.1
- name: nnf-mfu
newName: ghcr.io/nearnodeflash/nnf-mfu
newTag: 0.0.3
newTag: 0.1.0
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ spec:
maxUnavailable: '25%'
template:
spec:
serviceAccountName: nnf-dm-controller-manager # not sure why 'controller-manager' isn't patched to include the 'nnf-dm-' prefix
serviceAccountName: nnf-dm-node-controller
tolerations:
- key: 'cray.nnf.node'
operator: 'Equal'
Expand Down
7 changes: 7 additions & 0 deletions config/rbac/daemon_role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ rules:
- get
- list
- watch
- apiGroups:
- dataworkflowservices.github.io
resources:
- workflows
verbs:
- get
- list
- apiGroups:
- dataworkflowservices.github.io
resources:
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ rules:
- get
- patch
- update
- apiGroups:
- nnf.cray.hpe.com
resources:
- nnfnodestorages
verbs:
- get
- list
- watch
- apiGroups:
- nnf.cray.hpe.com
resources:
Expand Down
15 changes: 14 additions & 1 deletion config/rbac/role_binding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,17 @@ roleRef:
subjects:
- kind: ServiceAccount
name: controller-manager
namespace: system
namespace: system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-manager-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: manager-role
subjects:
- kind: ServiceAccount
name: node-controller
namespace: system
18 changes: 17 additions & 1 deletion config/rbac/service_account.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,20 @@ metadata:
annotations:
kubernetes.io/service-account.name: controller-manager
kubernetes.io/service-account.namespace: system
type: kubernetes.io/service-account-token
type: kubernetes.io/service-account-token
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-controller
namespace: system
---
apiVersion: v1
kind: Secret
metadata:
name: node-controller
namespace: system
annotations:
kubernetes.io/service-account.name: node-controller
kubernetes.io/service-account.namespace: system
type: kubernetes.io/service-account-token
18 changes: 11 additions & 7 deletions daemons/compute/client-go/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,23 @@ func main() {
// and couldn't recover the data movement request). The NNF Data Movement Workflow ensures that these requests are
// deleted.
if !*skipDelete {
// Use List to cleanup and delete requests
for _, uid := range listResponse.GetUids() {
// Delete each DM this client created
for _, resp := range responses {
uid := resp.GetUid()
log.Printf("Deleting request: %v", uid)

deleteResponse, err := deleteRequest(ctx, c, *workflow, *namespace, uid)
if err != nil {
log.Fatalf("could not delete data movement request: %s", err)
}

if deleteResponse.Status != pb.DataMovementDeleteResponse_SUCCESS {
log.Fatalf("data movement delete failed: %+v", deleteResponse)
if deleteResponse.Status == pb.DataMovementDeleteResponse_NOT_FOUND {
log.Printf("Data movement request deleted (not found): %v %v", uid, deleteResponse.String())
} else if deleteResponse.Status != pb.DataMovementDeleteResponse_SUCCESS {
log.Fatalf("data movement delete failed: %v %+v", uid, deleteResponse)
} else {
log.Printf("Data movement request deleted: %v %v", uid, deleteResponse.String())
}

log.Printf("Data movement request deleted: %v %v", uid, deleteResponse.String())
}

// Print out the list again to verify deletes
Expand Down Expand Up @@ -288,7 +291,8 @@ func deleteRequest(ctx context.Context, client pb.DataMoverClient, workflow stri
Uid: uid,
})

if err != nil {
// It's already been deleted if it's not found
if err != nil && rsp.Status != pb.DataMovementDeleteResponse_NOT_FOUND {
return nil, err
}

Expand Down
38 changes: 27 additions & 11 deletions daemons/compute/server/servers/server_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,25 @@ func (*defaultServer) Version(context.Context, *emptypb.Empty) (*pb.DataMovement

func (s *defaultServer) Create(ctx context.Context, req *pb.DataMovementCreateRequest) (*pb.DataMovementCreateResponse, error) {

// Ensure workflow exists and is in PreRun:Ready
workflow := &dwsv1alpha2.Workflow{ObjectMeta: metav1.ObjectMeta{
Name: req.Workflow.Name,
Namespace: req.Workflow.Namespace,
}}
if err := s.client.Get(ctx, client.ObjectKeyFromObject(workflow), workflow); err != nil {
return &pb.DataMovementCreateResponse{
Status: pb.DataMovementCreateResponse_FAILED,
Message: "Could find matching workflow: " + err.Error(),
}, nil
}

if workflow.Status.State != dwsv1alpha2.StatePreRun || workflow.Status.Status != "Completed" {
return &pb.DataMovementCreateResponse{
Status: pb.DataMovementCreateResponse_FAILED,
Message: fmt.Sprintf("Workflow must be in '%s' state and 'Completed' status", dwsv1alpha2.StatePreRun),
}, nil
}

computeClientMount, computeMountInfo, err := s.findComputeMountInfo(ctx, req)
if err != nil {
return &pb.DataMovementCreateResponse{
Expand All @@ -313,6 +332,9 @@ func (s *defaultServer) Create(ctx context.Context, req *pb.DataMovementCreateRe
dm, err = s.createNnfNodeDataMovement(ctx, req, computeMountInfo)
dmFunc = "createNnfNodeDataMovement()"
default:
// xfs is not supported since it can only be mounted in one location at a time. It is
// already mounted on the compute node when copy offload occurs (PreRun/PostRun), therefore
// it cannot be mounted on the rabbit to perform data movement.
return &pb.DataMovementCreateResponse{
Status: pb.DataMovementCreateResponse_INVALID,
Message: fmt.Sprintf("filesystem not supported: '%s'", computeMountInfo.Type),
Expand All @@ -338,16 +360,9 @@ func (s *defaultServer) Create(ctx context.Context, req *pb.DataMovementCreateRe
dm.Spec.UserId = userId
dm.Spec.GroupId = groupId

// We don't have the actual NnfDataMovement parent available, but we know the name
// and the namespace because they will match the workflow's name and namespace.
parentDm := &nnfv1alpha1.NnfDataMovement{
ObjectMeta: metav1.ObjectMeta{
Name: req.Workflow.Name,
Namespace: req.Workflow.Namespace,
},
}

dwsv1alpha2.AddOwnerLabels(dm, parentDm)
// Add appropriate workflow labels so this is cleaned up
dwsv1alpha2.AddWorkflowLabels(dm, workflow)
dwsv1alpha2.AddOwnerLabels(dm, workflow)

// Label the NnfDataMovement with a teardown state of "post_run" so the NNF workflow
// controller can identify compute initiated data movements.
Expand Down Expand Up @@ -470,7 +485,8 @@ func (s *defaultServer) createNnfNodeDataMovement(ctx context.Context, req *pb.D
},
Spec: nnfv1alpha1.NnfDataMovementSpec{
Source: &nnfv1alpha1.NnfDataMovementSpecSourceDestination{
Path: source,
Path: source,
StorageReference: computeMountInfo.Device.DeviceReference.ObjectReference,
},
Destination: &nnfv1alpha1.NnfDataMovementSpecSourceDestination{
Path: req.Destination,
Expand Down
10 changes: 5 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ module github.com/NearNodeFlash/nnf-dm
go 1.19

require (
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240326175906-15cfe803227d
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240501135550-002d992157a9
github.com/onsi/ginkgo/v2 v2.11.0
github.com/onsi/gomega v1.27.10
github.com/prometheus/client_golang v1.16.0
Expand All @@ -22,12 +22,12 @@ require (
)

require (
github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f // indirect
github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240318141758-e8ded5e13eb8 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-logr/logr v1.2.4
github.com/go-logr/zapr v1.2.4 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand Down Expand Up @@ -73,7 +73,7 @@ require (
)

require (
github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6
github.com/DataWorkflowServices/dws v0.0.1-0.20240423152131-d92c9aadede8
go.openly.dev/pointy v1.3.0
)

Expand Down
16 changes: 8 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6 h1:LYKIIoawsuo+1ByvQaIpgl8vZc2KrE0q7AE7t0YumrI=
github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6/go.mod h1:vSTBLWbsFjMYxx+sjMDyZpMXLY9m5Bp73cjnmAL30WU=
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d h1:AP1TgQlneYZT/AxkYFyvJp1j86+7MTYOoo3I1Zw3L2E=
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI=
github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f h1:aWtSSQLLk9mUZj94mowirQeVw9saf80gVe10X0rZe8o=
github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a h1:m3lPHiWObITk+zp3GAuOeawkaMr+U4aKlHcblYnsP58=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a/go.mod h1:4kJuGEwS46EYIt24NmNweaMryYp8M6RabNc8Nd+GIBE=
github.com/DataWorkflowServices/dws v0.0.1-0.20240423152131-d92c9aadede8 h1:Ic1rErr2VFbIfGYx8loePH7HRqPMQEW5ZIDiOP2CMLk=
github.com/DataWorkflowServices/dws v0.0.1-0.20240423152131-d92c9aadede8/go.mod h1:vSTBLWbsFjMYxx+sjMDyZpMXLY9m5Bp73cjnmAL30WU=
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240326175906-15cfe803227d h1:gqAZOwvFrsgsAQYLqSnMBU4aALOH7+QpqYruhhI6MZU=
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240326175906-15cfe803227d/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI=
github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240318141758-e8ded5e13eb8 h1:Ja+ZQVGl/+buQXGqKFsM5mDwX4ReiI5UkputLsaiRSo=
github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240318141758-e8ded5e13eb8/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240501135550-002d992157a9 h1:IS42BQNDBmdFDreU+LpMcjj/Cai4iqDEmSKgadUc9Q0=
github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240501135550-002d992157a9/go.mod h1:/q/cLHgca7wBy0QAbcj80ANWuWjmNLrLD39xbEwi0tQ=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down
Loading

0 comments on commit b11a220

Please sign in to comment.