Skip to content

Commit

Permalink
Merge branch 'master' into release-v0.0.5
Browse files Browse the repository at this point in the history
  • Loading branch information
matthew-richerson committed Sep 7, 2023
2 parents b0a8863 + 5877923 commit bdb258e
Show file tree
Hide file tree
Showing 76 changed files with 4,355 additions and 1,591 deletions.
4 changes: 1 addition & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@
"mode": "test",
"program": "${relativeFileDirname}",
"args": [
"-v=4",
"-ginkgo.v",
"-ginkgo.progress"
],
"env": {
"KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.25.0-darwin-amd64",
"KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.26.0-darwin-amd64",
"GOMEGA_DEFAULT_EVENTUALLY_TIMEOUT": "10m",
"GOMEGA_DEFAULT_EVENTUALLY_POLLING_INTERVAL": "100ms"
},
Expand Down
21 changes: 15 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,12 @@ LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
mkdir -p $(LOCALBIN)

.PHONY: clean-bin
clean-bin:
if [[ -d $(LOCALBIN) ]]; then \
chmod -R u+w $(LOCALBIN) && rm -rf $(LOCALBIN); \
fi

## Tool Binaries
KUSTOMIZE ?= $(LOCALBIN)/kustomize
CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
Expand All @@ -293,14 +299,17 @@ CONTROLLER_TOOLS_VERSION ?= v0.12.0

KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh"
.PHONY: kustomize
kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
$(KUSTOMIZE): $(LOCALBIN)
test -s $(LOCALBIN)/kustomize || { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); }
kustomize: $(LOCALBIN) ## Download kustomize locally if necessary.
if [[ ! -s $(LOCALBIN)/kustomize || $$($(LOCALBIN)/kustomize version | awk '{print $$1}' | awk -F/ '{print $$2}') != $(KUSTOMIZE_VERSION) ]]; then \
rm -f $(LOCALBIN)/kustomize && \
{ curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); }; \
fi

.PHONY: controller-gen
controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary.
$(CONTROLLER_GEN): $(LOCALBIN)
test -s $(LOCALBIN)/controller-gen || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION)
controller-gen: $(LOCALBIN) ## Download controller-gen locally if necessary.
if [[ ! -s $(LOCALBIN)/controller-gen || $$($(LOCALBIN)/controller-gen --version | awk '{print $$2}') != $(CONTROLLER_TOOLS_VERSION) ]]; then \
rm -f $(LOCALBIN)/controller-gen && GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION); \
fi

.PHONY: envtest
envtest: $(ENVTEST) ## Download envtest-setup locally if necessary.
Expand Down
3 changes: 2 additions & 1 deletion api/v1alpha1/nnf_access_types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand Down Expand Up @@ -83,6 +83,7 @@ type NnfAccessStatus struct {
//+kubebuilder:printcolumn:name="DESIREDSTATE",type="string",JSONPath=".spec.desiredState",description="The desired state"
//+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="The current state"
//+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="Whether the state has been achieved"
//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity"
//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"

// NnfAccess is the Schema for the nnfaccesses API
Expand Down
5 changes: 4 additions & 1 deletion api/v1alpha1/nnf_datamovement_types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand Down Expand Up @@ -153,6 +153,8 @@ type NnfDataMovementStatus struct {
// as it executes. The command status is polled at a certain frequency to avoid excessive
// updates to the Data Movement resource.
CommandStatus *NnfDataMovementCommandStatus `json:"commandStatus,omitempty"`

dwsv1alpha2.ResourceError `json:",inline"`
}

// Types describing the various data movement status conditions.
Expand All @@ -175,6 +177,7 @@ const (
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="Current state"
//+kubebuilder:printcolumn:name="STATUS",type="string",JSONPath=".status.status",description="Status of current state"
//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity"
//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"

// NnfDataMovement is the Schema for the datamovements API
Expand Down
6 changes: 3 additions & 3 deletions api/v1alpha1/nnf_node_storage_types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand Down Expand Up @@ -169,8 +169,6 @@ type NnfNodeStorageAllocationStatus struct {
StoragePool NnfResourceStatus `json:"storagePool,omitempty"`

FileSystem NnfResourceStatus `json:"fileSystem,omitempty"`

Conditions []metav1.Condition `json:"conditions,omitempty"`
}

// LustreStorageStatus describes the Lustre target created here.
Expand All @@ -196,6 +194,8 @@ func (ns *NnfNodeStorage) GetStatus() updater.Status[*NnfNodeStorageStatus] {
}

//+kubebuilder:object:root=true
//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity"
//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"

// NnfNodeStorageList contains a list of NNF Nodes
type NnfNodeStorageList struct {
Expand Down
7 changes: 6 additions & 1 deletion api/v1alpha1/nnf_port_manager_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ type NnfPortManagerSpec struct {
// AllocationStatus is the current status of a port requestor. A port that is in use by the respective owner
// will have a status of "InUse". A port that is freed by the owner but not yet reclaimed by the port manager
// will have a status of "Free". Any other status value indicates a failure of the port allocation.
// +kubebuilder:validation:Enum:=InUse;Free;InvalidConfiguration;InsufficientResources
// +kubebuilder:validation:Enum:=InUse;Free;Cooldown;InvalidConfiguration;InsufficientResources
type NnfPortManagerAllocationStatusStatus string

const (
NnfPortManagerAllocationStatusInUse NnfPortManagerAllocationStatusStatus = "InUse"
NnfPortManagerAllocationStatusFree NnfPortManagerAllocationStatusStatus = "Free"
NnfPortManagerAllocationStatusCooldown NnfPortManagerAllocationStatusStatus = "Cooldown"
NnfPortManagerAllocationStatusInvalidConfiguration NnfPortManagerAllocationStatusStatus = "InvalidConfiguration"
NnfPortManagerAllocationStatusInsufficientResources NnfPortManagerAllocationStatusStatus = "InsufficientResources"
// NOTE: You must ensure any new value is added to the above kubebuilder validation enum
Expand All @@ -82,6 +83,10 @@ type NnfPortManagerAllocationStatus struct {

// Status is the ownership status of the port.
Status NnfPortManagerAllocationStatusStatus `json:"status"`

// TimeUnallocated is when the port was unallocated. This is to ensure the proper cooldown
// duration.
TimeUnallocated *metav1.Time `json:"timeUnallocated,omitempty"`
}

// PortManagerStatus is the current status of the port manager.
Expand Down
12 changes: 8 additions & 4 deletions api/v1alpha1/nnf_storage_types.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2021, 2022 Hewlett Packard Enterprise Development LP
* Copyright 2021-2023 Hewlett Packard Enterprise Development LP
* Other additional copyright holders may be indicated within.
*
* The entirety of this work is licensed under the Apache License,
Expand All @@ -22,6 +22,7 @@ package v1alpha1
import (
dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2"
"github.com/HewlettPackard/dws/utils/updater"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -56,6 +57,10 @@ type NnfStorageLustreSpec struct {
// ExternalMgsNid is the NID of the MGS when a pre-existing MGS is
// provided by the DataWarp directive (#DW).
ExternalMgsNid string `json:"externalMgsNid,omitempty"`

// PersistentMgsReference is a reference to a persistent storage that is providing
// the external MGS.
PersistentMgsReference corev1.ObjectReference `json:"persistentMgsReference,omitempty"`
}

// NnfStorageAllocationSetSpec defines the details for an allocation set
Expand Down Expand Up @@ -106,9 +111,6 @@ type NnfStorageAllocationSetStatus struct {
// Health reflects the health of this allocation set
Health NnfResourceHealthType `json:"health,omitempty"`

// Error is the human readable error string
Error string `json:"error,omitempty"`

// AllocationCount is the total number of allocations that currently
// exist
AllocationCount int `json:"allocationCount"`
Expand All @@ -135,6 +137,8 @@ type NnfStorageStatus struct {

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp"
//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity"

// NnfStorage is the Schema for the storages API
type NnfStorage struct {
Expand Down
58 changes: 49 additions & 9 deletions api/v1alpha1/nnfcontainerprofile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,61 @@ type NnfContainerProfileData struct {
// List of possible filesystems supported by this container profile
Storages []NnfContainerProfileStorage `json:"storages,omitempty"`

// Stop any containers after X seconds once a workflow has transitioned to PostRun. Defaults to
// 0. A value of 0 disables this behavior.
// Containers are launched in the PreRun state. Allow this many seconds for the containers to
// start before declaring an error to the workflow.
// Defaults to 60 if not set. A value of 0 disables this behavior.
// +kubebuilder:default:=60
// +kubebuilder:validation:Minimum:=0
PostRunTimeoutSeconds int64 `json:"postRunTimeoutSeconds,omitempty"`
PreRunTimeoutSeconds *int64 `json:"preRunTimeoutSeconds,omitempty"`

// Containers are expected to complete in the PostRun State. Allow this many seconds for the
// containers to exit before declaring an error the workflow.
// Defaults to 60 if not set. A value of 0 disables this behavior.
// +kubebuilder:default:=60
// +kubebuilder:validation:Minimum:=0
PostRunTimeoutSeconds *int64 `json:"postRunTimeoutSeconds,omitempty"`

// Specifies the number of times a container will be retried upon a failure. A new pod is
// deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0
// deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0
// disables retries.
// +kubebuilder:validation:Minimum:=0
// +kubebuilder:default:=6
RetryLimit int32 `json:"retryLimit"`

// Spec to define the containers created from container profile. This is used for non-MPI
// containers.
// UserID specifies the user ID that is allowed to use this profile. If this is specified, only
// Workflows that have a matching user ID can select this profile.
UserID *uint32 `json:"userID,omitempty"`

// GroupID specifies the group ID that is allowed to use this profile. If this is specified,
// only Workflows that have a matching group ID can select this profile.
GroupID *uint32 `json:"groupID,omitempty"`

// Number of ports to open for communication with the user container. These ports are opened on
// the targeted NNF nodes and can be accessed outside of the k8s cluster (e.g. compute nodes).
// The requested ports are made available as environment variables inside the container and in
// the DWS workflow (NNF_CONTAINER_PORTS).
NumPorts int32 `json:"numPorts,omitempty"`

// Spec to define the containers created from this profile. This is used for non-MPI containers.
// Refer to the K8s documentation for `PodSpec` for more definition:
// https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec
// Either this or MPISpec must be provided, but not both.
Spec *corev1.PodSpec `json:"spec,omitempty"`

// MPIJobSpec to define the containers created from container profile. This is used for MPI
// containers via MPIJobs. See mpi-operator for more details.
// MPIJobSpec to define the MPI containers created from this profile. This functionality is
// provided via mpi-operator, a 3rd party tool to assist in running MPI applications across
// worker containers.
// Either this or Spec must be provided, but not both.
//
// All the fields defined drive mpi-operator behavior. See the type definition of MPISpec for
// more detail:
// https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137
//
// Note: most of these fields are fully customizable with a few exceptions. These fields are
// overridden by NNF software to ensure proper behavior to interface with the DWS workflow
// - Replicas
// - RunPolicy.BackoffLimit (this is set above by `RetryLimit`)
// - Worker/Launcher.RestartPolicy
MPISpec *mpiv2beta1.MPIJobSpec `json:"mpiSpec,omitempty"`
}

Expand All @@ -73,6 +108,11 @@ type NnfContainerProfileStorage struct {
// the user not supplying this filesystem in the #DW directives
//+kubebuilder:default:=false
Optional bool `json:"optional"`

// For DW_GLOBAL_ (global lustre) storages, the access mode must match what is configured in
// the LustreFilesystem resource for the namespace. Defaults to `ReadWriteMany` for global
// lustre, otherwise empty.
PVCMode corev1.PersistentVolumeAccessMode `json:"pvcMode,omitempty"`
}

// +kubebuilder:object:root=true
Expand All @@ -82,7 +122,7 @@ type NnfContainerProfile struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Data NnfContainerProfileData `json:"data,omitempty"`
Data NnfContainerProfileData `json:"data"`
}

// +kubebuilder:object:root=true
Expand Down
Loading

0 comments on commit bdb258e

Please sign in to comment.