From 7ae2b6ba95052d537c5a5fadc0dd1c454ea6334c Mon Sep 17 00:00:00 2001 From: Adam Bozanich Date: Fri, 23 Feb 2024 20:18:20 +0000 Subject: [PATCH 1/6] feat(cluster/kube/builder): `"ram"` storage class * Implement `"ram"` storage class with "empty dir" memory-backed volumes. * No changes to resource accounting - service memory size must include size allocated to ram storage. refs akash-network/support#179 Signed-off-by: Adam Bozanich --- cluster/kube/builder/deployment.go | 1 + cluster/kube/builder/statefulset.go | 1 + cluster/kube/builder/workload.go | 36 +++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/cluster/kube/builder/deployment.go b/cluster/kube/builder/deployment.go index f337a36b..5769d76a 100644 --- a/cluster/kube/builder/deployment.go +++ b/cluster/kube/builder/deployment.go @@ -54,6 +54,7 @@ func (b *deployment) Create() (*appsv1.Deployment, error) { // nolint:golint,unp AutomountServiceAccountToken: &falseValue, Containers: []corev1.Container{b.container()}, ImagePullSecrets: b.imagePullSecrets(), + Volumes: b.volumes(), }, }, }, diff --git a/cluster/kube/builder/statefulset.go b/cluster/kube/builder/statefulset.go index c5fcda50..0b43be11 100644 --- a/cluster/kube/builder/statefulset.go +++ b/cluster/kube/builder/statefulset.go @@ -54,6 +54,7 @@ func (b *statefulSet) Create() (*appsv1.StatefulSet, error) { // nolint:golint,u AutomountServiceAccountToken: &falseValue, Containers: []corev1.Container{b.container()}, ImagePullSecrets: b.imagePullSecrets(), + Volumes: b.volumes(), }, }, VolumeClaimTemplates: b.persistentVolumeClaims(), diff --git a/cluster/kube/builder/workload.go b/cluster/kube/builder/workload.go index 48683f28..ad3d0879 100644 --- a/cluster/kube/builder/workload.go +++ b/cluster/kube/builder/workload.go @@ -154,6 +154,42 @@ func (b *Workload) container() corev1.Container { return kcontainer } +// Return RAM volumes +func (b *Workload) volumes() []corev1.Volume { + var volumes []corev1.Volume // nolint:prealloc + + service := &b.deployment.ManifestGroup().Services[b.serviceIdx] + + for _, storage := range service.Resources.Storage { + + // Only RAM volumes + sclass, ok := storage.Attributes.Find(sdl.StorageAttributeClass).AsString() + if !ok || sclass != sdl.StorageClassRAM { + continue + } + + // No persistent volumes + persistent, ok := storage.Attributes.Find(sdl.StorageAttributePersistent).AsBool() + if !ok || persistent { + continue + } + + size := resource.NewQuantity(storage.Quantity.Val.Int64(), resource.DecimalSI).DeepCopy() + + volumes = append(volumes, corev1.Volume{ + Name: fmt.Sprintf("%s-%s", service.Name, storage.Name), + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + Medium: corev1.StorageMediumMemory, + SizeLimit: &size, + }, + }, + }) + } + + return volumes +} + func (b *Workload) persistentVolumeClaims() []corev1.PersistentVolumeClaim { var pvcs []corev1.PersistentVolumeClaim // nolint:prealloc From be9c3476ce50dfcb04b00d569908369c24a27338 Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Tue, 19 Mar 2024 10:35:54 +0100 Subject: [PATCH 2/6] chore(deps): bump deps (#213) - akash-api v0.0.60 - node v0.32.2 Signed-off-by: Artur Troian --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 8aa10097..8485197e 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/akash-network/provider go 1.21 require ( - github.com/akash-network/akash-api v0.0.56 - github.com/akash-network/node v0.31.0-rc0 + github.com/akash-network/akash-api v0.0.60 + github.com/akash-network/node v0.32.0 github.com/avast/retry-go/v4 v4.5.0 github.com/blang/semver/v4 v4.0.0 github.com/boz/go-lifecycle v0.1.1 diff --git a/go.sum b/go.sum index 8a8f2012..2e49c72f 100644 --- a/go.sum +++ b/go.sum @@ -197,16 +197,16 @@ github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM= github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= -github.com/akash-network/akash-api v0.0.56 h1:dUiDR7DXeMXgh9xyxCik4gznTsjSJhi8Pq3iPcZoqSc= -github.com/akash-network/akash-api v0.0.56/go.mod h1:pNr61L4+0sheol7ZK0HjgK3rxpIAbYBGq1w1oH4B0+M= +github.com/akash-network/akash-api v0.0.60 h1:SIhOB8jUt3cX9thf4GTjouEqwX7sNU4Oqx+hEFdPzbA= +github.com/akash-network/akash-api v0.0.60/go.mod h1:pNr61L4+0sheol7ZK0HjgK3rxpIAbYBGq1w1oH4B0+M= github.com/akash-network/cometbft v0.34.27-akash h1:V1dApDOr8Ee7BJzYyQ7Z9VBtrAul4+baMeA6C49dje0= github.com/akash-network/cometbft v0.34.27-akash/go.mod h1:BcCbhKv7ieM0KEddnYXvQZR+pZykTKReJJYf7YC7qhw= github.com/akash-network/ledger-go v0.14.3 h1:LCEFkTfgGA2xFMN2CtiKvXKE7dh0QSM77PJHCpSkaAo= github.com/akash-network/ledger-go v0.14.3/go.mod h1:NfsjfFvno9Kaq6mfpsKz4sqjnAVVEsVsnBJfKB4ueAs= github.com/akash-network/ledger-go/cosmos v0.14.4 h1:h3WiXmoKKs9wkj1LHcJ12cLjXXg6nG1fp+UQ5+wu/+o= github.com/akash-network/ledger-go/cosmos v0.14.4/go.mod h1:SjAfheQTE4rWk0ir+wjbOWxwj8nc8E4AZ08NdsvYG24= -github.com/akash-network/node v0.31.0-rc0 h1:Ls54ebvbHvBGoWEj1M6VatZmBHgUl2XkZL2XE4S0XZQ= -github.com/akash-network/node v0.31.0-rc0/go.mod h1:wGWR5LewA1nq+O/Sib5kYfBCLVCAqS83oWZ/RuHGYDY= +github.com/akash-network/node v0.32.0 h1:xcHjZaVES7O9zp/2CodOIpOaEouq2Jqy1ZpWVJuYY+w= +github.com/akash-network/node v0.32.0/go.mod h1:wGWR5LewA1nq+O/Sib5kYfBCLVCAqS83oWZ/RuHGYDY= github.com/alecthomas/participle/v2 v2.0.0-alpha7 h1:cK4vjj0VSgb3lN1nuKA5F7dw+1s1pWBe5bx7nNCnN+c= github.com/alecthomas/participle/v2 v2.0.0-alpha7/go.mod h1:NumScqsC42o9x+dGj8/YqsIfhrIQjFEOFovxotbBirA= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= From f1573dbf32cbc93c09736d4746fb4e25203ca26c Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Tue, 19 Mar 2024 11:32:40 +0100 Subject: [PATCH 3/6] fix(e2e/tests): use default deposit value from onchain params (#214) Signed-off-by: Artur Troian --- integration/container2container_test.go | 3 +-- integration/e2e_test.go | 24 ++++++++++++------------ integration/escrow_monitor_test.go | 3 +-- integration/persistentstorage_test.go | 4 ++-- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/integration/container2container_test.go b/integration/container2container_test.go index 5dc09515..d6c5a34f 100644 --- a/integration/container2container_test.go +++ b/integration/container2container_test.go @@ -37,8 +37,7 @@ func (s *E2EContainerToContainer) TestE2EContainerToContainer() { s.validator.ClientCtx, s.keyTenant.GetAddress(), deploymentPath, - cliGlobalFlags(deploymentUAktDeposit, - fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., + cliGlobalFlags(fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., ) s.Require().NoError(err) s.Require().NoError(s.waitForBlocksCommitted(7)) diff --git a/integration/e2e_test.go b/integration/e2e_test.go index 547a0f64..f345087d 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -64,6 +64,8 @@ type IntegrationTestSuite struct { ctx context.Context ctxCancel context.CancelFunc + deploymentMinDeposit sdk.DecCoin + appHost string appPort string @@ -71,15 +73,14 @@ type IntegrationTestSuite struct { } const ( - defaultGasPrice = "0.03uakt" - defaultGasAdjustment = "1.4" - uaktMinDeposit = "5000000uakt" - axlUSDCDenom = "ibc/12C6A0C374171B595A0A9E18B83FA09D295FB1F2D8C6DAA3AC28683471752D84" - axlUSCDMinDeposit = "5000000" + axlUSDCDenom + defaultGasPrice = "0.03uakt" + defaultGasAdjustment = "1.4" + axlUSDCDenom = "ibc/12C6A0C374171B595A0A9E18B83FA09D295FB1F2D8C6DAA3AC28683471752D84" + axlUSCDMinDepositAmount = 5000000 ) var ( - deploymentUAktDeposit = fmt.Sprintf("--deposit=%s", uaktMinDeposit) + axlUSCDMinDeposit = fmt.Sprintf("%d%s", axlUSCDMinDepositAmount, axlUSDCDenom) deploymentAxlUSDCDeposit = fmt.Sprintf("--deposit=%s", axlUSCDMinDeposit) ) @@ -142,7 +143,7 @@ func (s *IntegrationTestSuite) SetupSuite() { // Send coins value sendTokens := sdk.Coins{ sdk.NewCoin(s.cfg.BondDenom, mtypes.DefaultBidMinDeposit.Amount.MulRaw(4)), - sdk.NewCoin(axlUSDCDenom, mtypes.DefaultBidMinDeposit.Amount.MulRaw(4)), + sdk.NewCoin(axlUSDCDenom, sdk.NewInt(axlUSCDMinDepositAmount*4)), } // Setup a Provider key @@ -259,10 +260,8 @@ func (s *IntegrationTestSuite) SetupSuite() { context.Background(), s.validator.ClientCtx, s.keyTenant.GetAddress(), - fmt.Sprintf("--%s=true", flags.FlagSkipConfirmation), - fmt.Sprintf("--%s=%s", flags.FlagBroadcastMode, flags.BroadcastBlock), - fmt.Sprintf("--%s=%s", flags.FlagFees, sdk.NewCoins(sdk.NewCoin(s.cfg.BondDenom, sdk.NewInt(10))).String()), - fmt.Sprintf("--gas=%d", flags.DefaultGasLimit), + cliGlobalFlags(fmt.Sprintf("--%s=true", flags.FlagSkipConfirmation), + fmt.Sprintf("--%s=%s", flags.FlagBroadcastMode, flags.BroadcastBlock))..., ) s.Require().NoError(err) @@ -482,7 +481,8 @@ func (s *IntegrationTestSuite) closeDeployments() int { res, err := deploycli.TxCloseDeploymentExec( s.validator.ClientCtx, keyTenant.GetAddress(), - cliGlobalFlags(fmt.Sprintf("--owner=%s", createdDep.Groups[0].GroupID.Owner), + cliGlobalFlags( + fmt.Sprintf("--owner=%s", createdDep.Groups[0].GroupID.Owner), fmt.Sprintf("--dseq=%v", createdDep.Deployment.DeploymentID.DSeq))..., ) s.Require().NoError(err) diff --git a/integration/escrow_monitor_test.go b/integration/escrow_monitor_test.go index b3ef30c2..989c7931 100644 --- a/integration/escrow_monitor_test.go +++ b/integration/escrow_monitor_test.go @@ -39,8 +39,7 @@ func (s *E2EEscrowMonitor) TestE2EEscrowMonitor() { s.validator.ClientCtx, s.keyTenant.GetAddress(), deploymentPath, - cliGlobalFlags(deploymentUAktDeposit, - fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., + cliGlobalFlags(fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., ) s.Require().NoError(err) s.Require().NoError(s.waitForBlocksCommitted(7)) diff --git a/integration/persistentstorage_test.go b/integration/persistentstorage_test.go index 23af0795..8e12f9a0 100644 --- a/integration/persistentstorage_test.go +++ b/integration/persistentstorage_test.go @@ -50,7 +50,7 @@ func (s *E2EPersistentStorageDefault) TestDefaultStorageClass() { s.validator.ClientCtx, s.keyTenant.GetAddress(), deploymentPath, - cliGlobalFlags(deploymentUAktDeposit, fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., + cliGlobalFlags(fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., ) s.Require().NoError(err) s.Require().NoError(s.waitForBlocksCommitted(7)) @@ -151,7 +151,7 @@ func (s *E2EPersistentStorageBeta2) TestDedicatedStorageClass() { s.validator.ClientCtx, s.keyTenant.GetAddress(), deploymentPath, - cliGlobalFlags(deploymentUAktDeposit, fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., + cliGlobalFlags(fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., ) s.Require().NoError(err) s.Require().NoError(s.waitForBlocksCommitted(7)) From cc03eab2c7757b358f8b030b5148fc537ecbf73d Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Wed, 20 Mar 2024 14:03:31 +0100 Subject: [PATCH 4/6] feat(inventory): count emptydir into total memory (#215) Signed-off-by: Artur Troian --- .../operators/clients/inventory/inventory.go | 18 +-- operator/inventory/node-discovery.go | 117 +++++++++++------- operator/inventory/nodes.go | 4 +- 3 files changed, 84 insertions(+), 55 deletions(-) diff --git a/cluster/kube/operators/clients/inventory/inventory.go b/cluster/kube/operators/clients/inventory/inventory.go index 71c213da..a862eb91 100644 --- a/cluster/kube/operators/clients/inventory/inventory.go +++ b/cluster/kube/operators/clients/inventory/inventory.go @@ -53,7 +53,7 @@ func (inv *inventory) tryAdjust(node int, res *types.Resources) (*crd.SchedulerP return nil, false, true } - if !tryAdjustMemory(&nd.Resources.Memory.Quantity, res.Memory) { + if !nd.Resources.Memory.Quantity.SubNLZ(res.Memory.Quantity) { return nil, false, true } @@ -66,9 +66,17 @@ func (inv *inventory) tryAdjust(node int, res *types.Resources) (*crd.SchedulerP } if !attrs.Persistent { - if !tryAdjustEphemeralStorage(&nd.Resources.EphemeralStorage, &res.Storage[i]) { - return nil, false, true + if attrs.Class == "ram" { + if !nd.Resources.Memory.Quantity.SubNLZ(storage.Quantity) { + return nil, false, true + } + } else { + // ephemeral storage + if !tryAdjustEphemeralStorage(&nd.Resources.EphemeralStorage, &res.Storage[i]) { + return nil, false, true + } } + continue } @@ -197,10 +205,6 @@ func tryAdjustGPU(rp *inventoryV1.GPU, res *types.GPU, sparams *crd.SchedulerPar return false } -func tryAdjustMemory(rp *inventoryV1.ResourcePair, res *types.Memory) bool { - return rp.SubNLZ(res.Quantity) -} - func tryAdjustEphemeralStorage(rp *inventoryV1.ResourcePair, res *types.Storage) bool { return rp.SubNLZ(res.Quantity) } diff --git a/operator/inventory/node-discovery.go b/operator/inventory/node-discovery.go index 476c907f..a1da597b 100644 --- a/operator/inventory/node-discovery.go +++ b/operator/inventory/node-discovery.go @@ -32,6 +32,8 @@ import ( var ( errWorkerExit = errors.New("worker finished") + + labelNvidiaComGPUPresent = fmt.Sprintf("%s.present", builder.ResourceGPUNvidia) ) type k8sPatch struct { @@ -367,7 +369,8 @@ func (dp *nodeDiscovery) monitor() error { var podsWatch watch.Interface var cfg Config var sc storageClasses - var lastPubState nodeStateEnum + + lastPubState := nodeStateRemoved gpusIDs := make(RegistryGPUVendors) currLabels := make(map[string]string) @@ -402,7 +405,7 @@ func (dp *nodeDiscovery) monitor() error { knode, err := dp.kc.CoreV1().Nodes().Get(ctx, dp.name, metav1.GetOptions{}) if err == nil { - currLabels = copyAkashLabels(knode.Labels) + currLabels = copyManagedLabels(knode.Labels) } node, err := dp.initNodeInfo(gpusIDs) @@ -431,17 +434,13 @@ func (dp *nodeDiscovery) monitor() error { } for name, pod := range currPods { - for _, container := range pod.Spec.Containers { - subAllocatedResources(&node, container.Resources.Requests) - } + subPodAllocatedResources(&node, &pod) delete(currPods, name) } for _, pod := range pods.Items { - for _, container := range pod.Spec.Containers { - addAllocatedResources(&node, container.Resources.Requests) - } + addPodAllocatedResources(&node, &pod) currPods[pod.Name] = *pod.DeepCopy() } @@ -531,9 +530,7 @@ func (dp *nodeDiscovery) monitor() error { case watch.Added: if _, exists := currPods[obj.Name]; !exists { currPods[obj.Name] = *obj.DeepCopy() - for _, container := range obj.Spec.Containers { - addAllocatedResources(&node, container.Resources.Requests) - } + addPodAllocatedResources(&node, obj) } else { currPodsInitCount-- } @@ -544,9 +541,7 @@ func (dp *nodeDiscovery) monitor() error { break } - for _, container := range pod.Spec.Containers { - subAllocatedResources(&node, container.Resources.Requests) - } + subPodAllocatedResources(&node, &pod) if currPodsInitCount > 0 { currPodsInitCount-- @@ -582,9 +577,9 @@ func (dp *nodeDiscovery) monitor() error { } if !reflect.DeepEqual(labels, currLabels) { - currLabels = copyAkashLabels(labels) + currLabels = copyManagedLabels(labels) - for key, val := range removeAkashLabels(knode.Labels) { + for key, val := range removeManagedLabels(knode.Labels) { labels[key] = val } @@ -661,45 +656,70 @@ func (dp *nodeDiscovery) initNodeInfo(gpusIds RegistryGPUVendors) (v1.Node, erro return res, nil } -func addAllocatedResources(node *v1.Node, rl corev1.ResourceList) { - for name, quantity := range rl { - switch name { - case corev1.ResourceCPU: - node.Resources.CPU.Quantity.Allocated.Add(quantity) - case corev1.ResourceMemory: - node.Resources.Memory.Quantity.Allocated.Add(quantity) - case corev1.ResourceEphemeralStorage: - node.Resources.EphemeralStorage.Allocated.Add(quantity) - case builder.ResourceGPUNvidia: - fallthrough - case builder.ResourceGPUAMD: - node.Resources.GPU.Quantity.Allocated.Add(quantity) +func addPodAllocatedResources(node *v1.Node, pod *corev1.Pod) { + for _, container := range pod.Spec.Containers { + for name, quantity := range container.Resources.Requests { + switch name { + case corev1.ResourceCPU: + node.Resources.CPU.Quantity.Allocated.Add(quantity) + case corev1.ResourceMemory: + node.Resources.Memory.Quantity.Allocated.Add(quantity) + case corev1.ResourceEphemeralStorage: + node.Resources.EphemeralStorage.Allocated.Add(quantity) + case builder.ResourceGPUNvidia: + fallthrough + case builder.ResourceGPUAMD: + node.Resources.GPU.Quantity.Allocated.Add(quantity) + } + } + + for _, vol := range pod.Spec.Volumes { + if vol.EmptyDir == nil || vol.EmptyDir.Medium != corev1.StorageMediumMemory || vol.EmptyDir.SizeLimit == nil { + continue + } + + node.Resources.Memory.Quantity.Allocated.Add(*vol.EmptyDir.SizeLimit) } } + } -func subAllocatedResources(node *v1.Node, rl corev1.ResourceList) { - for name, quantity := range rl { - switch name { - case corev1.ResourceCPU: - node.Resources.CPU.Quantity.Allocated.Sub(quantity) - case corev1.ResourceMemory: - node.Resources.Memory.Quantity.Allocated.Sub(quantity) - case corev1.ResourceEphemeralStorage: - node.Resources.EphemeralStorage.Allocated.Sub(quantity) - case builder.ResourceGPUNvidia: - fallthrough - case builder.ResourceGPUAMD: - node.Resources.GPU.Quantity.Allocated.Sub(quantity) +func subPodAllocatedResources(node *v1.Node, pod *corev1.Pod) { + for _, container := range pod.Spec.Containers { + for name, quantity := range container.Resources.Requests { + switch name { + case corev1.ResourceCPU: + node.Resources.CPU.Quantity.Allocated.Sub(quantity) + case corev1.ResourceMemory: + node.Resources.Memory.Quantity.Allocated.Sub(quantity) + case corev1.ResourceEphemeralStorage: + node.Resources.EphemeralStorage.Allocated.Sub(quantity) + case builder.ResourceGPUNvidia: + fallthrough + case builder.ResourceGPUAMD: + node.Resources.GPU.Quantity.Allocated.Sub(quantity) + } + } + + for _, vol := range pod.Spec.Volumes { + if vol.EmptyDir == nil || vol.EmptyDir.Medium != corev1.StorageMediumMemory || vol.EmptyDir.SizeLimit == nil { + continue + } + + node.Resources.Memory.Quantity.Allocated.Sub(*vol.EmptyDir.SizeLimit) } } } -func copyAkashLabels(in map[string]string) map[string]string { +func isLabelManaged(key string) bool { + return strings.HasPrefix(key, builder.AkashManagedLabelName) || key == labelNvidiaComGPUPresent +} + +func copyManagedLabels(in map[string]string) map[string]string { out := make(map[string]string, len(in)) for key, val := range in { - if !strings.HasPrefix(key, builder.AkashManagedLabelName) { + if !isLabelManaged(key) { continue } @@ -709,11 +729,11 @@ func copyAkashLabels(in map[string]string) map[string]string { return out } -func removeAkashLabels(in map[string]string) map[string]string { +func removeManagedLabels(in map[string]string) map[string]string { out := make(map[string]string) for key, val := range in { - if strings.HasPrefix(key, builder.AkashManagedLabelName) { + if isLabelManaged(key) { continue } @@ -764,6 +784,11 @@ func generateLabels(cfg Config, knode *corev1.Node, node v1.Node, sc storageClas node.Capabilities.StorageClasses = allowedSc for _, info := range node.Resources.GPU.Info { + // nvidia device plugin requires nodes to be labeled with "nvidia.com/gpu.present" + if info.Vendor == "nvidia" && res[labelNvidiaComGPUPresent] != "true" { + res[labelNvidiaComGPUPresent] = "true" + } + key := fmt.Sprintf("%s.vendor.%s.model.%s", builder.AkashServiceCapabilityGPU, info.Vendor, info.Name) if val, exists := res[key]; exists { nval, _ := strconv.ParseUint(val, 10, 32) diff --git a/operator/inventory/nodes.go b/operator/inventory/nodes.go index 11488c56..8ef95b4b 100644 --- a/operator/inventory/nodes.go +++ b/operator/inventory/nodes.go @@ -22,8 +22,8 @@ import ( type nodeStateEnum int const ( - nodeStateUpdated nodeStateEnum = iota - nodeStateRemoved + nodeStateRemoved nodeStateEnum = iota + nodeStateUpdated ) type nodeState struct { From 230b475b157dd17ec4c1489bd9ca61020f6f9100 Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Wed, 20 Mar 2024 15:08:22 +0100 Subject: [PATCH 5/6] chore: bump deps (#216) - akash-api v0.0.60 - node v0.32.2 Signed-off-by: Artur Troian --- go.mod | 3 ++- go.sum | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 8485197e..f1d55892 100644 --- a/go.mod +++ b/go.mod @@ -4,13 +4,14 @@ go 1.21 require ( github.com/akash-network/akash-api v0.0.60 - github.com/akash-network/node v0.32.0 + github.com/akash-network/node v0.32.2 github.com/avast/retry-go/v4 v4.5.0 github.com/blang/semver/v4 v4.0.0 github.com/boz/go-lifecycle v0.1.1 github.com/cosmos/cosmos-sdk v0.45.16 github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f github.com/fsnotify/fsnotify v1.7.0 + github.com/go-andiamo/splitter v1.2.5 github.com/go-kit/kit v0.12.0 github.com/go-logr/logr v1.2.4 github.com/go-logr/zapr v1.2.4 diff --git a/go.sum b/go.sum index 2e49c72f..4145690e 100644 --- a/go.sum +++ b/go.sum @@ -205,8 +205,8 @@ github.com/akash-network/ledger-go v0.14.3 h1:LCEFkTfgGA2xFMN2CtiKvXKE7dh0QSM77P github.com/akash-network/ledger-go v0.14.3/go.mod h1:NfsjfFvno9Kaq6mfpsKz4sqjnAVVEsVsnBJfKB4ueAs= github.com/akash-network/ledger-go/cosmos v0.14.4 h1:h3WiXmoKKs9wkj1LHcJ12cLjXXg6nG1fp+UQ5+wu/+o= github.com/akash-network/ledger-go/cosmos v0.14.4/go.mod h1:SjAfheQTE4rWk0ir+wjbOWxwj8nc8E4AZ08NdsvYG24= -github.com/akash-network/node v0.32.0 h1:xcHjZaVES7O9zp/2CodOIpOaEouq2Jqy1ZpWVJuYY+w= -github.com/akash-network/node v0.32.0/go.mod h1:wGWR5LewA1nq+O/Sib5kYfBCLVCAqS83oWZ/RuHGYDY= +github.com/akash-network/node v0.32.2 h1:/o/hgOYUwTmAOp2ENpx0E/L2qaSCdw3Sh6TBXEhKLTE= +github.com/akash-network/node v0.32.2/go.mod h1:Rwfi2uCMQr+3dsBMl8Mcd4p4nlG+1sVOM8TDbbeMXsE= github.com/alecthomas/participle/v2 v2.0.0-alpha7 h1:cK4vjj0VSgb3lN1nuKA5F7dw+1s1pWBe5bx7nNCnN+c= github.com/alecthomas/participle/v2 v2.0.0-alpha7/go.mod h1:NumScqsC42o9x+dGj8/YqsIfhrIQjFEOFovxotbBirA= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -628,6 +628,8 @@ github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0 github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= +github.com/go-andiamo/splitter v1.2.5 h1:P3NovWMY2V14TJJSolXBvlOmGSZo3Uz+LtTl2bsV/eY= +github.com/go-andiamo/splitter v1.2.5/go.mod h1:8WHU24t9hcMKU5FXDQb1hysSEC/GPuivIp0uKY1J8gw= github.com/go-asn1-ber/asn1-ber v1.3.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs= From fb2b2137cc9fce5fcd1774edfa2e4575720d8acc Mon Sep 17 00:00:00 2001 From: Artur Troian Date: Wed, 20 Mar 2024 11:32:23 +0100 Subject: [PATCH 6/6] feat(shm): add e2e tests Signed-off-by: Artur Troian --- _run/kube/provider.yaml | 5 + integration/e2e_test.go | 1 + integration/storageclassram_test.go | 136 ++++++++++++++++++ integration/test_helpers.go | 4 + script/usd_pricing_oracle.sh | 1 + .../deployment/deployment-v2-storage-ram.yaml | 41 ++++++ 6 files changed, 188 insertions(+) create mode 100644 integration/storageclassram_test.go create mode 100644 testdata/deployment/deployment-v2-storage-ram.yaml diff --git a/_run/kube/provider.yaml b/_run/kube/provider.yaml index 57371ebf..1052c528 100644 --- a/_run/kube/provider.yaml +++ b/_run/kube/provider.yaml @@ -11,3 +11,8 @@ attributes: value: true - key: capabilities/storage/2/class value: beta2 + - key: capabilities/storage/3/class + value: ram + - key: capabilities/storage/3/persistent + value: false + diff --git a/integration/e2e_test.go b/integration/e2e_test.go index f345087d..94a48ebf 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -579,6 +579,7 @@ func TestIntegrationTestSuite(t *testing.T) { suite.Run(t, new(E2EPersistentStorageDefault)) suite.Run(t, new(E2EPersistentStorageBeta2)) suite.Run(t, new(E2EPersistentStorageDeploymentUpdate)) + suite.Run(t, new(E2EStorageClassRam)) suite.Run(t, new(E2EMigrateHostname)) suite.Run(t, new(E2EJWTServer)) suite.Run(t, new(E2ECustomCurrency)) diff --git a/integration/storageclassram_test.go b/integration/storageclassram_test.go new file mode 100644 index 00000000..a6a48b18 --- /dev/null +++ b/integration/storageclassram_test.go @@ -0,0 +1,136 @@ +//go:build e2e + +package integration + +import ( + "context" + "fmt" + "path/filepath" + "time" + + "github.com/cosmos/cosmos-sdk/client/flags" + sdktest "github.com/cosmos/cosmos-sdk/testutil" + "github.com/gyuho/linux-inspect/df" + + dtypes "github.com/akash-network/akash-api/go/node/deployment/v1beta3" + mtypes "github.com/akash-network/akash-api/go/node/market/v1beta4" + clitestutil "github.com/akash-network/node/testutil/cli" + deploycli "github.com/akash-network/node/x/deployment/client/cli" + mcli "github.com/akash-network/node/x/market/client/cli" + + ptestutil "github.com/akash-network/provider/testutil/provider" +) + +type E2EStorageClassRam struct { + IntegrationTestSuite +} + +type dfOutput struct { + Mount string `json:"mount"` + Spacetotal string `json:"spacetotal"` +} + +type dfResult []dfOutput + +func (s *E2EStorageClassRam) TestRAM() { + deploymentPath, err := filepath.Abs("../testdata/deployment/deployment-v2-storage-ram.yaml") + s.Require().NoError(err) + + deploymentID := dtypes.DeploymentID{ + Owner: s.keyTenant.GetAddress().String(), + DSeq: uint64(100), + } + + // Create Deployments + res, err := deploycli.TxCreateDeploymentExec( + s.validator.ClientCtx, + s.keyTenant.GetAddress(), + deploymentPath, + cliGlobalFlags(fmt.Sprintf("--dseq=%v", deploymentID.DSeq))..., + ) + s.Require().NoError(err) + s.Require().NoError(s.waitForBlocksCommitted(7)) + clitestutil.ValidateTxSuccessful(s.T(), s.validator.ClientCtx, res.Bytes()) + + bidID := mtypes.MakeBidID( + mtypes.MakeOrderID(dtypes.MakeGroupID(deploymentID, 1), 1), + s.keyProvider.GetAddress(), + ) + + _, err = mcli.QueryBidExec(s.validator.ClientCtx, bidID) + s.Require().NoError(err) + + _, err = mcli.TxCreateLeaseExec( + s.validator.ClientCtx, + bidID, + s.keyTenant.GetAddress(), + cliGlobalFlags()..., + ) + s.Require().NoError(err) + s.Require().NoError(s.waitForBlocksCommitted(2)) + clitestutil.ValidateTxSuccessful(s.T(), s.validator.ClientCtx, res.Bytes()) + + lid := bidID.LeaseID() + + // Send Manifest to Provider ---------------------------------------------- + _, err = ptestutil.TestSendManifest( + s.validator.ClientCtx.WithOutputFormat("json"), + lid.BidID(), + deploymentPath, + fmt.Sprintf("--%s=%s", flags.FlagFrom, s.keyTenant.GetAddress().String()), + fmt.Sprintf("--%s=%s", flags.FlagHome, s.validator.ClientCtx.HomeDir), + ) + s.Require().NoError(err) + s.Require().NoError(s.waitForBlocksCommitted(2)) + + var out sdktest.BufferWriter + leaseShellCtx, cancel := context.WithTimeout(s.ctx, time.Minute) + defer cancel() + + extraArgs := []string{ + fmt.Sprintf("--%s=%s", flags.FlagFrom, s.keyTenant.GetAddress().String()), + fmt.Sprintf("--%s=%s", flags.FlagHome, s.validator.ClientCtx.HomeDir), + } + + logged := make(map[string]struct{}) + + cmd := `df --all --sync --block-size=1024 --output=source,target,fstype,file,itotal,iavail,iused,ipcent,size,avail,used,pcent` + + // Loop until we get a shell or the context times out + for { + select { + case <-leaseShellCtx.Done(): + s.T().Fatalf("context is done while trying to run lease-shell: %v", leaseShellCtx.Err()) + return + default: + } + out, err = ptestutil.TestLeaseShell(leaseShellCtx, s.validator.ClientCtx.WithOutputFormat("json"), extraArgs, lid, 0, false, false, "web", cmd) + if err != nil { + _, hasBeenLogged := logged[err.Error()] + if !hasBeenLogged { + // Don't spam an error message in a test, that is very annoying + s.T().Logf("encountered %v, waiting before next attempt", err) + logged[err.Error()] = struct{}{} + } + time.Sleep(100 * time.Millisecond) + continue // Try again until the context times out + } + s.Require().NotNil(s.T(), out) + break + } + + dfRes, err := df.Parse(out.String()) + s.Require().NoError(err) + + var found *df.Row + + for i := range dfRes { + if dfRes[i].MountedOn == "/dev/shm" { + found = &dfRes[i] + break + } + } + + s.Require().NotNil(found) + s.Require().Equal(int64(131072), found.TotalBlocks) +} diff --git a/integration/test_helpers.go b/integration/test_helpers.go index e794b43e..2ded90c4 100644 --- a/integration/test_helpers.go +++ b/integration/test_helpers.go @@ -30,6 +30,10 @@ attributes: value: true - key: capabilities/storage/2/class value: beta2 + - key: capabilities/storage/3/persistent + value: false + - key: capabilities/storage/3/class + value: ram ` ) diff --git a/script/usd_pricing_oracle.sh b/script/usd_pricing_oracle.sh index 71da020f..291733b7 100755 --- a/script/usd_pricing_oracle.sh +++ b/script/usd_pricing_oracle.sh @@ -50,6 +50,7 @@ STORAGE_USD_SCALE[default]=0.02 STORAGE_USD_SCALE[beta1]=0.02 STORAGE_USD_SCALE[beta2]=0.03 STORAGE_USD_SCALE[beta3]=0.04 +STORAGE_USD_SCALE[ram]=0.02 # ram storage class is for tmp disks like /dev/shm, making assumption for now pricing is same of for regular RAM # used later for validation MAX_INT64=9223372036854775807 diff --git a/testdata/deployment/deployment-v2-storage-ram.yaml b/testdata/deployment/deployment-v2-storage-ram.yaml new file mode 100644 index 00000000..a5ba34d4 --- /dev/null +++ b/testdata/deployment/deployment-v2-storage-ram.yaml @@ -0,0 +1,41 @@ +--- +version: "2.0" +services: + web: + image: ghcr.io/akash-network/e2e-test + expose: + - port: 8080 + as: 80 + to: + - global: true + accept: + - webdistest.localhost + params: + storage: + shm: + mount: /dev/shm +profiles: + compute: + web: + resources: + cpu: + units: "0.01" + memory: + size: "128Mi" + storage: + - size: "512Mi" + - name: shm + size: "256Mi" + attributes: + class: ram + placement: + global: + pricing: + web: + denom: uakt + amount: 10 +deployment: + web: + global: + profile: web + count: 1