Skip to content

Commit

Permalink
SKS-2194: Fix labeling virtual machines (#163)
Browse files Browse the repository at this point in the history
  • Loading branch information
haijianyang authored Dec 8, 2023
1 parent b2fb22f commit a30b311
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 9 deletions.
32 changes: 24 additions & 8 deletions controllers/elfmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,10 +448,8 @@ func (r *ElfMachineReconciler) reconcileNormal(ctx *context.MachineContext) (rec
}

// Reconcile the ElfMachine's Labels using the cluster info
if len(vm.Labels) == 0 {
if ok, err := r.reconcileLabels(ctx, vm); !ok {
return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile labels")
}
if ok, err := r.reconcileLabels(ctx, vm); !ok {
return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile labels")
}

// Reconcile the ElfMachine's providerID using the VM's UUID.
Expand Down Expand Up @@ -1188,10 +1186,26 @@ func (r *ElfMachineReconciler) getBootstrapData(ctx *context.MachineContext) (st
}

func (r *ElfMachineReconciler) reconcileLabels(ctx *context.MachineContext, vm *models.VM) (bool, error) {
creatorLabel, err := ctx.VMService.UpsertLabel(towerresources.GetVMLabelManaged(), "true")
if err != nil {
return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelManaged())
capeManagedLabelKey := towerresources.GetVMLabelManaged()
capeManagedLabel := getLabelFromCache(capeManagedLabelKey)
if capeManagedLabel == nil {
var err error
capeManagedLabel, err = ctx.VMService.UpsertLabel(capeManagedLabelKey, "true")
if err != nil {
return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelManaged())
}

setLabelInCache(capeManagedLabel)
}

// If the virtual machine has been labeled with managed label,
// it is considered that all labels have been labeled.
for i := 0; i < len(vm.Labels); i++ {
if *vm.Labels[i].ID == *capeManagedLabel.ID {
return true, nil
}
}

namespaceLabel, err := ctx.VMService.UpsertLabel(towerresources.GetVMLabelNamespace(), ctx.ElfMachine.Namespace)
if err != nil {
return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelNamespace())
Expand All @@ -1209,13 +1223,15 @@ func (r *ElfMachineReconciler) reconcileLabels(ctx *context.MachineContext, vm *
}
}

labelIDs := []string{*namespaceLabel.ID, *clusterNameLabel.ID, *creatorLabel.ID}
labelIDs := []string{*namespaceLabel.ID, *clusterNameLabel.ID, *capeManagedLabel.ID}
if machineutil.IsControlPlaneMachine(ctx.ElfMachine) {
labelIDs = append(labelIDs, *vipLabel.ID)
}
r.Logger.V(3).Info("Upsert labels", "labelIds", labelIDs)
_, err = ctx.VMService.AddLabelsToVM(*vm.ID, labelIDs)
if err != nil {
delLabelCache(capeManagedLabelKey)

return false, err
}
return true, nil
Expand Down
53 changes: 53 additions & 0 deletions controllers/elfmachine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3447,6 +3447,59 @@ var _ = Describe("ElfMachineReconciler", func() {
Expect(err).ToNot(HaveOccurred())
})
})

Context("reconcileLabels", func() {
It("should add labels to the VM", func() {
capeManagedLabel := &models.Label{
ID: service.TowerString("managed-label"),
Key: service.TowerString(towerresources.GetVMLabelManaged()),
Value: service.TowerString("true"),
}
namespaceLabel := &models.Label{
ID: service.TowerString("namespace-label"),
Key: service.TowerString(towerresources.GetVMLabelNamespace()),
Value: service.TowerString(elfMachine.Namespace),
}
clusterNameLabel := &models.Label{
ID: service.TowerString("cluster-label"),
Key: service.TowerString(towerresources.GetVMLabelClusterName()),
Value: service.TowerString(elfCluster.Name),
}

vm := fake.NewTowerVMFromElfMachine(elfMachine)
ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md)
machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService)
machineContext.VMService = mockVMService

unexpectedError := errors.New("unexpected error")
setLabelInCache(capeManagedLabel)
mockVMService.EXPECT().UpsertLabel(*namespaceLabel.Key, *namespaceLabel.Value).Return(namespaceLabel, nil)
mockVMService.EXPECT().UpsertLabel(*clusterNameLabel.Key, *clusterNameLabel.Value).Return(clusterNameLabel, nil)
mockVMService.EXPECT().AddLabelsToVM(*vm.ID, gomock.InAnyOrder([]string{*capeManagedLabel.ID, *namespaceLabel.ID, *clusterNameLabel.ID})).Return(nil, unexpectedError)
reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
ok, err := reconciler.reconcileLabels(machineContext, vm)
Expect(ok).To(BeFalse())
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(Equal(unexpectedError.Error()))
Expect(getLabelFromCache(*capeManagedLabel.Key)).To(BeNil())

mockVMService.EXPECT().UpsertLabel(*capeManagedLabel.Key, *capeManagedLabel.Value).Return(capeManagedLabel, nil)
mockVMService.EXPECT().UpsertLabel(*namespaceLabel.Key, *namespaceLabel.Value).Return(namespaceLabel, nil)
mockVMService.EXPECT().UpsertLabel(*clusterNameLabel.Key, *clusterNameLabel.Value).Return(clusterNameLabel, nil)
mockVMService.EXPECT().AddLabelsToVM(*vm.ID, gomock.InAnyOrder([]string{*capeManagedLabel.ID, *namespaceLabel.ID, *clusterNameLabel.ID})).Return(nil, nil)
reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
ok, err = reconciler.reconcileLabels(machineContext, vm)
Expect(ok).To(BeTrue())
Expect(err).ToNot(HaveOccurred())
Expect(getLabelFromCache(*capeManagedLabel.Key)).To(Equal(capeManagedLabel))

vm.Labels = []*models.NestedLabel{{ID: capeManagedLabel.ID}}
reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService}
ok, err = reconciler.reconcileLabels(machineContext, vm)
Expect(ok).To(BeTrue())
Expect(err).ToNot(HaveOccurred())
})
})
})

func waitStaticIPAllocationSpec(mockNewVMService func(ctx goctx.Context, auth infrav1.Tower, logger logr.Logger) (service.VMService, error),
Expand Down
34 changes: 33 additions & 1 deletion controllers/tower_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func getKeyForPGCache(pgName string) string {
// setPGCache saves the specified placement group to the memory,
// which can reduce access to the Tower service.
func setPGCache(pg *models.VMPlacementGroup) {
inMemoryCache.Set(getKeyForPGCache(*pg.Name), *pg, gpuCacheDuration)
inMemoryCache.Set(getKeyForPGCache(*pg.Name), *pg, pgCacheDuration)
}

// delPGCaches deletes the specified placement group caches.
Expand All @@ -200,6 +200,38 @@ func getPGFromCache(pgName string) *models.VMPlacementGroup {
return nil
}

// labelCacheDuration is the lifespan of label cache.
const labelCacheDuration = 10 * time.Minute

func getKeyForLabelCache(labelKey string) string {
return fmt.Sprintf("label:%s:cache", labelKey)
}

// setLabelInCache saves the specified label to the memory,
// which can reduce access to the Tower service.
func setLabelInCache(label *models.Label) {
inMemoryCache.Set(getKeyForLabelCache(*label.Key), *label, labelCacheDuration)
}

// delLabelCache deletes the specified label cache.
func delLabelCache(labelKey string) {
inMemoryCache.Delete(getKeyForLabelCache(labelKey))
}

// getLabelFromCache gets the specified label from the memory.
func getLabelFromCache(labelKey string) *models.Label {
key := getKeyForLabelCache(labelKey)
if val, found := inMemoryCache.Get(key); found {
if label, ok := val.(models.Label); ok {
return &label
}
// Delete unexpected data.
inMemoryCache.Delete(key)
}

return nil
}

/* GPU */

// gpuCacheDuration is the lifespan of gpu cache.
Expand Down
14 changes: 14 additions & 0 deletions controllers/tower_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,20 @@ var _ = Describe("TowerCache", func() {
Expect(getPGFromCache(pgName)).To(BeNil())
})

It("Label Cache", func() {
label := &models.Label{
ID: service.TowerString("label-id"),
Key: service.TowerString("label-key"),
Value: service.TowerString("label-name"),
}

Expect(getPGFromCache(*label.Key)).To(BeNil())
setLabelInCache(label)
Expect(getLabelFromCache(*label.Key)).To(Equal(label))
delLabelCache(*label.Key)
Expect(getLabelFromCache(*label.Key)).To(BeNil())
})

It("GPU Cache", func() {
gpuID := "gpu"
gpuVMInfo := models.GpuVMInfo{ID: service.TowerString(gpuID)}
Expand Down

0 comments on commit a30b311

Please sign in to comment.