From f996239c644099903dbc581cac32c203bc71f6b6 Mon Sep 17 00:00:00 2001 From: Manikandan R Date: Thu, 5 Sep 2024 13:58:01 +0530 Subject: [PATCH] Addressed review comments --- pkg/common/errors.go | 5 + pkg/scheduler/objects/allocation.go | 36 ---- pkg/scheduler/objects/application.go | 15 +- pkg/scheduler/objects/events/ask_events.go | 36 ---- .../objects/events/ask_events_test.go | 58 ------ pkg/scheduler/objects/preemption.go | 32 +-- pkg/scheduler/objects/preemption_test.go | 182 +++++++----------- 7 files changed, 78 insertions(+), 286 deletions(-) diff --git a/pkg/common/errors.go b/pkg/common/errors.go index 27ca0c74a..18c37cde9 100644 --- a/pkg/common/errors.go +++ b/pkg/common/errors.go @@ -24,4 +24,9 @@ import "errors" var ( // InvalidQueueName returned when queue name is invalid InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed") + + PreemptionPreconditionsFailed = errors.New("PreemptionPreconditionsFailed") + PreemptionDoesNotGuarantee = errors.New("PreemptionQueueGuaranteesCheckFailed") + PreemptionShortfall = errors.New("PreemptionHelpedButShortfallOfResources") + PreemptionDoesNotHelp = errors.New("PreemptionDoesNotHelp") ) diff --git a/pkg/scheduler/objects/allocation.go b/pkg/scheduler/objects/allocation.go index d2c56e985..8f2450e9c 100644 --- a/pkg/scheduler/objects/allocation.go +++ b/pkg/scheduler/objects/allocation.go @@ -573,39 +573,3 @@ func (a *Allocation) setUserQuotaCheckPassed() { a.askEvents.SendRequestFitsInUserQuota(a.allocationKey, a.applicationID, a.allocatedResource) } } - -func (a *Allocation) setPreemptionPreConditionsCheckPassed() { - a.Lock() - defer a.Unlock() - if a.preemptionPreConditionsCheckFailed { - a.preemptionPreConditionsCheckFailed = false - a.askEvents.SendPreemptionPreConditionsCheckPassed(a.allocationKey, a.applicationID, a.allocatedResource) - } -} - -func (a *Allocation) setPreemptionPreConditionsCheckFailed() { - a.Lock() - defer a.Unlock() - if !a.preemptionPreConditionsCheckFailed { - a.preemptionPreConditionsCheckFailed = true - a.askEvents.SendPreemptionPreConditionsCheckFailed(a.allocationKey, a.applicationID, a.allocatedResource) - } -} - -func (a *Allocation) setPreemptionQueueGuaranteesCheckPassed() { - a.Lock() - defer a.Unlock() - if a.preemptionQueueGuaranteesCheckFailed { - a.preemptionQueueGuaranteesCheckFailed = false - a.askEvents.SendPreemptionQueueGuaranteesCheckPassed(a.allocationKey, a.applicationID, a.allocatedResource) - } -} - -func (a *Allocation) setPreemptionQueueGuaranteesCheckFailed() { - a.Lock() - defer a.Unlock() - if !a.preemptionQueueGuaranteesCheckFailed { - a.preemptionQueueGuaranteesCheckFailed = true - a.askEvents.SendPreemptionQueueGuaranteesCheckFailed(a.allocationKey, a.applicationID, a.allocatedResource) - } -} diff --git a/pkg/scheduler/objects/application.go b/pkg/scheduler/objects/application.go index 5430e49cc..3da921b8e 100644 --- a/pkg/scheduler/objects/application.go +++ b/pkg/scheduler/objects/application.go @@ -60,9 +60,8 @@ const ( Soft string = "Soft" Hard string = "Hard" - NotEnoughUserQuota = "Not enough user quota" - NotEnoughQueueQuota = "Not enough queue quota" - PreemptionDoesNotHelp = "Preemption does not help" + NotEnoughUserQuota = "Not enough user quota" + NotEnoughQueueQuota = "Not enough queue quota" ) type PlaceholderData struct { @@ -1057,7 +1056,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption // preemption occurred, and possibly reservation return result } - request.LogAllocationFailure(PreemptionDoesNotHelp, true) + request.LogAllocationFailure(common.PreemptionDoesNotHelp.Error(), true) } } request.LogAllocationFailure(NotEnoughQueueQuota, true) // error message MUST be constant! @@ -1124,7 +1123,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption return result } } - request.LogAllocationFailure(PreemptionDoesNotHelp, true) + request.LogAllocationFailure(common.PreemptionDoesNotHelp.Error(), true) } } } @@ -1400,13 +1399,9 @@ func (sa *Application) tryPreemption(headRoom *resources.Resource, preemptionDel // validate prerequisites for preemption of an ask and mark ask for preemption if successful if !preemptor.CheckPreconditions() { - ask.setPreemptionPreConditionsCheckFailed() - log.Log(log.SchedApplication).Debug("Preemption Preconditions check failed", - zap.String("applicationID", sa.ApplicationID), - zap.String("ask", ask.GetAllocationKey())) + ask.LogAllocationFailure(common.PreemptionPreconditionsFailed.Error(), true) return nil, false } - ask.setPreemptionPreConditionsCheckPassed() // track time spent trying preemption tryPreemptionStart := time.Now() diff --git a/pkg/scheduler/objects/events/ask_events.go b/pkg/scheduler/objects/events/ask_events.go index 7b7417bbb..64265c6dc 100644 --- a/pkg/scheduler/objects/events/ask_events.go +++ b/pkg/scheduler/objects/events/ask_events.go @@ -94,42 +94,6 @@ func (ae *AskEvents) SendPredicatesFailed(allocKey, appID string, predicateError ae.eventSystem.AddEvent(event) } -func (ae *AskEvents) SendPreemptionPreConditionsCheckPassed(allocKey, appID string, allocatedResource *resources.Resource) { - if !ae.eventSystem.IsEventTrackingEnabled() { - return - } - message := fmt.Sprintf("Preemption pre conditions check passed for ask %s belongs to app %s", allocKey, appID) - event := events.CreateRequestEventRecord(allocKey, appID, message, allocatedResource) - ae.eventSystem.AddEvent(event) -} - -func (ae *AskEvents) SendPreemptionPreConditionsCheckFailed(allocKey, appID string, allocatedResource *resources.Resource) { - if !ae.eventSystem.IsEventTrackingEnabled() { - return - } - message := fmt.Sprintf("Preemption pre conditions check failed for ask %s belongs to app %s", allocKey, appID) - event := events.CreateRequestEventRecord(allocKey, appID, message, allocatedResource) - ae.eventSystem.AddEvent(event) -} - -func (ae *AskEvents) SendPreemptionQueueGuaranteesCheckPassed(allocKey, appID string, allocatedResource *resources.Resource) { - if !ae.eventSystem.IsEventTrackingEnabled() { - return - } - message := fmt.Sprintf("Preemption queue guarantees check passed for ask %s belongs to app %s", allocKey, appID) - event := events.CreateRequestEventRecord(allocKey, appID, message, allocatedResource) - ae.eventSystem.AddEvent(event) -} - -func (ae *AskEvents) SendPreemptionQueueGuaranteesCheckFailed(allocKey, appID string, allocatedResource *resources.Resource) { - if !ae.eventSystem.IsEventTrackingEnabled() { - return - } - message := fmt.Sprintf("Preemption queue guarantees check failed for ask %s belongs to app %s", allocKey, appID) - event := events.CreateRequestEventRecord(allocKey, appID, message, allocatedResource) - ae.eventSystem.AddEvent(event) -} - func NewAskEvents(evt events.EventSystem) *AskEvents { return newAskEventsWithRate(evt, 15*time.Second, 1) } diff --git a/pkg/scheduler/objects/events/ask_events_test.go b/pkg/scheduler/objects/events/ask_events_test.go index fcad45bd7..add2fc539 100644 --- a/pkg/scheduler/objects/events/ask_events_test.go +++ b/pkg/scheduler/objects/events/ask_events_test.go @@ -112,64 +112,6 @@ func TestRequestFitsInUserQuotaEvent(t *testing.T) { assert.Equal(t, "Request 'alloc-0' fits in the available user quota", event.Message) } -func TestPreemptionPreConditionsCheckEvents(t *testing.T) { - eventSystem := mock.NewEventSystemDisabled() - events := NewAskEvents(eventSystem) - events.SendPreemptionPreConditionsCheckFailed(allocKey, appID, requestResource) - assert.Equal(t, 0, len(eventSystem.Events)) - - eventSystem = mock.NewEventSystem() - events = NewAskEvents(eventSystem) - events.SendPreemptionPreConditionsCheckFailed(allocKey, appID, requestResource) - assert.Equal(t, 1, len(eventSystem.Events)) - event := eventSystem.Events[0] - assert.Equal(t, "alloc-0", event.ObjectID) - assert.Equal(t, appID, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption pre conditions check failed for ask alloc-0 belongs to app app-0", event.Message) - - events.SendPreemptionPreConditionsCheckPassed(allocKey, appID, requestResource) - assert.Equal(t, 2, len(eventSystem.Events)) - event = eventSystem.Events[1] - assert.Equal(t, "alloc-0", event.ObjectID) - assert.Equal(t, appID, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption pre conditions check passed for ask alloc-0 belongs to app app-0", event.Message) -} - -func TestPreemptionQueueGuaranteesCheckEvents(t *testing.T) { - eventSystem := mock.NewEventSystemDisabled() - events := NewAskEvents(eventSystem) - events.SendPreemptionQueueGuaranteesCheckFailed(allocKey, appID, requestResource) - assert.Equal(t, 0, len(eventSystem.Events)) - - eventSystem = mock.NewEventSystem() - events = NewAskEvents(eventSystem) - events.SendPreemptionQueueGuaranteesCheckFailed(allocKey, appID, requestResource) - assert.Equal(t, 1, len(eventSystem.Events)) - event := eventSystem.Events[0] - assert.Equal(t, "alloc-0", event.ObjectID) - assert.Equal(t, appID, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption queue guarantees check failed for ask alloc-0 belongs to app app-0", event.Message) - - events.SendPreemptionQueueGuaranteesCheckPassed(allocKey, appID, requestResource) - assert.Equal(t, 2, len(eventSystem.Events)) - event = eventSystem.Events[1] - assert.Equal(t, "alloc-0", event.ObjectID) - assert.Equal(t, appID, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption queue guarantees check passed for ask alloc-0 belongs to app app-0", event.Message) -} - func TestPredicateFailedEvents(t *testing.T) { resource := resources.NewResourceFromMap(map[string]resources.Quantity{"first": 1}) eventSystem := mock.NewEventSystemDisabled() diff --git a/pkg/scheduler/objects/preemption.go b/pkg/scheduler/objects/preemption.go index 45cf45992..42436a417 100644 --- a/pkg/scheduler/objects/preemption.go +++ b/pkg/scheduler/objects/preemption.go @@ -26,6 +26,7 @@ import ( "go.uber.org/zap" + "github.com/apache/yunikorn-core/pkg/common" "github.com/apache/yunikorn-core/pkg/common/resources" "github.com/apache/yunikorn-core/pkg/log" "github.com/apache/yunikorn-core/pkg/plugins" @@ -177,12 +178,6 @@ func (p *Preemptor) initWorkingState() { p.allocationsByNode = allocationsByNode p.queueByAlloc = queueByAlloc p.nodeAvailableMap = nodeAvailableMap - log.Log(log.SchedPreemption).Info("Preemption triggered. Working state ", - zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath), - zap.Int("allocationsByNode", len(p.allocationsByNode)), - zap.Int("queueByAlloc", len(p.queueByAlloc)), - zap.Int("nodeAvailableMap", len(p.nodeAvailableMap))) } // checkPreemptionQueueGuarantees verifies that it's possible to free enough resources to fit the given ask @@ -536,11 +531,6 @@ func (p *Preemptor) tryNodes() (string, []*Allocation, bool) { } // identify which victims and in which order should be tried if idx, victims := p.calculateVictimsByNode(nodeAvailable, allocations); victims != nil { - log.Log(log.SchedPreemption).Debug("Node wise Potential victims collected for preemption", - zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath), - zap.String("node", nodeID), - zap.Int("victims count", len(victims))) victimsByNode[nodeID] = victims keys := make([]string, 0) for _, victim := range victims { @@ -568,12 +558,9 @@ func (p *Preemptor) tryNodes() (string, []*Allocation, bool) { func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { // validate that sufficient capacity can be freed if !p.checkPreemptionQueueGuarantees() { - p.ask.setPreemptionQueueGuaranteesCheckFailed() - log.Log(log.SchedPreemption).Debug("Preemption doesn't guarantee to free up resources", zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath)) + p.ask.LogAllocationFailure(common.PreemptionDoesNotGuarantee.Error(), true) return nil, false } - p.ask.setPreemptionQueueGuaranteesCheckPassed() // ensure required data structures are populated p.initWorkingState() @@ -584,10 +571,6 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { // no preemption possible return nil, false } - log.Log(log.SchedPreemption).Debug("Node candidate selected for preemption", zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath), - zap.String("node", nodeID), - zap.Int("victims count", len(victims))) // look for additional victims in case we have not yet made enough capacity in the queue extraVictims, ok := p.calculateAdditionalVictims(victims) @@ -595,10 +578,6 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { // not enough resources were preempted return nil, false } - log.Log(log.SchedPreemption).Debug("Additional victims chosen for preemption", zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath), - zap.String("node", nodeID), - zap.Int("additional victims count", len(extraVictims))) victims = append(victims, extraVictims...) if len(victims) == 0 { return nil, false @@ -637,12 +616,7 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { if p.ask.GetAllocatedResource().StrictlyGreaterThanOnlyExisting(victimsTotalResource) { // there is shortfall, so preemption doesn't help - log.Log(log.SchedPreemption).Debug("preemption has freed up some resources, but not good enough to run the ask", - zap.String("ask", p.ask.GetAllocationKey()), - zap.String("ask queue path", p.queuePath), - zap.String("node", nodeID), - zap.String("ask resources", p.ask.GetAllocatedResource().String()), - zap.String("freed resources", victimsTotalResource.String())) + p.ask.LogAllocationFailure(common.PreemptionShortfall.Error(), true) return nil, false } diff --git a/pkg/scheduler/objects/preemption_test.go b/pkg/scheduler/objects/preemption_test.go index 7b1b6fbd5..9c16f34c4 100644 --- a/pkg/scheduler/objects/preemption_test.go +++ b/pkg/scheduler/objects/preemption_test.go @@ -25,18 +25,16 @@ import ( "gotest.tools/v3/assert" + "github.com/apache/yunikorn-core/pkg/common" "github.com/apache/yunikorn-core/pkg/common/resources" - EventsMock "github.com/apache/yunikorn-core/pkg/events/mock" "github.com/apache/yunikorn-core/pkg/mock" "github.com/apache/yunikorn-core/pkg/plugins" - Events "github.com/apache/yunikorn-core/pkg/scheduler/objects/events" "github.com/apache/yunikorn-scheduler-interface/lib/go/si" ) const alloc = "alloc" func TestCheckPreconditions(t *testing.T) { - eventSystem := EventsMock.NewEventSystem() node := newNode("node1", map[string]resources.Quantity{"first": 5}) iterator := getNodeIteratorFn(node) rootQ, err := createRootQueue(map[string]string{"first": "5"}) @@ -47,7 +45,6 @@ func TestCheckPreconditions(t *testing.T) { app.SetQueue(childQ) childQ.applications[appID1] = app ask := newAllocationAsk("alloc1", appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5})) - ask.askEvents = Events.NewAskEvents(eventSystem) ask.allowPreemptOther = true ask.createTime = time.Now().Add(-1 * time.Minute) err = app.AddAllocationAsk(ask) @@ -93,36 +90,27 @@ func TestCheckPreconditions(t *testing.T) { getNode := func(nodeID string) *Node { return nodeMap[nodeID] } - preemptionAttemptsRemaining := 2 + preemptionAttemptsRemaining := 1 result := app.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 2}), true, 1*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode) assert.Check(t, result == nil, "unexpected result") - assert.Equal(t, 2, len(eventSystem.Events)) - event := eventSystem.Events[0] - assert.Equal(t, "alloc1", event.ObjectID) - assert.Equal(t, appID1, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption pre conditions check failed for ask alloc1 belongs to app app-1", event.Message) - + log := ask.GetAllocationLog() + preemptionPreconditionsFailed := false + PreemptionDoesNotHelp := false + for _, l := range log { + if l.Message == common.PreemptionPreconditionsFailed.Error() { + preemptionPreconditionsFailed = true + } else if l.Message == common.PreemptionDoesNotHelp.Error() { + PreemptionDoesNotHelp = true + } + } + assert.Assert(t, preemptionPreconditionsFailed, true) + assert.Assert(t, PreemptionDoesNotHelp, true) ask.preemptCheckTime = time.Now().Add(-1 * time.Minute) assert.Assert(t, preemptor.CheckPreconditions(), "preconditions failed") - ask.preemptCheckTime = time.Now().Add(-1 * time.Minute) - result = app.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 2}), true, 1*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode) - assert.Check(t, result == nil, "unexpected result") - assert.Equal(t, 4, len(eventSystem.Events)) - event = eventSystem.Events[2] - assert.Equal(t, "alloc1", event.ObjectID) - assert.Equal(t, appID1, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption pre conditions check passed for ask alloc1 belongs to app app-1", event.Message) assert.Assert(t, !preemptor.CheckPreconditions(), "preconditions succeeded on successive run") } func TestCheckPreemptionQueueGuarantees(t *testing.T) { - eventSystem := EventsMock.NewEventSystem() node := newNode("node1", map[string]resources.Quantity{"first": 20}) iterator := getNodeIteratorFn(node) rootQ, err := createRootQueue(map[string]string{"first": "20"}) @@ -151,7 +139,6 @@ func TestCheckPreemptionQueueGuarantees(t *testing.T) { app2.SetQueue(childQ2) childQ2.applications[appID2] = app2 ask3 := newAllocationAsk("alloc3", appID2, resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5})) - ask3.askEvents = Events.NewAskEvents(eventSystem) assert.NilError(t, app2.AddAllocationAsk(ask3)) childQ2.incPendingResource(ask3.GetAllocatedResource()) headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"first": 10}) @@ -159,35 +146,13 @@ func TestCheckPreemptionQueueGuarantees(t *testing.T) { // positive case assert.Assert(t, preemptor.checkPreemptionQueueGuarantees(), "queue guarantees fail") - assert.Equal(t, 0, len(eventSystem.Events)) // verify too large of a resource will not succeed ask3.allocatedResource = resources.NewResourceFromMap(map[string]resources.Quantity{"first": 25}) assert.Assert(t, !preemptor.checkPreemptionQueueGuarantees(), "queue guarantees did not fail") result, _ := preemptor.TryPreemption() assert.Assert(t, result == nil, "no result") - assert.Equal(t, 1, len(eventSystem.Events)) - event := eventSystem.Events[0] - assert.Equal(t, "alloc3", event.ObjectID) - assert.Equal(t, appID2, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption queue guarantees check failed for ask alloc3 belongs to app app-2", event.Message) - - // again positive case - ask3.allocatedResource = resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5}) - assert.Assert(t, preemptor.checkPreemptionQueueGuarantees(), "queue guarantees fail") - result, _ = preemptor.TryPreemption() - assert.Assert(t, result != nil, "result is nil") - assert.Equal(t, 2, len(eventSystem.Events)) - event = eventSystem.Events[1] - assert.Equal(t, "alloc3", event.ObjectID) - assert.Equal(t, appID2, event.ReferenceID) - assert.Equal(t, si.EventRecord_REQUEST, event.Type) - assert.Equal(t, si.EventRecord_NONE, event.EventChangeType) - assert.Equal(t, si.EventRecord_DETAILS_NONE, event.EventChangeDetail) - assert.Equal(t, "Preemption queue guarantees check passed for ask alloc3 belongs to app app-2", event.Message) + assert.Equal(t, ask3.GetAllocationLog()[0].Message, common.PreemptionDoesNotGuarantee.Error()) } func TestCheckPreemptionQueueGuaranteesWithNoGuaranteedResources(t *testing.T) { @@ -237,7 +202,16 @@ func TestCheckPreemptionQueueGuaranteesWithNoGuaranteedResources(t *testing.T) { childQ2.incPendingResource(ask3.GetAllocatedResource()) headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"first": 10}) preemptor := NewPreemptor(app2, headRoom, 30*time.Second, ask3, iterator(), false) - assert.Equal(t, tt.expected, preemptor.checkPreemptionQueueGuarantees(), "unexpected resultType") + result, ok := preemptor.TryPreemption() + assert.Equal(t, tt.expected, ok, "unexpected resultType") + if tt.expected { + assert.Equal(t, result != nil, true, "unexpected resultType") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) + } else { + assert.Equal(t, result == nil, true, "unexpected resultType") + assert.Equal(t, len(ask3.GetAllocationLog()), 1) + assert.Equal(t, ask3.GetAllocationLog()[0].Message, common.PreemptionDoesNotGuarantee.Error()) + } }) } } @@ -295,6 +269,7 @@ func TestTryPreemption(t *testing.T) { assert.Equal(t, "alloc3", result.Request.GetAllocationKey(), "wrong alloc") assert.Check(t, alloc1.IsPreempted(), "alloc1 not preempted") assert.Check(t, !alloc2.IsPreempted(), "alloc2 preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemptionOnNode Test try preemption on node with simple queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -360,6 +335,7 @@ func TestTryPreemptionOnNode(t *testing.T) { assert.Equal(t, nodeID2, result.NodeID, "wrong node") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_NodeWithCapacityLesserThanAsk Test try preemption on node whose capacity is lesser than ask resource requirements with simple queue hierarchy. Since Node won't accommodate the ask even after preempting all allocations, there is no use in considering the node. @@ -414,6 +390,7 @@ func TestTryPreemption_NodeWithCapacityLesserThanAsk(t *testing.T) { assert.Equal(t, ok, false, "no victims found") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, !alloc2.IsPreempted(), "alloc2 preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemptionOnNodeWithOGParentAndUGPreemptor Test try preemption on node with simple queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -481,6 +458,7 @@ func TestTryPreemptionOnNodeWithOGParentAndUGPreemptor(t *testing.T) { assert.Equal(t, "alloc7", result.Request.allocationKey, "wrong alloc") assert.Equal(t, nodeID2, result.NodeID, "wrong node") assert.Check(t, node2.GetAllocation("alloc1").IsPreempted(), "alloc1 preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemptionOnQueue Test try preemption on queue with simple queue hierarchy. Since Node has enough resources to accomodate, preemption happens because of queue resource constraint. @@ -545,6 +523,7 @@ func TestTryPreemptionOnQueue(t *testing.T) { assert.Equal(t, nodeID2, result.NodeID, "wrong node") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_VictimsAvailable_InsufficientResource Test try preemption on queue with simple queue hierarchy. Since Node has enough resources to accomodate, preemption happens because of queue resource constraint. @@ -600,6 +579,8 @@ func TestTryPreemption_VictimsAvailable_InsufficientResource(t *testing.T) { assert.Equal(t, ok, false, "no victims found") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, !alloc2.IsPreempted(), "alloc2 preempted") + log := ask3.GetAllocationLog() + assert.Equal(t, log[0].Message, common.PreemptionShortfall.Error()) } // TestTryPreemption_VictimsOnDifferentNodes_InsufficientResource Test try preemption on queue with simple queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -665,6 +646,8 @@ func TestTryPreemption_VictimsOnDifferentNodes_InsufficientResource(t *testing.T assert.Equal(t, ok, false, "no victims found") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, !alloc2.IsPreempted(), "alloc2 preempted") + log := ask3.GetAllocationLog() + assert.Equal(t, log[0].Message, common.PreemptionShortfall.Error()) } // TestTryPreemption_VictimsAvailableOnDifferentNodes Test try preemption on queue with simple queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -729,6 +712,8 @@ func TestTryPreemption_VictimsAvailableOnDifferentNodes(t *testing.T) { assert.Equal(t, ok, false, "no victims found") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, !alloc2.IsPreempted(), "alloc2 preempted") + log := ask3.GetAllocationLog() + assert.Equal(t, log[0].Message, common.PreemptionShortfall.Error()) } // TestTryPreemption_OnQueue_VictimsOnDifferentNodes Test try preemption on queue with simple queue hierarchy. Since Node has enough resources to accomodate, preemption happens because of queue resource constraint.xw @@ -817,6 +802,7 @@ func TestTryPreemption_OnQueue_VictimsOnDifferentNodes(t *testing.T) { assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc4.IsPreempted(), "alloc2 not preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_OnQueue_VictimsAvailable_LowerPriority Test try preemption on queue with simple queue hierarchy. Since Node has enough resources to accomodate, preemption happens because of queue resource constraint. @@ -906,6 +892,7 @@ func TestTryPreemption_OnQueue_VictimsAvailable_LowerPriority(t *testing.T) { assert.Check(t, alloc1.IsPreempted(), "alloc1 not preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, !alloc4.IsPreempted(), "alloc4 preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnPreemptorSide Test try preemption with 2 level queue hierarchy. @@ -978,6 +965,7 @@ func TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnPreemptorSide(t *test assert.Equal(t, nodeID1, alloc2.nodeID, "wrong node") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnPreemptorSide Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -1049,6 +1037,7 @@ func TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnPreemptorSide( assert.Equal(t, nodeID1, alloc2.nodeID, "wrong node") assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") + assert.Equal(t, len(ask3.GetAllocationLog()), 0) } // TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSides Test try preemption with 2 level queue hierarchy. @@ -1082,17 +1071,7 @@ func TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSid assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"mem": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1157,6 +1136,7 @@ func TestTryPreemption_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSid assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) } // TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnVictimAndPreemptorSides Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -1188,17 +1168,7 @@ func TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnVictimAndPreem assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"mem": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1262,6 +1232,20 @@ func TestTryPreemption_OnNode_AskResTypesDifferent_GuaranteedSetOnVictimAndPreem assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) +} + +func createVictimApplications(childQ2 *Queue) (*Application, *Application, *Application) { + app1 := newApplication(appID1, "default", "root.parent.parent2.child2") + app1.SetQueue(childQ2) + childQ2.applications[appID1] = app1 + app2 := newApplication(appID2, "default", "root.parent.parent2.child2") + app2.SetQueue(childQ2) + childQ2.applications[appID2] = app2 + app3 := newApplication(appID3, "default", "root.parent.parent2.child2") + app3.SetQueue(childQ2) + childQ2.applications[appID3] = app3 + return app1, app2, app3 } // TestTryPreemption_AskResTypesSame_GuaranteedSetOnPreemptorSide Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -1295,17 +1279,7 @@ func TestTryPreemption_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *testing.T assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"gpu": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1370,6 +1344,7 @@ func TestTryPreemption_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *testing.T assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) } // TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnPreemptorSide Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -1401,17 +1376,7 @@ func TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *te assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"gpu": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1475,6 +1440,7 @@ func TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnPreemptorSide(t *te assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) } // TestTryPreemption_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides Test try preemption with 2 level queue hierarchy. @@ -1508,17 +1474,7 @@ func TestTryPreemption_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides(t assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"gpu": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1584,6 +1540,7 @@ func TestTryPreemption_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides(t assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) } // TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorSides Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint. @@ -1615,17 +1572,7 @@ func TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorS assert.NilError(t, err) _, err = createManagedQueueGuaranteed(parentQ2, "child3", false, nil, nil) assert.NilError(t, err) - - app1 := newApplication(appID1, "default", "root.parent.parent2.child2") - app1.SetQueue(childQ2) - childQ2.applications[appID1] = app1 - app2 := newApplication(appID2, "default", "root.parent.parent2.child2") - app2.SetQueue(childQ2) - childQ2.applications[appID2] = app2 - app3 := newApplication(appID3, "default", "root.parent.parent2.child2") - app3.SetQueue(childQ2) - childQ2.applications[appID3] = app3 - + app1, app2, app3 := createVictimApplications(childQ2) for i := 5; i < 8; i++ { askN := newAllocationAsk(alloc+strconv.Itoa(i), appID1, resources.NewResourceFromMap(map[string]resources.Quantity{"gpu": 100})) askN.createTime = time.Now().Add(-2 * time.Minute) @@ -1690,6 +1637,7 @@ func TestTryPreemption_OnNode_AskResTypesSame_GuaranteedSetOnVictimAndPreemptorS assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") assert.Check(t, alloc3.IsPreempted(), "alloc3 not preempted") + assert.Equal(t, len(ask4.GetAllocationLog()), 0) } // TestTryPreemption_OnNode_UGParent_With_UGPreemptorChild_GNotSetOnVictimChild_As_Siblings Test try preemption with 2 level queue hierarchy. Since Node doesn't have enough resources to accomodate, preemption happens because of node resource constraint.