Skip to content

Commit

Permalink
[YUNIKORN-2837] Log & Send Events, Improve logging (#957)
Browse files Browse the repository at this point in the history
Closes: #957

Signed-off-by: Manikandan R <[email protected]>
  • Loading branch information
manirajv06 committed Sep 10, 2024
1 parent 7c51e82 commit 3a712d4
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 95 deletions.
12 changes: 7 additions & 5 deletions pkg/common/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ package common

import "errors"

// Common errors
var (
// InvalidQueueName returned when queue name is invalid
InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed")
)
// InvalidQueueName returned when queue name is invalid
var InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed")

const PreemptionPreconditionsFailed = "Preemption preconditions failed"
const PreemptionDoesNotGuarantee = "Preemption queue guarantees check failed"
const PreemptionShortfall = "Preemption helped but short of resources"
const PreemptionDoesNotHelp = "Preemption does not help"
3 changes: 3 additions & 0 deletions pkg/scheduler/objects/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
// preemption occurred, and possibly reservation
return result
}
request.LogAllocationFailure(common.PreemptionDoesNotHelp, true)
}
}
request.LogAllocationFailure(NotEnoughQueueQuota, true) // error message MUST be constant!
Expand Down Expand Up @@ -1122,6 +1123,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
return result
}
}
request.LogAllocationFailure(common.PreemptionDoesNotHelp, true)
}
}
}
Expand Down Expand Up @@ -1394,6 +1396,7 @@ func (sa *Application) tryPreemption(headRoom *resources.Resource, preemptionDel

// validate prerequisites for preemption of an ask and mark ask for preemption if successful
if !preemptor.CheckPreconditions() {
ask.LogAllocationFailure(common.PreemptionPreconditionsFailed, true)
return nil, false
}

Expand Down
48 changes: 26 additions & 22 deletions pkg/scheduler/objects/application_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,7 @@ func TestTryAllocatePreemptQueue(t *testing.T) {
result3 := app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result3 == nil, "result3 not expected")
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
assertAllocationLog(t, ask3)

// pass the time and try again
ask3.createTime = ask3.createTime.Add(-30 * time.Second)
Expand Down Expand Up @@ -2068,28 +2069,25 @@ func TestTryAllocatePreemptNode(t *testing.T) {
preemptionAttemptsRemaining := 10

// consume capacity with 'unlimited' app
result00 := app0.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 40}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result00 != nil, "result00 expected")
alloc00 := result00.Request
assert.Assert(t, alloc00 != nil, "alloc00 expected")
alloc00.SetNodeID(result00.NodeID)
result01 := app0.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 39}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result01 != nil, "result01 expected")
alloc01 := result01.Request
assert.Assert(t, alloc01 != nil, "alloc01 expected")
alloc01.SetNodeID(result01.NodeID)
for _, r := range []*resources.Resource{resources.NewResourceFromMap(map[string]resources.Quantity{"first": 40}), resources.NewResourceFromMap(map[string]resources.Quantity{"first": 39})} {
result0 := app0.tryAllocate(r, true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result0 != nil, "result0 expected")
alloc0 := result0.Request
assert.Assert(t, alloc0 != nil, "alloc0 expected")
alloc0.SetNodeID(result0.NodeID)
}

// consume remainder of space but not quota
result1 := app1.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 28}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result1 != nil, "result1 expected")
alloc1 := result1.Request
assert.Assert(t, alloc1 != nil, "alloc1 expected")
alloc1.SetNodeID(result1.NodeID)
result2 := app1.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 23}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result2 != nil, "result2 expected")
alloc2 := result2.Request
assert.Assert(t, alloc2 != nil, "alloc2 expected")
alloc2.SetNodeID(result2.NodeID)
allocs := make([]*Allocation, 0)
for _, r := range []*resources.Resource{resources.NewResourceFromMap(map[string]resources.Quantity{"first": 28}), resources.NewResourceFromMap(map[string]resources.Quantity{"first": 23})} {
var alloc1 *Allocation
result1 := app1.tryAllocate(r, true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result1 != nil, "result1 expected")
alloc1 = result1.Request
assert.Assert(t, result1.Request != nil, "alloc1 expected")
alloc1.SetNodeID(result1.NodeID)
allocs = append(allocs, alloc1)
}

// on first attempt, should see a reservation since we're after the reservation timeout
ask3.createTime = ask3.createTime.Add(-10 * time.Second)
Expand All @@ -2099,18 +2097,24 @@ func TestTryAllocatePreemptNode(t *testing.T) {
assert.Assert(t, alloc3 != nil, "alloc3 not expected")
assert.Equal(t, "node1", result3.NodeID, "wrong node assignment")
assert.Equal(t, Reserved, result3.ResultType, "expected reservation")
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
assert.Assert(t, !allocs[1].IsPreempted(), "alloc2 should not have been preempted")
err = node1.Reserve(app2, ask3)
assert.NilError(t, err)

// preemption delay not yet passed, so preemption should fail
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 18}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result3 == nil, "result3 expected")
assert.Assert(t, !allocs[1].IsPreempted(), "alloc1 should have been preempted")
assertAllocationLog(t, ask3)

// pass the time and try again
ask3.createTime = ask3.createTime.Add(-30 * time.Second)
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 18}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
assert.Assert(t, result3 != nil, "result3 expected")
assert.Equal(t, Reserved, result3.ResultType, "expected reservation")
alloc3 = result3.Request
assert.Assert(t, alloc3 != nil, "alloc3 expected")
assert.Assert(t, alloc1.IsPreempted(), "alloc1 should have been preempted")
assert.Assert(t, allocs[0].IsPreempted(), "alloc1 should have been preempted")
}

func TestMaxAskPriority(t *testing.T) {
Expand Down
4 changes: 3 additions & 1 deletion pkg/scheduler/objects/preemption.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"go.uber.org/zap"

"github.com/apache/yunikorn-core/pkg/common"
"github.com/apache/yunikorn-core/pkg/common/resources"
"github.com/apache/yunikorn-core/pkg/log"
"github.com/apache/yunikorn-core/pkg/plugins"
Expand Down Expand Up @@ -203,7 +204,6 @@ func (p *Preemptor) checkPreemptionQueueGuarantees() bool {
}
}
}

return false
}

Expand Down Expand Up @@ -558,6 +558,7 @@ func (p *Preemptor) tryNodes() (string, []*Allocation, bool) {
func (p *Preemptor) TryPreemption() (*AllocationResult, bool) {
// validate that sufficient capacity can be freed
if !p.checkPreemptionQueueGuarantees() {
p.ask.LogAllocationFailure(common.PreemptionDoesNotGuarantee, true)
return nil, false
}

Expand Down Expand Up @@ -615,6 +616,7 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) {

if p.ask.GetAllocatedResource().StrictlyGreaterThanOnlyExisting(victimsTotalResource) {
// there is shortfall, so preemption doesn't help
p.ask.LogAllocationFailure(common.PreemptionShortfall, true)
return nil, false
}

Expand Down
Loading

0 comments on commit 3a712d4

Please sign in to comment.