Skip to content

Commit

Permalink
NETOBSERV-559: Using batchAPIs to help with CPU and memory resources
Browse files Browse the repository at this point in the history
Signed-off-by: Mohamed Mahmoud <[email protected]>
  • Loading branch information
msherif1234 committed Jan 26, 2024
1 parent 726bdb7 commit 781299c
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ type ebpfFlowFetcher interface {
io.Closer
Register(iface ifaces.Interface) error

LookupAndDeleteMap() map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics
LookupAndDeleteMap(forceGC bool) map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics
DeleteMapsStaleEntries(timeOut time.Duration)
ReadRingBuf() (ringbuf.Record, error)
}
Expand Down
34 changes: 29 additions & 5 deletions pkg/ebpf/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io/fs"
"os"
"runtime"
"strconv"
"strings"
"syscall"
Expand Down Expand Up @@ -395,17 +396,40 @@ func (m *FlowFetcher) ReadRingBuf() (ringbuf.Record, error) {
// This way we avoid missing packets that could be updated on the
// ebpf side while we process/aggregate them here
// Changing this method invocation by BatchLookupAndDelete could improve performance
// TODO: detect whether BatchLookupAndDelete is supported (Kernel>=5.6) and use it selectively
// Supported Lookup/Delete operations by kernel: https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md
// Race conditions here causes that some flows are lost in high-load scenarios
func (m *FlowFetcher) LookupAndDeleteMap() map[BpfFlowId][]BpfFlowMetrics {
func (m *FlowFetcher) LookupAndDeleteMap(forceGC bool) map[BpfFlowId][]BpfFlowMetrics {
flowMap := m.objects.AggregatedFlows

iterator := flowMap.Iterate()
var flows = make(map[BpfFlowId][]BpfFlowMetrics, m.cacheMaxSize)
var metrics = make([]BpfFlowMetrics, m.cacheMaxSize*ebpf.MustPossibleCPU())
var id BpfFlowId
var metrics []BpfFlowMetrics
var ids = make([]BpfFlowId, m.cacheMaxSize)
var cursor = ebpf.BatchCursor{}

for {
count, err := flowMap.BatchLookupAndDelete(&cursor, ids, metrics, nil)
if err == nil || errors.Is(err, ebpf.ErrKeyNotExist) {
for i, id := range ids[:count] {
for j := 0; j < ebpf.MustPossibleCPU(); j++ {
flows[id] = append(flows[id], metrics[i*ebpf.MustPossibleCPU()+j])
}
}

if forceGC {
runtime.KeepAlive(flows) // Keeps a reference to flow map so that map isn't collected
runtime.GC()
}
return flows
}
if err != nil || count == 0 {
log.Debugf("failed to use BatchLookupAndDelete api: %v fall back to use iterate and delete api", err)
break
}
}

// fallback to iterate and delete if the BatchLookupAndDelete() not supported,
// reinitialize iterator to start from the beginning of the map
iterator := flowMap.Iterate()
// Changing Iterate+Delete by LookupAndDelete would prevent some possible race conditions
// TODO: detect whether LookupAndDelete is supported (Kernel>=4.20) and use it selectively
for iterator.Next(&id, &metrics) {
Expand Down
4 changes: 2 additions & 2 deletions pkg/flow/tracer_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type MapTracer struct {
}

type mapFetcher interface {
LookupAndDeleteMap() map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics
LookupAndDeleteMap(forceGC bool) map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics
DeleteMapsStaleEntries(timeOut time.Duration)
}

Expand Down Expand Up @@ -95,7 +95,7 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c

var forwardingFlows []*Record
laterFlowNs := uint64(0)
for flowKey, flowMetrics := range m.mapFetcher.LookupAndDeleteMap() {
for flowKey, flowMetrics := range m.mapFetcher.LookupAndDeleteMap(forceGC) {
aggregatedMetrics := m.aggregate(flowMetrics)
// we ignore metrics that haven't been aggregated (e.g. all the mapped values are ignored)
if aggregatedMetrics.EndMonoTimeTs == 0 {
Expand Down
2 changes: 1 addition & 1 deletion pkg/test/tracer_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (m *TracerFake) Register(iface ifaces.Interface) error {
return nil
}

func (m *TracerFake) LookupAndDeleteMap() map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics {
func (m *TracerFake) LookupAndDeleteMap(_ bool) map[ebpf.BpfFlowId][]ebpf.BpfFlowMetrics {
select {
case r := <-m.mapLookups:
return r
Expand Down

0 comments on commit 781299c

Please sign in to comment.