Skip to content

Commit

Permalink
Merge pull request snabbco#1212 from Igalia/isolcpus-check
Browse files Browse the repository at this point in the history
Add more system performance diagnostics
  • Loading branch information
wingo authored Mar 5, 2019
2 parents 9f37939 + a27d8b3 commit 89e855c
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 74 deletions.
23 changes: 22 additions & 1 deletion src/lib/README.numa.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ for some reason the current process is not bound to a NUMA node. See
[../doc/performance-tuning.md] for more notes on getting good
performance out of your Snabb program.

— Function **bind_to_cpu** *cpu*
— Function **bind_to_cpu** *cpu* *skip_perf_checks*
Bind the current process to *cpu*, arranging for it to only ever be
run on that CPU. Additionally, call **bind_to_numa_node** on the NUMA
node corresponding to *cpu*.

Unless the optional argument *skip_perf_checks* is true, also run some
basic checks to verify that the given core is suitable for processing
low-latency network traffic: that the CPU has the `performance` scaling
governor, that it has been reserved from the kernel scheduler, and so
on, printing out any problems to `stderr`.

— Function **bind_to_numa_node** *node*
Bind the current process to NUMA node *node*, arranging for it to only
ever allocate memory local to that NUMA node. Additionally, migrate
Expand Down Expand Up @@ -55,3 +61,18 @@ node bound by **bind_to_numa_node**, if present, and in any case that
all *addrs* are on the same NUMA node. If *require_affinity* is true
(not the default), then error if a problem is detected, otherwise just
print a warning to the console.

— Function **parse_cpuset** *cpus*
A helper function to parse a CPU set from a string. A CPU set is either
the number of a CPU, a range of CPUs, or two or more CPU sets joined by
commas. The result is a table whose keys are the CPUs and whose values
are true (a set). For example, q`parse_cpuset("1-3,5")` will return a
table with keys 1, 2, 3, and 5 bound to `true`.

— Function **node_cpus** *node*
Return a set of CPUs belonging to NUMA node *node*, in the same format
as in **parse_cpuset**.

— Function **isolated_cpus**
Return a set of CPUs that have been "isolated" away from the kernel at
boot via the `isolcpus` kernel boot parameter.
63 changes: 3 additions & 60 deletions src/lib/cpuset.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,53 +19,7 @@ do
end
end

local function trim (str)
return str:gsub("^%s", ""):gsub("%s$", "")
end

local function parse_cpulist (cpus)
local ret = {}
cpus = trim(cpus)
if #cpus == 0 then return ret end
for range in cpus:split(',') do
local lo, hi = range:match("^%s*([^%-]*)%s*-%s*([^%-%s]*)%s*$")
if lo == nil then lo = range:match("^%s*([^%-]*)%s*$") end
assert(lo ~= nil, 'invalid range: '..range)
lo = assert(tonumber(lo), 'invalid range begin: '..lo)
assert(lo == math.floor(lo), 'invalid range begin: '..lo)
if hi ~= nil then
hi = assert(tonumber(hi), 'invalid range end: '..hi)
assert(hi == math.floor(hi), 'invalid range end: '..hi)
assert(lo < hi, 'invalid range: '..range)
else
hi = lo
end
for cpu=lo,hi do table.insert(ret, cpu) end
end
return ret
end

local function parse_cpulist_from_file (path)
local fd = assert(io.open(path))
if not fd then return {} end
local ret = parse_cpulist(fd:read("*all"))
fd:close()
return ret
end

local function available_cpus (node)
local function set (t)
local ret = {}
for _,v in pairs(t) do ret[tostring(v)] = true end
return ret
end
local function cpus_in_node (node)
local node_path = '/sys/devices/system/node/node'..node
return set(parse_cpulist_from_file(node_path..'/cpulist'))
end
local function isolated_cpus ()
return set(parse_cpulist_from_file('/sys/devices/system/cpu/isolated'))
end
local function subtract (s, t)
local ret = {}
for k,_ in pairs(s) do
Expand All @@ -75,7 +29,7 @@ local function available_cpus (node)
return ret
end
-- XXX: Add sched_getaffinity cpus.
return subtract(cpus_in_node(node), isolated_cpus())
return subtract(numa.node_cpus(node), numa.isolated_cpus())
end

function CPUSet:bind_to_numa_node()
Expand All @@ -87,7 +41,7 @@ function CPUSet:bind_to_numa_node()
numa.bind_to_numa_node(nodes[1])
local cpus = available_cpus(nodes[1])
assert(#cpus > 0, 'Not available CPUs')
numa.bind_to_cpu(cpus)
numa.bind_to_cpu(cpus, 'skip-perf-checks')
print(("Bound main process to NUMA node: %s (CPU %s)"):format(nodes[1], cpus[1]))
else
print("CPUs available from multiple NUMA nodes: "..table.concat(nodes, ","))
Expand All @@ -96,7 +50,7 @@ function CPUSet:bind_to_numa_node()
end

function CPUSet:add_from_string(cpus)
for _, cpu in ipairs(parse_cpulist(cpus)) do
for cpu,_ in pairs(numa.parse_cpuset(cpus)) do
self:add(cpu)
end
end
Expand Down Expand Up @@ -157,14 +111,3 @@ function CPUSet:release(cpu)
end
error('CPU not found on NUMA node: '..cpu..', '..node)
end

function selftest ()
print('selftest: cpuset')
local cpus = parse_cpulist("0-5,7")
assert(#cpus == 7 and cpus[6] == 5 and cpus[7] == 7)
cpus = parse_cpulist("1")
assert(#cpus == 1 and cpus[1] == 1)
assert(#parse_cpulist("\n") == 0)
assert(#parse_cpulist("") == 0)
print('selftest: ok')
end
110 changes: 97 additions & 13 deletions src/lib/numa.lua
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,57 @@ local bound_numa_node
local node_path = '/sys/devices/system/node/node'
local MAX_CPU = 1023

local function warn(fmt, ...)
io.stderr:write(string.format("Warning: ".. fmt .. "\n", ...))
io.stderr:flush()
end

local function die(fmt, ...)
error(string.format(fmt, ...))
end

local function trim (str)
return str:gsub("^%s", ""):gsub("%s$", "")
end

function parse_cpuset (cpus)
local ret = {}
cpus = trim(cpus)
if #cpus == 0 then return ret end
for range in cpus:split(',') do
local lo, hi = range:match("^%s*([^%-]*)%s*-%s*([^%-%s]*)%s*$")
if lo == nil then lo = range:match("^%s*([^%-]*)%s*$") end
assert(lo ~= nil, 'invalid range: '..range)
lo = assert(tonumber(lo), 'invalid range begin: '..lo)
assert(lo == math.floor(lo), 'invalid range begin: '..lo)
if hi ~= nil then
hi = assert(tonumber(hi), 'invalid range end: '..hi)
assert(hi == math.floor(hi), 'invalid range end: '..hi)
assert(lo < hi, 'invalid range: '..range)
else
hi = lo
end
for cpu=lo,hi do table.insert(ret, cpu) end
end
return lib.set(unpack(ret))
end

local function parse_cpuset_from_file (path)
local fd = assert(io.open(path))
if not fd then return {} end
local ret = parse_cpuset(fd:read("*all"))
fd:close()
return ret
end

function node_cpus (node)
return parse_cpuset_from_file(node_path..node..'/cpulist')
end

function isolated_cpus (node)
return parse_cpuset_from_file('/sys/devices/system/cpu/isolated')
end

function cpu_get_numa_node (cpu)
local node = 0
while true do
Expand Down Expand Up @@ -62,10 +113,10 @@ function choose_numa_node_for_pci_addresses (addrs, require_affinity)
chosen_node = node
chosen_because_of_addr = addr
else
local msg = string.format(
"PCI devices %s and %s have different NUMA node affinities",
chosen_because_of_addr, addr)
if require_affinity then error(msg) else print('Warning: '..msg) end
local warn = warn
if require_affinity then warn = die end
warn("PCI devices %s and %s have different NUMA node affinities",
chosen_because_of_addr, addr)
end
end
return chosen_node
Expand All @@ -75,17 +126,17 @@ function check_affinity_for_pci_addresses (addrs)
local policy = S.get_mempolicy()
if policy.mode == S.c.MPOL_MODE['default'] then
if has_numa() then
print('Warning: No NUMA memory affinity.')
print('Pass --cpu to bind to a CPU and its NUMA node.')
warn('No NUMA memory affinity.\n'..
'Pass --cpu to bind to a CPU and its NUMA node.')
end
elseif (policy.mode ~= S.c.MPOL_MODE['bind'] and
policy.mode ~= S.c.MPOL_MODE['preferred']) then
print("Warning: NUMA memory policy already in effect, but it's not --membind or --preferred.")
warn("NUMA memory policy already in effect, but it's not --membind or --preferred.")
else
local node = S.getcpu().node
local node_for_pci = choose_numa_node_for_pci_addresses(addrs)
if node_for_pci and node ~= node_for_pci then
print("Warning: Bound NUMA node does not have affinity with PCI devices.")
warn("Bound NUMA node does not have affinity with PCI devices.")
end
end
end
Expand All @@ -98,7 +149,7 @@ function unbind_cpu ()
bound_cpu = nil
end

function bind_to_cpu (cpu)
function bind_to_cpu (cpu, skip_perf_checks)
local function contains (t, e)
for k,v in ipairs(t) do
if tonumber(v) == tonumber(e) then return true end
Expand All @@ -117,6 +168,14 @@ function bind_to_cpu (cpu)
bound_cpu = cpu_and_node.cpu

bind_to_numa_node (cpu_and_node.node)

if not skip_perf_checks then
local ok, err = pcall(check_cpu_performance_tuning, bound_cpu)
if not ok then
warn("Error checking performance tuning on CPU %s: %s",
bound_cpu, tostring(err))
end
end
end

function unbind_numa_node ()
Expand All @@ -138,9 +197,8 @@ function bind_to_numa_node (node, policy)
local from_mask = assert(S.get_mempolicy(nil, nil, nil, 'mems_allowed')).mask
local ok, err = S.migrate_pages(0, from_mask, node)
if not ok then
io.stderr:write(
string.format("Warning: Failed to migrate pages to NUMA node %d: %s\n",
node, tostring(err)))
warn("Failed to migrate pages to NUMA node %d: %s\n",
node, tostring(err))
end
end

Expand All @@ -152,11 +210,37 @@ function prevent_preemption(priority)
'Failed to enable real-time scheduling. Try running as root.')
end

function check_cpu_performance_tuning (cpu, strict)
local warn = warn
if strict then warn = die end
local path = '/sys/devices/system/cpu/cpu'..cpu..'/cpufreq/scaling_governor'
local gov = assert(io.open(path)):read()
if not gov:match('performance') then
warn('Expected performance scaling governor for CPU %s, but got "%s"',
cpu, gov)
end

if not isolated_cpus()[cpu] then
warn('Expected dedicated core, but CPU %s is not in isolcpus set', cpu)
end
end

function selftest ()

local cpus = parse_cpuset("0-5,7")
for i=0,5 do assert(cpus[i]) end
assert(not cpus[6])
assert(cpus[7])
do
local count = 0
for k,v in pairs(cpus) do count = count + 1 end
assert(count == 7)
end
assert(parse_cpuset("1")[1])

function test_cpu(cpu)
local node = cpu_get_numa_node(cpu)
bind_to_cpu(cpu)
bind_to_cpu(cpu, 'skip-perf-checks')
assert(bound_cpu == cpu)
assert(bound_numa_node == node)
assert(S.getcpu().cpu == cpu)
Expand Down

0 comments on commit 89e855c

Please sign in to comment.