Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] snabb top --yang: dump RFC7223 interface stats as JSON #886

Merged
merged 21 commits into from
Jul 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b34c3ee
engine: Set shm path to "app/$name"
lukego Feb 22, 2016
94ff234
Amendments to #766:
eugeneia Apr 12, 2016
aac0c8c
Merge PR #766 (engine: Set shm path to "app/$name") into yang
eugeneia Apr 12, 2016
fad0f43
core.counter: Qualify counter names using `shm.resolve'.
eugeneia Apr 13, 2016
7ed4ed0
snabb top: add `--app' option to print app counters.
eugeneia Apr 12, 2016
eb9005b
snabb top: unlink own shm tree to avoid clutter.
eugeneia Apr 13, 2016
5fbe0d6
vhost_user: Add RFC 7223 app counters.
eugeneia Apr 14, 2016
8bb3215
Intel_app: Add RFC 7223 app counters.
eugeneia Apr 14, 2016
7a55478
snabb top: Add --link parameter to list link counters.
eugeneia Apr 14, 2016
dde5da2
core.app: Put app counters under "counters/<app>", update snabb top.
eugeneia Apr 14, 2016
924ff4e
lib.json: Import JSON4Lua 1.0.0, include encode functionality.
eugeneia Apr 18, 2016
8e34093
lib.macaddress: Support numeric initialization; add method to get num…
eugeneia Apr 18, 2016
5f9efd2
core.link: Create “discontinuity-time” counters.
eugeneia Apr 18, 2016
7b39148
snabb top: add `--yang' option to print YANG model as JSON.
eugeneia Apr 19, 2016
8984741
snabb top --yang: Represent uint64_t as decimal string.
eugeneia Apr 20, 2016
ee00d16
[core.lib] Generalize `timer' to optionally accept 'repeating'
eugeneia Apr 28, 2016
45490b8
Revert "Intel_app: Add RFC 7223 app counters."
eugeneia Apr 28, 2016
f0ed10b
intel_app: expose per-pciaddress statistics in `counters/<pciaddress>'.
eugeneia Apr 28, 2016
aca8064
Merge branch 'master' into yang-local
eugeneia May 13, 2016
c186591
lib.protocol.ethernet: Add n_mcast, branch-free Multicast predicate.
eugeneia Apr 25, 2016
b09e843
Fix for f0ed10b: require macaddress module.
eugeneia May 27, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,18 @@ Returns a table that acts as a bounds checked wrapper around a C array of
ctype and the caller must ensure that the allocated memory region at
*base*/*offset* is at least `sizeof(type)*size` bytes long.

— Function **lib.timer** *s*
— Function **lib.timer** *duration*, *mode*, *timefun*

Returns a closure that will return `false` until *duration* has elapsed. If
*mode* is `'repeating'` the timer will reset itself after returning `true`,
thus implementing an interval timer. *Timefun* is used to get a monotonic time.
*Timefun* defaults to `C.get_time_ns`.

The “deadline” for a given *duration* is computed by adding *duration* to the
result of calling *timefun*, and is saved in the resulting closure. A
*duration* has elapsed when its deadline is less than or equal the value
obtained using *timefun* when calling the closure.

Returns a function that accepts no parameters and acts as a predicate to
test if *ns* nanoseconds have elapsed.

— Function **lib.waitfor** *condition*

Expand Down
70 changes: 65 additions & 5 deletions src/apps/intel/intel_app.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ local zone = require("jit.zone")
local basic_apps = require("apps.basic.basic_apps")
local ffi = require("ffi")
local lib = require("core.lib")
local shm = require("core.shm")
local counter = require("core.counter")
local pci = require("lib.hardware.pci")
local register = require("lib.hardware.register")
local macaddress = require("lib.macaddress")
local intel10g = require("apps.intel.intel10g")
local receive, transmit, full, empty = link.receive, link.transmit, link.full, link.empty
Intel82599 = {}
Expand Down Expand Up @@ -35,21 +38,49 @@ end
-- Create an Intel82599 App for the device with 'pciaddress'.
function Intel82599:new (arg)
local conf = config.parse_app_arg(arg)
local self = {}

if conf.vmdq then
if devices[conf.pciaddr] == nil then
devices[conf.pciaddr] = {pf=intel10g.new_pf(conf):open(), vflist={}}
local pf = intel10g.new_pf(conf):open()
devices[conf.pciaddr] = {pf=pf, vflist={}, stats={register=pf.s}}
end
local dev = devices[conf.pciaddr]
local poolnum = firsthole(dev.vflist)-1
local vf = dev.pf:new_vf(poolnum)
dev.vflist[poolnum+1] = vf
return setmetatable({dev=vf:open(conf)}, Intel82599)
self.dev = vf:open(conf)
self.stats = devices[conf.pciaddr].stats
else
local dev = intel10g.new_sf(conf):open()
if not dev then return null end
return setmetatable({dev=dev, zone="intel"}, Intel82599)
self.dev = assert(intel10g.new_sf(conf):open(), "Can not open device.")
self.stats = { register = self.dev.s }
self.zone = "intel"
end
if not self.stats.counters then
local counters = {}
local path = "/counters/"..conf.pciaddr.."/"
counters['type'] = counter.open(path..'type')
counters['discontinuity-time'] = counter.open(path..'discontinuity-time')
counters['in-octets'] = counter.open(path..'in-octets')
counters['in-multicast'] = counter.open(path..'in-multicast')
counters['in-broadcast'] = counter.open(path..'in-broadcast')
counters['in-discards'] = counter.open(path..'in-discards')
counters['out-octets'] = counter.open(path..'out-octets')
counters['out-multicast'] = counter.open(path..'out-multicast')
counters['out-broadcast'] = counter.open(path..'out-broadcast')
counters['out-discards'] = counter.open(path..'out-discards')
counter.set(counters['type'], 0x1000) -- Hardware interface
counter.set(counters['discontinuity-time'], C.get_unix_time())
if not conf.vmdq and conf.macaddr then
counters['phys-address'] = counter.open(path..'phys-address')
counter.set(counters['phys-address'],
macaddress:new(conf.macaddr):int())
end
self.stats.counters = counters
self.stats.path = path
self.stats.sync_timer = lib.timer(0.001, 'repeating', engine.now)
end
return setmetatable(self, Intel82599)
end

function Intel82599:stop()
Expand All @@ -70,6 +101,12 @@ function Intel82599:stop()
if close_pf then
close_pf:close()
end
if not self.dev.pf or close_pf then
for name, _ in pairs(self.stats.counters) do
counter.delete(self.stats.path..name)
end
shm.unlink(self.stats.path)
end
end


Expand All @@ -78,6 +115,11 @@ function Intel82599:reconfig(arg)
assert((not not self.dev.pf) == (not not conf.vmdq), "Can't reconfig from VMDQ to single-port or viceversa")

self.dev:reconfig(conf)

if not self.dev.pf and conf.macaddr then
counter.set(self.stats.counters['phys-address'],
macaddress:new(conf.macaddr):int())
end
end

-- Allocate receive buffers from the given freelist.
Expand All @@ -95,6 +137,9 @@ function Intel82599:pull ()
transmit(l, self.dev:receive())
end
self:add_receive_buffers()
if self.stats.sync_timer() then
self:sync_stats()
end
end

function Intel82599:add_receive_buffers ()
Expand All @@ -104,6 +149,21 @@ function Intel82599:add_receive_buffers ()
end
end

-- Synchronize self.stats.register a and self.stats.counters.
function Intel82599:sync_stats ()
--- XXX - it is questionable if I choose the right register to counter
--- mapping
local counters, register = self.stats.counters, self.stats.register
counter.set(counters['in-octets'], register.GORC64())
counter.set(counters['in-multicast'], register.MPRC())
counter.set(counters['in-broadcast'], register.BPRC())
counter.set(counters['in-discards'], register.TPR() - register.GPRC())
counter.set(counters['out-octets'], register.GOTC64())
counter.set(counters['out-multicast'], register.MPTC())
counter.set(counters['out-broadcast'], register.BPTC())
counter.set(counters['out-discards'], register.TPT() - register.GPTC())
end

-- Push packets from our 'rx' link onto the network.
function Intel82599:push ()
local l = self.input.rx
Expand Down
32 changes: 32 additions & 0 deletions src/apps/vhost/vhost_user.lua
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ local lib = require("core.lib")
local link = require("core.link")
local main = require("core.main")
local memory = require("core.memory")
local counter = require("core.counter")
local pci = require("lib.hardware.pci")
local ethernet = require("lib.protocol.ethernet")
local net_device= require("lib.virtio.net_device")
local timer = require("core.timer")
local ffi = require("ffi")
Expand Down Expand Up @@ -52,6 +54,15 @@ function VhostUser:new (args)
else
self.qemu_connect = self.client_connect
end
-- initialize counters
self.counters = {}
for _, name in ipairs({'type', 'discontinuity-time',
'in-octets', 'in-unicast', 'in-multicast', 'in-discards',
'out-octets', 'out-unicast', 'out-multicast'}) do
self.counters[name] = counter.open(name)
end
counter.set(self.counters['type'], 0x1001) -- Virtual interface
counter.set(self.counters['discontinuity-time'], C.get_unix_time())
return self
end

Expand All @@ -68,6 +79,9 @@ function VhostUser:stop()
self:free_mem_table()

if self.link_down_proc then self.link_down_proc() end

-- delete counters
for name, _ in pairs(self.counters) do counter.delete(name) end
end

function VhostUser:pull ()
Expand All @@ -86,6 +100,24 @@ function VhostUser:push ()
end
end

function VhostUser:tx_callback (p)
counter.add(self.counters['out-octets'], packet.length(p))
local mcast = ethernet:n_mcast(packet.data(p))
counter.add(self.counters['out-multicast'], mcast)
counter.add(self.counters['out-unicast'], 1 - mcast)
end

function VhostUser:rx_callback (p)
counter.add(self.counters['in-octets'], packet.length(p))
local mcast = ethernet:n_mcast(packet.data(p))
counter.add(self.counters['in-multicast'], mcast)
counter.add(self.counters['in-unicast'], 1 - mcast)
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to introduce this potentially unbiased branch onto the "fast path" for Virtio-net?

The risk I see is that on a workload with 50/50 mix of unicast/multicast traffic you will take the penalty of both a LuaJIT side-trace and also a CPU branch-misprediction half of the time. This could be significant and we don't currently have performance test coverage for such a workload.

One alternative would be to write this branch-free (using arithmetic, bitwise operators, and min/max). Sketch:

-- Set unicast and multicast to 0 and 1 as appropriate.
local multicast = bit.band(packet.data[0], 1)
local unicast = 1 - multicast
counter.add(self.counters['in-multicast'], multicast)
counter.add(self.counters['in-unicast'], unicast)

This way the same instructions would execute every time and only the values would change.


function VhostUser:rxdrop_callback (p)
counter.add(self.counters['in-discards'])
end

-- Try to connect to QEMU.
function VhostUser:client_connect ()
return C.vhost_user_connect(self.socket_path)
Expand Down
44 changes: 43 additions & 1 deletion src/core/app.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ local lib = require("core.lib")
local link = require("core.link")
local config = require("core.config")
local timer = require("core.timer")
local shm = require("core.shm")
local histogram = require('core.histogram')
local counter = require("core.counter")
local zone = require("jit.zone")
Expand Down Expand Up @@ -68,6 +69,8 @@ end
-- Run app:methodname() in protected mode (pcall). If it throws an
-- error app will be marked as dead and restarted eventually.
local function with_restart (app, method)
local oldshm = shm.path
shm.path = app.shmpath
if use_restart then
-- Run fn in protected mode using pcall.
local status, err = pcall(method, app)
Expand All @@ -78,6 +81,7 @@ local function with_restart (app, method)
else
method(app)
end
shm.path = oldshm
end

-- Restart dead apps.
Expand Down Expand Up @@ -155,7 +159,13 @@ function apply_config_actions (actions, conf)
-- Table of functions that execute config actions
local ops = {}
function ops.stop (name)
if app_table[name].stop then app_table[name]:stop() end
if app_table[name].stop then
local shmorig = shm.path
shm.path = app_table[name].shmpath
app_table[name]:stop()
shm.path = shmorig
shm.unlink(app_table[name].shmpath)
end
end
function ops.keep (name)
new_app_table[name] = app_table[name]
Expand All @@ -165,7 +175,10 @@ function apply_config_actions (actions, conf)
function ops.start (name)
local class = conf.apps[name].class
local arg = conf.apps[name].arg
local shmpath, shmorig = "counters/"..name, shm.path
shm.path = shmpath
local app = class:new(arg)
shm.path = shmorig
if type(app) ~= 'table' then
error(("bad return value from app '%s' start() method: %s"):format(
name, tostring(app)))
Expand All @@ -174,6 +187,7 @@ function apply_config_actions (actions, conf)
app.appname = name
app.output = {}
app.input = {}
app.shmpath = shmpath
new_app_table[name] = app
table.insert(new_app_array, app)
app_name_to_index[name] = #new_app_array
Expand Down Expand Up @@ -493,6 +507,34 @@ function selftest ()
assert(app_table.app3 == orig_app3) -- should be the same
main({duration = 4, report = {showapps = true}})
assert(app_table.app3 ~= orig_app3) -- should be restarted
-- Test shm.path management
print("shm.path management")
local S = require("syscall")
local App4 = {zone="test"}
function App4:new ()
local c = counter.open('test')
counter.set(c, 42)
counter.commit()
return setmetatable({test_counter = c},
{__index = App4})
end
function App4:pull ()
assert(counter.read(self.test_counter) == 42, "Invalid counter value")
counter.add(self.test_counter)
end
function App4:stop ()
assert(counter.read(self.test_counter) == 43, "Invalid counter value")
counter.delete('test')
end
local c_counter = config.new()
config.app(c_counter, "App4", App4)
configure(c_counter)
main({done = function () return app_table.App4.test_counter end})
assert(S.stat(shm.root.."/"..shm.resolve("counters/App4/test")),
"Missing : counters/App4/test")
configure(config.new())
assert(not S.stat(shm.root.."/"..shm.resolve("counters/App4")),
"Failed to unlink counters/App4")
print("OK")
end

Expand Down
12 changes: 7 additions & 5 deletions src/core/counter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ local private = {}
local numbers = {} -- name -> number

function open (name, readonly)
if numbers[name] then return private[numbers[name]] end
local qname = shm.resolve(name)
if numbers[qname] then return private[numbers[qname]] end
local n = #public+1
if readonly then
public[n] = shm.open(name, counter_t, readonly)
Expand All @@ -53,21 +54,22 @@ function open (name, readonly)
public[n] = shm.create(name, counter_t)
private[n] = ffi.new(counter_t)
end
numbers[name] = n
numbers[qname] = n
return private[n]
end

function delete (name)
local number = numbers[name]
if not number then error("counter not found for deletion: " .. name) end
local qname = shm.resolve(name)
local number = numbers[qname]
if not number then error("counter not found for deletion: " .. qname) end
-- Free shm object
shm.unmap(public[number])
-- If we "own" the counter for writing then we unlink it too.
if public[number] ~= private[number] then
shm.unlink(name)
end
-- Free local state
numbers[name] = false
numbers[qname] = false
public[number] = false
private[number] = false
end
Expand Down
21 changes: 17 additions & 4 deletions src/core/lib.lua
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,23 @@ function bounds_checked (type, base, offset, size)
return wrap(ffi.cast(tptr, ffi.cast("uint8_t *", base) + offset))
end

-- Return a function that will return false until NS nanoseconds have elapsed.
function timer (ns)
local deadline = C.get_time_ns() + ns
return function () return C.get_time_ns() >= deadline end
-- Return a function that will return false until duration has elapsed.
-- If mode is 'repeating' the timer will reset itself after returning true,
-- thus implementing an interval timer. Timefun defaults to `C.get_time_ns'.
function timer (duration, mode, timefun)
timefun = timefun or C.get_time_ns
local deadline = timefun() + duration
local function oneshot ()
return timefun() >= deadline
end
local function repeating ()
if timefun() >= deadline then
deadline = deadline + duration
return true
else return false end
end
if mode == 'repeating' then return repeating
else return oneshot end
end

-- Loop until the function `condition` returns true.
Expand Down
3 changes: 3 additions & 0 deletions src/core/link.lua
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,16 @@ function new (name)
for _, c in ipairs(counternames) do
r.stats[c] = counter.open("counters/"..name.."/"..c)
end
counter.set(counter.open("counters/"..name.."/discontinuity-time"),
C.get_unix_time())
return r
end

function free (r, name)
for _, c in ipairs(counternames) do
counter.delete("counters/"..name.."/"..c)
end
counter.delete("counters/"..name.."/discontinuity-time")
shm.unmap(r)
shm.unlink("links/"..name)
end
Expand Down
Loading