From 25007d8ed06e0df0c1feb8881ea0b146ed0a2c2d Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Sun, 6 Mar 2016 04:30:26 +0000 Subject: [PATCH 1/3] snabbmark: Add 'mp-ring' multiprocess benchmark This benchmark measures the throughput of Snabb processes that are circularly connected together in a ring. --- src/program/snabbmark/mp.lua | 57 +++++++++++++++++++++++++++++ src/program/snabbmark/snabbmark.lua | 2 + 2 files changed, 59 insertions(+) create mode 100644 src/program/snabbmark/mp.lua diff --git a/src/program/snabbmark/mp.lua b/src/program/snabbmark/mp.lua new file mode 100644 index 0000000000..4b41370cc8 --- /dev/null +++ b/src/program/snabbmark/mp.lua @@ -0,0 +1,57 @@ +-- Multiprocess benchmarks +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +module(..., package.seeall) + +local ffi = require("ffi") +local C = ffi.C +local S = require("syscall") + +-- Ring benchmark: +-- See how quickly packets are cycled through a ring of processes. +-- +-- Each process copies packets from its input to its output link. Each +-- link is populated with an initial "burst" of packets. +function mp_ring (nprocesses, totalpackets, burstpackets) + nprocesses = tonumber(nprocesses) + totalpackets = tonumber(totalpackets) + burstpackets = tonumber(burstpackets) + links = {} + -- Create links to connect the processes in a loop + for i = 0, nprocesses-1 do + links[i] = link.new(tostring(i)) + for j = 1, burstpackets do + link.transmit(links[i], packet.allocate()) + end + end + -- Create per-process counters + local counters = ffi.cast("uint64_t *", + memory.dma_alloc(nprocesses*ffi.sizeof("uint64_t"))) + -- Start child processes + local start = C.get_time_ns() + for i = 0, nprocesses-1 do + if S.fork() == 0 then + -- terminate when parent does + S.prctl("set_pdeathsig", "hup") + local input = links[i] + local output = links[(i+1) % nprocesses] + while counters[i] < totalpackets do + if not link.empty(input) and not link.full(output) then + link.transmit(output, link.receive(input)) + counters[i] = counters[i] + 1 + end + -- Sync registers with memory + core.lib.compiler_barrier() + end + os.exit(0) + end + end + -- Spin until enough packets have been processed + while counters[0] < totalpackets do + core.lib.compiler_barrier() + end + local finish = C.get_time_ns() + local seconds = tonumber(finish-start)/1e9 + local packets = tonumber(counters[0]) + print(("%7.2f Mpps ring throughput per process"):format(packets/seconds/1e6)) +end diff --git a/src/program/snabbmark/snabbmark.lua b/src/program/snabbmark/snabbmark.lua index 8a554ffa8b..e1e0391cfc 100644 --- a/src/program/snabbmark/snabbmark.lua +++ b/src/program/snabbmark/snabbmark.lua @@ -22,6 +22,8 @@ function run (args) solarflare(unpack(args)) elseif command == 'intel1g' and #args >= 2 and #args <= 3 then intel1g(unpack(args)) + elseif command == 'mp-ring' then + require("program.snabbmark.mp").mp_ring(unpack(args)) else print(usage) main.exit(1) From e4ec563a930cc1f4a12fe0d3386fab32e19f2e1e Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Sun, 6 Mar 2016 05:15:18 +0000 Subject: [PATCH 2/3] snabbmark: Add CPU affinity to mp-ring Process uses core . --- src/program/snabbmark/mp.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/program/snabbmark/mp.lua b/src/program/snabbmark/mp.lua index 4b41370cc8..56056f7aaf 100644 --- a/src/program/snabbmark/mp.lua +++ b/src/program/snabbmark/mp.lua @@ -31,6 +31,8 @@ function mp_ring (nprocesses, totalpackets, burstpackets) local start = C.get_time_ns() for i = 0, nprocesses-1 do if S.fork() == 0 then + -- Child has affinity to CPU core + S.sched_setaffinity(0, i) -- terminate when parent does S.prctl("set_pdeathsig", "hup") local input = links[i] From d45f80874d3e0cfb450dbf024b4ebd70b262b673 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Sun, 6 Mar 2016 06:11:42 +0000 Subject: [PATCH 3/3] snabbmark: Add proper command-line syntax to mp-ring --- src/program/snabbmark/mp.lua | 87 +++++++++++++++++++++++------ src/program/snabbmark/snabbmark.lua | 2 +- 2 files changed, 72 insertions(+), 17 deletions(-) diff --git a/src/program/snabbmark/mp.lua b/src/program/snabbmark/mp.lua index 56056f7aaf..3448d21b87 100644 --- a/src/program/snabbmark/mp.lua +++ b/src/program/snabbmark/mp.lua @@ -6,50 +6,105 @@ module(..., package.seeall) local ffi = require("ffi") local C = ffi.C local S = require("syscall") +local pmu = require("lib.pmu") +local lib = require("core.lib") -- Ring benchmark: -- See how quickly packets are cycled through a ring of processes. -- -- Each process copies packets from its input to its output link. Each -- link is populated with an initial "burst" of packets. -function mp_ring (nprocesses, totalpackets, burstpackets) - nprocesses = tonumber(nprocesses) - totalpackets = tonumber(totalpackets) - burstpackets = tonumber(burstpackets) +function mp_ring (args) + local function usage () + print(require("program.snabbmark.README_mp_inc")) + os.exit(1) + end + local long_opts = { + help = "h", + mode = "m", + processes = "n", + packets = "p", + burst = "b", + events = "e", + read = "r", + write = "w" + } + local c = { + mode = "basic", + processes = 2, + packets = 100e6, + burst = 100, + pmuevents = false, + readbytes = 0, + writebytes = 0 + } + local opt = {} + function opt.m (arg) c.mode = arg end + function opt.n (arg) c.processes = tonumber(arg) end + function opt.p (arg) c.packets = tonumber(arg) end + function opt.b (arg) c.burst = tonumber(arg) end + function opt.e (arg) c.pmuevents = arg end + function opt.r (arg) c.readbytes = tonumber(arg) end + function opt.w (arg) c.writebytes = tonumber(arg) end + function opt.h (arg) usage() end + local leftover = lib.dogetopt(args, opt, "hn:p:b:e:r:w:", long_opts) + if #leftover > 0 then usage () end + -- Print summary of configuration + print("Benchmark configuration:") + for k, v in pairs(c) do + print(("%12s: %s"):format(k,v)) + end links = {} -- Create links to connect the processes in a loop - for i = 0, nprocesses-1 do + for i = 0, c.processes-1 do links[i] = link.new(tostring(i)) - for j = 1, burstpackets do + for j = 1, c.burst do link.transmit(links[i], packet.allocate()) end end -- Create per-process counters local counters = ffi.cast("uint64_t *", - memory.dma_alloc(nprocesses*ffi.sizeof("uint64_t"))) + memory.dma_alloc(c.processes*ffi.sizeof("uint64_t"))) -- Start child processes + if c.pmuevents then error("PMU support NYI") end local start = C.get_time_ns() - for i = 0, nprocesses-1 do + for i = 0, c.processes-1 do if S.fork() == 0 then -- Child has affinity to CPU core S.sched_setaffinity(0, i) -- terminate when parent does S.prctl("set_pdeathsig", "hup") local input = links[i] - local output = links[(i+1) % nprocesses] - while counters[i] < totalpackets do - if not link.empty(input) and not link.full(output) then - link.transmit(output, link.receive(input)) - counters[i] = counters[i] + 1 + local output = links[(i+1) % c.processes] + if c.mode == "basic" then + -- Simple reference implementation in idiomatic Lua. + local acc = ffi.new("uint8_t[1]") + while counters[i] < c.packets do + if not link.empty(input) and not link.full(output) then + local p = link.receive(input) + -- Read some packet data + for j = 0, c.readbytes do + acc[0] = acc[0] + p.data[j] + end + -- Write some packet data + for j = 0, c.writebytes do + p.data[j] = i + end + link.transmit(output, p) + counters[i] = counters[i] + 1 + end + -- Sync registers with memory + core.lib.compiler_barrier() end - -- Sync registers with memory - core.lib.compiler_barrier() + else + print("mode not recognized: " .. c.mode) + os.exit(1) end os.exit(0) end end -- Spin until enough packets have been processed - while counters[0] < totalpackets do + while counters[0] < c.packets do core.lib.compiler_barrier() end local finish = C.get_time_ns() diff --git a/src/program/snabbmark/snabbmark.lua b/src/program/snabbmark/snabbmark.lua index e1e0391cfc..17d9c8126e 100644 --- a/src/program/snabbmark/snabbmark.lua +++ b/src/program/snabbmark/snabbmark.lua @@ -23,7 +23,7 @@ function run (args) elseif command == 'intel1g' and #args >= 2 and #args <= 3 then intel1g(unpack(args)) elseif command == 'mp-ring' then - require("program.snabbmark.mp").mp_ring(unpack(args)) + require("program.snabbmark.mp").mp_ring(args) else print(usage) main.exit(1)