Skip to content

Commit

Permalink
Use new checksum function by default
Browse files Browse the repository at this point in the history
The change required to support third argument 'initial', return value as
host-byte order value and adapt some selftests.
  • Loading branch information
dpino committed Aug 23, 2018
1 parent 7d5eeab commit 24ee9fb
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 43 deletions.
31 changes: 12 additions & 19 deletions src/lib/newchecksum.dasl → src/arch/checksum.dasl
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ local function gen_checksum ()
| push rbp
| mov rbp, rsp
-- Accumulative sum.
| xor rax, rax -- Clear out rax. Stores accumulated sum.
| mov rax, rdx -- Dx (3rd argument: initial).
| xchg al, ah -- Swap to convert to host-bytes order.
| mov rcx, rsi -- Rsi (2nd argument; size).
| xor r9, r9 -- Clear out r9. Stores value of array.
| xor r8, r8 -- Clear out r8. Stores array index.
| mov rcx, rsi -- Rsi (2nd argument; size). Assign rsi to rcx.
| 1:
| cmp rcx, 32 -- If index is less than 16.
| jl >2 -- Jump to branch '2'.
Expand Down Expand Up @@ -99,6 +100,8 @@ local function gen_checksum ()
-- One's complement.
| not rax -- One-complement of rax.
| and rax, 0xffff -- Clear out higher part of rax.
-- Swap.
| xchg al, ah
-- Epilogue.
| mov rsp, rbp
| pop rbp
Expand All @@ -107,8 +110,6 @@ local function gen_checksum ()
end
end

local newchecksum = assemble("newchecksum", "uint32_t(*)(uint8_t*, uint32_t)", gen_checksum())

-- Reference implementation in Lua.
local function checksum_lua (data, size)
local function r16 (data)
Expand All @@ -132,12 +133,10 @@ local function checksum_lua (data, size)
return bit.band(bit.bnot(csum), 0xffff)
end

checksum = assemble("checksum", "uint32_t(*)(uint8_t*, uint32_t, uint16_t)", gen_checksum())

function selftest ()
require("lib.checksum_h")
local cpuinfo = lib.readfile("/proc/cpuinfo", "*a")
assert(cpuinfo, "failed to read /proc/cpuinfo for hardware check")
local have_avx2 = cpuinfo:match("avx2")
local have_sse2 = cpuinfo:match("sse2")
local function create_packet (size)
local pkt = {
data = ffi.new("uint8_t[?]", size),
Expand Down Expand Up @@ -165,14 +164,8 @@ function selftest ()
local pkt = create_packet(size)
print(mpps.."M; "..size.." bytes")
-- Benchmark for different architectures.
print("Gen: ", benchmark(function() return C.cksum_generic(pkt.data, pkt.length, 0), pkt end, times))
if have_sse2 then
print("SSE2: ", benchmark(function() return C.cksum_sse2(pkt.data, pkt.length, 0), pkt end, times))
end
if have_avx2 then
print("AVX2: ", benchmark(function() return C.cksum_avx2(pkt.data, pkt.length, 0), pkt end, times))
end
print("New: ", benchmark(function() return newchecksum(pkt.data, pkt.length), pkt end, times))
print("C: ", benchmark(function() return C.cksum_generic(pkt.data, pkt.length, 0), pkt end, times))
print("ASM: ", benchmark(function() return checksum(pkt.data, pkt.length, 0), pkt end, times))
end
local function verify_correctness ()
local function hex (num)
Expand All @@ -181,12 +174,12 @@ function selftest ()
local ntohs = lib.ntohs
for size=44,1500 do
local pkt = create_packet(size)
assert(hex(ntohs(newchecksum(pkt.data, pkt.length))) == hex(ntohs(checksum_lua(pkt.data, pkt.length))))
assert(hex(ntohs(newchecksum(pkt.data, pkt.length))) == hex(C.cksum_generic(pkt.data, pkt.length, 0)))
assert(hex(checksum(pkt.data, pkt.length, 0)) == hex(ntohs(checksum_lua(pkt.data, pkt.length))))
assert(hex(checksum(pkt.data, pkt.length, 0)) == hex(C.cksum_generic(pkt.data, pkt.length, 0)))
end
end

print("selftest: newchecksum")
print("selftest: checksum")
verify_correctness()
benchmark_report(44, 14.4)
benchmark_report(550, 2)
Expand Down
27 changes: 3 additions & 24 deletions src/lib/checksum.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,7 @@ local ffi = require("ffi")
local C = ffi.C
local band, lshift = bit.band, bit.lshift

-- Select ipsum(pointer, len, initial) function based on hardware
-- capability.
local cpuinfo = lib.readfile("/proc/cpuinfo", "*a")
assert(cpuinfo, "failed to read /proc/cpuinfo for hardware check")
local have_avx2 = cpuinfo:match("avx2")
local have_sse2 = cpuinfo:match("sse2")

if have_avx2 then ipsum = C.cksum_avx2
elseif have_sse2 then ipsum = C.cksum_sse2
else ipsum = C.cksum_generic end

ipsum = require("arch.checksum").checksum

function finish_packet (buf, len, offset)
ffi.cast('uint16_t *', buf+offset)[0] = lib.htons(ipsum(buf, len, 0))
Expand Down Expand Up @@ -105,24 +95,13 @@ function selftest ()
local tests = 1000
local n = 1000000
local array = ffi.new("char[?]", n)
for i = 0, n-1 do array[i] = i end
local avx2ok, sse2ok = 0, 0
for i = 0, n-1 do array[i] = i end
for i = 1, tests do
local initial = math.random(0, 0xFFFF)
local ref = C.cksum_generic(array+i*2, i*10+i, initial)
if have_avx2 and C.cksum_avx2(array+i*2, i*10+i, initial) == ref then
avx2ok = avx2ok + 1
end
if have_sse2 and C.cksum_sse2(array+i*2, i*10+i, initial) == ref then
sse2ok = sse2ok + 1
end
local ref = C.cksum_generic(array+i*2, i*10+i, initial)
assert(ipsum(array+i*2, i*10+i, initial) == ref, "API function check")
end
if have_avx2 then print("avx2: "..avx2ok.."/"..tests) else print("no avx2") end
if have_sse2 then print("sse2: "..sse2ok.."/"..tests) else print("no sse2") end
selftest_ipv4_tcp()
assert(not have_avx2 or avx2ok == tests, "AVX2 test failed")
assert(not have_sse2 or sse2ok == tests, "SSE2 test failed")
print("selftest: ok")
end

Expand Down

0 comments on commit 24ee9fb

Please sign in to comment.