diff --git a/src/lib/newchecksum.dasl b/src/arch/checksum.dasl similarity index 84% rename from src/lib/newchecksum.dasl rename to src/arch/checksum.dasl index 4651bc56c1..d247edf2ee 100644 --- a/src/lib/newchecksum.dasl +++ b/src/arch/checksum.dasl @@ -35,10 +35,11 @@ local function gen_checksum () | push rbp | mov rbp, rsp -- Accumulative sum. - | xor rax, rax -- Clear out rax. Stores accumulated sum. + | mov rax, rdx -- Dx (3rd argument: initial). + | xchg al, ah -- Swap to convert to host-bytes order. + | mov rcx, rsi -- Rsi (2nd argument; size). | xor r9, r9 -- Clear out r9. Stores value of array. | xor r8, r8 -- Clear out r8. Stores array index. - | mov rcx, rsi -- Rsi (2nd argument; size). Assign rsi to rcx. | 1: | cmp rcx, 32 -- If index is less than 16. | jl >2 -- Jump to branch '2'. @@ -99,6 +100,8 @@ local function gen_checksum () -- One's complement. | not rax -- One-complement of rax. | and rax, 0xffff -- Clear out higher part of rax. + -- Swap. + | xchg al, ah -- Epilogue. | mov rsp, rbp | pop rbp @@ -107,8 +110,6 @@ local function gen_checksum () end end -local newchecksum = assemble("newchecksum", "uint32_t(*)(uint8_t*, uint32_t)", gen_checksum()) - -- Reference implementation in Lua. local function checksum_lua (data, size) local function r16 (data) @@ -132,12 +133,10 @@ local function checksum_lua (data, size) return bit.band(bit.bnot(csum), 0xffff) end +checksum = assemble("checksum", "uint32_t(*)(uint8_t*, uint32_t, uint16_t)", gen_checksum()) + function selftest () require("lib.checksum_h") - local cpuinfo = lib.readfile("/proc/cpuinfo", "*a") - assert(cpuinfo, "failed to read /proc/cpuinfo for hardware check") - local have_avx2 = cpuinfo:match("avx2") - local have_sse2 = cpuinfo:match("sse2") local function create_packet (size) local pkt = { data = ffi.new("uint8_t[?]", size), @@ -165,14 +164,8 @@ function selftest () local pkt = create_packet(size) print(mpps.."M; "..size.." bytes") -- Benchmark for different architectures. - print("Gen: ", benchmark(function() return C.cksum_generic(pkt.data, pkt.length, 0), pkt end, times)) - if have_sse2 then - print("SSE2: ", benchmark(function() return C.cksum_sse2(pkt.data, pkt.length, 0), pkt end, times)) - end - if have_avx2 then - print("AVX2: ", benchmark(function() return C.cksum_avx2(pkt.data, pkt.length, 0), pkt end, times)) - end - print("New: ", benchmark(function() return newchecksum(pkt.data, pkt.length), pkt end, times)) + print("C: ", benchmark(function() return C.cksum_generic(pkt.data, pkt.length, 0), pkt end, times)) + print("ASM: ", benchmark(function() return checksum(pkt.data, pkt.length, 0), pkt end, times)) end local function verify_correctness () local function hex (num) @@ -181,12 +174,12 @@ function selftest () local ntohs = lib.ntohs for size=44,1500 do local pkt = create_packet(size) - assert(hex(ntohs(newchecksum(pkt.data, pkt.length))) == hex(ntohs(checksum_lua(pkt.data, pkt.length)))) - assert(hex(ntohs(newchecksum(pkt.data, pkt.length))) == hex(C.cksum_generic(pkt.data, pkt.length, 0))) + assert(hex(checksum(pkt.data, pkt.length, 0)) == hex(ntohs(checksum_lua(pkt.data, pkt.length)))) + assert(hex(checksum(pkt.data, pkt.length, 0)) == hex(C.cksum_generic(pkt.data, pkt.length, 0))) end end - print("selftest: newchecksum") + print("selftest: checksum") verify_correctness() benchmark_report(44, 14.4) benchmark_report(550, 2) diff --git a/src/lib/checksum.lua b/src/lib/checksum.lua index 05ebc07849..4eb3d801a7 100644 --- a/src/lib/checksum.lua +++ b/src/lib/checksum.lua @@ -10,17 +10,7 @@ local ffi = require("ffi") local C = ffi.C local band, lshift = bit.band, bit.lshift --- Select ipsum(pointer, len, initial) function based on hardware --- capability. -local cpuinfo = lib.readfile("/proc/cpuinfo", "*a") -assert(cpuinfo, "failed to read /proc/cpuinfo for hardware check") -local have_avx2 = cpuinfo:match("avx2") -local have_sse2 = cpuinfo:match("sse2") - -if have_avx2 then ipsum = C.cksum_avx2 -elseif have_sse2 then ipsum = C.cksum_sse2 -else ipsum = C.cksum_generic end - +ipsum = require("arch.checksum").checksum function finish_packet (buf, len, offset) ffi.cast('uint16_t *', buf+offset)[0] = lib.htons(ipsum(buf, len, 0)) @@ -105,24 +95,13 @@ function selftest () local tests = 1000 local n = 1000000 local array = ffi.new("char[?]", n) - for i = 0, n-1 do array[i] = i end - local avx2ok, sse2ok = 0, 0 + for i = 0, n-1 do array[i] = i end for i = 1, tests do local initial = math.random(0, 0xFFFF) - local ref = C.cksum_generic(array+i*2, i*10+i, initial) - if have_avx2 and C.cksum_avx2(array+i*2, i*10+i, initial) == ref then - avx2ok = avx2ok + 1 - end - if have_sse2 and C.cksum_sse2(array+i*2, i*10+i, initial) == ref then - sse2ok = sse2ok + 1 - end + local ref = C.cksum_generic(array+i*2, i*10+i, initial) assert(ipsum(array+i*2, i*10+i, initial) == ref, "API function check") end - if have_avx2 then print("avx2: "..avx2ok.."/"..tests) else print("no avx2") end - if have_sse2 then print("sse2: "..sse2ok.."/"..tests) else print("no sse2") end selftest_ipv4_tcp() - assert(not have_avx2 or avx2ok == tests, "AVX2 test failed") - assert(not have_sse2 or sse2ok == tests, "SSE2 test failed") print("selftest: ok") end