Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] IP checksum in AVX2 assembler (prototype rewrite) #899

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ PFLUASRC = $(shell cd ../lib/pflua/src && \
CSRC = $(shell find . -regex '[^\#]*\.c' -not -regex './arch/.*' -printf '%P ')
CHDR = $(shell find . -regex '[^\#]*\.h' -printf '%P ')
ASM = $(shell find . -regex '[^\#]*\.dasl' -printf '%P ')
ARCHSRC= $(shell find . -regex '^./arch/[^\#]*\.c' -printf '%P ')
RMSRC = $(shell find . -name '*.md' -not -regex './obj.*' -printf '%P ')
# regexp is to include program/foo but not program/foo/bar
PROGRAM = $(shell find program -regex '^[^/]+/[^/]+' -type d -printf '%P ')
Expand All @@ -26,7 +25,7 @@ LUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(LUASRC))
PFLUAOBJ := $(patsubst %.lua,obj/%_lua.o,$(PFLUASRC))
COBJ := $(patsubst %.c,obj/%_c.o, $(CSRC))
HOBJ := $(patsubst %.h,obj/%_h.o, $(CHDR))
ARCHOBJ:= $(patsubst %.c,obj/%_c.o, $(ARCHSRC))
ARCHOBJ:= obj/arch/checksum_c.o obj/arch/checksum_avx2_c.o
ASMOBJ := $(patsubst %.dasl,obj/%_dasl.o, $(ASM))
JITOBJS:= $(patsubst %,obj/jit_%.o,$(JITSRC))
EXTRAOBJS := obj/jit_tprof.o obj/jit_vmprof.o obj/strict.o
Expand Down Expand Up @@ -126,13 +125,13 @@ $(COBJ): obj/%_c.o: %.c $(CHDR) Makefile | $(OBJDIR)
$(E) "C $@"
$(Q) gcc $(DEBUG) -Wl,-E -I ../lib/luajit/src -I . -include $(CURDIR)/../gcc-preinclude.h -c -Wall -Werror -o $@ $<

obj/arch/avx2_c.o: arch/avx2.c Makefile
obj/arch/checksum_avx2_c.o: arch/checksum.c Makefile
$(E) "C(AVX2) $@"
$(Q) gcc -O2 -mavx2 $(DEBUG) -Wl,-E -I ../lib/luajit/src -I . -include $(CURDIR)/../gcc-preinclude.h -c -Wall -Werror -o $@ $<
$(Q) gcc -Dcksum=cksum_avx2 -O3 -mavx2 $(DEBUG) -Wl,-E -I ../lib/luajit/src -I . -include $(CURDIR)/../gcc-preinclude.h -c -Wall -Werror -o $@ $<

obj/arch/sse2_c.o: arch/sse2.c Makefile
$(E) "C(SSE2) $@"
$(Q) gcc -O2 -msse2 $(DEBUG) -Wl,-E -I ../lib/luajit/src -I . -include $(CURDIR)/../gcc-preinclude.h -c -Wall -Werror -o $@ $<
obj/arch/checksum_c.o: arch/checksum.c Makefile
$(E) "C(SSE4) $@"
$(Q) gcc -msse4 -O3 $(DEBUG) -Wl,-E -I ../lib/luajit/src -I . -include $(CURDIR)/../gcc-preinclude.h -c -Wall -Werror -o $@ $<

$(HOBJ): obj/%_h.o: %.h Makefile | $(OBJDIR)
$(E) "H $@"
Expand Down
84 changes: 0 additions & 84 deletions src/arch/avx2.c

This file was deleted.

42 changes: 42 additions & 0 deletions src/arch/checksum.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* Use of this source code is governed by the Apache 2.0 license; see COPYING. */
/* IP checksum routines. */

#include <arpa/inet.h>
#include <stdio.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/time.h>

uint16_t cksum(unsigned char *p, size_t len, uint16_t initial)
{
uint64_t sum = htons(initial);
uint64_t sum1 = 0;
const uint32_t *u32 = (const uint32_t *)p;

while (len >= (sizeof(*u32) * 2)) {
sum += u32[0];
sum1 += u32[1];
u32 += 2;
len -= sizeof(*u32) * 2;
}
sum += sum1;

const uint16_t *u16 = (const uint16_t *)u32;
while (len >= sizeof(*u16)) {
sum += *u16;
len -= sizeof(*u16);
u16 += 1;
}

/* if length is in odd bytes */
if (len == 1)
sum += *((const uint8_t *)u16);

while(sum>>16)
sum = (sum & 0xFFFF) + (sum>>16);
return ntohs((uint16_t)~sum);
}


94 changes: 0 additions & 94 deletions src/arch/sse2.c

This file was deleted.

9 changes: 2 additions & 7 deletions src/lib/checksum.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
/* Use of this source code is governed by the Apache 2.0 license; see COPYING. */

// Calculate IP checksum using SSE2 instructions.
// (This will crash if you call it on a CPU that does not support SSE.)
uint16_t cksum_sse2(unsigned char *p, size_t n, uint16_t initial);
// Calculate IP checksum.
uint16_t cksum(unsigned char *p, size_t n, uint16_t initial);

// Calculate IP checksum using AVX2 instructions.
// (This will crash if you call it on a CPU that does not support AVX2.)
uint16_t cksum_avx2(unsigned char *p, size_t n, uint16_t initial);

// Calculate IP checksum using portable C code.
// This works on all hardware.
uint16_t cksum_generic(unsigned char *p, size_t n, uint16_t initial);

// Incrementally update checksum when modifying a 16-bit value.
void checksum_update_incremental_16(uint16_t* checksum_cell,
uint16_t* value_cell,
Expand Down
17 changes: 4 additions & 13 deletions src/lib/checksum.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@ local band, lshift = bit.band, bit.lshift
local cpuinfo = lib.readfile("/proc/cpuinfo", "*a")
assert(cpuinfo, "failed to read /proc/cpuinfo for hardware check")
local have_avx2 = cpuinfo:match("avx2")
local have_sse2 = cpuinfo:match("sse2")

if have_avx2 then ipsum = C.cksum_avx2
elseif have_sse2 then ipsum = C.cksum_sse2
else ipsum = C.cksum_generic end

if have_avx2 then ipsum = C.cksum_avx2 else ipsum = C.cksum end

function finish_packet (buf, len, offset)
ffi.cast('uint16_t *', buf+offset)[0] = lib.htons(ipsum(buf, len, 0))
Expand Down Expand Up @@ -102,27 +98,22 @@ end

function selftest ()
print("selftest: checksum")
local tests = 1000
local tests = 10000
local n = 1000000
local array = ffi.new("char[?]", n)
for i = 0, n-1 do array[i] = i end
for i = 0, n-1 do array[i] = math.random(256) end
local avx2ok, sse2ok = 0, 0
for i = 1, tests do
local initial = math.random(0, 0xFFFF)
local ref = C.cksum_generic(array+i*2, i*10+i, initial)
local ref = C.cksum(array+i*2, i*10+i, initial)
if have_avx2 and C.cksum_avx2(array+i*2, i*10+i, initial) == ref then
avx2ok = avx2ok + 1
end
if have_sse2 and C.cksum_sse2(array+i*2, i*10+i, initial) == ref then
sse2ok = sse2ok + 1
end
assert(ipsum(array+i*2, i*10+i, initial) == ref, "API function check")
end
if have_avx2 then print("avx2: "..avx2ok.."/"..tests) else print("no avx2") end
if have_sse2 then print("sse2: "..sse2ok.."/"..tests) else print("no sse2") end
selftest_ipv4_tcp()
assert(not have_avx2 or avx2ok == tests, "AVX2 test failed")
assert(not have_sse2 or sse2ok == tests, "SSE2 test failed")
print("selftest: ok")
end

Expand Down
Loading