Skip to content

Commit

Permalink
[cases] add rvv_bench
Browse files Browse the repository at this point in the history
  • Loading branch information
SharzyL committed May 20, 2024
1 parent 00969c5 commit 3e4bfc0
Show file tree
Hide file tree
Showing 32 changed files with 3,443 additions and 1 deletion.
3 changes: 2 additions & 1 deletion tests/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@ let
asm = casesSelf.callPackage ./asm { };
perf = casesSelf.callPackage ./perf { };
codegen = casesSelf.callPackage ./codegen { };
rvv_bench = casesSelf.callPackage ./rvv_bench { };
}));

# remove non-case attributes in scope
scopeStripped = {
inherit (scope) mlir intrinsic asm perf codegen;
inherit (scope) mlir intrinsic asm perf codegen rvv_bench;
};

all =
Expand Down
170 changes: 170 additions & 0 deletions tests/rvv_bench/_include/bench.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#include "config.h"
#include "nolibc.h"

#ifndef BENCH_NEXT
#define BENCH_NEXT NEXT
#endif

#define MX(f, F) f(F##_m1) f(F##_m2) f(F##_m4) f(F##_m8)
#define STR(x) STR_(x)
#define STR_(x) #x

#define ROTL(x, n) (((x) << (n)) | ((x) >> (8 * sizeof(x) - (n))))

#if defined(__clang__) || defined(__GNUC__) || defined(__INTEL_COMPILER)

#define BENCH_CLOBBER() ({ __asm volatile("" ::: "memory"); })
#define BENCH_VOLATILE(x) \
({ __asm volatile("" : "+g"(x) : "g"(x) : "memory"); })
#define BENCH_VOLATILE_REG(x) \
({ __asm volatile("" : "+r"(x) : "r"(x) : "memory"); })
#define BENCH_VOLATILE_MEM(x) \
({ __asm volatile("" : "+m"(x) : "m"(x) : "memory"); })
#define BENCH_FENCE() ({ __asm volatile("fence.i"); })

#define BENCH_MAY_ALIAS __attribute__((__may_alias__))

#else

#define BENCH_CLOBBER()
#define BENCH_CLOBBER_WITH(x) (bench__use_ptr(&(x)), BENCH_CLOBBER())
#define BENCH_CLOBBER_WITH_REG(x) (bench__use_ptr(&(x)), BENCH_CLOBBER())
#define BENCH_CLOBBER_WITH_MEM(x) (bench__use_ptr(&(x)), BENCH_CLOBBER())
static void bench_use_ptr(char const volatile *x) {}

#define BENCH_MAY_ALIAS

#endif

static int compare_ux(void const *a, void const *b) {
ux A = *(ux *)a, B = *(ux *)b;
return A < B ? -1 : A > B ? 1 : 0;
}

typedef struct {
ux x, y, z;
} RandState;
static RandState randState = {123, 456, 789};

/* RomuDuoJr, see https://romu-random.org/ */
static ux urand(void) {
ux xp = randState.x, yp = randState.y, zp = randState.z;
randState.x = 3323815723u * zp;
randState.y = ROTL(yp - xp, 6);
randState.z = ROTL(zp - yp, 22);
return xp;
}

typedef struct {
char const *name;
void *func;
} Impl;
typedef struct {
size_t N;
char const *name;
ux (*func)(void *, size_t);
} Bench;

static unsigned char *mem = 0;

void bench_main(void);
ux checksum(size_t n);
void init(void);

static void memrand(void *ptr, size_t n) {
unsigned char *p = ptr;
#ifdef __GNUC__
typedef ux __attribute__((__may_alias__)) uxa;
for (; n && (uintptr_t)p % sizeof(uxa); --n)
*p++ = urand();
uxa *px = (uxa *)p;
for (; n > sizeof(ux); n -= sizeof(ux))
*px++ = urand();
p = (unsigned char *)px;
#endif
while (n--)
*p++ = urand();
}

#if __STDC_HOSTED__
#include <stdlib.h>
#else
static ux heap[1 + MAX_MEM / sizeof(ux)];
#endif

int test(void) {

#if __STDC_HOSTED__
mem = malloc(MAX_MEM);
#else
mem = (unsigned char *)heap;
#endif

size_t x;
randState.x ^= rv_cycles() * 7;
randState.y += rv_cycles() ^ (uintptr_t)&x + 666 * (uintptr_t)mem;

/* initialize memory */
memrand(mem, MAX_MEM);

init();
bench_main();
#if __STDC_HOSTED__
free(mem);
#endif
return 0;
}

static fx bench_time(size_t n, Impl impl, Bench bench) {
static ux arr[MAX_REPEATS];
size_t total = 0, repeats = 0;
for (; repeats < MAX_REPEATS; ++repeats) {
total += arr[repeats] = bench.func(impl.func, n);
if (repeats > MIN_REPEATS && total > STOP_CYCLES)
break;
}
#if MAX_REPEATS > 4
qsort(arr, repeats, sizeof *arr, compare_ux);
ux sum = 0, count = 0;
for (size_t i = repeats * 0.2f; i < repeats * 0.8f; ++i, ++count)
sum += arr[i];
#else
ux sum = 0, count = repeats;
for (size_t i = 0; i < repeats; ++i)
sum += arr[i];
#endif
return n / ((fx)sum / count);
}

static void bench_run(size_t nImpls, Impl *impls, size_t nBenches,
Bench *benches) {
for (Bench *b = benches; b != benches + nBenches; ++b) {
size_t N = b->N;
for (Impl *i = impls; i != impls + nImpls; ++i) {
printf("[");
for (size_t n = 1; n < N; n = BENCH_NEXT(n)) {
ux si = 0, s0 = 0;
printf("%f, ", bench_time(n, *i, *b));
}
printf("],\n");
}
printf("]\n},\n");
}
}

#define TIME \
for (ux beg = rv_cycles(), _once = 1; _once; \
BENCH_FENCE(), _cycles += rv_cycles() - beg, _once = 0)

#define BENCH(name) \
ux bench_##name(void *_func, size_t n) { \
Func *f = _func; \
ux _cycles = 0;
#define BENCH_END \
return _cycles; \
}

#define BENCH_MAIN(impls, benches) \
void bench_main(void) { \
bench_run(ARR_LEN(impls), impls, ARR_LEN(benches), benches); \
}
25 changes: 25 additions & 0 deletions tests/rvv_bench/_include/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/* processor specific configs */
#define HAS_E64 (__riscv_v_elen >= 64)
#define HAS_F16 0

/* the maximum number of bytes to allocate, minimum of 4096 */
#define MAX_MEM (4096 * 8)
/* the byte count for the next run */
#define NEXT(c) (c + c / 3 + 3)

/* minimum number of repeats, to sample median from */
#define MIN_REPEATS 1
/* maxium number of repeats, executed until more than STOP_TIME has elapsed */
#define MAX_REPEATS 1

/* stop repeats early afer this many cycles have elapsed */
#define STOP_CYCLES (1024 * 1024 * 500)

/* custom scaling factors for benchmarks, these are used to make sure each
* benchmark approximately takes the same amount of time. */

#define SCALE_mandelbrot(N) ((N) / 10)
#define SCALE_mergelines(N) ((N) / 10)

/* benchmark specific configurations */
#define mandelbrot_ITER 100
80 changes: 80 additions & 0 deletions tests/rvv_bench/_include/nolibc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#pragma once

#include <float.h>
#include <limits.h>
#include <stddef.h>
#include <stdint.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if __riscv_xlen == 32
typedef uint32_t ux;
typedef float fx;
#define IF64(...)
#elif __riscv_xlen == 64
typedef uint64_t ux;
typedef double fx;
#define IF64(...) __VA_ARGS__
#else
#error "unsupported XLEN"
#endif
#define ARR_LEN(x) (sizeof x / sizeof *(x))

static void memwrite(void const *ptr, size_t len) {
fwrite(ptr, 1, len, stdout);
}

static size_t memread(void *ptr, size_t len) {
return fread(ptr, 1, len, stdin);
}

static inline ux rv_cycles(void) {
ux cycle;
__asm volatile("csrr %0, mcycle" : "=r"(cycle));
return cycle;
}

static void memswap(void *a, void *b, size_t size) {
unsigned char *A = (unsigned char *)a, *B = (unsigned char *)b;
unsigned char *aEnd = A + size;
while (A < aEnd) {
unsigned char temp = *A;
*A++ = *B;
*B++ = temp;
}
}

static ux usqrt(ux y) {
ux L = 0, R = y + 1;
while (L != R - 1) {
ux M = (L + R) / 2;
if (M * M <= y)
L = M;
else
R = M;
}
return L;
}

static ux uhash(ux x) {
#if __riscv_xlen == 32
/* MurmurHash3 32-bit finalizer */
x ^= x >> 16;
x *= 0x85ebca6b;
x ^= x >> 13;
x *= 0xc2b2ae35;
x ^= x >> 16;
#else
/* splitmix64 finalizer */
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9U;
x ^= x >> 27;
x *= 0x94d049bb133111ebU;
x ^= x >> 31;
#endif
return x;
}

#define IFHOSTED(...) __VA_ARGS__
80 changes: 80 additions & 0 deletions tests/rvv_bench/_include/template.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#define HAS_RVV_1_0 1
#include "config.h"
.text
.balign 8

#define CAT_(a,b) a##b
#define CAT(a,b) CAT_(a,b)

#define STR(x) #x
#define STRe(x) STR(x)

#define MX_N 0
#include STRe(INC)

#undef MX_N

#define MX_N 1
#define MX8(x) x##m8
#define MX4(x) x##m4
#define MX2(x) x##m2
#define MX(x) x##m1
#if HAS_RVV_1_0
#define MXf2(x) x##mf2
#define MXf4(x) x##mf4
# define MXf8(x) x##mf8
#endif
#include STRe(INC)

#undef MX_N
#undef MX8
#undef MX4
#undef MX2
#undef MX
#undef MXf2
#undef MXf4
#undef MXf8

#define MX_N 2
#define MX4(x) x##m8
#define MX2(x) x##m4
#define MX(x) x##m2
#define MXf2(x) x##m1
#if HAS_RVV_1_0
#define MXf4(x) x##mf2
# define MXf8(x) x##mf4
#endif
#include STRe(INC)

#undef MX_N
#undef MX4
#undef MX2
#undef MX
#undef MXf2
#undef MXf4
#undef MXf8

#define MX_N 4
#define MX2(x) x##m8
#define MX(x) x##m4
#define MXf2(x) x##m2
#define MXf4(x) x##m1
#if HAS_RVV_1_0
# define MXf8(x) x##mf2
#endif
#include STRe(INC)

#undef MX_N
#undef MX2
#undef MX
#undef MXf2
#undef MXf4
#undef MXf8

#define MX_N 8
#define MX(x) x##m8
#define MXf2(x) x##m4
#define MXf4(x) x##m2
#define MXf8(x) x##m1
#include STRe(INC)

Loading

0 comments on commit 3e4bfc0

Please sign in to comment.