Skip to content

Commit

Permalink
An attempt at implementing wasi sdk's llvm "wasm assemble" codegen fo…
Browse files Browse the repository at this point in the history
…r retest.

This is super awkward, especially around setting up the import for linear memory. I think that's the proper thing to do here, that it's part of the externally-facing ABI for the wasm program. And really I'd cut corners with the wat-based codegen by not using an import there. So perhaps we ought to output the same definition for both, I'm not sure.
  • Loading branch information
katef committed Oct 21, 2024
1 parent 905736a commit 520e173
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 34 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ jobs:
cc: [ clang, gcc ]
make: [ bmake ]
debug: [ DEBUG, RELEASE ] # RELEASE=1 is a no-op
lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm, wasm2c ]
lang: [ "vm -x v1", "vm -x v2", asm, c, rust, vmc, vmops, go, goasm, llvm, wasm2c, wat2c ]
exclude:
- os: macos
cc: gcc # it's clang anyway
Expand Down Expand Up @@ -513,7 +513,7 @@ jobs:
go version
- name: Dependencies (Ubuntu/Wabt)
if: matrix.os == 'ubuntu' && matrix.lang == 'wasm2c'
if: matrix.os == 'ubuntu' && (matrix.lang == 'wasm2c' || matrix.lang == 'wat2c')
run: |
uname -a
sudo apt-get install wabt
Expand All @@ -534,6 +534,7 @@ jobs:

- name: Run PCRE suite (${{ matrix.lang }})
run: CC=${{ matrix.cc }} ./${{ env.build }}/bin/retest -O1 -l ${{ matrix.lang }} ${{ env.cvtpcre }}/*.tst
# TODO: pass WASM2C=/opt/wasi-sdk/bin/clang and install wasi-sdk (there is a .deb package i think)

docs:
name: Documentation
Expand Down
4 changes: 3 additions & 1 deletion src/libfsm/print/wasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,10 +499,11 @@ fsm_print_wasm(FILE *f,
fprintf(f, ".global %smatch\n", prefix);
fprintf(f, ".hidden %smatch\n", prefix);
fprintf(f, ".type %smatch,@function\n", prefix);
// fprintf(f, "(memory 1 1)\n"); // TODO(dgryski): I guess we don't need this line?
// XXX: i think we do because without it clang introduces w2c_env_0x5F_linear_memory and i *think* that's what we want, but named .M0 in the generated C
fprintf(f, "%smatch:\n", prefix);
fprintf(f, ".functype %smatch (i32) -> (i32)\n", prefix);
fprintf(f, ".local i32, i32\n");
// fprintf(f, "// (memory 1 1)\n"); // TODO(dgryski): I guess we don't need this line?
break;

case DIALECT_WAT:
Expand Down Expand Up @@ -621,6 +622,7 @@ fsm_print_wasm(FILE *f,
* we're able to use multi-memory.
*/
// TODO: i could at least consolidate runs and use i32.ge_u/i32.le_u
// would prefer to do that as range consolidation in the vm ir
for (i = 0; i < ir->n; i++) {
if (!ir->states[i].isend) {
continue;
Expand Down
2 changes: 2 additions & 0 deletions src/retest/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,8 @@ main(int argc, char *argv[])
impl = IMPL_VMOPS;
} else if (strcmp(optarg, "wasm2c") == 0) {
impl = IMPL_WASM2C;
} else if (strcmp(optarg, "wat2c") == 0) {
impl = IMPL_WAT2C;
} else {
fprintf(stderr, "unknown argument to -l: %s\n", optarg);
usage();
Expand Down
2 changes: 2 additions & 0 deletions src/retest/reperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,8 @@ main(int argc, char *argv[])
impl = IMPL_VMOPS;
} else if (strcmp(optarg, "wasm2c") == 0) {
impl = IMPL_WASM2C;
} else if (strcmp(optarg, "wat2c") == 0) {
impl = IMPL_WAT2C;
} else {
fprintf(stderr, "unknown argument to -l: %s\n", optarg);
usage();
Expand Down
149 changes: 119 additions & 30 deletions src/retest/runner.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ print(const struct fsm *fsm,
case IMPL_GOASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_GO); break;
case IMPL_VMASM: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_AMD64_ATT); break;
case IMPL_GO: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_GO); break;
case IMPL_WASM2C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_WAT); break;
case IMPL_WASM2C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_WASM_S); break;
case IMPL_WAT2C: e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_WAT); break;

case IMPL_VMOPS:
e = fsm_print(f, fsm, opt, hooks, FSM_PRINT_VMOPS_H)
Expand Down Expand Up @@ -280,30 +281,63 @@ compile(enum implementation impl,
break;
}

case IMPL_WASM2C: {
case IMPL_WASM2C:
case IMPL_WAT2C: {
char tmp_c[] = "/tmp/fsmcompile-XXXXXX.c";
int fd_c;

{
const char *wat2wasm;
const char *wasm2c, *wasm2cflags, *wasmrt_impl;
const char *wasi_sdk, *wasi_sdkflags;

wat2wasm = getenv("WAT2WASM");

wasm2c = getenv("WASM2C");
wasm2cflags = getenv("WASM2CFLAGS");
wasi_sdk = getenv("WASISDK");
wasi_sdkflags = getenv("WASISDKFLAGS");
wasmrt_impl = getenv("WASMRTIMPL");

fd_c = xmkstemps(tmp_c);

if (0 != systemf("%s --enable-multi-memory %s -o /dev/stdout | %s %s - >> %s",
wat2wasm ? wat2wasm : "wat2wasm",
tmp_src,
wasm2c ? wasm2c : "wasm2c",
wasm2cflags ? wasm2cflags : "",
tmp_c, tmp_src))
{
return 0;
//XXX: for wasi-sdk, IMPL_WASISDK2C we'd use LANG_WASM_S and /opt/wasi-sdk/bin/clang-18 x.s -o x.wasm to produce a wasm binary. then wasm2c per usual.
//unsure if we need an equivalent to --enable-multi-memory for wasi-sdk
// we could do this in two steps and produce an intermediate file. but who likes intermediate files?

// TODO: which of -g, -Werror etc are relevant for wasi-sdk's clang?
// TODO: grep out
// (import "env" "__linear_memory" (memory (;0;) 0))
// ...somehow
switch (impl) {
case IMPL_WASM2C:
if (0 != systemf("%s -c %s %s -o /dev/stdout | %s %s - >> %s",
wasi_sdk ? wasi_sdk : "clang",
wasi_sdkflags ? wasi_sdkflags : "",
tmp_src,
wasm2c ? wasm2c : "wasm2c",
wasm2cflags ? wasm2cflags : "",
tmp_c, tmp_src))
{
return 0;
}
break;

case IMPL_WAT2C:
if (0 != systemf("%s --enable-multi-memory %s -o /dev/stdout | %s %s - >> %s",
wat2wasm ? wat2wasm : "wat2wasm",
tmp_src,
wasm2c ? wasm2c : "wasm2c",
wasm2cflags ? wasm2cflags : "",
tmp_c, tmp_src))
{
return 0;
}
break;

default:
assert(!"unreached");
abort();
}

/* append trampoline */
Expand All @@ -323,13 +357,29 @@ compile(enum implementation impl,
fprintf(f, "#include <stdio.h>\n");
fprintf(f, "\n");

if (impl == IMPL_WASM2C) {
fprintf(f, "struct w2c_env { wasm_rt_memory_t mem; };\n");
fprintf(f, "wasm_rt_memory_t *w2c_env_0x5F_linear_memory(struct w2c_env *env) { return &env->mem; }\n");
fprintf(f, "\n");
}

fprintf(f, "int retest_trampoline(const char *s) {\n");
fprintf(f, "#if HAVE_WASM_INSTANTIATE\n");
fprintf(f, " struct w2c_ ctx;\n");
if (impl == IMPL_WASM2C) {
fprintf(f, " struct w2c_env env;\n");
}
fprintf(f, "#endif\n");
fprintf(f, " size_t n;\n");
fprintf(f, "\n");

/* copies just for convenience of naming */
// XXX: explain we have two layers of generated code (wasi-sdk's wasm binary, and wasm2c). both introduce their own identifiers or name mangling. here we're scraping everything under one interface
// this gets confusing because some differences apply during compilation of the generated C (i.e. output as #ifdefs here), and some apply at retest(1) invocation time (i.e. as switches on impl). and on top of that, we attempt to support backwards compatibility for wasm2c as its generated API changes between versions.
fprintf(f, " uint64_t mem_size;\n");
fprintf(f, " uint8_t *mem_data;\n");
fprintf(f, "\n");

fprintf(f, " assert(s != NULL);\n");
fprintf(f, "\n");

Expand All @@ -338,32 +388,66 @@ compile(enum implementation impl,
fprintf(f, "#endif\n");
fprintf(f, "\n");

fprintf(f, "#if HAVE_WASM_INSTANTIATE\n");
fprintf(f, " wasm2c__instantiate(&ctx);\n");
fprintf(f, "#else\n");
fprintf(f, " fsm_init();\n");
fprintf(f, "#endif\n");
fprintf(f, "\n");
{
const char *M0_size;
const char *M0_data;
switch (impl) {
case IMPL_WASM2C:
/*
* wasi-sdk clang generates (in the binary wasm):
*
* (import "env" "__linear_memory" (memory (;0;) 0))
*
* which wasm2c compiles to the following identifier.
*/
// instance->w2c_env_0x5F_linear_memory depends on init_instance_import() to get initialised.
// and where are we even importing from? i think this is wasi-sdk's generated "env" struct
// i would love to not have the import, and therefore no "env". meanwhile i'm working with it
M0_size = "w2c_env_0x5F_linear_memory->size";
M0_data = "w2c_env_0x5F_linear_memory->data";
break;

case IMPL_WAT2C:
M0_size = "w2c_M0.size";
M0_size = "w2c_M0.data";
break;

default:
assert(!"unreached");
break;
}

if (impl == IMPL_WASM2C) {
fprintf(f, " wasm_rt_allocate_memory(&env.mem, 1, 1, false);\n");
}

fprintf(f, "#if HAVE_WASM_INSTANTIATE\n");
fprintf(f, " wasm2c__instantiate(&ctx");
if (impl == IMPL_WASM2C) {
fprintf(f,", &env");
}
fprintf(f, ");\n");
fprintf(f, " mem_size = ctx.%s;\n", M0_size);
fprintf(f, " mem_data = ctx.%s;\n", M0_data);
fprintf(f, "#else\n");
fprintf(f, " fsm_init();\n");
fprintf(f, " mem_size = %s;\n", M0_size);
fprintf(f, " mem_data = %s;\n", M0_data);
fprintf(f, "#endif\n");
fprintf(f, "\n");
}

/* TODO: we could grow the memory region to size.
* but a page is 64kB, probably enough for our test strings */
fprintf(f, " n = strlen(s);\n");
fprintf(f, "#if HAVE_WASM_INSTANTIATE\n");
fprintf(f, " if (n + 1 > ctx.w2c_M0.size) {\n");
fprintf(f, "#else\n");
fprintf(f, " if (n + 1 > w2c_M0.size) {\n");
fprintf(f, "#endif\n");
fprintf(f, " if (n + 1 > mem_size) {\n");
fprintf(f, " fprintf(stderr, \"overflow\");\n");
fprintf(f, " abort();\n");
fprintf(f, " }\n");
fprintf(f, "\n");

/* XXX: placeholder */
fprintf(f, "#if HAVE_WASM_INSTANTIATE\n");
fprintf(f, " memcpy(ctx.w2c_M0.data, s, n);\n");
fprintf(f, "#else\n");
fprintf(f, " memcpy(w2c_M0.data, s, n);\n");
fprintf(f, "#endif\n");
fprintf(f, " memcpy(mem_data, s, n);\n");

/* TODO: would deal with different IO APIs here */
fprintf(f, " u32 p = 0;\n");
Expand All @@ -380,7 +464,7 @@ compile(enum implementation impl,
fprintf(f, "#if HAVE_WASM_FREE\n");
fprintf(f, " wasm2c__free(&ctx);\n");
fprintf(f, "#else\n");
fprintf(f, " free(w2c_M0.data);\n");
fprintf(f, " free(mem_data);\n");
fprintf(f, "#endif\n");
fprintf(f, "\n");

Expand Down Expand Up @@ -495,9 +579,10 @@ runner_init_compiled(struct fsm *fsm,
case IMPL_RUST: tmp_src = tmp_src_rs; break;
case IMPL_LLVM: tmp_src = tmp_src_ll; break;
case IMPL_GOASM:
case IMPL_VMASM: tmp_src = tmp_src_s; break;
case IMPL_GO: tmp_src = tmp_src_go; break;
case IMPL_WASM2C: tmp_src = tmp_src_wat; break;
case IMPL_VMASM: tmp_src = tmp_src_s; break;
case IMPL_GO: tmp_src = tmp_src_go; break;
case IMPL_WASM2C: tmp_src = tmp_src_s; break;
case IMPL_WAT2C: tmp_src = tmp_src_wat; break;

case IMPL_INTERPRET:
assert(!"unreached");
Expand Down Expand Up @@ -581,6 +666,7 @@ runner_init_compiled(struct fsm *fsm,
break;

case IMPL_WASM2C:
case IMPL_WAT2C:
r->u.impl_wasm2c.h = h;
r->u.impl_wasm2c.func = (int (*)(const unsigned char *)) (uintptr_t) dlsym(h, "retest_trampoline");
break;
Expand Down Expand Up @@ -616,6 +702,7 @@ fsm_runner_initialize(struct fsm *fsm, const struct fsm_options *opt,
case IMPL_GO:
case IMPL_GOASM:
case IMPL_WASM2C:
case IMPL_WAT2C:
return runner_init_compiled(fsm, opt, r, impl);

case IMPL_INTERPRET:
Expand Down Expand Up @@ -678,6 +765,7 @@ fsm_runner_finalize(struct fsm_runner *r)
break;

case IMPL_WASM2C:
case IMPL_WAT2C:
if (r->u.impl_wasm2c.h != NULL) {
dlclose(r->u.impl_wasm2c.h);
}
Expand Down Expand Up @@ -741,6 +829,7 @@ fsm_runner_run(const struct fsm_runner *r, const char *s, size_t n)
*/

case IMPL_WASM2C:
case IMPL_WAT2C:
assert(r->u.impl_wasm2c.func != NULL);
return r->u.impl_wasm2c.func((const unsigned char *) s);
}
Expand Down
3 changes: 2 additions & 1 deletion src/retest/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ enum implementation {
IMPL_VMASM,
IMPL_VMC,
IMPL_VMOPS,
IMPL_WASM2C,
IMPL_WASM2C, /* FSM_PRINT_WASM_S and wasm2c */
IMPL_WAT2C, /* FSM_PRINT_WAT and wasm2c */
};

struct fsm_runner {
Expand Down

0 comments on commit 520e173

Please sign in to comment.