From 815c9b5942d44b43b2c59790212afd4a83efff4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Tokodi?= Date: Fri, 13 Sep 2024 17:19:00 +0200 Subject: [PATCH] Jit: Threads: Add Fence, Wait, and Notify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a few test cases with non-zero offsets Signed-off-by: Máté Tokodi mate.tokodi@szteszoftver.hu --- .github/workflows/actions.yml | 1 + src/interpreter/ByteCode.h | 111 +++++++-------- src/interpreter/Interpreter.cpp | 7 + src/jit/Backend.cpp | 12 ++ src/jit/ByteCodeParser.cpp | 42 +++++- src/jit/Compiler.h | 4 + src/jit/MemoryInl.h | 116 ++++++++++++++++ src/jit/MemoryUtilInl.h | 9 ++ src/parser/WASMParser.cpp | 2 +- src/runtime/JITExec.cpp | 3 + src/runtime/JITExec.h | 1 + src/runtime/Memory.h | 17 ++- .../atomic_wait_notify_with_offsets.wast | 24 ++++ .../extended/threads/atomic_with_offsets.wast | 130 ++++++++++++++++++ third_party/sljit | 2 +- 15 files changed, 416 insertions(+), 65 deletions(-) create mode 100644 test/extended/threads/atomic_wait_notify_with_offsets.wast create mode 100644 test/extended/threads/atomic_with_offsets.wast diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index a883ed6f5..c1fef49ff 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -271,6 +271,7 @@ jobs: - name: Run Tests run: | $RUNNER --engine="$GITHUB_WORKSPACE/out/extended/walrus" wasm-test-extended + $RUNNER --jit --engine="$GITHUB_WORKSPACE/out/extended/walrus" wasm-test-extended build-test-performance: runs-on: ubuntu-latest diff --git a/src/interpreter/ByteCode.h b/src/interpreter/ByteCode.h index 5e040e390..e70059970 100644 --- a/src/interpreter/ByteCode.h +++ b/src/interpreter/ByteCode.h @@ -586,7 +586,8 @@ class FunctionType; #define FOR_EACH_BYTECODE_ATOMIC_OTHER(F) \ F(MemoryAtomicNotify) \ F(MemoryAtomicWait32) \ - F(MemoryAtomicWait64) + F(MemoryAtomicWait64) \ + F(AtomicFence) #else // Extended Features #define FOR_EACH_BYTECODE_ATOMIC_LOAD_OP(F) #define FOR_EACH_BYTECODE_ATOMIC_STORE_OP(F) @@ -827,6 +828,33 @@ class Const128 : public ByteCode { uint32_t m_value[4]; }; +// dummy ByteCode for 3-input 1-output operation with offset +class TernaryOperationOffset : public ByteCode { +public: + TernaryOperationOffset(Opcode opcode, uint32_t offset, ByteCodeStackOffset src0, ByteCodeStackOffset src1, ByteCodeStackOffset src2, ByteCodeStackOffset dst) + : ByteCode(opcode) + , m_offset(offset) + , m_src0Offset(src0) + , m_src1Offset(src1) + , m_src2Offset(src2) + , m_dstOffset(dst) + { + } + + uint32_t offset() const { return m_offset; } + ByteCodeStackOffset src0Offset() const { return m_src0Offset; } + ByteCodeStackOffset src1Offset() const { return m_src1Offset; } + ByteCodeStackOffset src2Offset() const { return m_src2Offset; } + ByteCodeStackOffset dstOffset() const { return m_dstOffset; } + +protected: + uint32_t m_offset; + ByteCodeStackOffset m_src0Offset; + ByteCodeStackOffset m_src1Offset; + ByteCodeStackOffset m_src2Offset; + ByteCodeStackOffset m_dstOffset; +}; + // dummy ByteCode for binary operation class BinaryOperation : public ByteCodeOffset3 { public: @@ -1741,99 +1769,48 @@ class AtomicRmw : public ByteCode { ByteCodeStackOffset m_dstOffset; }; -class AtomicRmwCmpxchg : public ByteCode { +class AtomicRmwCmpxchg : public TernaryOperationOffset { public: AtomicRmwCmpxchg(Opcode opcode, uint32_t offset, ByteCodeStackOffset src0, ByteCodeStackOffset src1, ByteCodeStackOffset src2, ByteCodeStackOffset dst) - : ByteCode(opcode) - , m_offset(offset) - , m_src0Offset(src0) - , m_src1Offset(src1) - , m_src2Offset(src2) - , m_dstOffset(dst) + : TernaryOperationOffset(opcode, offset, src0, src1, src2, dst) + { } - uint32_t offset() const { return m_offset; } - ByteCodeStackOffset src0Offset() const { return m_src0Offset; } - ByteCodeStackOffset src1Offset() const { return m_src1Offset; } - ByteCodeStackOffset src2Offset() const { return m_src2Offset; } - ByteCodeStackOffset dstOffset() const { return m_dstOffset; } - #if !defined(NDEBUG) void dump(size_t pos) { } #endif -protected: - uint32_t m_offset; - ByteCodeStackOffset m_src0Offset; - ByteCodeStackOffset m_src1Offset; - ByteCodeStackOffset m_src2Offset; - ByteCodeStackOffset m_dstOffset; }; -class MemoryAtomicWait32 : public ByteCode { +class MemoryAtomicWait32 : public TernaryOperationOffset { public: MemoryAtomicWait32(uint32_t offset, ByteCodeStackOffset src0, ByteCodeStackOffset src1, ByteCodeStackOffset src2, ByteCodeStackOffset dst) - : ByteCode(Opcode::MemoryAtomicWait32Opcode) - , m_offset(offset) - , m_src0Offset(src0) - , m_src1Offset(src1) - , m_src2Offset(src2) - , m_dstOffset(dst) + : TernaryOperationOffset(Opcode::MemoryAtomicWait32Opcode, offset, src0, src1, src2, dst) { } - - uint32_t offset() const { return m_offset; } - ByteCodeStackOffset src0Offset() const { return m_src0Offset; } - ByteCodeStackOffset src1Offset() const { return m_src1Offset; } - ByteCodeStackOffset src2Offset() const { return m_src2Offset; } - ByteCodeStackOffset dstOffset() const { return m_dstOffset; } - #if !defined(NDEBUG) void dump(size_t pos) { printf("MemoryAtomicWait32 src0: %" PRIu32 " src1: %" PRIu32 " src2: %" PRIu32 " dst: %" PRIu32 " offset: %" PRIu32, (uint32_t)m_src0Offset, (uint32_t)m_src1Offset, (uint32_t)m_src2Offset, (uint32_t)m_dstOffset, (uint32_t)m_offset); } #endif -protected: - uint32_t m_offset; - ByteCodeStackOffset m_src0Offset; - ByteCodeStackOffset m_src1Offset; - ByteCodeStackOffset m_src2Offset; - ByteCodeStackOffset m_dstOffset; }; -class MemoryAtomicWait64 : public ByteCode { +class MemoryAtomicWait64 : public TernaryOperationOffset { public: MemoryAtomicWait64(uint32_t offset, ByteCodeStackOffset src0, ByteCodeStackOffset src1, ByteCodeStackOffset src2, ByteCodeStackOffset dst) - : ByteCode(Opcode::MemoryAtomicWait64Opcode) - , m_offset(offset) - , m_src0Offset(src0) - , m_src1Offset(src1) - , m_src2Offset(src2) - , m_dstOffset(dst) + : TernaryOperationOffset(Opcode::MemoryAtomicWait64Opcode, offset, src0, src1, src2, dst) { } - uint32_t offset() const { return m_offset; } - ByteCodeStackOffset src0Offset() const { return m_src0Offset; } - ByteCodeStackOffset src1Offset() const { return m_src1Offset; } - ByteCodeStackOffset src2Offset() const { return m_src2Offset; } - ByteCodeStackOffset dstOffset() const { return m_dstOffset; } - #if !defined(NDEBUG) void dump(size_t pos) { printf("MemoryAtomicWait64 src0: %" PRIu32 " src1: %" PRIu32 " src2: %" PRIu32 " dst: %" PRIu32 " offset: %" PRIu32, (uint32_t)m_src0Offset, (uint32_t)m_src1Offset, (uint32_t)m_src2Offset, (uint32_t)m_dstOffset, (uint32_t)m_offset); } #endif -protected: - uint32_t m_offset; - ByteCodeStackOffset m_src0Offset; - ByteCodeStackOffset m_src1Offset; - ByteCodeStackOffset m_src2Offset; - ByteCodeStackOffset m_dstOffset; }; class MemoryAtomicNotify : public ByteCode { @@ -1864,6 +1841,22 @@ class MemoryAtomicNotify : public ByteCode { ByteCodeStackOffset m_src1Offset; ByteCodeStackOffset m_dstOffset; }; + +class AtomicFence : public ByteCode { +public: + AtomicFence() + : ByteCode(Opcode::AtomicFenceOpcode) + { + } + +#if !defined(NDEBUG) + void dump(size_t pos) + { + } +#endif +protected: + uint32_t m_offset; +}; #endif #if !defined(NDEBUG) diff --git a/src/interpreter/Interpreter.cpp b/src/interpreter/Interpreter.cpp index 3e617a46d..f3af061f5 100644 --- a/src/interpreter/Interpreter.cpp +++ b/src/interpreter/Interpreter.cpp @@ -1077,6 +1077,13 @@ ByteCodeStackOffset* Interpreter::interpret(ExecutionState& state, ADD_PROGRAM_COUNTER(MemoryAtomicNotify); NEXT_INSTRUCTION(); } + DEFINE_OPCODE(AtomicFence) + : + { + // FIXME do nothing + ADD_PROGRAM_COUNTER(AtomicFence); + NEXT_INSTRUCTION(); + } #endif // FOR_EACH_BYTECODE_SIMD_ETC_OP diff --git a/src/jit/Backend.cpp b/src/jit/Backend.cpp index 6c3207abc..257635568 100644 --- a/src/jit/Backend.cpp +++ b/src/jit/Backend.cpp @@ -1243,6 +1243,18 @@ void JITCompiler::compileFunction(JITFunction* jitFunc, bool isExternal) emitAtomic(m_compiler, item->asInstruction()); break; } + case Instruction::AtomicFence: { + emitAtomicFence(m_compiler); + break; + } + case Instruction::AtomicWait: { + emitAtomicWait(m_compiler, item->asInstruction()); + break; + } + case Instruction::AtomicNotify: { + emitAtomicNotify(m_compiler, item->asInstruction()); + break; + } #endif /* ENABLE_EXTENDED_FEATURES */ default: { switch (item->asInstruction()->opcode()) { diff --git a/src/jit/ByteCodeParser.cpp b/src/jit/ByteCodeParser.cpp index 947bd25cd..1a9ee02de 100644 --- a/src/jit/ByteCodeParser.cpp +++ b/src/jit/ByteCodeParser.cpp @@ -235,7 +235,10 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL5(OTAtomicRmwI32, /* SSDTT */ I32, I32, I32 | TMP, PTR, I32 | S1) \ OL5(OTAtomicRmwI64, /* SSDTT */ I32, I64, I64 | TMP, PTR, I64 | S1) \ OL6(OTAtomicRmwCmpxchgI32, /* SSSDTT */ I32, I32, I32, I32 | TMP, PTR, I32 | S1) \ - OL6(OTAtomicRmwCmpxchgI64, /* SSSDTT */ I32, I64, I64, I64 | TMP, PTR, I64 | S1) + OL6(OTAtomicRmwCmpxchgI64, /* SSSDTT */ I32, I64, I64, I64 | TMP, PTR, I64 | S1) \ + OL6(OTAtomicWaitI32, /* SSSDTT */ I32, I32, I64, I32 | TMP, PTR, I32 | S0) \ + OL6(OTAtomicWaitI64, /* SSSDTT */ I32, I64, I64, I32 | TMP, PTR, I64 | S0) \ + OL5(OTAtomicNotify, /* SSDTT */ I32, I32, I32 | TMP, PTR, I32 | S0) #else /* !ENABLE_EXTENDED_FEATURES */ #define OPERAND_TYPE_LIST_EXTENDED #endif /* ENABLE_EXTENDED_FEATURES */ @@ -1343,6 +1346,12 @@ static void compileFunction(JITCompiler* compiler) instr->addInfo(Instruction::kIsCallback); break; } +#if defined(ENABLE_EXTENDED_FEATURES) + case ByteCode::AtomicFenceOpcode: { + group = Instruction::AtomicFence; + FALLTHROUGH; + } +#endif /* ENABLE_EXTENDED_FEATURES */ case ByteCode::UnreachableOpcode: { compiler->append(byteCode, group, opcode, 0, 0); break; @@ -1958,6 +1967,37 @@ static void compileFunction(JITCompiler* compiler) operands[3] = STACK_OFFSET(atomicRmwCmpxchg->dstOffset()); break; } + case ByteCode::MemoryAtomicWait64Opcode: { + requiredInit = OTAtomicWaitI64; + FALLTHROUGH; + } + case ByteCode::MemoryAtomicWait32Opcode: { + Instruction* instr = compiler->append(byteCode, Instruction::AtomicWait, opcode, 3, 1); + instr->addInfo(Instruction::kIsCallback); + + TernaryOperationOffset* memoryAtomicWait = reinterpret_cast(byteCode); + Operand* operands = instr->operands(); + instr->setRequiredRegsDescriptor(requiredInit != OTNone ? requiredInit : OTAtomicWaitI32); + + operands[0] = STACK_OFFSET(memoryAtomicWait->src0Offset()); + operands[1] = STACK_OFFSET(memoryAtomicWait->src1Offset()); + operands[2] = STACK_OFFSET(memoryAtomicWait->src2Offset()); + operands[3] = STACK_OFFSET(memoryAtomicWait->dstOffset()); + break; + } + case ByteCode::MemoryAtomicNotifyOpcode: { + Instruction* instr = compiler->append(byteCode, Instruction::AtomicNotify, opcode, 2, 1); + instr->addInfo(Instruction::kIsCallback); + + MemoryAtomicNotify* memoryAtomicNotify = reinterpret_cast(byteCode); + Operand* operands = instr->operands(); + instr->setRequiredRegsDescriptor(OTAtomicNotify); + + operands[0] = STACK_OFFSET(memoryAtomicNotify->src0Offset()); + operands[1] = STACK_OFFSET(memoryAtomicNotify->src1Offset()); + operands[2] = STACK_OFFSET(memoryAtomicNotify->dstOffset()); + break; + } #endif /* ENABLE_EXTENDED_FEATURES */ default: { ASSERT_NOT_REACHED(); diff --git a/src/jit/Compiler.h b/src/jit/Compiler.h index 3bf40d012..c57fc1b40 100644 --- a/src/jit/Compiler.h +++ b/src/jit/Compiler.h @@ -109,6 +109,10 @@ class InstructionListItem { #if defined(ENABLE_EXTENDED_FEATURES) // Atomic memory operations (e.g. I32AtomicRmwAdd, I64AtomicRmw16OrU) Atomic, + // Special types for thread synchronization operations + AtomicFence, + AtomicWait, + AtomicNotify, #endif /* ENABLE_EXTENDED_FEATURES */ }; diff --git a/src/jit/MemoryInl.h b/src/jit/MemoryInl.h index 03deeda28..524330f0c 100644 --- a/src/jit/MemoryInl.h +++ b/src/jit/MemoryInl.h @@ -1543,4 +1543,120 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr) #undef OP_XCHG #undef OP_CMPXCHG +static sljit_s32 atomicWaitCallback(ExecutionContext* context, uint8_t* address, sljit_s32 size) +{ + Instance* instance = context->instance; + + if (!instance->memory(0)->isShared()) { + return ExecutionContext::ExpectedSharedMemError; + } + + uint32_t result = 0; + int64_t timeout = context->tmp2[0]; + int64_t expect = context->tmp1[0]; + + if (size == 8) { + instance->memory(0)->atomicWait(context->state, instance->module()->store(), address, expect, timeout, &result); + } else { + instance->memory(0)->atomicWait(context->state, instance->module()->store(), address, (int32_t)expect, timeout, &result); + } + + context->tmp2[0] = result; + return ExecutionContext::NoError; +} + +static void emitAtomicWait(sljit_compiler* compiler, Instruction* instr) +{ + CompileContext* context = CompileContext::get(compiler); + sljit_s32 size = (instr->opcode() == ByteCode::MemoryAtomicWait64Opcode ? 8 : 4); + + TernaryOperationOffset* atomicWaitOperation = reinterpret_cast(instr->byteCode()); + sljit_s32 offset = atomicWaitOperation->offset(); + + Operand* operands = instr->operands(); + MemAddress addr(MemAddress::CheckNaturalAlignment | MemAddress::AbsoluteAddress, instr->requiredReg(0), instr->requiredReg(1), instr->requiredReg(2)); + addr.check(compiler, operands, offset, size); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + JITArgPair expectedPair; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + JITArg expected; + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (instr->opcode() == ByteCode::MemoryAtomicWait64Opcode) { + expectedPair = JITArgPair(operands + 1); + } else { + expected = JITArg(operands + 1); + } + JITArgPair timeout(operands + 2); +#else /* !SLJIT_32BIT_ARCHITECTURE */ + expected = JITArg(operands + 1); + JITArg timeout(operands + 2); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + JITArg dst(operands + 3); + + struct sljit_jump* memoryShared; + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (instr->opcode() == ByteCode::MemoryAtomicWait64Opcode) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp1) + WORD_LOW_OFFSET, expectedPair.arg1, expectedPair.arg1w); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp1) + WORD_HIGH_OFFSET, expectedPair.arg2, expectedPair.arg2w); + } else { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp1), expected.arg, expected.argw); + } +#else /* !SLJIT_32BIT_ARCHITECTURE */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp1), expected.arg, expected.argw); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp2) + WORD_LOW_OFFSET, timeout.arg1, timeout.arg1w); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp2) + WORD_HIGH_OFFSET, timeout.arg2, timeout.arg2w); +#else /* !SLJIT_32BIT_ARCHITECTURE */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp2), timeout.arg, timeout.argw); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_EXTRACT_REG(addr.memArg.arg), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, kContextReg, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, size); + + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(W, P, W, W), SLJIT_IMM, GET_FUNC_ADDR(sljit_sw, atomicWaitCallback)); + + memoryShared = sljit_emit_cmp(compiler, SLJIT_EQUAL, SLJIT_IMM, ExecutionContext::NoError, SLJIT_R0, 0); + context->appendTrapJump(ExecutionContext::ExpectedSharedMemError, sljit_emit_jump(compiler, SLJIT_JUMP)); + sljit_set_label(memoryShared, sljit_emit_label(compiler)); + + sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp2)); +} + +static sljit_s32 atomicNotifyCallback(ExecutionContext* context, uint8_t* address) +{ + Instance* instance = context->instance; + uint32_t result = 0; + int32_t count = context->tmp1[0]; + instance->memory(0)->atomicNotify(instance->module()->store(), address, count, &result); + return result; +} + +static void emitAtomicNotify(sljit_compiler* compiler, Instruction* instr) +{ + MemoryAtomicNotify* atomicNotifyOperation = reinterpret_cast(instr->byteCode()); + sljit_s32 offset = atomicNotifyOperation->offset(); + + Operand* operands = instr->operands(); + MemAddress addr(MemAddress::CheckNaturalAlignment | MemAddress::AbsoluteAddress, instr->requiredReg(0), instr->requiredReg(1), instr->requiredReg(2)); + addr.check(compiler, operands, offset, 4); + + JITArg count(operands + 1); + JITArg dst(operands + 2); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kContextReg), OffsetOfContextField(tmp1), count.arg, count.argw); + + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_EXTRACT_REG(addr.memArg.arg), 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, kContextReg, 0); + + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, P, W), SLJIT_IMM, GET_FUNC_ADDR(sljit_sw, atomicNotifyCallback)); + + sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_R0, 0); +} + #endif /* ENABLE_EXTENDED_FEATURES */ diff --git a/src/jit/MemoryUtilInl.h b/src/jit/MemoryUtilInl.h index 9f912cb7f..2dbc587fc 100644 --- a/src/jit/MemoryUtilInl.h +++ b/src/jit/MemoryUtilInl.h @@ -147,3 +147,12 @@ static void emitDataDrop(sljit_compiler* compiler, Instruction* instr) sljit_sw addr = GET_FUNC_ADDR(sljit_sw, dropData); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2V(32, W), SLJIT_IMM, addr); } + +#if defined(ENABLE_EXTENDED_FEATURES) + +static void emitAtomicFence(sljit_compiler* compiler) +{ + sljit_emit_op0(compiler, SLJIT_MEMORY_BARRIER); +} + +#endif /* ENABLE_EXTENDED_FEATURES */ diff --git a/src/parser/WASMParser.cpp b/src/parser/WASMParser.cpp index d7114559c..b4e56105e 100644 --- a/src/parser/WASMParser.cpp +++ b/src/parser/WASMParser.cpp @@ -2275,7 +2275,7 @@ class WASMBinaryReader : public wabt::WASMBinaryReaderDelegate { virtual void OnAtomicFenceExpr(uint32_t consistency_model) override { - // FIXME do nothing + pushByteCode(Walrus::AtomicFence(), WASMOpcode::AtomicFenceOpcode); } virtual void OnAtomicNotifyExpr(int opcode, Index memidx, Address alignmentLog2, Address offset) override diff --git a/src/runtime/JITExec.cpp b/src/runtime/JITExec.cpp index ca58add83..6d6ecc4c1 100644 --- a/src/runtime/JITExec.cpp +++ b/src/runtime/JITExec.cpp @@ -83,6 +83,9 @@ ByteCodeStackOffset* JITFunction::call(ExecutionState& state, Instance* instance case ExecutionContext::UnalignedAtomicError: Trap::throwException(state, "unaligned atomic"); return resultOffsets; + case ExecutionContext::ExpectedSharedMemError: + Trap::throwException(state, "expected shared memory"); + return resultOffsets; #endif /* ENABLE_EXTENDED_FEATURES */ default: Trap::throwException(state, "unknown exception"); diff --git a/src/runtime/JITExec.h b/src/runtime/JITExec.h index 9e6a745d2..06b112fbd 100644 --- a/src/runtime/JITExec.h +++ b/src/runtime/JITExec.h @@ -54,6 +54,7 @@ struct ExecutionContext { UnreachableError, #if defined(ENABLE_EXTENDED_FEATURES) UnalignedAtomicError, + ExpectedSharedMemError, #endif /* ENABLE_EXTENDED_FEATURES */ // These three in this order must be the last items of the list. diff --git a/src/runtime/Memory.h b/src/runtime/Memory.h index 4c2558012..5783a2d2c 100644 --- a/src/runtime/Memory.h +++ b/src/runtime/Memory.h @@ -211,15 +211,21 @@ class Memory : public Extern { throwUnsharedMemoryException(state); } + atomicWait(state, store, m_buffer + (offset + addend), expect, timeOut, out); + } + + template + void atomicWait(ExecutionState& state, Store* store, uint8_t* absoluteAddress, const T& expect, int64_t timeOut, uint32_t* out) const + { T read; - atomicLoad(state, offset, addend, &read); + atomicLoad(state, absoluteAddress - m_buffer, 0, &read); if (read != expect) { // "not-equal", the loaded value did not match the expected value *out = 1; } else { // wait process bool notified = false; - Waiter* waiter = store->getWaiter(static_cast(m_buffer + (offset + addend))); + Waiter* waiter = store->getWaiter(static_cast(absoluteAddress)); // lock waiter std::unique_lock lock(waiter->m_mutex); @@ -259,7 +265,12 @@ class Memory : public Extern { return; } - Waiter* waiter = store->getWaiter(static_cast(m_buffer + (offset + addend))); + atomicNotify(store, m_buffer + (offset + addend), count, out); + } + + void atomicNotify(Store* store, uint8_t* absoluteAddress, const uint32_t& count, uint32_t* out) const + { + Waiter* waiter = store->getWaiter(static_cast(absoluteAddress)); waiter->m_mutex.lock(); uint32_t realCount = std::min(waiter->m_waiterItemList.size(), (size_t)count); diff --git a/test/extended/threads/atomic_wait_notify_with_offsets.wast b/test/extended/threads/atomic_wait_notify_with_offsets.wast new file mode 100644 index 000000000..32725f22b --- /dev/null +++ b/test/extended/threads/atomic_wait_notify_with_offsets.wast @@ -0,0 +1,24 @@ +;; wait/notify with non-zero offsets +(module + (memory 1 1 shared) + + (func (export "initOffset") (param $value i64) (param $offset i32) (i64.store offset=0 (local.get $offset) (local.get $value))) + + (func (export "memory.atomic.notify") (param $addr i32) (param $count i32) (result i32) + (memory.atomic.notify offset=245 (local.get 0) (local.get 1))) + (func (export "memory.atomic.wait32") (param $addr i32) (param $expected i32) (param $timeout i64) (result i32) + (memory.atomic.wait32 offset=57822 (local.get 0) (local.get 1) (local.get 2))) + (func (export "memory.atomic.wait64") (param $addr i32) (param $expected i64) (param $timeout i64) (result i32) + (memory.atomic.wait64 offset=32456 (local.get 0) (local.get 1) (local.get 2))) +) + +;; non-zero offsets + +(invoke "initOffset" (i64.const 0xffffffffffff) (i32.const 368)) +(assert_return (invoke "memory.atomic.notify" (i32.const 123) (i32.const 10)) (i32.const 0)) + +(invoke "initOffset" (i64.const 0xffffffffffff) (i32.const 57944)) +(assert_return (invoke "memory.atomic.wait32" (i32.const 122) (i32.const 0) (i64.const 0)) (i32.const 1)) + +(invoke "initOffset" (i64.const 0xffffffffffff) (i32.const 32584)) +(assert_return (invoke "memory.atomic.wait64" (i32.const 128) (i64.const 0xffffffffffff) (i64.const 10)) (i32.const 2)) diff --git a/test/extended/threads/atomic_with_offsets.wast b/test/extended/threads/atomic_with_offsets.wast new file mode 100644 index 000000000..55a92ab15 --- /dev/null +++ b/test/extended/threads/atomic_with_offsets.wast @@ -0,0 +1,130 @@ +;; atomic operations win non-zero offsets + +(module + (memory 1 1 shared) + + (func (export "initOffset") (param $value i64) (param $offset i32) (i64.store (local.get $offset) (local.get $value))) + + (func (export "i32.atomic.load") (param $addr i32) (result i32) (i32.atomic.load offset=20 (local.get $addr))) + (func (export "i64.atomic.load") (param $addr i32) (result i64) (i64.atomic.load offset=20 (local.get $addr))) + (func (export "i32.atomic.load8_u") (param $addr i32) (result i32) (i32.atomic.load8_u offset=20 (local.get $addr))) + (func (export "i32.atomic.load16_u") (param $addr i32) (result i32) (i32.atomic.load16_u offset=20 (local.get $addr))) + (func (export "i64.atomic.load8_u") (param $addr i32) (result i64) (i64.atomic.load8_u offset=20 (local.get $addr))) + (func (export "i64.atomic.load16_u") (param $addr i32) (result i64) (i64.atomic.load16_u offset=20 (local.get $addr))) + (func (export "i64.atomic.load32_u") (param $addr i32) (result i64) (i64.atomic.load32_u offset=20 (local.get $addr))) + + (func (export "i32.atomic.store") (param $addr i32) (param $value i32) (i32.atomic.store offset=71 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.store") (param $addr i32) (param $value i64) (i64.atomic.store offset=71 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.store8") (param $addr i32) (param $value i32) (i32.atomic.store8 offset=71 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.store16") (param $addr i32) (param $value i32) (i32.atomic.store16 offset=71 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.store8") (param $addr i32) (param $value i64) (i64.atomic.store8 offset=71 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.store16") (param $addr i32) (param $value i64) (i64.atomic.store16 offset=71 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.store32") (param $addr i32) (param $value i64) (i64.atomic.store32 offset=71 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.add") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.add offset=32 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.add") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.add offset=32 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.add_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.add_u offset=32 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.add_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.add_u offset=32 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.add_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.add_u offset=32 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.add_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.add_u offset=32 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.add_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.add_u offset=32 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.sub") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.sub offset=579 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.sub") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.sub offset=579 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.sub_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.sub_u offset=579 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.sub_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.sub_u offset=579 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.sub_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.sub_u offset=579 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.sub_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.sub_u offset=579 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.sub_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.sub_u offset=579 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.and") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.and offset=1234 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.and") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.and offset=1234 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.and_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.and_u offset=1234 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.and_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.and_u offset=1234 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.and_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.and_u offset=1234 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.and_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.and_u offset=1234 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.and_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.and_u offset=1234 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.or") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.or offset=43523 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.or") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.or offset=43523 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.or_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.or_u offset=43523 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.or_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.or_u offset=43523 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.or_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.or_u offset=43523 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.or_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.or_u offset=43523 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.or_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.or_u offset=43523 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.xor") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.xor offset=5372 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.xor") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.xor offset=5372 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.xor_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.xor_u offset=5372 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.xor_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.xor_u offset=5372 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.xor_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.xor_u offset=5372 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.xor_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.xor_u offset=5372 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.xor_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.xor_u offset=5372 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.xchg") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw.xchg offset=63821 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw.xchg") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw.xchg offset=63821 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw8.xchg_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw8.xchg_u offset=63821 (local.get $addr) (local.get $value))) + (func (export "i32.atomic.rmw16.xchg_u") (param $addr i32) (param $value i32) (result i32) (i32.atomic.rmw16.xchg_u offset=63821 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw8.xchg_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw8.xchg_u offset=63821 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw16.xchg_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw16.xchg_u offset=63821 (local.get $addr) (local.get $value))) + (func (export "i64.atomic.rmw32.xchg_u") (param $addr i32) (param $value i64) (result i64) (i64.atomic.rmw32.xchg_u offset=63821 (local.get $addr) (local.get $value))) + + (func (export "i32.atomic.rmw.cmpxchg") (param $addr i32) (param $expected i32) (param $value i32) (result i32) (i32.atomic.rmw.cmpxchg offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i64.atomic.rmw.cmpxchg") (param $addr i32) (param $expected i64) (param $value i64) (result i64) (i64.atomic.rmw.cmpxchg offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i32.atomic.rmw8.cmpxchg_u") (param $addr i32) (param $expected i32) (param $value i32) (result i32) (i32.atomic.rmw8.cmpxchg_u offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i32.atomic.rmw16.cmpxchg_u") (param $addr i32) (param $expected i32) (param $value i32) (result i32) (i32.atomic.rmw16.cmpxchg_u offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i64.atomic.rmw8.cmpxchg_u") (param $addr i32) (param $expected i64) (param $value i64) (result i64) (i64.atomic.rmw8.cmpxchg_u offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i64.atomic.rmw16.cmpxchg_u") (param $addr i32) (param $expected i64) (param $value i64) (result i64) (i64.atomic.rmw16.cmpxchg_u offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + (func (export "i64.atomic.rmw32.cmpxchg_u") (param $addr i32) (param $expected i64) (param $value i64) (result i64) (i64.atomic.rmw32.cmpxchg_u offset=2831 (local.get $addr) (local.get $expected) (local.get $value))) + +) + +;; various non-zero offsets + +(invoke "initOffset" (i64.const 0x0706050403020100) (i32.const 20)) +(assert_return (invoke "i32.atomic.load16_u" (i32.const 0)) (i32.const 0x0100)) +(assert_return (invoke "i32.atomic.load16_u" (i32.const 6)) (i32.const 0x0706)) + +(invoke "initOffset" (i64.const 0x0000000000000000) (i32.const 71)) +(assert_return (invoke "i64.atomic.store32" (i32.const 5) (i64.const 0xdeadbeef))) +(assert_return (invoke "i64.atomic.load" (i32.const 52)) (i64.const 0xdeadbeef00000000)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 32)) +(assert_return (invoke "i64.atomic.rmw8.add_u" (i32.const 0) (i64.const 0x4242424242424242)) (i64.const 0x11)) +(assert_return (invoke "i64.atomic.load" (i32.const 12)) (i64.const 0x1111111111111153)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 584)) +(assert_return (invoke "i32.atomic.rmw8.sub_u" (i32.const 10) (i32.const 0xcdcdcdcd)) (i32.const 0x11)) +(assert_return (invoke "i64.atomic.load" (i32.const 564)) (i64.const 0x1111441111111111)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 1296)) +(assert_return (invoke "i64.atomic.rmw16.and_u" (i32.const 66) (i64.const 0xbeefbeefbeefbeef)) (i64.const 0x1111)) +(assert_return (invoke "i64.atomic.load" (i32.const 1276)) (i64.const 0x1111100111111111)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 44280)) +(assert_return (invoke "i32.atomic.rmw.or" (i32.const 757) (i32.const 0x12345678)) (i32.const 0x11111111)) +(assert_return (invoke "i64.atomic.load" (i32.const 44260)) (i64.const 0x1111111113355779)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 5472)) +(assert_return (invoke "i64.atomic.rmw.xor" (i32.const 100) (i64.const 0x0101010102020202)) (i64.const 0x1111111111111111)) +(assert_return (invoke "i64.atomic.load" (i32.const 5452)) (i64.const 0x1010101013131313)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 63848)) +(assert_return (invoke "i64.atomic.rmw16.xchg_u" (i32.const 31) (i64.const 0xbeefbeefbeefbeef)) (i64.const 0x1111)) +(assert_return (invoke "i64.atomic.load" (i32.const 63828)) (i64.const 0x1111beef11111111)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 2872)) +(assert_return (invoke "i32.atomic.rmw16.cmpxchg_u" (i32.const 47) (i32.const 0x11111111) (i32.const 0xcafecafe)) (i32.const 0x1111)) +(assert_return (invoke "i64.atomic.load" (i32.const 2852)) (i64.const 0x1111111111111111)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 8216)) +(assert_return (invoke "i64.atomic.rmw8.cmpxchg_u" (i32.const 5389) (i64.const 0) (i64.const 0x4242424242424242)) (i64.const 0x11)) +(assert_return (invoke "i64.atomic.load" (i32.const 8196)) (i64.const 0x1111111111111111)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 16096)) +(assert_return (invoke "i64.atomic.rmw.cmpxchg" (i32.const 13265) (i64.const 0x1111111111111111) (i64.const 0x0101010102020202)) (i64.const 0x1111111111111111)) +(assert_return (invoke "i64.atomic.load" (i32.const 16076)) (i64.const 0x0101010102020202)) + +(invoke "initOffset" (i64.const 0x1111111111111111) (i32.const 24000)) +(assert_return (invoke "i64.atomic.rmw16.cmpxchg_u" (i32.const 21169) (i64.const 0x1111) (i64.const 0xbeefbeefbeefbeef)) (i64.const 0x1111)) +(assert_return (invoke "i64.atomic.load" (i32.const 23980)) (i64.const 0x111111111111beef)) diff --git a/third_party/sljit b/third_party/sljit index f9f512c08..2c105e246 160000 --- a/third_party/sljit +++ b/third_party/sljit @@ -1 +1 @@ -Subproject commit f9f512c0809dc9b29a580ca6b0204ea96620d237 +Subproject commit 2c105e2461b0d5b6c9c632753522457ca442f9dd