diff --git a/README.md b/README.md
index ef70e01b9f..44f3297ba3 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ is the master line of development.
[packetblaster](src/program/packetblaster/) generates load by
replaying a [pcap format](http://en.wikipedia.org/wiki/Pcap) trace
-file onto any number of Intel 82599 10-Gigabit network
+file or synthesizing customizable packets onto any number of Intel 82599 10-Gigabit network
interfaces. This is very efficient: only a small % of one core per CPU
is required even for hundreds of Gbps of traffic. Because so little
CPU resources are required you can run packetblaster on a small server
@@ -96,7 +96,7 @@ For example, to install on the local machine and use as a load generator:
```
$ cp src/snabb /usr/local/bin/
-$ sudo snabb packetblaster capture.pcap 0000:01:00.0
+$ sudo snabb packetblaster replay capture.pcap 01:00.0
```
## How do I get involved?
diff --git a/lib/luajit/doc/changes.html b/lib/luajit/doc/changes.html
index 125b58b4ca..826cd2436b 100644
--- a/lib/luajit/doc/changes.html
+++ b/lib/luajit/doc/changes.html
@@ -113,6 +113,7 @@
LuaJIT 2.1.0-beta1 — 2015-08-25
x64: Add separate port of the interpreter to LJ_GC64 mode.
x86/x64: Drop internal x87 math functions. Use libm functions.
x86: Remove x87 support from interpreter. SSE2 is mandatory now.
+x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.
PPC/e500: Drop support for this architecture.
FFI library:
@@ -123,6 +124,7 @@ LuaJIT 2.1.0-beta1 — 2015-08-25
FFI: Compile lightuserdata to void * conversion.
FFI: Compile ffi.gc(cdata, nil), too.
FFI: Add ffi.typeinfo().
+FFI: Add ssize_t declaration.
diff --git a/lib/luajit/doc/ext_ffi_semantics.html b/lib/luajit/doc/ext_ffi_semantics.html
index 889d44d823..f65fe8f36d 100644
--- a/lib/luajit/doc/ext_ffi_semantics.html
+++ b/lib/luajit/doc/ext_ffi_semantics.html
@@ -185,6 +185,8 @@ C Language Support
uint16_t, uint32_t, uint64_t,
intptr_t, uintptr_t.
+From <unistd.h> (POSIX): ssize_t.
+
You're encouraged to use these types in preference to
diff --git a/lib/luajit/doc/install.html b/lib/luajit/doc/install.html
index b5df697b67..a4cc721512 100644
--- a/lib/luajit/doc/install.html
+++ b/lib/luajit/doc/install.html
@@ -114,30 +114,30 @@
Installation
x86 (32 bit) |
-GCC 4.x GCC 3.4 |
-GCC 4.x GCC 3.4 |
-GCC 4.x GCC 3.4 |
+GCC 4.2+ |
+GCC 4.2+ |
+XCode 5.0+ Clang |
MSVC, MSVC/EE WinSDK MinGW, Cygwin |
x64 (64 bit) |
-GCC 4.x |
+GCC 4.2+ |
ORBIS (PS4) |
-GCC 4.x |
+XCode 5.0+ Clang |
MSVC + SDK v7.0 WinSDK v7.0 Durango (Xbox One) |
ARMv5+ ARM9E+ |
GCC 4.2+ |
GCC 4.2+ PSP2 (PS VITA) |
-GCC 4.2+ |
+XCode 5.0+ Clang |
|
ARM64 |
GCC 4.8+ |
|
-Clang 3.5+ |
+XCode 6.0+ Clang 3.5+ |
|
@@ -442,8 +442,7 @@ Cross-compiling LuaJIT
make HOST_CC="gcc -m32" CROSS=$NDKP TARGET_FLAGS="$NDKF"
-You can cross-compile for iOS 3.0+ (iPhone/iPad) using the » iOS SDK.
-The environment variables need to match the iOS SDK version:
+You can cross-compile for iOS 3.0+ (iPhone/iPad) using the » iOS SDK:
Note: the JIT compiler is disabled for iOS, because regular iOS Apps
@@ -453,13 +452,18 @@
Cross-compiling LuaJIT
Or use Android. :-p
-IXCODE=`xcode-select -print-path`
-ISDK=$IXCODE/Platforms/iPhoneOS.platform/Developer
-ISDKVER=iPhoneOS6.0.sdk
-ISDKP=$ISDK/usr/bin/
-ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER"
-make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \
- TARGET_SYS=iOS
+# iOS/ARM (32 bit)
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch armv7 -isysroot $ISDKP"
+make HOST_CC="clang -m32 -arch i386" CROSS="$(dirname $ICC)/" \
+ TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+
+# iOS/ARM64
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch arm64 -isysroot $ISDKP"
+make CROSS="$(dirname $ICC)/" TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
Cross-compiling for consoles
diff --git a/lib/luajit/dynasm/dasm_arm.lua b/lib/luajit/dynasm/dasm_arm.lua
index 90a259c5c3..6a1d1d5195 100644
--- a/lib/luajit/dynasm/dasm_arm.lua
+++ b/lib/luajit/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
local _info = {
arch = "arm",
description = "DynASM ARM module",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
license = "MIT",
}
diff --git a/lib/luajit/dynasm/dasm_arm64.lua b/lib/luajit/dynasm/dasm_arm64.lua
index 9766e475b0..c1e3a81b11 100644
--- a/lib/luajit/dynasm/dasm_arm64.lua
+++ b/lib/luajit/dynasm/dasm_arm64.lua
@@ -9,9 +9,9 @@
local _info = {
arch = "arm",
description = "DynASM ARM64 module",
- version = "1.3.0",
- vernum = 10300,
- release = "2014-12-03",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
license = "MIT",
}
diff --git a/lib/luajit/dynasm/dasm_mips.lua b/lib/luajit/dynasm/dasm_mips.lua
index ae0dbd7a9b..ef383431cd 100644
--- a/lib/luajit/dynasm/dasm_mips.lua
+++ b/lib/luajit/dynasm/dasm_mips.lua
@@ -9,9 +9,9 @@
local _info = {
arch = "mips",
description = "DynASM MIPS module",
- version = "1.3.0",
- vernum = 10300,
- release = "2012-01-23",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
license = "MIT",
}
diff --git a/lib/luajit/dynasm/dasm_ppc.lua b/lib/luajit/dynasm/dasm_ppc.lua
index 278f09526d..1e9bccaeb8 100644
--- a/lib/luajit/dynasm/dasm_ppc.lua
+++ b/lib/luajit/dynasm/dasm_ppc.lua
@@ -11,9 +11,9 @@
local _info = {
arch = "ppc",
description = "DynASM PPC module",
- version = "1.3.0",
- vernum = 10300,
- release = "2015-01-14",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
license = "MIT",
}
diff --git a/lib/luajit/dynasm/dasm_proto.h b/lib/luajit/dynasm/dasm_proto.h
index a8bc6fd285..93ca06533c 100644
--- a/lib/luajit/dynasm/dasm_proto.h
+++ b/lib/luajit/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
#include
#include
-#define DASM_IDENT "DynASM 1.3.0"
-#define DASM_VERSION 10300 /* 1.3.0 */
+#define DASM_IDENT "DynASM 1.4.0"
+#define DASM_VERSION 10400 /* 1.4.0 */
#ifndef Dst_DECL
#define Dst_DECL dasm_State **Dst
diff --git a/lib/luajit/dynasm/dasm_x86.h b/lib/luajit/dynasm/dasm_x86.h
index 652e8c99b0..be9c289f02 100644
--- a/lib/luajit/dynasm/dasm_x86.h
+++ b/lib/luajit/dynasm/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
- int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+ int pos = sec->pos, ofs = sec->ofs, mrm = -1;
int *b;
if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
b[pos++] = n;
switch (action) {
case DASM_DISP:
- if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
case DASM_SPACE: p++; ofs += n; break;
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
- case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
- if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+ case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+ if (*p < 0x20 && (n&7) == 4) ofs++;
+ switch ((*p++ >> 3) & 3) {
+ case 3: n |= b[pos-3];
+ case 2: n |= b[pos-2];
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+ }
+ continue;
}
- mrm = 4;
+ mrm = -1;
} else {
int *pl, n;
switch (action) {
@@ -391,7 +398,27 @@ int dasm_encode(Dst_DECL, void *buffer)
case DASM_IMM_D: wd: dasmd(n); break;
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
case DASM_IMM_W: dasmw(n); break;
- case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+ case DASM_VREG: {
+ int t = *p++;
+ unsigned char *ex = cp - (t&7);
+ if ((n & 8) && t < 0xa0) {
+ if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+ n &= 7;
+ } else if (n & 0x10) {
+ if (*ex & 0x80) {
+ *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+ }
+ while (++ex < cp) ex[-1] = *ex;
+ if (mark) mark--;
+ cp--;
+ n &= 7;
+ }
+ if (t >= 0xc0) n <<= 4;
+ else if (t >= 0x40) n <<= 3;
+ else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
+ cp[-1] ^= n;
+ break;
+ }
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
b++; n = (int)(ptrdiff_t)D->globals[-n];
case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
diff --git a/lib/luajit/dynasm/dasm_x86.lua b/lib/luajit/dynasm/dasm_x86.lua
index 7ca061d22f..60f5211a33 100644
--- a/lib/luajit/dynasm/dasm_x86.lua
+++ b/lib/luajit/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
local _info = {
arch = x64 and "x64" or "x86",
description = "DynASM x86/x64 module",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
license = "MIT",
}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
local _s = string
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
-- Inherited tables and callbacks.
local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
-- int arg, 1 buffer pos:
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
- "VREG", "SPACE", -- !x64: VREG support NYI.
+ "VREG", "SPACE",
-- ptrdiff_t arg, 1 buffer pos (address): !x64
"SETLABEL", "REL_A",
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
-- Current number of section buffer positions for dasm_put().
local secpos = 1
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+ ["modrm.rm.m"] = 0x00,
+ ["modrm.rm.r"] = 0x20,
+ ["opcode"] = 0x20,
+ ["sib.base"] = 0x20,
+ ["sib.index"] = 0x40,
+ ["modrm.reg"] = 0x80,
+ ["vex.v"] = 0xa0,
+ ["imm.hi"] = 0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
------------------------------------------------------------------------------
-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
if a or num then secpos = secpos + (num or 1) end
end
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+ if not vreg then return end
+ waction("VREG", vreg)
+ local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+ if b < (sk or 0) then
+ vreg_shrink_count = vreg_shrink_count + 1
+ end
+ if not defer then
+ b = b + vreg_shrink_count * 8
+ vreg_shrink_count = 0
+ end
+ wputxb(b + (psz or 0))
+end
+
-- Add call to embedded DynASM C code.
local function wcall(func, args)
wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
if needrex then map_reg_needrex[iname] = true end
local name
- if sz == "o" then name = format("xmm%d", i)
+ if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
elseif sz == "f" then name = format("st%d", i)
else name = format("r%d%s", i, sz == addrsize and "" or sz) end
map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
map_reg_valid_index[map_archdef.esp] = false
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
map_archdef["Ra"] = "@"..addrsize
-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
-- SSE registers (oword sized, but qword and dword accessible).
mkrmap("o", "xmm")
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
-- Operand size prefixes to codes.
local map_opsize = {
- byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
- aword = addrsize,
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+ tword = "t", aword = addrsize,
}
-- Operand size code to number.
local map_opsizenum = {
- b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+ b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
}
-- Operand size code to name.
local map_opsizename = {
- b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
- f = "fpword",
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+ t = "tword", f = "fpword",
}
-- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
end
-- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+ local psz, sk = 0, nil
+ if vex then
+ local tail
+ if vex.m == 1 and band(rex, 11) == 0 then
+ if x64 and vregxb then
+ sk = map_vreg["modrm.reg"]
+ else
+ wputb(0xc5)
+ tail = shl(bxor(band(rex, 4), 4), 5)
+ psz = 3
+ end
+ end
+ if not tail then
+ wputb(0xc4)
+ wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+ tail = shl(band(rex, 8), 4)
+ psz = 4
+ end
+ local reg, vreg = 0, nil
+ if vex.v then
+ reg = vex.v.reg
+ if not reg then werror("bad vex operand") end
+ if reg < 0 then reg = 0; vreg = vex.v.vreg end
+ end
+ if sz == "y" or vex.l then tail = tail + 4 end
+ wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+ wvreg("vex.v", vreg)
+ rex = 0
+ if op >= 256 then werror("bad vex opcode") end
+ else
+ if rex ~= 0 then
+ if not x64 then werror("bad operand size") end
+ elseif (vregr or vregxb) and x64 then
+ rex = 0x10
+ sk = map_vreg["vex.v"]
+ end
+ end
local r
- if rex ~= 0 and not x64 then werror("bad operand size") end
if sz == "w" then wputb(102) end
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
if rex ~= 0 then
local opc3 = band(op, 0xffff00)
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
- wputb(64 + band(rex, 15)); rex = 0
+ wputb(64 + band(rex, 15)); rex = 0; psz = 2
end
end
- wputb(shr(op, 16)); op = band(op, 0xffff)
+ wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
end
if op >= 256 then
local b = shr(op, 8)
- if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
- wputb(b)
- op = band(op, 255)
+ if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+ wputb(b); op = band(op, 255); psz = psz + 1
end
- if rex ~= 0 then wputb(64 + band(rex, 15)) end
+ if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
if sz == "b" then op = op - 1 end
wputb(op)
+ return psz, sk
end
-- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
end
-- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
local vreg, vxreg
local reg, xreg = t.reg, t.xreg
if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
-- Register mode.
if sub(t.mode, 1, 1) == "r" then
wputmodrm(3, s, reg)
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(0) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.r", vreg, psz+1, sk)
return
end
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
wputmodrm(0, s, 4)
if imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
wputmodrm(t.xsc, xreg, 5)
- if vxreg then waction("VREG", vxreg); wputxb(3) end
+ wvreg("sib.index", vxreg, psz+2, sk)
else
-- Pure 32 bit displacement.
if x64 and tdisp ~= "table" then
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
wputmodrm(0, 4, 5)
else
riprel = x64
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
end
if riprel then -- Emit rip-relative displacement.
if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
if xreg or band(reg, 7) == 4 then
wputmodrm(m or 2, s, 4) -- ModRM.
if m == nil or imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
- if vxreg then waction("VREG", vxreg); wputxb(3) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("sib.index", vxreg, psz+2, sk, vreg)
+ wvreg("sib.base", vreg, psz+2, sk)
else
wputmodrm(m or 2, s, reg) -- ModRM.
if (imark == "I" and (m == 1 or m == 2)) or
(m == nil and (vsreg or vreg)) then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.m", vreg, psz+1, sk)
end
-- Put displacement.
@@ -881,9 +952,15 @@ end
-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
-- The spare 3 bits are either filled with the last hex digit or
-- the result from a previous "r"/"R". The opcode is restored.
+-- "u" Use VEX encoding, vvvv unused.
+-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+-- removed from the list used by future characters).
+-- "L" Force VEX.L
--
-- All of the following characters force a flush of the opcode:
-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+-- "s" stores a 4 bit immediate from the last register operand,
+-- followed by 4 zero bits.
-- "S" stores a signed 8 bit immediate from the last operand.
-- "U" stores an unsigned 8 bit immediate from the last operand.
-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1081,10 +1158,11 @@ local map_op = {
btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
- shld_3 = "mriqdw:0FA4RmU|mrCqdw:0FA5Rm",
- shrd_3 = "mriqdw:0FACRmU|mrCqdw:0FADRm",
+ shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
+ shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
rdtsc_0 = "0F31", -- P1+
+ rdpmc_0 = "0F33", -- P6+
cpuid_0 = "0FA2", -- P1+
-- floating point ops
@@ -1190,7 +1268,7 @@ local map_op = {
cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
- cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
+ cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
cvttpd2dq_2 = "rmo:660FE6rM",
cvttps2dq_2 = "rmo:F30F5BrM",
cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
@@ -1225,46 +1303,14 @@ local map_op = {
movups_2 = "rmo:0F10rM|mro:0F11Rm",
orpd_2 = "rmo:660F56rM",
orps_2 = "rmo:0F56rM",
- packssdw_2 = "rmo:660F6BrM",
- packsswb_2 = "rmo:660F63rM",
- packuswb_2 = "rmo:660F67rM",
- paddb_2 = "rmo:660FFCrM",
- paddd_2 = "rmo:660FFErM",
- paddq_2 = "rmo:660FD4rM",
- paddsb_2 = "rmo:660FECrM",
- paddsw_2 = "rmo:660FEDrM",
- paddusb_2 = "rmo:660FDCrM",
- paddusw_2 = "rmo:660FDDrM",
- paddw_2 = "rmo:660FFDrM",
- pand_2 = "rmo:660FDBrM",
- pandn_2 = "rmo:660FDFrM",
pause_0 = "F390",
- pavgb_2 = "rmo:660FE0rM",
- pavgw_2 = "rmo:660FE3rM",
- pcmpeqb_2 = "rmo:660F74rM",
- pcmpeqd_2 = "rmo:660F76rM",
- pcmpeqw_2 = "rmo:660F75rM",
- pcmpgtb_2 = "rmo:660F64rM",
- pcmpgtd_2 = "rmo:660F66rM",
- pcmpgtw_2 = "rmo:660F65rM",
- pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+ pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
- pmaddwd_2 = "rmo:660FF5rM",
- pmaxsw_2 = "rmo:660FEErM",
- pmaxub_2 = "rmo:660FDErM",
- pminsw_2 = "rmo:660FEArM",
- pminub_2 = "rmo:660FDArM",
pmovmskb_2 = "rr/do:660FD7rM",
- pmulhuw_2 = "rmo:660FE4rM",
- pmulhw_2 = "rmo:660FE5rM",
- pmullw_2 = "rmo:660FD5rM",
- pmuludq_2 = "rmo:660FF4rM",
- por_2 = "rmo:660FEBrM",
prefetchnta_1 = "xb:n0F180m",
prefetcht0_1 = "xb:n0F181m",
prefetcht1_1 = "xb:n0F182m",
prefetcht2_1 = "xb:n0F183m",
- psadbw_2 = "rmo:660FF6rM",
pshufd_3 = "rmio:660F70rMU",
pshufhw_3 = "rmio:F30F70rMU",
pshuflw_3 = "rmio:F20F70rMU",
@@ -1278,23 +1324,6 @@ local map_op = {
psrldq_2 = "rio:660F733mU",
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
- psubb_2 = "rmo:660FF8rM",
- psubd_2 = "rmo:660FFArM",
- psubq_2 = "rmo:660FFBrM",
- psubsb_2 = "rmo:660FE8rM",
- psubsw_2 = "rmo:660FE9rM",
- psubusb_2 = "rmo:660FD8rM",
- psubusw_2 = "rmo:660FD9rM",
- psubw_2 = "rmo:660FF9rM",
- punpckhbw_2 = "rmo:660F68rM",
- punpckhdq_2 = "rmo:660F6ArM",
- punpckhqdq_2 = "rmo:660F6DrM",
- punpckhwd_2 = "rmo:660F69rM",
- punpcklbw_2 = "rmo:660F60rM",
- punpckldq_2 = "rmo:660F62rM",
- punpcklqdq_2 = "rmo:660F6CrM",
- punpcklwd_2 = "rmo:660F61rM",
- pxor_2 = "rmo:660FEFrM",
rcpps_2 = "rmo:0F53rM",
rcpss_2 = "rro:F30F53rM|rx/od:",
rsqrtps_2 = "rmo:0F52rM",
@@ -1352,7 +1381,7 @@ local map_op = {
dpps_3 = "rmio:660F3A40rMU",
extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
insertps_3 = "rrio:660F3A41rMU|rxi/od:",
- movntdqa_2 = "rmo:660F382ArM",
+ movntdqa_2 = "rxo:660F382ArM",
mpsadbw_3 = "rmio:660F3A42rMU",
packusdw_2 = "rmo:660F382BrM",
pblendvb_3 = "rmRo:660F3810rM",
@@ -1412,6 +1441,238 @@ local map_op = {
movntsd_2 = "xr/qo:nF20F2BRm",
movntss_2 = "xr/do:F30F2BRm",
-- popcnt is also in SSE4.2
+
+ -- AES-NI
+ aesdec_2 = "rmo:660F38DErM",
+ aesdeclast_2 = "rmo:660F38DFrM",
+ aesenc_2 = "rmo:660F38DCrM",
+ aesenclast_2 = "rmo:660F38DDrM",
+ aesimc_2 = "rmo:660F38DBrM",
+ aeskeygenassist_3 = "rmio:660F3ADFrMU",
+ pclmulqdq_3 = "rmio:660F3A44rMU",
+
+ -- AVX FP ops
+ vaddsubpd_3 = "rrmoy:660FVD0rM",
+ vaddsubps_3 = "rrmoy:F20FVD0rM",
+ vandpd_3 = "rrmoy:660FV54rM",
+ vandps_3 = "rrmoy:0FV54rM",
+ vandnpd_3 = "rrmoy:660FV55rM",
+ vandnps_3 = "rrmoy:0FV55rM",
+ vblendpd_4 = "rrmioy:660F3AV0DrMU",
+ vblendps_4 = "rrmioy:660F3AV0CrMU",
+ vblendvpd_4 = "rrmroy:660F3AV4BrMs",
+ vblendvps_4 = "rrmroy:660F3AV4ArMs",
+ vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+ vcmppd_4 = "rrmioy:660FVC2rMU",
+ vcmpps_4 = "rrmioy:0FVC2rMU",
+ vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
+ vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
+ vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
+ vcomiss_2 = "rro:0Fu2FrM|rx/od:",
+ vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
+ vcvtdq2ps_2 = "rmoy:0Fu5BrM",
+ vcvtpd2dq_2 = "rmoy:F20FuE6rM",
+ vcvtpd2ps_2 = "rmoy:660Fu5ArM",
+ vcvtps2dq_2 = "rmoy:660Fu5BrM",
+ vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
+ vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+ vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
+ vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+ vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+ vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
+ vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+ vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+ vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+ vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+ vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+ vdppd_4 = "rrmio:660F3AV41rMU",
+ vdpps_4 = "rrmioy:660F3AV40rMU",
+ vextractf128_3 = "mri/oy:660F3AuL19RmU",
+ vextractps_3 = "mri/do:660F3Au17RmU",
+ vhaddpd_3 = "rrmoy:660FV7CrM",
+ vhaddps_3 = "rrmoy:F20FV7CrM",
+ vhsubpd_3 = "rrmoy:660FV7DrM",
+ vhsubps_3 = "rrmoy:F20FV7DrM",
+ vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+ vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
+ vldmxcsr_1 = "xd:0FuAE2m",
+ vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+ vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+ vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
+ vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
+ vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+ vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+ vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
+ vmovhlps_3 = "rrro:0FV12rM",
+ vmovhpd_2 = "xr/qo:660Fu17Rm",
+ vmovhpd_3 = "rrx/ooq:660FV16rM",
+ vmovhps_2 = "xr/qo:0Fu17Rm",
+ vmovhps_3 = "rrx/ooq:0FV16rM",
+ vmovlhps_3 = "rrro:0FV16rM",
+ vmovlpd_2 = "xr/qo:660Fu13Rm",
+ vmovlpd_3 = "rrx/ooq:660FV12rM",
+ vmovlps_2 = "xr/qo:0Fu13Rm",
+ vmovlps_3 = "rrx/ooq:0FV12rM",
+ vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
+ vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
+ vmovntpd_2 = "xroy:660Fu2BRm",
+ vmovntps_2 = "xroy:0Fu2BRm",
+ vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+ vmovsd_3 = "rrro:F20FV10rM",
+ vmovshdup_2 = "rmoy:F30Fu16rM",
+ vmovsldup_2 = "rmoy:F30Fu12rM",
+ vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+ vmovss_3 = "rrro:F30FV10rM",
+ vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
+ vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
+ vorpd_3 = "rrmoy:660FV56rM",
+ vorps_3 = "rrmoy:0FV56rM",
+ vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+ vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+ vperm2f128_4 = "rrmiy:660F3AV06rMU",
+ vptestpd_2 = "rmoy:660F38u0FrM",
+ vptestps_2 = "rmoy:660F38u0ErM",
+ vrcpps_2 = "rmoy:0Fu53rM",
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
+ vrsqrtps_2 = "rmoy:0Fu52rM",
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
+ vroundpd_3 = "rmioy:660F3AV09rMU",
+ vroundps_3 = "rmioy:660F3AV08rMU",
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
+ vshufpd_4 = "rrmioy:660FVC6rMU",
+ vshufps_4 = "rrmioy:0FVC6rMU",
+ vsqrtps_2 = "rmoy:0Fu51rM",
+ vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
+ vsqrtpd_2 = "rmoy:660Fu51rM",
+ vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
+ vstmxcsr_1 = "xd:0FuAE3m",
+ vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
+ vucomiss_2 = "rro:0Fu2ErM|rx/od:",
+ vunpckhpd_3 = "rrmoy:660FV15rM",
+ vunpckhps_3 = "rrmoy:0FV15rM",
+ vunpcklpd_3 = "rrmoy:660FV14rM",
+ vunpcklps_3 = "rrmoy:0FV14rM",
+ vxorpd_3 = "rrmoy:660FV57rM",
+ vxorps_3 = "rrmoy:0FV57rM",
+ vzeroall_0 = "0FuL77",
+ vzeroupper_0 = "0Fu77",
+
+ -- AVX2 FP ops
+ vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+ vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+ -- *vgather* (!vsib)
+ vpermpd_3 = "rmiy:660F3AuX01rMU",
+ vpermps_3 = "rrmy:660F38V16rM",
+
+ -- AVX, AVX2 integer ops
+ -- In general, xmm requires AVX, ymm requires AVX2.
+ vaesdec_3 = "rrmo:660F38VDErM",
+ vaesdeclast_3 = "rrmo:660F38VDFrM",
+ vaesenc_3 = "rrmo:660F38VDCrM",
+ vaesenclast_3 = "rrmo:660F38VDDrM",
+ vaesimc_2 = "rmo:660F38uDBrM",
+ vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
+ vlddqu_2 = "rxoy:F20FuF0rM",
+ vmaskmovdqu_2 = "rro:660FuF7rM",
+ vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
+ vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+ vmovntdq_2 = "xroy:660FuE7Rm",
+ vmovntdqa_2 = "rxoy:660F38u2ArM",
+ vmpsadbw_4 = "rrmioy:660F3AV42rMU",
+ vpabsb_2 = "rmoy:660F38u1CrM",
+ vpabsd_2 = "rmoy:660F38u1ErM",
+ vpabsw_2 = "rmoy:660F38u1DrM",
+ vpackusdw_3 = "rrmoy:660F38V2BrM",
+ vpalignr_4 = "rrmioy:660F3AV0FrMU",
+ vpblendvb_4 = "rrmroy:660F3AV4CrMs",
+ vpblendw_4 = "rrmioy:660F3AV0ErMU",
+ vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+ vpcmpeqq_3 = "rrmoy:660F38V29rM",
+ vpcmpestri_3 = "rmio:660F3Au61rMU",
+ vpcmpestrm_3 = "rmio:660F3Au60rMU",
+ vpcmpgtq_3 = "rrmoy:660F38V37rM",
+ vpcmpistri_3 = "rmio:660F3Au63rMU",
+ vpcmpistrm_3 = "rmio:660F3Au62rMU",
+ vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+ vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+ vpextrd_3 = "mri/do:660F3Au16RmU",
+ vpextrq_3 = "mri/qo:660F3Au16RmU",
+ vphaddw_3 = "rrmoy:660F38V01rM",
+ vphaddd_3 = "rrmoy:660F38V02rM",
+ vphaddsw_3 = "rrmoy:660F38V03rM",
+ vphminposuw_2 = "rmo:660F38u41rM",
+ vphsubw_3 = "rrmoy:660F38V05rM",
+ vphsubd_3 = "rrmoy:660F38V06rM",
+ vphsubsw_3 = "rrmoy:660F38V07rM",
+ vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
+ vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
+ vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
+ vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
+ vpmaddubsw_3 = "rrmoy:660F38V04rM",
+ vpmaxsb_3 = "rrmoy:660F38V3CrM",
+ vpmaxsd_3 = "rrmoy:660F38V3DrM",
+ vpmaxuw_3 = "rrmoy:660F38V3ErM",
+ vpmaxud_3 = "rrmoy:660F38V3FrM",
+ vpminsb_3 = "rrmoy:660F38V38rM",
+ vpminsd_3 = "rrmoy:660F38V39rM",
+ vpminuw_3 = "rrmoy:660F38V3ArM",
+ vpminud_3 = "rrmoy:660F38V3BrM",
+ vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+ vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
+ vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
+ vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
+ vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
+ vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
+ vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
+ vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
+ vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
+ vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
+ vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
+ vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
+ vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
+ vpmuldq_3 = "rrmoy:660F38V28rM",
+ vpmulhrsw_3 = "rrmoy:660F38V0BrM",
+ vpmulld_3 = "rrmoy:660F38V40rM",
+ vpshufb_3 = "rrmoy:660F38V00rM",
+ vpshufd_3 = "rmioy:660Fu70rMU",
+ vpshufhw_3 = "rmioy:F30Fu70rMU",
+ vpshuflw_3 = "rmioy:F20Fu70rMU",
+ vpsignb_3 = "rrmoy:660F38V08rM",
+ vpsignw_3 = "rrmoy:660F38V09rM",
+ vpsignd_3 = "rrmoy:660F38V0ArM",
+ vpslldq_3 = "rrioy:660Fv737mU",
+ vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
+ vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
+ vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
+ vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
+ vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
+ vpsrldq_3 = "rrioy:660Fv733mU",
+ vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
+ vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
+ vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
+ vptest_2 = "rmoy:660F38u17rM",
+
+ -- AVX2 integer ops
+ vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+ vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+ vextracti128_3 = "mri/oy:660F3AuL39RmU",
+ vpblendd_4 = "rrmioy:660F3AV02rMU",
+ vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+ vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+ vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+ vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+ vpermd_3 = "rrmy:660F38V36rM",
+ vpermq_3 = "rmiy:660F3AuX00rMU",
+ -- *vpgather* (!vsib)
+ vperm2i128_4 = "rrmiy:660F3AV46rMU",
+ vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+ vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+ vpsllvd_3 = "rrmoy:660F38V47rM",
+ vpsllvq_3 = "rrmoy:660F38VX47rM",
+ vpsravd_3 = "rrmoy:660F38V46rM",
+ vpsrlvd_3 = "rrmoy:660F38V45rM",
+ vpsrlvq_3 = "rrmoy:660F38VX45rM",
}
------------------------------------------------------------------------------
@@ -1462,28 +1723,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
end
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
sub = 12, min = 13, div = 14, max = 15 } do
map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+ if n ~= 1 then
+ map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+ map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+ map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+ map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+ end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+ paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+ paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+ packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+ paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+ pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+ pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+ pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+ pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+ pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+ pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+ psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+ psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+ punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+ punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+ punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+ map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+ map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
end
------------------------------------------------------------------------------
+local map_vexarg = { u = false, v = 1, V = 2 }
+
-- Process pattern string.
local function dopattern(pat, args, sz, op, needrex)
- local digit, addin
+ local digit, addin, vex
local opcode = 0
local szov = sz
local narg = 1
local rex = 0
-- Limit number of section buffer positions used by a single dasm_put().
- -- A single opcode needs a maximum of 5 positions.
- if secpos+5 > maxsecpos then wflush() end
+ -- A single opcode needs a maximum of 6 positions.
+ if secpos+6 > maxsecpos then wflush() end
-- Process each character.
for c in gmatch(pat.."|", ".") do
@@ -1497,6 +1788,8 @@ local function dopattern(pat, args, sz, op, needrex)
szov = nil
elseif c == "X" then -- Force REX.W.
rex = 8
+ elseif c == "L" then -- Force VEX.L.
+ vex.l = true
elseif c == "r" then -- Merge 1st operand regno. into opcode.
addin = args[1]; opcode = opcode + (addin.reg % 8)
if narg < 2 then narg = 2 end
@@ -1520,21 +1813,42 @@ local function dopattern(pat, args, sz, op, needrex)
if t.xreg and t.xreg > 7 then rex = rex + 2 end
if s > 7 then rex = rex + 4 end
if needrex then rex = rex + 16 end
- wputop(szov, opcode, rex); opcode = nil
+ local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+ opcode = nil
local imark = sub(pat, -1) -- Force a mark (ugly).
-- Put ModRM/SIB with regno/last digit as spare.
- wputmrmsib(t, imark, s, addin and addin.vreg)
+ wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
addin = nil
+ elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+ local b = band(opcode, 255); opcode = shr(opcode, 8)
+ local m = 1
+ if b == 0x38 then m = 2
+ elseif b == 0x3a then m = 3 end
+ if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+ if b ~= 0x0f then
+ werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+ "' in pattern `"..pat.."' for `"..op.."'")
+ end
+ local v = map_vexarg[c]
+ if v then v = remove(args, v) end
+ b = band(opcode, 255)
+ local p = 0
+ if b == 0x66 then p = 1
+ elseif b == 0xf3 then p = 2
+ elseif b == 0xf2 then p = 3 end
+ if p ~= 0 then opcode = shr(opcode, 8) end
+ if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+ vex = { m = m, p = p, v = v }
else
if opcode then -- Flush opcode.
if szov == "q" and rex == 0 then rex = rex + 8 end
if needrex then rex = rex + 16 end
if addin and addin.reg == -1 then
- wputop(szov, opcode - 7, rex)
- waction("VREG", addin.vreg); wputxb(0)
+ local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+ wvreg("opcode", addin.vreg, psz, sk)
else
if addin and addin.reg > 7 then rex = rex + 1 end
- wputop(szov, opcode, rex)
+ wputop(szov, opcode, rex, vex)
end
opcode = nil
end
@@ -1571,6 +1885,14 @@ local function dopattern(pat, args, sz, op, needrex)
else
wputlabel("REL_", imm, 2)
end
+ elseif c == "s" then
+ local reg = a.reg
+ if reg < 0 then
+ wputb(0)
+ wvreg("imm.hi", a.vreg)
+ else
+ wputb(shl(reg, 4))
+ end
else
werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
end
@@ -1647,11 +1969,14 @@ map_op[".template__"] = function(params, template, nparams)
if pat == "" then pat = lastpat else lastpat = pat end
if matchtm(tm, args) then
local prefix = sub(szm, 1, 1)
- if prefix == "/" then -- Match both operand sizes.
- if args[1].opsize == sub(szm, 2, 2) and
- args[2].opsize == sub(szm, 3, 3) then
- dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
- return
+ if prefix == "/" then -- Exactly match leading operand sizes.
+ for i = #szm,1,-1 do
+ if i == 1 then
+ dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+ return
+ elseif args[i-1].opsize ~= sub(szm, i, i) then
+ break
+ end
end
else -- Match common operand size.
local szp = sz
@@ -1716,8 +2041,8 @@ if x64 then
rex = a.reg > 7 and 9 or 8
end
end
- wputop(sz, opcode, rex)
- if vreg then waction("VREG", vreg); wputxb(0) end
+ local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+ wvreg("opcode", vreg, psz, sk)
waction("IMM_D", format("(unsigned int)(%s)", op64))
waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
end
diff --git a/lib/luajit/dynasm/dynasm.lua b/lib/luajit/dynasm/dynasm.lua
index fffda7513c..145fb0cc6d 100644
--- a/lib/luajit/dynasm/dynasm.lua
+++ b/lib/luajit/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
local _info = {
name = "DynASM",
description = "A dynamic assembler for code generation engines",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2015-10-18",
author = "Mike Pall",
url = "http://luajit.org/dynasm.html",
license = "MIT",
diff --git a/lib/luajit/src/Makefile b/lib/luajit/src/Makefile
index 532da6e94d..6d9a1053ec 100644
--- a/lib/luajit/src/Makefile
+++ b/lib/luajit/src/Makefile
@@ -24,11 +24,13 @@ NODOTABIVER= 51
# removing the '#' in front of them. Make sure you force a full recompile
# with "make clean", followed by "make" if you change any options.
#
+DEFAULT_CC = gcc
+#
# LuaJIT builds as a native 32 or 64 bit binary by default.
-CC= gcc
+CC= $(DEFAULT_CC)
#
# Use this if you want to force a 32 bit build on a 64 bit multilib OS.
-#CC= gcc -m32
+#CC= $(DEFAULT_CC) -m32
#
# Since the assembler part does NOT maintain a frame pointer, it's pointless
# to slow down the C part by not omitting it. Debugging, tracebacks and
@@ -147,6 +149,29 @@ XCFLAGS=
# You probably don't need to change anything below this line!
##############################################################################
+##############################################################################
+# Host system detection.
+##############################################################################
+
+ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
+ HOST_SYS= Windows
+ HOST_RM= del
+else
+ HOST_SYS:= $(shell uname -s)
+ ifneq (,$(findstring MINGW,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= mingw
+ endif
+ ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
+ HOST_SYS= Windows
+ HOST_MSYS= cygwin
+ endif
+ # Use Clang for OSX host.
+ ifeq (Darwin,$(HOST_SYS))
+ DEFAULT_CC= clang
+ endif
+endif
+
##############################################################################
# Flags and options for host and target.
##############################################################################
@@ -268,24 +293,9 @@ ifneq (,$(LMULTILIB))
endif
##############################################################################
-# System detection.
+# Target system detection.
##############################################################################
-ifeq (Windows,$(findstring Windows,$(OS))$(MSYSTEM)$(TERM))
- HOST_SYS= Windows
- HOST_RM= del
-else
- HOST_SYS:= $(shell uname -s)
- ifneq (,$(findstring MINGW,$(HOST_SYS)))
- HOST_SYS= Windows
- HOST_MSYS= mingw
- endif
- ifneq (,$(findstring CYGWIN,$(HOST_SYS)))
- HOST_SYS= Windows
- HOST_MSYS= cygwin
- endif
-endif
-
TARGET_SYS?= $(HOST_SYS)
ifeq (Windows,$(TARGET_SYS))
TARGET_STRIP+= --strip-unneeded
@@ -612,7 +622,7 @@ $(MINILUA_T): $(MINILUA_O)
$(E) "HOSTLINK $@"
$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP)
+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
$(E) "DYNASM $@"
$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
diff --git a/lib/luajit/src/host/buildvm_asm.c b/lib/luajit/src/host/buildvm_asm.c
index 9b7ae53a26..9b1194259a 100644
--- a/lib/luajit/src/host/buildvm_asm.c
+++ b/lib/luajit/src/host/buildvm_asm.c
@@ -261,11 +261,20 @@ void emit_asm(BuildCtx *ctx)
#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
/* This should really be moved into buildvm_arm.dasc. */
+#if LJ_ARCH_HASFPU
+ fprintf(ctx->fp,
+ ".fnstart\n"
+ ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
+ ".vsave {d8-d15}\n"
+ ".save {r4}\n"
+ ".pad #28\n");
+#else
fprintf(ctx->fp,
".fnstart\n"
".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
".pad #28\n");
#endif
+#endif
#if LJ_TARGET_MIPS
fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
#endif
diff --git a/lib/luajit/src/jit/dis_x86.lua b/lib/luajit/src/jit/dis_x86.lua
index 6bc38066fe..a7c05ed6d5 100644
--- a/lib/luajit/src/jit/dis_x86.lua
+++ b/lib/luajit/src/jit/dis_x86.lua
@@ -15,13 +15,12 @@
-- Intel and AMD manuals. The supported instruction set is quite extensive
-- and reflects what a current generation Intel or AMD CPU implements in
-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
--
-- Notes:
-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
-- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
------------------------------------------------------------------------------
local type = type
@@ -78,7 +77,7 @@ local map_opc1_32 = {
"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
--Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
--Dx
"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -103,7 +102,7 @@ local map_opc1_64 = setmetatable({
[0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
[0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
[0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
- [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+ [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
[0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
}, { __index = map_opc1_32 })
@@ -114,12 +113,12 @@ local map_opc2 = {
[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
--1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
"movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
"movhpsXmr||movhpdXmr",
"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -128,7 +127,7 @@ local map_opc2 = {
"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
"movapsXrm||movapdXrm",
"movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -144,27 +143,27 @@ local map_opc2 = {
"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
--5x
"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
--6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
--7x
-"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
-"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
+"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
--8x
"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -182,27 +181,27 @@ nil,nil,
"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
--Cx
"xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
--Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
--Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
--Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
}
assert(map_opc2[255] == "ud")
@@ -210,49 +209,70 @@ assert(map_opc2[255] == "ud")
local map_opc3 = {
["38"] = { -- [66] 0f 38 xx
--0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
--1x
"||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
"pabsbPrm","pabswPrm","pabsdPrm",nil,
--2x
"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
--3x
"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
--4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
+--Dx
+[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
--Fx
[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
},
["3a"] = { -- [66] 0f 3a xx
--0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
--1x
nil,nil,nil,nil,
"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
--2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
--4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x44] = "||pclmulqdqXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
--6x
[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+[0xdf] = "||aeskeygenassistXrmu",
},
}
@@ -356,17 +376,19 @@ local map_regs = {
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+ Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+ "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
}
local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
-- Maps for size names.
local map_sz2n = {
- B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+ B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
}
local map_sz2prefix = {
B = "byte", W = "word", D = "dword",
Q = "qword",
- M = "qword", X = "xword",
+ M = "qword", X = "xword", Y = "yword",
F = "dword", G = "qword", -- No need for sizes/register names for these two.
}
@@ -389,10 +411,13 @@ local function putop(ctx, text, operands)
if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
if ctx.rex then
local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
- (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
- if t ~= "" then text = "rex."..t.." "..text end
+ (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+ (ctx.vexl and "l" or "")
+ if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+ if t ~= "" then text = ctx.rex.."."..t.." "..text
+ elseif ctx.rex == "vex" then text = "v"..text end
ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
- ctx.rex = false
+ ctx.rex = false; ctx.vexl = false; ctx.vexv = false
end
if ctx.seg then
local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -407,6 +432,7 @@ local function putop(ctx, text, operands)
end
ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
ctx.mrm = false
+ ctx.vexv = false
ctx.start = pos
ctx.imm = nil
end
@@ -415,7 +441,7 @@ end
local function clearprefixes(ctx)
ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
- ctx.rex = false; ctx.a32 = false
+ ctx.rex = false; ctx.a32 = false; ctx.vexl = false
end
-- Fallback for incomplete opcodes at the end.
@@ -452,9 +478,9 @@ end
-- Process pattern string and generate the operands.
local function putpat(ctx, name, pat)
local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
- local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+ local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
- -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+ -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
for p in gmatch(pat, ".") do
local x = nil
if p == "V" or p == "U" then
@@ -469,11 +495,13 @@ local function putpat(ctx, name, pat)
elseif p == "B" then
sz = "B"
regs = ctx.rex and map_regs.B64 or map_regs.B
- elseif match(p, "[WDQMXFG]") then
+ elseif match(p, "[WDQMXYFG]") then
sz = p
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
regs = map_regs[sz]
elseif p == "P" then
sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
regs = map_regs[sz]
elseif p == "S" then
name = name..lower(sz)
@@ -486,6 +514,10 @@ local function putpat(ctx, name, pat)
local imm = getimm(ctx, pos, 1); if not imm then return end
x = format("0x%02x", imm)
pos = pos+1
+ elseif p == "b" then
+ local imm = getimm(ctx, pos, 1); if not imm then return end
+ x = regs[imm/16+1]
+ pos = pos+1
elseif p == "w" then
local imm = getimm(ctx, pos, 2); if not imm then return end
x = format("0x%x", imm)
@@ -618,8 +650,13 @@ local function putpat(ctx, name, pat)
else
x = "CR"..sp
end
+ elseif p == "v" then
+ if ctx.vexv then
+ x = regs[ctx.vexv+1]; ctx.vexv = false
+ end
elseif p == "y" then x = "DR"..sp
elseif p == "z" then x = "TR"..sp
+ elseif p == "l" then vexl = false
elseif p == "t" then
else
error("bad pattern `"..pat.."'")
@@ -694,7 +731,7 @@ map_act = {
B = putpat, W = putpat, D = putpat, Q = putpat,
V = putpat, U = putpat, T = putpat,
M = putpat, X = putpat, P = putpat,
- F = putpat, G = putpat,
+ F = putpat, G = putpat, Y = putpat,
-- Collect prefixes.
[":"] = function(ctx, name, pat)
@@ -755,15 +792,68 @@ map_act = {
-- REX prefix.
rex = function(ctx, name, pat)
- if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+ if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
for p in gmatch(pat, ".") do ctx["rex"..p] = true end
- ctx.rex = true
+ ctx.rex = "rex"
+ end,
+
+ -- VEX prefix.
+ vex = function(ctx, name, pat)
+ if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+ ctx.rex = "vex"
+ local pos = ctx.pos
+ if ctx.mrm then
+ ctx.mrm = nil
+ pos = pos-1
+ end
+ local b = byte(ctx.code, pos, pos)
+ if not b then return incomplete(ctx) end
+ pos = pos+1
+ if b < 128 then ctx.rexr = true end
+ local m = 1
+ if pat == "3" then
+ m = b%32; b = (b-m)/32
+ local nb = b%2; b = (b-nb)/2
+ if nb == 0 then ctx.rexb = true end
+ local nx = b%2; b = (b-nx)/2
+ if nx == 0 then ctx.rexx = true end
+ b = byte(ctx.code, pos, pos)
+ if not b then return incomplete(ctx) end
+ pos = pos+1
+ if b >= 128 then ctx.rexw = true end
+ end
+ ctx.pos = pos
+ local map
+ if m == 1 then map = map_opc2
+ elseif m == 2 then map = map_opc3["38"]
+ elseif m == 3 then map = map_opc3["3a"]
+ else return unknown(ctx) end
+ local p = b%4; b = (b-p)/4
+ if p == 1 then ctx.o16 = "o16"
+ elseif p == 2 then ctx.rep = "rep"
+ elseif p == 3 then ctx.rep = "repne" end
+ local l = b%2; b = (b-l)/2
+ if l ~= 0 then ctx.vexl = true end
+ ctx.vexv = (-1-b)%16
+ return dispatchmap(ctx, map)
end,
-- Special case for nop with REX prefix.
nop = function(ctx, name, pat)
return dispatch(ctx, ctx.rex and pat or "nop")
end,
+
+ -- Special case for 0F 77.
+ emms = function(ctx, name, pat)
+ if ctx.rex ~= "vex" then
+ return putop(ctx, "emms")
+ elseif ctx.vexl then
+ ctx.vexl = false
+ return putop(ctx, "zeroall")
+ else
+ return putop(ctx, "zeroupper")
+ end
+ end,
}
------------------------------------------------------------------------------
diff --git a/lib/luajit/src/jit/dump.lua b/lib/luajit/src/jit/dump.lua
index c52d0f217e..071d396ee3 100644
--- a/lib/luajit/src/jit/dump.lua
+++ b/lib/luajit/src/jit/dump.lua
@@ -575,6 +575,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
end
if dumpmode.H then out:write("\n\n") else out:write("\n") end
else
+ if what == "flush" then symtab, nexitsym = {}, 0 end
out:write("---- TRACE ", what, "\n\n")
end
out:flush()
diff --git a/lib/luajit/src/lib_base.c b/lib/luajit/src/lib_base.c
index 887fea7a58..ca268b1d07 100644
--- a/lib/luajit/src/lib_base.c
+++ b/lib/luajit/src/lib_base.c
@@ -435,13 +435,13 @@ LJLIB_CF(gcinfo)
LJLIB_CF(collectgarbage)
{
int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
- "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+ "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
int32_t data = lj_lib_optint(L, 2, 0);
if (opt == LUA_GCCOUNT) {
setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
} else {
int res = lua_gc(L, opt, data);
- if (opt == LUA_GCSTEP)
+ if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
setboolV(L->top, res);
else
setintV(L->top, res);
diff --git a/lib/luajit/src/lib_ffi.c b/lib/luajit/src/lib_ffi.c
index b2b2d37ff7..7be624b42d 100644
--- a/lib/luajit/src/lib_ffi.c
+++ b/lib/luajit/src/lib_ffi.c
@@ -505,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
}
if (sz == CTSIZE_INVALID)
lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
- if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
- cd = lj_cdata_new(cts, id, sz);
- else
- cd = lj_cdata_newv(L, id, sz, ctype_align(info));
+ cd = lj_cdata_newx(cts, id, sz, info);
setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
o, (MSize)(L->top - o)); /* Initialize cdata. */
diff --git a/lib/luajit/src/lib_os.c b/lib/luajit/src/lib_os.c
index 7b5873a518..37d7d5be61 100644
--- a/lib/luajit/src/lib_os.c
+++ b/lib/luajit/src/lib_os.c
@@ -39,7 +39,7 @@
LJLIB_CF(os_execute)
{
-#if LJ_TARGET_CONSOLE
+#if LJ_NO_SYSTEM
#if LJ_52
errno = ENOSYS;
return luaL_fileresult(L, 0, NULL);
diff --git a/lib/luajit/src/lj.supp b/lib/luajit/src/lj.supp
index 411f261700..acb9e789d0 100644
--- a/lib/luajit/src/lj.supp
+++ b/lib/luajit/src/lj.supp
@@ -24,3 +24,18 @@
Memcheck:Cond
fun:lj_str_new
}
+{
+ Optimized string compare
+ Memcheck:Addr4
+ fun:lj_str_fastcmp
+}
+{
+ Optimized string compare
+ Memcheck:Addr1
+ fun:lj_str_fastcmp
+}
+{
+ Optimized string compare
+ Memcheck:Cond
+ fun:lj_str_fastcmp
+}
diff --git a/lib/luajit/src/lj_alloc.c b/lib/luajit/src/lj_alloc.c
index 0aad826d36..ddd50cae4f 100644
--- a/lib/luajit/src/lj_alloc.c
+++ b/lib/luajit/src/lj_alloc.c
@@ -196,7 +196,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
return ptr;
}
-#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__)
+#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || defined(__CYGWIN__)
/* OSX and FreeBSD mmap() use a naive first-fit linear search.
** That's perfect for us. Except that -pagezero_size must be set for OSX,
diff --git a/lib/luajit/src/lj_api.c b/lib/luajit/src/lj_api.c
index 1f09284f99..042b0d9c8d 100644
--- a/lib/luajit/src/lj_api.c
+++ b/lib/luajit/src/lj_api.c
@@ -1188,6 +1188,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
res = (int)(g->gc.stepmul);
g->gc.stepmul = (MSize)data;
break;
+ case LUA_GCISRUNNING:
+ res = (g->gc.threshold != LJ_MAX_MEM);
+ break;
default:
res = -1; /* Invalid option. */
}
diff --git a/lib/luajit/src/lj_arch.h b/lib/luajit/src/lj_arch.h
index f1e7d7f45c..a114bdda53 100644
--- a/lib/luajit/src/lj_arch.h
+++ b/lib/luajit/src/lj_arch.h
@@ -155,7 +155,11 @@
#define LJ_ARCH_NAME "x64"
#define LJ_ARCH_BITS 64
#define LJ_ARCH_ENDIAN LUAJIT_LE
-#define LJ_ABI_WIN LJ_TARGET_WINDOWS
+#if LJ_TARGET_WINDOWS || __CYGWIN__
+#define LJ_ABI_WIN 1
+#else
+#define LJ_ABI_WIN 0
+#endif
#define LJ_TARGET_X64 1
#define LJ_TARGET_X86ORX64 1
#define LJ_TARGET_EHRETREG 0
@@ -300,6 +304,13 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
+#if !defined(LJ_ARCH_HASFPU) && defined(__mips_soft_float)
+#define LJ_ARCH_HASFPU 0
+#endif
+#if !defined(LJ_ABI_SOFTFP) && defined(__mips_soft_float)
+#define LJ_ABI_SOFTFP 1
+#endif
+
#if _MIPS_ARCH_MIPS32R2
#define LJ_ARCH_VERSION 20
#else
@@ -382,9 +393,6 @@
#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
#elif LJ_TARGET_MIPS
-#if defined(__mips_soft_float)
-#error "No support for MIPS CPUs without FPU"
-#endif
#if defined(_LP64)
#error "No support for MIPS64"
#endif
@@ -494,6 +502,9 @@
#if defined(__symbian__) || LJ_TARGET_WINDOWS
#define LUAJIT_NO_EXP2
#endif
+#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
+#define LJ_NO_SYSTEM 1
+#endif
#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
#define LJ_NO_UNWIND 1
diff --git a/lib/luajit/src/lj_ccall.c b/lib/luajit/src/lj_ccall.c
index 5ab5b60daa..2dda540510 100644
--- a/lib/luajit/src/lj_ccall.c
+++ b/lib/luajit/src/lj_ccall.c
@@ -418,6 +418,18 @@
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ ((intptr_t *)dp)[1] = cc->gpr[1]; \
+ } else { /* Copy complex double from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ ((intptr_t *)dp)[1] = cc->gpr[1]; \
+ ((intptr_t *)dp)[2] = cc->gpr[2]; \
+ ((intptr_t *)dp)[3] = cc->gpr[3]; \
+ }
+#else
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
@@ -426,6 +438,7 @@
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
+#endif
#define CCALL_HANDLE_STRUCTARG \
/* Pass all structs by value in registers and/or on the stack. */
@@ -433,6 +446,22 @@
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
+#define CCALL_HANDLE_GPR \
+ if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr < maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ if (ngpr + n > maxgpr) { \
+ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ ngpr = maxgpr; \
+ } else { \
+ ngpr += n; \
+ } \
+ goto done; \
+ }
+
+#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
#define CCALL_HANDLE_REGARG \
if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
/* Try to pass argument in FPRs. */ \
@@ -441,24 +470,18 @@
goto done; \
} else { /* Try to pass argument in GPRs. */ \
nfpr = CCALL_NARG_FPR; \
- if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
- ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
- if (ngpr < maxgpr) { \
- dp = &cc->gpr[ngpr]; \
- if (ngpr + n > maxgpr) { \
- nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
- if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
- ngpr = maxgpr; \
- } else { \
- ngpr += n; \
- } \
- goto done; \
- } \
+ CCALL_HANDLE_GPR \
}
+#else /* MIPS32 soft-float */
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#endif
+#if !LJ_ABI_SOFTFP
+/* On MIPS64 soft-float, position of float return values is endian-dependant. */
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
+#endif
#else
#error "Missing calling convention definitions for this architecture"
diff --git a/lib/luajit/src/lj_ccall.h b/lib/luajit/src/lj_ccall.h
index 91983feebd..8b0e796bfc 100644
--- a/lib/luajit/src/lj_ccall.h
+++ b/lib/luajit/src/lj_ccall.h
@@ -98,9 +98,9 @@ typedef double FPRArg;
#elif LJ_TARGET_MIPS
#define CCALL_NARG_GPR 4
-#define CCALL_NARG_FPR 2
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_NRET_GPR 2
-#define CCALL_NRET_FPR 2
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
#define CCALL_SPS_EXTRA 7
#define CCALL_SPS_FREE 1
diff --git a/lib/luajit/src/lj_ccallback.c b/lib/luajit/src/lj_ccallback.c
index 065c329fa7..539c9e3da4 100644
--- a/lib/luajit/src/lj_ccallback.c
+++ b/lib/luajit/src/lj_ccallback.c
@@ -427,6 +427,15 @@ void lj_ccallback_mcode_free(CTState *cts)
#elif LJ_TARGET_MIPS
+#define CALLBACK_HANDLE_GPR \
+ if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+
+#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
#define CALLBACK_HANDLE_REGARG \
if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \
sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -434,13 +443,13 @@ void lj_ccallback_mcode_free(CTState *cts)
goto done; \
} else { /* Try to pass argument in GPRs. */ \
nfpr = CCALL_NARG_FPR; \
- if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
- if (ngpr + n <= maxgpr) { \
- sp = &cts->cb.gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ CALLBACK_HANDLE_GPR \
}
+#else /* MIPS32 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+ CALLBACK_HANDLE_GPR \
+ UNUSED(isfp);
+#endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
diff --git a/lib/luajit/src/lj_cdata.c b/lib/luajit/src/lj_cdata.c
index 5cd2c1140e..30d788e4c9 100644
--- a/lib/luajit/src/lj_cdata.c
+++ b/lib/luajit/src/lj_cdata.c
@@ -49,6 +49,15 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
return cd;
}
+/* Allocate arbitrary C data object. */
+GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
+{
+ if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
+ return lj_cdata_new(cts, id, sz);
+ else
+ return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
+}
+
/* Free a C data object. */
void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
{
diff --git a/lib/luajit/src/lj_cdata.h b/lib/luajit/src/lj_cdata.h
index c8975be1c9..0891c33c80 100644
--- a/lib/luajit/src/lj_cdata.h
+++ b/lib/luajit/src/lj_cdata.h
@@ -60,6 +60,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
CTSize align);
+LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
+ CTInfo info);
LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
diff --git a/lib/luajit/src/lj_cparse.c b/lib/luajit/src/lj_cparse.c
index f212bd3682..c5e16fd7bc 100644
--- a/lib/luajit/src/lj_cparse.c
+++ b/lib/luajit/src/lj_cparse.c
@@ -1744,6 +1744,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
}
}
+/* Handle line number. */
+static void cp_line(CPState *cp, BCLine hashline)
+{
+ BCLine newline = cp->val.u32;
+ /* TODO: Handle file name and include it in error messages. */
+ while (cp->tok != CTOK_EOF && cp->linenumber == hashline)
+ cp_next(cp);
+ cp->linenumber = newline;
+}
+
/* Parse multiple C declarations of types or extern identifiers. */
static void cp_decl_multi(CPState *cp)
{
@@ -1756,12 +1766,23 @@ static void cp_decl_multi(CPState *cp)
continue;
}
if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */
- BCLine pragmaline = cp->linenumber;
- if (!(cp_next(cp) == CTOK_IDENT &&
- cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */
+ BCLine hashline = cp->linenumber;
+ CPToken tok = cp_next(cp);
+ if (tok == CTOK_INTEGER) {
+ cp_line(cp, hashline);
+ continue;
+ } else if (tok == CTOK_IDENT &&
+ cp->str->hash == H_(187aab88,fcb60b42)) { /* line */
+ if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
+ cp_line(cp, hashline);
+ continue;
+ } else if (tok == CTOK_IDENT &&
+ cp->str->hash == H_(f5e6b4f8,1d509107)) { /* pragma */
+ cp_pragma(cp, hashline);
+ continue;
+ } else {
cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
- cp_pragma(cp, pragmaline);
- continue;
+ }
}
scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC);
if ((cp->tok == ';' || cp->tok == CTOK_EOF) &&
diff --git a/lib/luajit/src/lj_ctype.c b/lib/luajit/src/lj_ctype.c
index 2e23c994bb..eda070ce1e 100644
--- a/lib/luajit/src/lj_ctype.c
+++ b/lib/luajit/src/lj_ctype.c
@@ -38,6 +38,8 @@
_("uint64_t", UINT64) \
_("intptr_t", INT_PSZ) \
_("uintptr_t", UINT_PSZ) \
+ /* From POSIX. */ \
+ _("ssize_t", INT_PSZ) \
/* End of typedef list. */
/* Keywords (only the ones we actually care for). */
diff --git a/lib/luajit/src/lj_dispatch.h b/lib/luajit/src/lj_dispatch.h
index 1e247e3828..73d00ec00c 100644
--- a/lib/luajit/src/lj_dispatch.h
+++ b/lib/luajit/src/lj_dispatch.h
@@ -14,6 +14,21 @@
#if LJ_TARGET_MIPS
/* Need our own global offset table for the dreaded MIPS calling conventions. */
+#if LJ_SOFTFP
+extern double __adddf3(double a, double b);
+extern double __subdf3(double a, double b);
+extern double __muldf3(double a, double b);
+extern double __divdf3(double a, double b);
+extern void __ledf2(double a, double b);
+extern double __floatsidf(int32_t a);
+extern int32_t __fixdfsi(double a);
+
+#define SFGOTDEF(_) \
+ _(lj_num2bit) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) _(__ledf2) \
+ _(__floatsidf) _(__fixdfsi)
+#else
+#define SFGOTDEF(_)
+#endif
#if LJ_HASJIT
#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot)
#else
@@ -39,7 +54,8 @@
_(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
_(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
_(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
- _(lj_buf_putstr_upper) _(lj_buf_tostr) JITGOTDEF(_) FFIGOTDEF(_)
+ _(lj_buf_putstr_upper) _(lj_buf_tostr) \
+ JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
enum {
#define GOTENUM(name) LJ_GOT_##name,
diff --git a/lib/luajit/src/lj_err.c b/lib/luajit/src/lj_err.c
index 2e20c2c0f8..d641735e9f 100644
--- a/lib/luajit/src/lj_err.c
+++ b/lib/luajit/src/lj_err.c
@@ -183,20 +183,13 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
/* -- External frame unwinding -------------------------------------------- */
-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_TARGET_WINDOWS
+#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
/*
** We have to use our own definitions instead of the mandatory (!) unwind.h,
** since various OS, distros and compilers mess up the header installation.
*/
-typedef struct _Unwind_Exception
-{
- uint64_t exclass;
- void (*excleanup)(int, struct _Unwind_Exception *);
- uintptr_t p1, p2;
-} __attribute__((__aligned__)) _Unwind_Exception;
-
typedef struct _Unwind_Context _Unwind_Context;
#define _URC_OK 0
@@ -206,8 +199,20 @@ typedef struct _Unwind_Context _Unwind_Context;
#define _URC_CONTINUE_UNWIND 8
#define _URC_FAILURE 9
+#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
+#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
+#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
+#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
+
#if !LJ_TARGET_ARM
+typedef struct _Unwind_Exception
+{
+ uint64_t exclass;
+ void (*excleanup)(int, struct _Unwind_Exception *);
+ uintptr_t p1, p2;
+} __attribute__((__aligned__)) _Unwind_Exception;
+
extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
@@ -219,11 +224,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
#define _UA_HANDLER_FRAME 4
#define _UA_FORCE_UNWIND 8
-#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
-#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
-#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
-#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
-
/* DWARF2 personality handler referenced from interpreter .eh_frame. */
LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -302,10 +302,23 @@ static void err_raise_ext(int errcode)
}
#endif
-#else
+#else /* LJ_TARGET_ARM */
+
+#define _US_VIRTUAL_UNWIND_FRAME 0
+#define _US_UNWIND_FRAME_STARTING 1
+#define _US_ACTION_MASK 3
+#define _US_FORCE_UNWIND 8
+
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+typedef struct _Unwind_Context _Unwind_Context;
-extern void _Unwind_DeleteException(void *);
-extern int __gnu_unwind_frame (void *, _Unwind_Context *);
+struct _Unwind_Control_Block {
+ uint64_t exclass;
+ uint32_t misc[20];
+};
+
+extern int _Unwind_RaiseException(_Unwind_Control_Block *);
+extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
@@ -321,35 +334,58 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
_Unwind_VRS_Set(ctx, 0, r, 0, &v);
}
-#define _US_VIRTUAL_UNWIND_FRAME 0
-#define _US_UNWIND_FRAME_STARTING 1
-#define _US_ACTION_MASK 3
-#define _US_FORCE_UNWIND 8
+extern void lj_vm_unwind_ext(void);
/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
-LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx)
+LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
+ _Unwind_Context *ctx)
{
void *cf = (void *)_Unwind_GetGR(ctx, 13);
lua_State *L = cframe_L(cf);
- if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) {
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+ int errcode;
+
+ switch ((state & _US_ACTION_MASK)) {
+ case _US_VIRTUAL_UNWIND_FRAME:
+ if ((state & _US_FORCE_UNWIND)) break;
return _URC_HANDLER_FOUND;
- }
- if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) {
- _Unwind_DeleteException(ucb);
- _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw);
- _Unwind_SetGR(ctx, 0, (uint32_t)L);
- _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN);
+ case _US_UNWIND_FRAME_STARTING:
+ if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
+ errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
+ } else {
+ errcode = LUA_ERRRUN;
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+ }
+ cf = err_unwind(L, cf, errcode);
+ if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
+ _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
+ _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
+ _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
+ _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
+ (uint32_t)lj_vm_unwind_ff_eh :
+ (uint32_t)lj_vm_unwind_c_eh);
return _URC_INSTALL_CONTEXT;
+ default:
+ return _URC_FAILURE;
}
if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
return _URC_FAILURE;
return _URC_CONTINUE_UNWIND;
}
+#if LJ_UNWIND_EXT
+static __thread _Unwind_Control_Block static_uex;
+
+static void err_raise_ext(int errcode)
+{
+ memset(&static_uex, 0, sizeof(static_uex));
+ static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
+ _Unwind_RaiseException(&static_uex);
+}
#endif
-#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
+#endif /* LJ_TARGET_ARM */
+
+#elif LJ_TARGET_X64 && LJ_ABI_WIN
/*
** Someone in Redmond owes me several days of my life. A lot of this is
@@ -414,7 +450,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
if (cf2) { /* We catch it, so start unwinding the upper frames. */
if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if LJ_TARGET_WINDOWS
__DestructExceptionObject(rec, 1);
+#endif
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
} else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
/* Don't catch access violations etc. */
diff --git a/lib/luajit/src/lj_ffrecord.c b/lib/luajit/src/lj_ffrecord.c
index 6cc05a24f7..281f017856 100644
--- a/lib/luajit/src/lj_ffrecord.c
+++ b/lib/luajit/src/lj_ffrecord.c
@@ -435,11 +435,12 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
{
- if (!(LJ_52 && recff_metacall(J, rd, MM_ipairs))) {
- TRef tab = J->base[0];
- if (tref_istab(tab)) {
+ TRef tr = J->base[0];
+ if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
+ recff_metacall(J, rd, MM_pairs + rd->data))) {
+ if (tref_istab(tr)) {
J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
- J->base[1] = tab;
+ J->base[1] = tr;
J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
rd->nres = 3;
} /* else: Interpreter will throw. */
diff --git a/lib/luajit/src/lj_frame.h b/lib/luajit/src/lj_frame.h
index a86c36be7e..aa3ab20bbf 100644
--- a/lib/luajit/src/lj_frame.h
+++ b/lib/luajit/src/lj_frame.h
@@ -218,6 +218,7 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SHIFT_MULTRES 3
#endif
#elif LJ_TARGET_MIPS
+#if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 116
@@ -227,6 +228,16 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
#define CFRAME_SIZE 112
#define CFRAME_SHIFT_MULTRES 3
#else
+#define CFRAME_OFS_ERRF 100
+#define CFRAME_OFS_NRES 96
+#define CFRAME_OFS_PREV 92
+#define CFRAME_OFS_L 88
+#define CFRAME_OFS_PC 44
+#define CFRAME_OFS_MULTRES 16
+#define CFRAME_SIZE 88
+#define CFRAME_SHIFT_MULTRES 3
+#endif
+#else
#error "Missing CFRAME_* definitions for this architecture"
#endif
diff --git a/lib/luajit/src/lj_ircall.h b/lib/luajit/src/lj_ircall.h
index 84e41ecfcc..1f44b03d67 100644
--- a/lib/luajit/src/lj_ircall.h
+++ b/lib/luajit/src/lj_ircall.h
@@ -270,6 +270,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
#define fp64_f2l __aeabi_f2lz
#define fp64_f2ul __aeabi_f2ulz
#endif
+#elif LJ_TARGET_MIPS
+#define softfp_add __adddf3
+#define softfp_sub __subdf3
+#define softfp_mul __muldf3
+#define softfp_div __divdf3
+#define softfp_cmp __ledf2
+#define softfp_i2d __floatsidf
+#define softfp_d2i __fixdfsi
+#define softfp_ui2d __floatunsidf
+#define softfp_f2d __extendsfdf2
+#define softfp_d2ui __fixunsdfsi
+#define softfp_d2f __truncdfsf2
+#define softfp_i2f __floatsisf
+#define softfp_ui2f __floatunsisf
+#define softfp_f2i __fixsfsi
+#define softfp_f2ui __fixunssfsi
#else
#error "Missing soft-float definitions for target architecture"
#endif
diff --git a/lib/luajit/src/lj_opt_split.c b/lib/luajit/src/lj_opt_split.c
index 81ded6c0a0..4652c73786 100644
--- a/lib/luajit/src/lj_opt_split.c
+++ b/lib/luajit/src/lj_opt_split.c
@@ -596,7 +596,8 @@ static void split_ir(jit_State *J)
}
#endif
else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
- /* Drop cast, since assembler doesn't care. */
+ /* Drop cast, since assembler doesn't care. But fwd both parts. */
+ hi = hiref;
goto fwdlo;
} else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
IRRef k31 = lj_ir_kint(J, 31);
diff --git a/lib/luajit/src/lj_record.c b/lib/luajit/src/lj_record.c
index dc5f2d547a..480c80c170 100644
--- a/lib/luajit/src/lj_record.c
+++ b/lib/luajit/src/lj_record.c
@@ -1236,12 +1236,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
}
/* Record indexed key lookup. */
-static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref)
+static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
+ IRType1 *rbguard)
{
TRef key;
GCtab *t = tabV(&ix->tabv);
ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
*rbref = 0;
+ rbguard->irt = 0;
/* Integer keys are looked up in the array part first. */
key = ix->key;
@@ -1293,6 +1295,7 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref)
hslot <= 65535*(MSize)sizeof(Node)) {
TRef node, kslot, hm;
*rbref = J->cur.nins; /* Mark possible rollback point. */
+ *rbguard = J->guardemit;
hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
@@ -1327,6 +1330,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
TRef xref;
IROp xrefop, loadop;
IRRef rbref;
+ IRType1 rbguard;
cTValue *oldv;
while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
@@ -1373,7 +1377,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
}
/* Record the key lookup. */
- xref = rec_idx_key(J, ix, &rbref);
+ xref = rec_idx_key(J, ix, &rbref, &rbguard);
xrefop = IR(tref_ref(xref))->o;
loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
/* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1388,8 +1392,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else {
res = emitir(IRTG(loadop, t), xref, 0);
}
- if (tref_ref(res) < rbref) /* HREFK + load forwarded? */
+ if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */
lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
+ J->guardemit = rbguard;
+ }
if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
goto handlemm;
if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1397,8 +1403,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else { /* Indexed store. */
GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
- if (tref_ref(xref) < rbref) /* HREFK forwarded? */
+ if (tref_ref(xref) < rbref) { /* HREFK forwarded? */
lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
+ J->guardemit = rbguard;
+ }
if (tvisnil(oldv)) { /* Previous value was nil? */
/* Need to duplicate the hasmm check for the early guards. */
int hasmm = 0;
diff --git a/lib/luajit/src/lj_snap.c b/lib/luajit/src/lj_snap.c
index fa9abb7475..368bad33a9 100644
--- a/lib/luajit/src/lj_snap.c
+++ b/lib/luajit/src/lj_snap.c
@@ -26,9 +26,6 @@
#include "lj_cdata.h"
#endif
-/* Some local macros to save typing. Undef'd at the end. */
-#define IR(ref) (&J->cur.ir[(ref)])
-
/* Pass IR on to next optimization in chain (FOLD). */
#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
@@ -73,7 +70,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
IRRef ref = tref_ref(tr);
if (ref) {
SnapEntry sn = SNAP_TR(s, tr);
- IRIns *ir = IR(ref);
+ IRIns *ir = &J->cur.ir[ref];
if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
/* No need to snapshot unmodified non-inherited slots. */
@@ -148,8 +145,8 @@ void lj_snap_add(jit_State *J)
MSize nsnap = J->cur.nsnap;
MSize nsnapmap = J->cur.nsnapmap;
/* Merge if no ins. inbetween or if requested and no guard inbetween. */
- if (J->mergesnap ? !irt_isguard(J->guardemit) :
- (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
+ if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
+ (J->mergesnap && !irt_isguard(J->guardemit))) {
if (nsnap == 1) { /* But preserve snap #0 PC. */
emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
goto nomerge;
@@ -407,24 +404,24 @@ static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
}
/* Check whether a sunk store corresponds to an allocation. Slow path. */
-static int snap_sunk_store2(jit_State *J, IRIns *ira, IRIns *irs)
+static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
{
if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
- IRIns *irk = IR(irs->op1);
+ IRIns *irk = &T->ir[irs->op1];
if (irk->o == IR_AREF || irk->o == IR_HREFK)
- irk = IR(irk->op1);
- return (IR(irk->op1) == ira);
+ irk = &T->ir[irk->op1];
+ return (&T->ir[irk->op1] == ira);
}
return 0;
}
/* Check whether a sunk store corresponds to an allocation. Fast path. */
-static LJ_AINLINE int snap_sunk_store(jit_State *J, IRIns *ira, IRIns *irs)
+static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
{
if (irs->s != 255)
return (ira + irs->s == irs); /* Fast check. */
- return snap_sunk_store2(J, ira, irs);
+ return snap_sunk_store2(T, ira, irs);
}
/* Replay snapshot state to setup side trace. */
@@ -487,7 +484,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
} else {
IRIns *irs;
for (irs = ir+1; irs < irlast; irs++)
- if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
@@ -521,13 +518,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
snap_pref(J, T, map, nent, seen, (ir+1)->op2));
}
- J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
+ J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
} else {
IRIns *irs;
TRef tr = emitir(ir->ot, op1, op2);
J->slot[snap_slot(sn)] = tr;
for (irs = ir+1; irs < irlast; irs++)
- if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *irr = &T->ir[irs->op1];
TRef val, key = irr->op2, tmp = tr;
if (irr->o != IR_FREF) {
@@ -714,8 +711,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
CTState *cts = ctype_cts(J->L);
CTypeID id = (CTypeID)T->ir[ir->op1].i;
- CTSize sz = lj_ctype_size(cts, id);
- GCcdata *cd = lj_cdata_new(cts, id, sz);
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
setcdataV(J->L, o, cd);
if (ir->o == IR_CNEWI) {
uint8_t *p = (uint8_t *)cdataptr(cd);
@@ -729,7 +727,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
} else {
IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
- if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *iro = &T->ir[T->ir[irs->op1].op2];
uint8_t *p = (uint8_t *)cd;
CTSize szs;
@@ -762,7 +760,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
settabV(J->L, o, t);
irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
- if (irs->r == RID_SINK && snap_sunk_store(J, ir, irs)) {
+ if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *irk = &T->ir[irs->op1];
TValue tmp, *val;
lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
@@ -863,7 +861,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
return pc;
}
-#undef IR
#undef emitir_raw
#undef emitir
diff --git a/lib/luajit/src/lj_vm.h b/lib/luajit/src/lj_vm.h
index b31e22f70f..cb76d7a700 100644
--- a/lib/luajit/src/lj_vm.h
+++ b/lib/luajit/src/lj_vm.h
@@ -50,7 +50,7 @@ LJ_ASMF void lj_vm_exit_handler(void);
LJ_ASMF void lj_vm_exit_interp(void);
/* Internal math helper functions. */
-#if LJ_TARGET_PPC || LJ_TARGET_ARM64
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
#define lj_vm_floor floor
#define lj_vm_ceil ceil
#else
diff --git a/lib/luajit/src/lua.h b/lib/luajit/src/lua.h
index c83fd3bbe7..352d29f3cd 100644
--- a/lib/luajit/src/lua.h
+++ b/lib/luajit/src/lua.h
@@ -226,6 +226,7 @@ LUA_API int (lua_status) (lua_State *L);
#define LUA_GCSTEP 5
#define LUA_GCSETPAUSE 6
#define LUA_GCSETSTEPMUL 7
+#define LUA_GCISRUNNING 9
LUA_API int (lua_gc) (lua_State *L, int what, int data);
diff --git a/lib/luajit/src/vm_arm.dasc b/lib/luajit/src/vm_arm.dasc
index af722f9eac..acc0853bb7 100644
--- a/lib/luajit/src/vm_arm.dasc
+++ b/lib/luajit/src/vm_arm.dasc
@@ -372,6 +372,17 @@ static void build_subroutines(BuildCtx *ctx)
| str CARG1, [BASE, #-4] // Prepend false to error message.
| st_vmstate CARG2
| b ->vm_returnc
+ |
+ |->vm_unwind_ext: // Complete external unwind.
+#if !LJ_NO_UNWIND
+ | push {r0, r1, r2, lr}
+ | bl extern _Unwind_Complete
+ | ldr r0, [sp]
+ | bl extern _Unwind_DeleteException
+ | pop {r0, r1, r2, lr}
+ | mov r0, r1
+ | bx r2
+#endif
|
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
diff --git a/lib/luajit/src/vm_mips.dasc b/lib/luajit/src/vm_mips.dasc
index 134ed569e8..0dba129316 100644
--- a/lib/luajit/src/vm_mips.dasc
+++ b/lib/luajit/src/vm_mips.dasc
@@ -1,6 +1,9 @@
|// Low-level VM code for MIPS CPUs.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
+|//
+|// MIPS soft-float support contributed by Djordje Kovacevic and
+|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
|
|.arch mips
|.section code_op, code_sub
@@ -18,6 +21,12 @@
|// Fixed register assignments for the interpreter.
|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r16 // Base of current Lua stack frame.
|.define KBASE, r17 // Constants of current Lua function.
@@ -31,7 +40,9 @@
|
|// Constants for type-comparisons, stores and conversions. C callee-save.
|.define TISNIL, r30
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r23 // Callee-save.
@@ -46,6 +57,13 @@
|.define TMP2, r14
|.define TMP3, r15
|
+|.if not FPU
+|.define SFT1, r2
+|.define SFT2, r3
+|.define SFT3, r4
+|.define SFT4, r5
+|.endif
+|
|// Calling conventions.
|.define CFUNCADDR, r25
|.define CARG1, r4
@@ -56,13 +74,16 @@
|.define CRET1, r2
|.define CRET2, r3
|
+|.if FPU
|.define FARG1, f12
|.define FARG2, f14
|
|.define FRET1, f0
|.define FRET2, f2
+|.endif
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.if FPU // MIPS32 hard-float.
|.define CFRAME_SPACE, 112 // Delta for sp.
|
|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -83,43 +104,76 @@
|.define ARG5_OFS, 16
|.define SAVE_MULTRES, ARG5
|
+|//-----------------------------------------------------------------------
+|.else // MIPS32 soft-float.
+|
+|.define CFRAME_SPACE, 88 // Delta for sp.
+|
+|.define SAVE_ERRF, 100(sp) // 32 bit C frame info.
+|.define SAVE_NRES, 96(sp)
+|.define SAVE_CFRAME, 92(sp)
+|.define SAVE_L, 88(sp)
+|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
+|.define SAVE_GPR_, 48 // .. 48+10*4: 32 bit GPR saves.
+|.define SAVE_PC, 44(sp)
+|.define TEMP_SAVE_6, 40(sp)
+|.define TEMP_SAVE_5, 36(sp)
+|.define TEMP_SAVE_4, 32(sp)
+|.define TEMP_SAVE_3, 28(sp)
+|.define TEMP_SAVE_2, 24(sp)
+|.define TEMP_SAVE_1, 20(sp)
+|//----- 8 byte aligned, ^^^^ 24 byte register save area, owned by caller.
+|.define ARG5, 16(sp)
+|.define CSAVE_4, 12(sp)
+|.define CSAVE_3, 8(sp)
+|.define CSAVE_2, 4(sp)
+|.define CSAVE_1, 0(sp)
+|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by callee.
+|
+|.define ARG5_OFS, 16
+|.define SAVE_MULTRES, ARG5
+|
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
|.macro saveregs
| addiu sp, sp, -CFRAME_SPACE
| sw ra, SAVE_GPR_+9*4(sp)
| sw r30, SAVE_GPR_+8*4(sp)
-| sdc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
| sw r23, SAVE_GPR_+7*4(sp)
| sw r22, SAVE_GPR_+6*4(sp)
-| sdc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
| sw r21, SAVE_GPR_+5*4(sp)
| sw r20, SAVE_GPR_+4*4(sp)
-| sdc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
| sw r19, SAVE_GPR_+3*4(sp)
| sw r18, SAVE_GPR_+2*4(sp)
-| sdc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
| sw r17, SAVE_GPR_+1*4(sp)
| sw r16, SAVE_GPR_+0*4(sp)
-| sdc1 f22, SAVE_FPR_+1*8(sp)
-| sdc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
|.endmacro
|
|.macro restoreregs_ret
| lw ra, SAVE_GPR_+9*4(sp)
| lw r30, SAVE_GPR_+8*4(sp)
-| ldc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
| lw r23, SAVE_GPR_+7*4(sp)
| lw r22, SAVE_GPR_+6*4(sp)
-| ldc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
| lw r21, SAVE_GPR_+5*4(sp)
| lw r20, SAVE_GPR_+4*4(sp)
-| ldc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
| lw r19, SAVE_GPR_+3*4(sp)
| lw r18, SAVE_GPR_+2*4(sp)
-| ldc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
| lw r17, SAVE_GPR_+1*4(sp)
| lw r16, SAVE_GPR_+0*4(sp)
-| ldc1 f22, SAVE_FPR_+1*8(sp)
-| ldc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
| jr ra
| addiu sp, sp, CFRAME_SPACE
|.endmacro
@@ -270,6 +324,61 @@
|.macro call_extern; jalr CFUNCADDR; .endmacro
|.macro jmp_extern; jr CFUNCADDR; .endmacro
|
+|// Converts int from given reg to double, result in CRET1 and CRET2 regs.
+|.if not FPU
+|.macro cvti2d, arg
+| load_got __floatsidf
+| call_extern
+|. move CARG1, arg
+|.endmacro
+|.endif
+|
+|// Loads a double-word floating-point value.
+|.macro load_double, fpr, gpr1, gpr2, src
+|.if FPU
+| ldc1 fpr, src
+|.else
+| lw gpr1, src
+| lw gpr2, 4+src
+|.endif
+|.endmacro
+|
+|// Stores a double-word floating-point value.
+|.macro store_double, fpr, gpr1, gpr2, dst
+|.if FPU
+| sdc1 fpr, dst
+|.else
+| sw gpr1, dst
+| sw gpr2, 4+dst
+|.endif
+|.endmacro
+|
+|// Loads the first double-word floating-point argument.
+|.macro load_farg1, src
+| load_double FARG1, CARG1, CARG2, src
+|.endmacro
+|
+|// Loads the second double-word floating-point argument.
+|.macro load_farg2, src
+| load_double FARG2, CARG3, CARG4, src
+|.endmacro
+|
+|.macro load_double1, src
+| load_double f0, SFT1, SFT2, src
+|.endmacro
+|
+|.macro store_double1, dst
+| store_double f0, SFT1, SFT2, dst
+|.endmacro
+|
+|.macro load_double2, src
+| load_double f2, SFT3, SFT4, src
+|.endmacro
+|
+|.macro store_double2, dst
+| store_double f2, SFT3, SFT4, dst
+|.endmacro
+|
|.macro hotcheck, delta, target
| srl TMP1, PC, 1
| andi TMP1, TMP1, 126
@@ -354,9 +463,9 @@ static void build_subroutines(BuildCtx *ctx)
|. sll TMP2, TMP2, 3
|1:
| addiu TMP1, TMP1, -8
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| addiu RA, RA, 8
- | sdc1 f0, 0(BASE)
+ | store_double1 0(BASE)
| bnez TMP1, <1
|. addiu BASE, BASE, 8
|
@@ -425,15 +534,15 @@ static void build_subroutines(BuildCtx *ctx)
| and sp, CARG1, AT
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lw L, SAVE_L
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li TISNIL, LJ_TNIL
| lw BASE, L->base
| lw DISPATCH, L->glref // Setup pointer to dispatch table.
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li TMP1, LJ_TFALSE
| li_vmstate INTERP
| lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| addiu RA, BASE, -8 // Results start at BASE-8.
| addiu DISPATCH, DISPATCH, GG_G2DISP
| sw TMP1, HI(RA) // Prepend false to error message.
@@ -498,11 +607,11 @@ static void build_subroutines(BuildCtx *ctx)
| lw BASE, L->base
| lw TMP1, L->top
| lw PC, FRAME_PC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| subu RD, TMP1, BASE
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| sb r0, L->status
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| addiu RD, RD, 8
| st_vmstate
@@ -540,13 +649,13 @@ static void build_subroutines(BuildCtx *ctx)
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
| sw L, DISPATCH_GL(cur_L)(DISPATCH)
| lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw TMP1, L->top
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| addu PC, PC, BASE
| subu NARGS8:RC, TMP1, BASE
| subu PC, PC, TMP2 // PC = frame delta + frame type
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| st_vmstate
@@ -628,7 +737,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_cat: // RA = resultptr, RB = meta base
| lw INS, -4(PC)
| addiu CARG2, RB, -16
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| decode_RB8a MULTRES, INS
| decode_RA8a RA, INS
| decode_RB8b MULTRES
@@ -636,11 +745,21 @@ static void build_subroutines(BuildCtx *ctx)
| addu TMP1, BASE, MULTRES
| sw BASE, L->base
| subu CARG3, CARG2, TMP1
+ |.if FPU
| bne TMP1, CARG2, ->BC_CAT_Z
|. sdc1 f0, 0(CARG2)
| addu RA, BASE, RA
| b ->cont_nop
|. sdc1 f0, 0(RA)
+ |.else
+ | sw SFT1, 0(CARG2)
+ | bne TMP1, CARG2, ->BC_CAT_Z
+ |. sw SFT2, 4(CARG2)
+ | addu RA, BASE, RA
+ | sw SFT1, 0(RA)
+ | b ->cont_nop
+ |. sw SFT2, 4(RA)
+ |.endif
|
|//-- Table indexing metamethods -----------------------------------------
|
@@ -663,10 +782,19 @@ static void build_subroutines(BuildCtx *ctx)
|. sw TMP1, HI(CARG3)
|
|->vmeta_tgetb: // TMP0 = index
+ |.if FPU
| mtc1 TMP0, f0
| cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
| sdc1 f0, 0(CARG3)
+ |.else
+ | sw CARG2, TEMP_SAVE_1 //needed to be saved because it's used later in lj_meta_tget
+ | cvti2d TMP0
+ | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | sw CRET1, 0(CARG3)
+ | sw CRET2, 4(CARG3)
+ | lw CARG2, TEMP_SAVE_1
+ |.endif
|
|->vmeta_tgetv:
|1:
@@ -678,9 +806,9 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns TValue * (finished) or NULL (metamethod).
| beqz CRET1, >3
|. addiu TMP1, BASE, -FRAME_CONT
- | ldc1 f0, 0(CRET1)
+ | load_double2 0(CRET1)
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double2 0(RA)
| ins_next2
|
|3: // Call __index metamethod.
@@ -699,8 +827,14 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns cTValue * or NULL.
| beqz CRET1, >1
|. nop
+ |.if FPU
| b ->BC_TGETR_Z
|. ldc1 f0, 0(CRET1)
+ |.else
+ | lw SFT1, 0(CRET1)
+ | b ->BC_TGETR_Z
+ |. lw SFT2, 4(CRET1)
+ |.endif
|
|//-----------------------------------------------------------------------
|
@@ -723,10 +857,19 @@ static void build_subroutines(BuildCtx *ctx)
|. sw TMP1, HI(CARG3)
|
|->vmeta_tsetb: // TMP0 = index
+ |.if FPU
| mtc1 TMP0, f0
| cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
| sdc1 f0, 0(CARG3)
+ |.else
+ | sw CARG2, TEMP_SAVE_1
+ | cvti2d TMP0
+ | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | sw CRET1, 0(CARG3)
+ | sw CRET2, 4(CARG3)
+ | lw CARG2, TEMP_SAVE_1
+ |.endif
|
|->vmeta_tsetv:
|1:
@@ -736,11 +879,17 @@ static void build_subroutines(BuildCtx *ctx)
| call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|. move CARG1, L
| // Returns TValue * (finished) or NULL (metamethod).
+ |.if FPU
| beqz CRET1, >3
- |. ldc1 f0, 0(RA)
+ |. ldc1 f2, 0(RA)
+ |.else
+ | lw SFT3, 0(RA)
+ | beqz CRET1, >3
+ |. lw SFT4, 4(RA)
+ |.endif
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
- | sdc1 f0, 0(CRET1)
+ | store_double2 0(CRET1)
| ins_next2
|
|3: // Call __newindex metamethod.
@@ -750,7 +899,7 @@ static void build_subroutines(BuildCtx *ctx)
| sw PC, -16+HI(BASE) // [cont|PC]
| subu PC, BASE, TMP1
| lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | sdc1 f0, 16(BASE) // Copy value to third argument.
+ | store_double2 16(BASE) // Copy value to third argument.
| b ->vm_call_dispatch_f
|. li NARGS8:RC, 24 // 3 args for func(t, k, v)
|
@@ -793,11 +942,17 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_ra: // RA = resultptr
| lbu TMP1, -4+OFS_RA(PC)
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| sll TMP1, TMP1, 3
| addu TMP1, BASE, TMP1
+ |.if FPU
| b ->cont_nop
|. sdc1 f0, 0(TMP1)
+ |.else
+ | sw SFT1, 0(TMP1)
+ | b ->cont_nop
+ |. sw SFT2, 4(TMP1)
+ |.endif
|
|->cont_condt: // RA = resultptr
| lw TMP0, HI(RA)
@@ -852,7 +1007,22 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_unm:
- | move CARG4, CARG3
+ | b ->vmeta_arith
+ |. move CARG4, CARG3
+ |
+ |->vmeta_arith_vn:
+ | addu CARG3, BASE, RB
+ | b ->vmeta_arith
+ |. addu CARG4, KBASE, RC
+ |
+ |->vmeta_arith_nv:
+ | addu CARG4, BASE, RB
+ | b ->vmeta_arith
+ |. addu CARG3, KBASE, RC
+ |
+ |->vmeta_arith_vv:
+ | addu CARG3, BASE, RB
+ | addu CARG4, BASE, RC
|
|->vmeta_arith:
| load_got lj_meta_arith
@@ -985,9 +1155,9 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_n, name // Caveat: has delay slot!
|->ff_ .. name:
| lw CARG3, HI(BASE)
+ | load_farg1 0(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FARG1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |. sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
|.endmacro
|
@@ -997,10 +1167,10 @@ static void build_subroutines(BuildCtx *ctx)
| lw CARG3, HI(BASE)
| bnez AT, ->fff_fallback
|. lw CARG4, 8+HI(BASE)
- | ldc1 FARG1, 0(BASE)
- | ldc1 FARG2, 8(BASE)
| sltiu TMP0, CARG3, LJ_TISNUM
| sltiu TMP1, CARG4, LJ_TISNUM
+ | load_farg1 0(BASE)
+ | load_farg2 8(BASE)
| and TMP0, TMP0, TMP1
| beqz TMP0, ->fff_fallback
|.endmacro
@@ -1027,8 +1197,8 @@ static void build_subroutines(BuildCtx *ctx)
| beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
|. sw CARG1, LO(RA)
|1:
- | ldc1 f0, 0(TMP1)
- | sdc1 f0, -8(TMP1)
+ | load_double1 0(TMP1)
+ | store_double1 -8(TMP1)
| bne TMP1, TMP2, <1
|. addiu TMP1, TMP1, 8
| b ->fff_res
@@ -1043,8 +1213,14 @@ static void build_subroutines(BuildCtx *ctx)
| not TMP1, TMP1
| sll TMP1, TMP1, 3
| addu TMP1, CFUNC:RB, TMP1
+ |.if HFABI
| b ->fff_resn
|. ldc1 FRET1, CFUNC:TMP1->upvalue
+ |.else
+ | lw CRET1, CFUNC:TMP1->upvalue[0].u32.hi
+ | b ->fff_resn
+ |. lw CRET2, CFUNC:TMP1->upvalue[0].u32.lo
+ |.endif
|
|//-- Base library: getters and setters ---------------------------------
|
@@ -1125,8 +1301,14 @@ static void build_subroutines(BuildCtx *ctx)
| call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|. move CARG1, L
| // Returns cTValue *.
+ |.if HFABI
| b ->fff_resn
|. ldc1 FRET1, 0(CRET1)
+ |.else
+ | lw CRET2, 4(CRET1)
+ | b ->fff_resn
+ |. lw CRET1, 0(CRET1)
+ |.endif
|
|//-- Base library: conversions ------------------------------------------
|
@@ -1136,8 +1318,14 @@ static void build_subroutines(BuildCtx *ctx)
| xori AT, NARGS8:RC, 8
| sltiu CARG1, CARG1, LJ_TISNUM
| movn CARG1, r0, AT
+ |.if HFABI
| beqz CARG1, ->fff_fallback // Exactly one number argument.
|. ldc1 FRET1, 0(BASE)
+ |.else
+ | lw CRET1, 0(BASE)
+ | beqz CARG1, ->fff_fallback // Exactly one number argument.
+ |. lw CRET2, 4(BASE)
+ |.endif
| b ->fff_resn
|. nop
|
@@ -1185,13 +1373,13 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns 0 at end of traversal.
| beqz CRET1, ->fff_restv // End of traversal: return nil.
|. li CARG3, LJ_TNIL
- | ldc1 f0, 8(BASE) // Copy key and value to results.
+ | load_double1 8(BASE)
| addiu RA, BASE, -8
- | ldc1 f2, 16(BASE)
- | li RD, (2+1)*8
- | sdc1 f0, 0(RA)
+ | load_double2 16(BASE)
+ | store_double1 0(RA)
+ | store_double2 8(RA)
| b ->fff_res
- |. sdc1 f2, 8(RA)
+ |. li RD, (2+1)*8
|
|.ffunc_1 pairs
| li AT, LJ_TTAB
@@ -1199,16 +1387,32 @@ static void build_subroutines(BuildCtx *ctx)
|. lw PC, FRAME_PC(BASE)
#if LJ_52
| lw TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| ldc1 f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
+ | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| bnez TAB:TMP2, ->fff_fallback
#else
+ |.if FPU
| ldc1 f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
+ | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
#endif
|. addiu RA, BASE, -8
| sw TISNIL, 8+HI(BASE)
| li RD, (3+1)*8
+ |.if FPU
| b ->fff_res
|. sdc1 f0, 0(RA)
+ |.else
+ | sw SFT1, 0(RA)
+ | b ->fff_res
+ |. sw SFT2, 4(RA)
+ |.endif
|
|.ffunc ipairs_aux
| sltiu AT, NARGS8:RC, 16
@@ -1216,35 +1420,55 @@ static void build_subroutines(BuildCtx *ctx)
| lw TAB:CARG1, LO(BASE)
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
- |. ldc1 FARG2, 8(BASE)
- | addiu CARG3, CARG3, -LJ_TTAB
+ |. addiu CARG3, CARG3, -LJ_TTAB
| sltiu AT, CARG4, LJ_TISNUM
| li TMP0, 1
| movn AT, r0, CARG3
- | mtc1 TMP0, FARG1
| beqz AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
+ |.if FPU
+ | ldc1 FARG2, 8(BASE)
+ | mtc1 TMP0, FARG1
| trunc.w.d FRET1, FARG2
| cvt.d.w FARG1, FARG1
- | lw TMP0, TAB:CARG1->asize
- | lw TMP1, TAB:CARG1->array
| mfc1 TMP2, FRET1
- | addiu RA, BASE, -8
| add.d FARG2, FARG2, FARG1
+ |.else
+ | sw CARG1, TEMP_SAVE_1
+ | cvti2d TMP0
+ | sw CRET1, TEMP_SAVE_2 // Store result CRET1/CRET2=1 (double).
+ | sw CRET2, TEMP_SAVE_3
+ | lw CARG2, 8+4(BASE)
+ | load_got __fixdfsi
+ | call_extern
+ |. lw CARG1, 8(BASE)
+ | sw CRET1, TEMP_SAVE_4
+ | load_got __adddf3
+ | lw CARG2, TEMP_SAVE_3
+ | lw CARG3, 8(BASE)
+ | lw CARG4, 8+4(BASE)
+ | call_extern
+ |. lw CARG1, TEMP_SAVE_2
+ | lw TMP2, TEMP_SAVE_4
+ | lw CARG1, TEMP_SAVE_1
+ |.endif
+ | lw TMP0, TAB:CARG1->asize
+ | lw TMP1, TAB:CARG1->array
| addiu TMP2, TMP2, 1
| sltu AT, TMP2, TMP0
+ | beqz AT, >2 // Not in array part?
+ |. addiu RA, BASE, -8
+ | store_double FARG2, CRET1, CRET2, 0(RA)
| sll TMP3, TMP2, 3
| addu TMP3, TMP1, TMP3
- | beqz AT, >2 // Not in array part?
- |. sdc1 FARG2, 0(RA)
| lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
+ | load_double1 0(TMP3)
|1:
| beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
|. li RD, (0+1)*8
- | li RD, (2+1)*8
+ | store_double1 8(RA)
| b ->fff_res
- |. sdc1 f0, 8(RA)
+ |. li RD, (2+1)*8
|2: // Check for empty hash part first. Otherwise call C function.
| lw TMP0, TAB:CARG1->hmask
| load_got lj_tab_getinth
@@ -1256,8 +1480,14 @@ static void build_subroutines(BuildCtx *ctx)
| beqz CRET1, ->fff_res
|. li RD, (0+1)*8
| lw TMP2, HI(CRET1)
+ |.if FPU
| b <1
|. ldc1 f0, 0(CRET1)
+ |.else
+ | lw SFT2, 4(CRET1)
+ | b <1
+ |. lw SFT1, 0(CRET1)
+ |.endif
|
|.ffunc_1 ipairs
| li AT, LJ_TTAB
@@ -1265,17 +1495,33 @@ static void build_subroutines(BuildCtx *ctx)
|. lw PC, FRAME_PC(BASE)
#if LJ_52
| lw TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| ldc1 f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
+ | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| bnez TAB:TMP2, ->fff_fallback
#else
+ |.if FPU
| ldc1 f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lw SFT1, CFUNC:RB->upvalue[0].u32.hi
+ | lw SFT2, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
#endif
|. addiu RA, BASE, -8
| sw r0, 8+HI(BASE)
| sw r0, 8+LO(BASE)
| li RD, (3+1)*8
+ |.if FPU
| b ->fff_res
|. sdc1 f0, 0(RA)
+ |.else
+ | sw SFT1, 0(RA)
+ | b ->fff_res
+ |. sw SFT2, 4(RA)
+ |.endif
|
|//-- Base library: catch errors ----------------------------------------
|
@@ -1295,8 +1541,12 @@ static void build_subroutines(BuildCtx *ctx)
| sltiu AT, NARGS8:RC, 16
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
+ |.if FPU
|. ldc1 FARG2, 8(BASE)
- | ldc1 FARG1, 0(BASE)
+ |.else
+ |. lw CARG3, 8+LO(BASE)
+ |.endif
+ | load_double FARG1, CARG1, CARG2, 0(BASE)
| lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| li AT, LJ_TFUNC
| move TMP2, BASE
@@ -1304,9 +1554,14 @@ static void build_subroutines(BuildCtx *ctx)
| addiu BASE, BASE, 16
| // Remember active hook before pcall.
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
+ |.if FPU
| sdc1 FARG2, 0(TMP2) // Swap function and traceback.
+ |.else
+ | sw CARG3, LO(TMP2)
+ | sw CARG4, HI(TMP2)
+ |.endif
| andi TMP3, TMP3, 1
- | sdc1 FARG1, 8(TMP2)
+ | store_double FARG1, CARG1, CARG2, 8(TMP2)
| addiu PC, TMP3, 16+FRAME_PCALL
| b ->vm_call_dispatch
|. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1350,11 +1605,11 @@ static void build_subroutines(BuildCtx *ctx)
| move CARG3, CARG2
| sw BASE, L->top
|2: // Move args to coroutine.
- | ldc1 f0, 0(BASE)
+ | load_double1 0(BASE)
| sltu AT, BASE, TMP1
| beqz AT, >3
|. addiu BASE, BASE, 8
- | sdc1 f0, 0(CARG3)
+ | store_double1 0(CARG3)
| b <2
|. addiu CARG3, CARG3, 8
|3:
@@ -1380,10 +1635,10 @@ static void build_subroutines(BuildCtx *ctx)
| sw TMP2, L:RA->top // Clear coroutine stack.
| move TMP1, BASE
|5: // Move results from coroutine.
- | ldc1 f0, 0(TMP2)
+ | load_double1 0(TMP2)
| addiu TMP2, TMP2, 8
| sltu AT, TMP2, TMP3
- | sdc1 f0, 0(TMP1)
+ | store_double1 0(TMP1)
| bnez AT, <5
|. addiu TMP1, TMP1, 8
|6:
@@ -1408,12 +1663,12 @@ static void build_subroutines(BuildCtx *ctx)
|.if resume
| addiu TMP3, TMP3, -8
| li TMP1, LJ_TFALSE
- | ldc1 f0, 0(TMP3)
+ | load_double1 0(TMP3)
| sw TMP3, L:RA->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| sw TMP1, -8+HI(BASE) // Prepend false to results.
| addiu RA, BASE, -8
- | sdc1 f0, 0(BASE) // Copy error message.
+ | store_double1 0(BASE) // Copy error message.
| b <7
|. andi TMP0, PC, FRAME_TYPE
|.else
@@ -1449,13 +1704,33 @@ static void build_subroutines(BuildCtx *ctx)
|
|//-- Math library -------------------------------------------------------
|
- |.ffunc_n math_abs
+ |.ffunc_1 math_abs
+ | load_farg1 0(BASE)
+ | sltiu AT, CARG3, LJ_TISNUM
+ | beqz AT, ->fff_fallback
+ |. nop
+ |.if FPU
|. abs.d FRET1, FARG1
+ |.else
+ |. lui TMP1, 0x8000
+ | and AT, CARG1, TMP1
+ | move CRET2, CARG2
+ | beqz AT, ->fff_resn
+ |. move CRET1, CARG1
+ | xor CRET1, CARG1, TMP1
+ |.endif
+ |
|->fff_resn:
| lw PC, FRAME_PC(BASE)
| addiu RA, BASE, -8
+ |.if HFABI
| b ->fff_res1
|. sdc1 FRET1, -8(BASE)
+ |.else
+ | sw CRET1, -8(BASE)
+ | b ->fff_res1
+ |. sw CRET2, -8+4(BASE)
+ |.endif
|
|->fff_restv:
| // CARG3/CARG1 = TValue result.
@@ -1498,8 +1773,14 @@ static void build_subroutines(BuildCtx *ctx)
| sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
|. nop
+ |.if HFABI
| call_extern
|. ldc1 FARG1, 0(BASE)
+ |.else
+ | lw CARG1, 0(BASE)
+ | call_extern
+ |. lw CARG2, 4(BASE)
+ |.endif
| b ->fff_resn
|. nop
|.endmacro
@@ -1526,15 +1807,20 @@ static void build_subroutines(BuildCtx *ctx)
| math_round ceil
|
|.ffunc math_log
- | lw CARG3, HI(BASE)
| li AT, 8
| bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
- |. load_got log
+ |. lw CARG3, HI(BASE)
| sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. nop
+ |. load_got log
+ |.if HFABI
| call_extern
|. ldc1 FARG1, 0(BASE)
+ |.else
+ | lw CARG1, 0(BASE)
+ | call_extern
+ |. lw CARG2, 4(BASE)
+ |.endif
| b ->fff_resn
|. nop
|
@@ -1553,17 +1839,40 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern2 atan2
| math_extern2 fmod
|
+ |.if FPU
|.ffunc_n math_sqrt
|. sqrt.d FRET1, FARG1
| b ->fff_resn
|. nop
+ |.else
+ | math_extern sqrt
+ |.endif
|
- |.ffunc_nn math_ldexp
+ |.ffunc_2 math_ldexp
+ | sltiu TMP0, CARG3, LJ_TISNUM
+ | sltiu TMP1, CARG4, LJ_TISNUM
+ | load_farg1 0(BASE)
+ | load_farg2 8(BASE)
+ | and TMP0, TMP0, TMP1
+ | beqz TMP0, ->fff_fallback
+ |.if FPU
+ | load_got ldexp
| trunc.w.d FARG2, FARG2
+ | call_extern
+ |. mfc1 CARG3, FARG2
+ |.else
+ | sw CARG1, TEMP_SAVE_1
+ | sw CARG2, TEMP_SAVE_2
+ | load_got __fixdfsi
+ | move CARG1, CARG3
+ | call_extern
+ |. move CARG2, CARG4
+ | lw CARG1, TEMP_SAVE_1
| load_got ldexp
- | mfc1 CARG3, FARG2
+ | lw CARG2, TEMP_SAVE_2
| call_extern
- |. nop
+ |. move CARG3, CRET1
+ |.endif
| b ->fff_resn
|. nop
|
@@ -1574,10 +1883,14 @@ static void build_subroutines(BuildCtx *ctx)
|. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
| lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
| addiu RA, BASE, -8
+ | store_double FRET1, CRET1, CRET2, 0(RA)
+ |.if FPU
| mtc1 TMP1, FARG2
- | sdc1 FRET1, 0(RA)
| cvt.d.w FARG2, FARG2
- | sdc1 FARG2, 8(RA)
+ |.else
+ | cvti2d TMP1
+ |.endif
+ | store_double FARG2, CRET1, CRET2, 8(RA)
| b ->fff_res
|. li RD, (2+1)*8
|
@@ -1587,7 +1900,12 @@ static void build_subroutines(BuildCtx *ctx)
| call_extern
|. addiu CARG3, BASE, -8
| addiu RA, BASE, -8
+ |.if HFABI
| sdc1 FRET1, 0(BASE)
+ |.else
+ | sw CRET1, 0(BASE)
+ | sw CRET2, 4(BASE)
+ |.endif
| b ->fff_res
|. li RD, (2+1)*8
|
@@ -1595,25 +1913,73 @@ static void build_subroutines(BuildCtx *ctx)
|->ff_ .. name:
| lw CARG3, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FRET1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |. sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
|. addu TMP2, BASE, NARGS8:RC
| addiu TMP1, BASE, 8
+ |.if HFABI
+ | ldc1 FRET1, 0(BASE)
| beq TMP1, TMP2, ->fff_resn
+ |.else
+ | lw CRET1, 0(BASE)
+ | lw CRET2, 4(BASE)
+ | beq TMP1, TMP2, ->fff_resn
+ |.endif
|1:
|. lw CARG3, HI(TMP1)
+ |.if HFABI
| ldc1 FARG1, 0(TMP1)
- | addiu TMP1, TMP1, 8
+ |.else
+ | lw CARG1, 0(TMP1)
+ | lw CARG2, 4(TMP1)
+ |.endif
| sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
+ |. addiu TMP1, TMP1, 8
+ |.if FPU
|.if ismax
- |. c.olt.d FARG1, FRET1
+ | c.olt.d FARG1, FRET1
|.else
- |. c.olt.d FRET1, FARG1
+ | c.olt.d FRET1, FARG1
|.endif
| bne TMP1, TMP2, <1
|. movf.d FRET1, FARG1
+ |.else
+ | load_got __ledf2
+ | sw TMP1, TEMP_SAVE_1
+ | sw TMP2, TEMP_SAVE_2
+ | sw CARG1, TEMP_SAVE_3
+ | sw CARG2, TEMP_SAVE_4
+ | sw CRET1, TEMP_SAVE_5
+ | sw CRET2, TEMP_SAVE_6
+ | move CARG3, CRET1
+ | call_extern
+ |. move CARG4, CRET2
+ | lw CARG4, TEMP_SAVE_6
+ | lw CARG3, TEMP_SAVE_5
+ | lw CARG2, TEMP_SAVE_4
+ | lw CARG1, TEMP_SAVE_3
+ | lw TMP2, TEMP_SAVE_2
+ | lw TMP1, TEMP_SAVE_1
+ |.if ismax
+ | beqz CRET1, >2 // farg1==fret1
+ |. li TMP3, 1
+ | beq CRET1, TMP3, >2 // farg1>fret1
+ |. nop
+ |.else
+ | blez CRET1, >2
+ |. nop
+ |.endif
+ | move CRET1, CARG3 // Keep the value.
+ | b >3
+ |. move CRET2, CARG4
+ |2:
+ | move CRET1, CARG1 // Set new value.
+ | move CRET2, CARG2
+ |3:
+ | bne TMP1, TMP2, <1
+ |. nop
+ |.endif
| b ->fff_resn
|. nop
|.endmacro
@@ -1632,32 +1998,52 @@ static void build_subroutines(BuildCtx *ctx)
| bnez AT, ->fff_fallback // Need exactly 1 string argument.
|. nop
| lw TMP0, STR:CARG1->len
- | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
| addiu RA, BASE, -8
| sltu RD, r0, TMP0
- | mtc1 TMP1, f0
+ | lw PC, FRAME_PC(BASE)
| addiu RD, RD, 1
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
+ |.if FPU
+ | mtc1 TMP1, f0
| cvt.d.w f0, f0
- | lw PC, FRAME_PC(BASE)
- | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
+ | sdc1 f0, 0(RA)
+ |.else
+ | sw RD, TEMP_SAVE_1
+ | cvti2d TMP1
+ | sw CRET1, 0(RA)
+ | sw CRET2, 4(RA)
+ | lw RD, TEMP_SAVE_1
+ |.endif
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
|
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
| lw CARG3, HI(BASE)
- | ldc1 FARG1, 0(BASE)
| li AT, 8
| bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
|. sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
|. li CARG3, 1
- | trunc.w.d FARG1, FARG1
- | addiu CARG2, sp, ARG5_OFS
| sltiu AT, TMP0, 256
- | mfc1 TMP0, FARG1
| beqz AT, ->fff_fallback
- |. sw TMP0, ARG5
+ | load_farg1 0(BASE)
+ |.if FPU
+ | trunc.w.d FARG1, FARG1
+ | mfc1 TMP0, FARG1
+ |.else
+ | load_got __fixdfsi
+ | sw RB, TEMP_SAVE_1
+ | sw RC, TEMP_SAVE_2
+ | call_extern
+ |. sw CARG3, TEMP_SAVE_3
+ | lw CARG3, TEMP_SAVE_3
+ | lw RC, TEMP_SAVE_2
+ | lw RB, TEMP_SAVE_1
+ | move TMP0, CRET1
+ |.endif
+ | addiu CARG2, sp, ARG5_OFS
+ | sw TMP0, ARG5
|->fff_newstr:
| load_got lj_str_new
| sw BASE, L->base
@@ -1674,27 +2060,52 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc string_sub
| ffgccheck
| addiu AT, NARGS8:RC, -16
+ |.if FPU
+ | ldc1 f0, 16(BASE)
+ | trunc.w.d f0, f0
+ |.else
+ | lw CARG1, 16(BASE)
+ | load_got __fixdfsi
+ | sw AT, TEMP_SAVE_1
+ | call_extern
+ |. lw CARG2, 16+4(BASE)
+ | lw AT, TEMP_SAVE_1
+ |.endif
| lw CARG3, 16+HI(BASE)
- | ldc1 f0, 16(BASE)
| lw TMP0, HI(BASE)
| lw STR:CARG1, LO(BASE)
| bltz AT, ->fff_fallback
- | lw CARG2, 8+HI(BASE)
- | ldc1 f2, 8(BASE)
+ |. lw CARG2, 8+HI(BASE)
| beqz AT, >1
|. li CARG4, -1
- | trunc.w.d f0, f0
| sltiu AT, CARG3, LJ_TISNUM
| beqz AT, ->fff_fallback
+ |.if FPU
|. mfc1 CARG4, f0
+ |.else
+ |. move CARG4, CRET1
+ |.endif
|1:
| sltiu AT, CARG2, LJ_TISNUM
| beqz AT, ->fff_fallback
|. li AT, LJ_TSTR
- | trunc.w.d f2, f2
| bne TMP0, AT, ->fff_fallback
- |. lw CARG2, STR:CARG1->len
+ |.if FPU
+ |. ldc1 f2, 8(BASE)
+ | trunc.w.d f2, f2
| mfc1 CARG3, f2
+ |.else
+ |. sw CARG1, TEMP_SAVE_1
+ | sw CARG4, TEMP_SAVE_2
+ | lw CARG2, 8+4(BASE)
+ | load_got __fixdfsi
+ | call_extern
+ |. lw CARG1, 8(BASE)
+ | lw CARG1, TEMP_SAVE_1
+ | lw CARG4, TEMP_SAVE_2
+ | move CARG3, CRET1
+ |.endif
+ | lw CARG2, STR:CARG1->len
| // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
| slt AT, CARG4, r0
| addiu TMP0, CARG2, 1
@@ -1749,10 +2160,58 @@ static void build_subroutines(BuildCtx *ctx)
|
|//-- Bit library --------------------------------------------------------
|
+ |.if not FPU
+ |// FP number to bit conversion for soft-float.
+ |->vm_tobit:
+ | sll TMP0, CARG1, 1
+ | lui TMP3, 0x0020
+ | addu TMP0, TMP0, TMP3
+ | slt TMP3, TMP0, r0
+ | movz CARG2, r0, TMP3
+ | beqz TMP3, >2
+ |. li CARG4, 0x3e0
+ | not CARG4, CARG4
+ | sra TMP0, TMP0, 21
+ | subu TMP0, CARG4, TMP0
+ | slt TMP3, TMP0, r0
+ | bnez TMP3, >1
+ |. sll CARG4, CARG1, 11
+ | lui TMP3, 0x8000
+ | or CARG4, CARG4, TMP3
+ | srl TMP3, CARG2, 21
+ | or CARG4, CARG4, TMP3
+ | slt TMP3, CARG1, r0
+ | beqz TMP3, >2
+ |. srlv CARG2, CARG4, TMP0
+ | subu CARG2, r0, CARG2
+ |2:
+ | jr ra
+ |. move CRET1, CARG2
+ |1:
+ | addiu TMP0, TMP0, 21
+ | srlv CARG4, CARG2, TMP0
+ | li TMP3, 20
+ | subu TMP0, TMP3, TMP0
+ | sll CARG2, CARG1, 12
+ | sllv TMP3, CARG2, TMP0
+ | or CARG2, CARG4, TMP3
+ | slt TMP3, CARG1, r0
+ | beqz TMP3, <2
+ |. nop
+ | jr ra
+ |. subu CRET1, r0, CARG2
+ |.endif
+ |
|.macro .ffunc_bit, name
| .ffunc_n bit_..name
+ |.if FPU
|. add.d FARG1, FARG1, TOBIT
| mfc1 CRET1, FARG1
+ |.else
+ |. nop
+ | bal ->vm_tobit
+ |. nop
+ |.endif
|.endmacro
|
|.macro .ffunc_bit_op, name, ins
@@ -1760,14 +2219,27 @@ static void build_subroutines(BuildCtx *ctx)
| addiu TMP1, BASE, 8
| addu TMP2, BASE, NARGS8:RC
|1:
+ | move CRET2, CRET1
| lw CARG4, HI(TMP1)
+ |.if FPU
| beq TMP1, TMP2, ->fff_resi
|. ldc1 FARG1, 0(TMP1)
+ |.else
+ | lw CARG1, 0(TMP1)
+ | beq TMP1, TMP2, ->fff_resi
+ |. lw CARG2, 4(TMP1)
+ |.endif
| sltiu AT, CARG4, LJ_TISNUM
| beqz AT, ->fff_fallback
- | add.d FARG1, FARG1, TOBIT
- | mfc1 CARG2, FARG1
- | ins CRET1, CRET1, CARG2
+ |.if FPU
+ |. add.d FARG1, FARG1, TOBIT
+ | mfc1 CRET1, FARG1
+ |.else
+ |. nop
+ | bal ->vm_tobit
+ |. nop
+ |.endif
+ | ins CRET1, CRET2, CRET1
| b <1
|. addiu TMP1, TMP1, 8
|.endmacro
@@ -1794,10 +2266,22 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc_bit_sh, name, ins, shmod
| .ffunc_nn bit_..name
+ |.if FPU
|. add.d FARG1, FARG1, TOBIT
| add.d FARG2, FARG2, TOBIT
| mfc1 CARG1, FARG1
| mfc1 CARG2, FARG2
+ |.else
+ |. sw CARG4, TEMP_SAVE_1
+ | bal ->vm_tobit
+ |. nop
+ | move CRET2, CRET1
+ | lw CARG2, TEMP_SAVE_1
+ | bal ->vm_tobit
+ |. move CARG1, CARG3
+ | move CARG2, CRET1
+ | move CARG1, CRET2
+ |.endif
|.if shmod == 1
| li AT, 32
| subu TMP0, AT, CARG2
@@ -1822,9 +2306,19 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc_bit tobit
|->fff_resi:
+ | lw PC, FRAME_PC(BASE)
+ | addiu RA, BASE, -8
+ |.if HFABI
| mtc1 CRET1, FRET1
- | b ->fff_resn
- |. cvt.d.w FRET1, FRET1
+ | cvt.d.w FRET1, FRET1
+ | b ->fff_res1
+ |. sdc1 FRET1, -8(BASE)
+ |.else // Result already in CRET1.
+ | cvti2d CRET1
+ | sw CRET1, -8(BASE)
+ | b ->fff_res1
+ |. sw CRET2, -8+4(BASE)
+ |.endif
|
|//-----------------------------------------------------------------------
|
@@ -2082,14 +2576,23 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b
+ |.if FPU
| sdc1 f..a, 16+a*8(sp)
| sw r..a, 16+32*8+a*4(sp)
| sw r..b, 16+32*8+b*4(sp)
+ |.else
+ | sw r..a, 16+a*4(sp)
+ | sw r..b, 16+b*4(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
|.if JIT
+ |.if FPU
| addiu sp, sp, -(16+32*8+32*4)
+ |.else
+ | addiu sp, sp, -(16+32*4)
+ |.endif
| savex_ 0, 1
| savex_ 2, 3
| savex_ 4, 5
@@ -2104,17 +2607,25 @@ static void build_subroutines(BuildCtx *ctx)
| savex_ 22, 23
| savex_ 24, 25
| savex_ 26, 27
+ |.if FPU
| sdc1 f28, 16+28*8(sp)
- | sw r28, 16+32*8+28*4(sp)
| sdc1 f30, 16+30*8(sp)
+ | sw r28, 16+32*8+28*4(sp)
| sw r30, 16+32*8+30*4(sp)
| sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
+ |.else
+ | sw r28, 16+28*4(sp)
+ | sw r30, 16+30*4(sp)
+ | sw r0, 16+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
+ |.endif
| li_vmstate EXIT
- | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
| addiu DISPATCH, JGL, -GG_DISP2G-32768
| lw TMP1, 0(TMP2) // Load exit number.
| st_vmstate
- | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
| lw L, DISPATCH_GL(cur_L)(DISPATCH)
| lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| load_got lj_trace_exit
@@ -2144,15 +2655,15 @@ static void build_subroutines(BuildCtx *ctx)
|1:
| bltz CRET1, >9 // Check for error from exit.
|. lw LFUNC:RB, FRAME_FUNC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| sll MULTRES, CRET1, 3
| li TISNIL, LJ_TNIL
| sw MULTRES, SAVE_MULTRES
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| lw TMP1, LFUNC:RB->pc
| sw r0, DISPATCH_GL(jit_base)(DISPATCH)
| lw KBASE, PC2PROTO(k)(TMP1)
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| // Modified copy of ins_next which handles function header dispatch, too.
| lw INS, 0(PC)
| addiu PC, PC, 4
@@ -2160,7 +2671,7 @@ static void build_subroutines(BuildCtx *ctx)
| sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
| decode_OP4a TMP1, INS
| decode_OP4b TMP1
- | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
+ | sltiu TMP2, TMP1, BC_FUNCF*4
| addu TMP0, DISPATCH, TMP1
| decode_RD8a RD, INS
| lw AT, 0(TMP0)
@@ -2202,7 +2713,7 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
- |.macro vm_round, func
+ |.macro vm_round_hf, func
| lui TMP0, 0x4330 // Hiword of 2^52 (double).
| mtc1 r0, f4
| mtc1 TMP0, f5
@@ -2244,6 +2755,25 @@ static void build_subroutines(BuildCtx *ctx)
|. mov.d FRET1, FARG1
|.endmacro
|
+ |.macro vm_round_sf, func
+ | addiu sp, sp, -8
+ | load_got func
+ | sw ra, 0(sp)
+ | call_extern
+ |. nop
+ | lw ra, 0(sp)
+ | jr ra
+ |. addiu sp, sp, 8
+ |.endmacro
+ |
+ |.macro vm_round, func
+ |.if FPU
+ | vm_round_hf, func
+ |.else
+ | vm_round_sf, func
+ |.endif
+ |.endmacro
+ |
|->vm_floor:
| vm_round floor
|->vm_ceil:
@@ -2272,10 +2802,10 @@ static void build_subroutines(BuildCtx *ctx)
| sw r1, CTSTATE->cb.slot
| sw CARG1, CTSTATE->cb.gpr[0]
| sw CARG2, CTSTATE->cb.gpr[1]
- | sdc1 FARG1, CTSTATE->cb.fpr[0]
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
| sw CARG3, CTSTATE->cb.gpr[2]
| sw CARG4, CTSTATE->cb.gpr[3]
- | sdc1 FARG2, CTSTATE->cb.fpr[1]
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
| addiu TMP0, sp, CFRAME_SPACE+16
| sw TMP0, CTSTATE->cb.stack
| sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2286,14 +2816,14 @@ static void build_subroutines(BuildCtx *ctx)
| lw BASE, L:CRET1->base
| lw RC, L:CRET1->top
| move L, CRET1
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw LFUNC:RB, FRAME_FUNC(BASE)
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| subu RC, RC, BASE
| st_vmstate
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| ins_callt
|.endif
|
@@ -2307,11 +2837,11 @@ static void build_subroutines(BuildCtx *ctx)
| move CARG2, RA
| call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
|. move CARG1, CTSTATE
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
| lw CRET1, CTSTATE->cb.gpr[0]
- | ldc1 FRET1, CTSTATE->cb.fpr[0]
- | lw CRET2, CTSTATE->cb.gpr[1]
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
| b ->vm_leave_unw
- |. ldc1 FRET2, CTSTATE->cb.fpr[1]
+ |. lw CRET2, CTSTATE->cb.gpr[1]
|.endif
|
|->vm_ffi_call: // Call C function via FFI.
@@ -2343,8 +2873,8 @@ static void build_subroutines(BuildCtx *ctx)
| lw CARG2, CCSTATE->gpr[1]
| lw CARG3, CCSTATE->gpr[2]
| lw CARG4, CCSTATE->gpr[3]
- | ldc1 FARG1, CCSTATE->fpr[0]
- | ldc1 FARG2, CCSTATE->fpr[1]
+ | .FPU ldc1 FARG1, CCSTATE->fpr[0]
+ | .FPU ldc1 FARG2, CCSTATE->fpr[1]
| jalr CFUNCADDR
|. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| lw CCSTATE:TMP1, -12(r16)
@@ -2352,8 +2882,10 @@ static void build_subroutines(BuildCtx *ctx)
| lw ra, -4(r16)
| sw CRET1, CCSTATE:TMP1->gpr[0]
| sw CRET2, CCSTATE:TMP1->gpr[1]
- | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
- | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
+ | .FPU sdc1 FRET1, CCSTATE:TMP1->fpr[0]
+ | .FPU sdc1 FRET2, CCSTATE:TMP1->fpr[1]
+ | sw CARG1, CCSTATE:TMP1->gpr[2] // MIPS32 soft-float.
+ | sw CARG2, CCSTATE:TMP1->gpr[3] // Complex doubles are returned in v0, v1, a0, a1.
| move sp, r16
| jr ra
|. move r16, TMP2
@@ -2381,8 +2913,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu CARG3, BASE, RD
| lw TMP0, HI(CARG2)
| lw TMP1, HI(CARG3)
- | ldc1 f0, 0(CARG2)
- | ldc1 f2, 0(CARG3)
| sltiu TMP0, TMP0, LJ_TISNUM
| sltiu TMP1, TMP1, LJ_TISNUM
| lhu TMP2, OFS_RD(PC)
@@ -2390,8 +2920,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addiu PC, PC, 4
| beqz TMP0, ->vmeta_comp
|. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
+ | load_double f0, CARG1, CARG2, 0(CARG2)
+ |.if FPU
+ | ldc1 f2, 0(CARG3)
+ |.else
+ | lw CARG4, 4(CARG3)
+ | lw CARG3, 0(CARG3)
+ |.endif
| decode_RD4b TMP2
| addu TMP2, TMP2, TMP1
+ |.if FPU
if (op == BC_ISLT || op == BC_ISGE) {
| c.olt.d f0, f2
} else {
@@ -2402,8 +2940,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} else {
| movt TMP2, r0
}
- | addu PC, PC, TMP2
+ |.else
+ | load_got __ledf2
+ | sw RD, TEMP_SAVE_1
+ | sw TMP1, TEMP_SAVE_2
+ | call_extern //CRET1 = f0<=f2
+ |. sw TMP2, TEMP_SAVE_3
+ | lw TMP2, TEMP_SAVE_3
+ | lw TMP1, TEMP_SAVE_2
+ if (op == BC_ISLT) {
+ | bltz CRET1, >1
+ } else if (op == BC_ISLE) {
+ | blez CRET1, >1
+ } else if (op == BC_ISGT) {
+ | bgtz CRET1, >1
+ } else {
+ | bgez CRET1, >1
+ }
+ |. lw RD, TEMP_SAVE_1
+ | move TMP2, r0
|1:
+ |.endif
+ | addu PC, PC, TMP2
| ins_next
break;
@@ -2413,24 +2971,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu RA, BASE, RA
| addiu PC, PC, 4
| lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
| addu RD, BASE, RD
| lhu TMP2, -4+OFS_RD(PC)
- | lw TMP1, HI(RD)
- | ldc1 f2, 0(RD)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | lw TMP1, HI(RD)
+ | decode_RD4b TMP2
| sltiu AT, TMP0, LJ_TISNUM
| sltiu CARG1, TMP1, LJ_TISNUM
- | decode_RD4b TMP2
+ | load_double f2, CARG3, CARG4, 0(RD)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
| and AT, AT, CARG1
+ | load_double f0, CARG1, CARG2, 0(RA)
| beqz AT, >5
|. addu TMP2, TMP2, TMP3
+ |.if FPU
| c.eq.d f0, f2
if (vk) {
| movf TMP2, r0
} else {
| movt TMP2, r0
}
+ |.else
+ | load_got __ledf2
+ | sw RD, TEMP_SAVE_1
+ | call_extern
+ |. sw TMP2, TEMP_SAVE_2
+ | lw RD, TEMP_SAVE_1
+ | lw TMP2, TEMP_SAVE_2
+ if (vk) {
+ | beqz CRET1, >4
+ |. nop
+ } else {
+ | bnez CRET1, >4
+ |. nop
+ }
+ | move TMP2, r0
+ |4:
+ |.endif
|1:
| addu PC, PC, TMP2
| ins_next
@@ -2507,10 +3084,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu RA, BASE, RA
| addiu PC, PC, 4
| lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
+ | load_double f0, CARG1, CARG2, 0(RA)
| addu RD, KBASE, RD
| lhu TMP2, -4+OFS_RD(PC)
- | ldc1 f2, 0(RD)
+ | load_double f2, CARG3, CARG4, 0(RD)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
| sltiu AT, TMP0, LJ_TISNUM
| decode_RD4b TMP2
@@ -2520,6 +3097,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz AT, >1
|.endif
|. addu TMP2, TMP2, TMP3
+ |.if FPU
| c.eq.d f0, f2
if (vk) {
| movf TMP2, r0
@@ -2530,6 +3108,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|1:
| addu PC, PC, TMP2
}
+ |.else
+ | load_got __ledf2
+ | sw RD, TEMP_SAVE_1
+ | call_extern
+ |. sw TMP2, TEMP_SAVE_2
+ | lw RD, TEMP_SAVE_1
+ | lw TMP2, TEMP_SAVE_2
+ if (vk) {
+ | beqz CRET1, >4
+ |. nop
+ | move TMP2, r0
+ |4:
+ | addu PC, PC, TMP2
+ |1:
+ } else {
+ | bnez CRET1, >1
+ |. nop
+ | move TMP2, r0
+ |1:
+ | addu PC, PC, TMP2
+ }
+ |.endif
| ins_next
|.if FFI
|5:
@@ -2588,7 +3188,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu PC, PC, TMP2
} else {
| sltiu TMP0, TMP0, LJ_TISTRUECOND
- | ldc1 f0, 0(RD)
+ | load_double1 0(RD)
if (op == BC_ISTC) {
| beqz TMP0, >1
} else {
@@ -2598,7 +3198,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| decode_RD4b TMP2
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
| addu TMP2, TMP2, TMP3
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| addu PC, PC, TMP2
|1:
}
@@ -2631,9 +3231,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = dst*8, RD = src*8
| addu RD, BASE, RD
| addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | load_double1 0(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
break;
case BC_NOT:
@@ -2653,12 +3253,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu CARG3, BASE, RD
| addu RA, BASE, RA
| lw TMP0, HI(CARG3)
- | ldc1 f0, 0(CARG3)
| sltiu AT, TMP0, LJ_TISNUM
+ | load_double f0, CARG1, CARG2, 0(CARG3)
+ |.if FPU
| beqz AT, ->vmeta_unm
|. neg.d f0, f0
+ |.else
+ | lui TMP1, 0x8000
+ | xor CRET1, TMP1, CARG1
+ | beqz AT, ->vmeta_unm
+ |. move CRET2, CARG2
+ |.endif
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double f0, CRET1, CRET2, 0(RA)
| ins_next2
break;
case BC_LEN:
@@ -2672,10 +3279,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. li AT, LJ_TTAB
| lw CRET1, STR:CARG1->len
|1:
+ |.if FPU
| mtc1 CRET1, f0
| cvt.d.w f0, f0
+ |.else
+ | cvti2d CRET1
+ |.endif
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double f0, CRET1, CRET2, 0(RA)
| ins_next2
|2:
| bne TMP0, AT, ->vmeta_len
@@ -2717,72 +3328,142 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu CARG3, BASE, RB
| addu CARG4, KBASE, RC
| lw TMP1, HI(CARG3)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
+ | sltiu AT, TMP1, LJ_TISNUM
+ | load_double f20, CARG1, CARG2, 0(CARG3)
+ | load_double f22, CARG3, CARG4, 0(CARG4)
+ |.if FPU
+ | beqz AT, ->vmeta_arith
+ |.else
+ | beqz AT, ->vmeta_arith_vn
+ |.endif
+ |. addu RA, BASE, RA
|| break;
||case 1:
| addu CARG4, BASE, RB
| addu CARG3, KBASE, RC
| lw TMP1, HI(CARG4)
- | ldc1 f22, 0(CARG4)
- | ldc1 f20, 0(CARG3)
- | sltiu AT, TMP1, LJ_TISNUM
+ | sltiu AT, TMP1, LJ_TISNUM
+ | load_double f20, CARG1, CARG2, 0(CARG3)
+ | load_double f22, CARG3, CARG4, 0(CARG4)
+ |.if FPU
+ | beqz AT, ->vmeta_arith
+ |.else
+ | beqz AT, ->vmeta_arith_nv
+ |.endif
+ |. addu RA, BASE, RA
|| break;
||default:
| addu CARG3, BASE, RB
| addu CARG4, BASE, RC
| lw TMP1, HI(CARG3)
| lw TMP2, HI(CARG4)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP2, LJ_TISNUM
- | and AT, AT, TMP0
+ | sltiu AT, TMP1, LJ_TISNUM
+ | sltiu TMP0, TMP2, LJ_TISNUM
+ | and AT, AT, TMP0
+ | load_double f20, CARG1, CARG2, 0(CARG3)
+ | load_double f22, CARG3, CARG4, 0(CARG4)
+ |.if FPU
+ | beqz AT, ->vmeta_arith
+ |.else
+ | beqz AT, ->vmeta_arith_vv
+ |.endif
+ |. addu RA, BASE, RA
|| break;
||}
- | beqz AT, ->vmeta_arith
- |. addu RA, BASE, RA
|.endmacro
|
+ |.macro ins_arithfallback
+ ||switch (vk) {
+ ||case 0:
+ | b ->vmeta_arith_vn
+ |. nop
+ || break;
+ ||case 1:
+ | b ->vmeta_arith_nv
+ |. nop
+ || break;
+ ||default:
+ | b ->vmeta_arith_vv
+ |. nop
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.if FPU
|.macro fpmod, a, b, c
|->BC_MODVN_Z:
- | bal ->vm_floor // floor(b/c)
+ | bal ->vm_floor // floor(b/c)
|. div.d FARG1, b, c
| mul.d a, FRET1, c
- | sub.d a, b, a // b - floor(b/c)*c
+ | sub.d a, b, a // b - floor(b/c)*c
|.endmacro
+ |.else
|
- |.macro ins_arith, ins
+ |.macro sfpmod
+ |->BC_MODVN_Z:
+ | load_got __divdf3
+ | sw CARG1, TEMP_SAVE_1
+ | sw CARG2, TEMP_SAVE_2
+ | sw CARG3, TEMP_SAVE_3
+ | call_extern
+ |. sw CARG4, TEMP_SAVE_4
+ | move CARG1, CRET1
+ | bal ->vm_floor
+ |. move CARG2, CRET2
+ | load_got __muldf3
+ | move CARG1, CRET1
+ | move CARG2, CRET2
+ | lw CARG3, TEMP_SAVE_3
+ | call_extern
+ |. lw CARG4, TEMP_SAVE_4
+ | load_got __subdf3
+ | lw CARG1, TEMP_SAVE_1
+ | lw CARG2, TEMP_SAVE_2
+ | move CARG3, CRET1
+ | call_extern
+ |. move CARG4, CRET2
+ |.endmacro
+ |.endif
+ |
+ |.macro ins_arith, intins, fpins, fpcall
| ins_arithpre
- |.if "ins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+ |.if "fpins" == "fpmod_"
+ | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|. nop
|.else
- | ins f0, f20, f22
+ |.if FPU
+ | fpins f0, f20, f22
+ |.else
+ |.if "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | load_got fpcall
+ | call_extern
+ |. nop
+ |.endif
+ |.endif
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
|.endif
|.endmacro
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith add.d
+ | ins_arith addu, add.d, __adddf3
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith sub.d
+ | ins_arith subu, sub.d, __subdf3
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mul.d
+ | ins_arith mult, mul.d, __muldf3
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith div.d
+ | ins_arith div, div.d, __divdf3
break;
case BC_MODVN:
- | ins_arith fpmod
- break;
+ | ins_arith modi, fpmod, sfpmod
case BC_MODNV: case BC_MODVV:
- | ins_arith fpmod_
+ | ins_arith modi, fpmod_, sfpmod
break;
case BC_POW:
| decode_RB8a RB, INS
@@ -2792,18 +3473,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu CARG4, BASE, RC
| lw TMP1, HI(CARG3)
| lw TMP2, HI(CARG4)
- | ldc1 FARG1, 0(CARG3)
- | ldc1 FARG2, 0(CARG4)
| sltiu AT, TMP1, LJ_TISNUM
| sltiu TMP0, TMP2, LJ_TISNUM
| and AT, AT, TMP0
| load_got pow
| beqz AT, ->vmeta_arith
|. addu RA, BASE, RA
+ | load_farg1 0(CARG3)
+ | load_farg2 0(CARG4)
| call_extern
|. nop
| ins_next1
+ |.if HFABI
| sdc1 FRET1, 0(RA)
+ |.else
+ | sw CRET1, 0(RA)
+ | sw CRET2, 4(RA)
+ |.endif
| ins_next2
break;
@@ -2826,10 +3512,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bnez CRET1, ->vmeta_binop
|. lw BASE, L->base
| addu RB, BASE, MULTRES
- | ldc1 f0, 0(RB)
+ | load_double1 0(RB)
| addu RA, BASE, RA
| ins_next1
- | sdc1 f0, 0(RA) // Copy result from RB to RA.
+ | store_double1 0(RA)
| ins_next2
break;
@@ -2864,20 +3550,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8
| sra RD, INS, 16
- | mtc1 RD, f0
| addu RA, BASE, RA
+ |.if FPU
+ | mtc1 RD, f0
| cvt.d.w f0, f0
+ |.else
+ | cvti2d RD
+ |.endif
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double f0, CRET1, CRET2, 0(RA)
| ins_next2
break;
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| addu RD, KBASE, RD
| addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | load_double1 0(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
break;
case BC_KPRI:
@@ -2913,9 +3603,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw UPVAL:RB, LFUNC:RD->uvptr
| ins_next1
| lw TMP1, UPVAL:RB->v
- | ldc1 f0, 0(TMP1)
+ | load_double1 0(TMP1)
| addu RA, BASE, RA
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
break;
case BC_USETV:
@@ -2924,14 +3614,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srl RA, RA, 1
| addu RD, BASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
+ | load_double1 0(RD)
| lw UPVAL:RB, LFUNC:RA->uvptr
| lbu TMP3, UPVAL:RB->marked
| lw CARG2, UPVAL:RB->v
| andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbu TMP0, UPVAL:RB->closed
| lw TMP2, HI(RD)
- | sdc1 f0, 0(CARG2)
+ | store_double1 0(CARG2)
| li AT, LJ_GC_BLACK|1
| or TMP3, TMP3, TMP0
| beq TMP3, AT, >2 // Upvalue is closed and black?
@@ -2991,11 +3681,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srl RA, RA, 1
| addu RD, KBASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
+ | load_double1 0(RD)
| lw UPVAL:RB, LFUNC:RA->uvptr
| ins_next1
| lw TMP1, UPVAL:RB->v
- | sdc1 f0, 0(TMP1)
+ | store_double1 0(TMP1)
| ins_next2
break;
case BC_USETP:
@@ -3126,13 +3816,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tgetv
|. addu RA, BASE, RA
| sltiu AT, TMP2, LJ_TISNUM
| beqz AT, >5
|. li AT, LJ_TSTR
- |
+ |.if FPU
+ | ldc1 f0, 0(CARG3)
| // Convert number key to integer, check for integerness and range.
| cvt.w.d f2, f0
| lw TMP0, TAB:RB->asize
@@ -3148,9 +3838,51 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP0, HI(TMP2)
| beq TMP0, TISNIL, >2
|. ldc1 f0, 0(TMP2)
+ |.else
+ | sw RB, TEMP_SAVE_1
+ | sw CARG2, TEMP_SAVE_3
+ | load_got __fixdfsi
+ | lw CARG1, 0(CARG3)
+ | lw CARG2, 4(CARG3)
+ | call_extern // cvt.w.d f2, f0
+ |. sw RC, TEMP_SAVE_2
+ | sw CRET1, TEMP_SAVE_4
+ | cvti2d CRET1 // cvt.d.w f4, f2
+ | load_got __ledf2
+ | lw RC, TEMP_SAVE_2
+ | addu CARG3, BASE, RC
+ | lw CARG1, 0(CARG3)
+ | lw CARG2, 4(CARG3)
+ | move CARG3, CRET1
+ | move CARG4, CRET2
+ | call_extern // c.eq.d f0, f4
+ |. nop
+ | lw CARG3, TEMP_SAVE_3
+ | lw RC, TEMP_SAVE_2
+ | lw RB, TEMP_SAVE_1
+ | lw TMP0, TAB:RB->asize
+ | lw TMP1, TAB:RB->array
+ | lw TMP2, TEMP_SAVE_4
+ | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3.
+ | addu CARG3, BASE, RC
+ | bnez CRET1, >3
+ |. sltu AT, TMP2, TMP0
+ | b >4
+ |. nop
+ |3:
+ | move AT, r0
+ |4:
+ | sll TMP2, TMP2, 3
+ | beqz AT, ->vmeta_tgetv // Integer key and in array part?
+ |. addu TMP2, TMP1, TMP2
+ | lw TMP0, HI(TMP2)
+ | lw SFT2, 4(TMP2)
+ | beq TMP0, TISNIL, >2
+ |. lw SFT1, 0(TMP2)
+ |.endif
|1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
|
|2: // Check for __index if table value is nil.
@@ -3246,10 +3978,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. addu RC, TMP2, RC
| lw TMP1, HI(RC)
| beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RC)
+ |. nop
|1:
+ | load_double1 0(RC)
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
|
|5: // Check for __index if table value is nil.
@@ -3271,20 +4004,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu CARG2, BASE, RB
| addu CARG3, BASE, RC
| lw TAB:CARG1, LO(CARG2)
+ | lw TMP0, TAB:CARG1->asize
+ | lw TMP1, TAB:CARG1->array
+ |.if FPU
| ldc1 f0, 0(CARG3)
| trunc.w.d f2, f0
- | lw TMP0, TAB:CARG1->asize
| mfc1 CARG2, f2
- | lw TMP1, TAB:CARG1->array
+ |.else
+ | load_got __fixdfsi
+ | lw CARG1, 0(CARG3)
+ | call_extern
+ |. lw CARG2, 4(CARG3)
+ | move CARG2, CRET1
+ |.endif
| sltu AT, CARG2, TMP0
| sll TMP2, CARG2, 3
| beqz AT, ->vmeta_tgetr // In array part?
|. addu TMP2, TMP1, TMP2
- | ldc1 f0, 0(TMP2)
+ | load_double1 0(TMP2)
|->BC_TGETR_Z:
| addu RA, BASE, RA
| ins_next1
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| ins_next2
break;
@@ -3299,13 +4040,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tsetv
|. addu RA, BASE, RA
| sltiu AT, TMP2, LJ_TISNUM
| beqz AT, >5
|. li AT, LJ_TSTR
- |
+ |.if FPU
+ | ldc1 f0, 0(CARG3)
| // Convert number key to integer, check for integerness and range.
| cvt.w.d f2, f0
| lw TMP0, TAB:RB->asize
@@ -3326,6 +4067,52 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
| bnez AT, >7
|. sdc1 f0, 0(TMP1)
+ |.else
+ | sw RB, TEMP_SAVE_1
+ | sw RC, TEMP_SAVE_2
+ | sw CARG2, TEMP_SAVE_3
+ | load_got __fixdfsi
+ | lw CARG1, 0(CARG3)
+ | call_extern // cvt.w.d f2, f0
+ |. lw CARG2, 4(CARG3)
+ | sw CRET1, TEMP_SAVE_4
+ | cvti2d CRET1 // cvt.d.w f4, f2
+ | load_got __ledf2
+ | lw RC, TEMP_SAVE_2
+ | addu CARG3, BASE, RC
+ | lw CARG1, 0(CARG3)
+ | lw CARG2, 4(CARG3)
+ | move CARG3, CRET1
+ | call_extern // c.eq.d f0, f4
+ |. move CARG4, CRET2
+ | lw RC, TEMP_SAVE_2
+ | lw RB, TEMP_SAVE_1
+ | lw TMP0, TAB:RB->asize
+ | lw TMP1, TAB:RB->array
+ | lw TMP2, TEMP_SAVE_4
+ | lw CARG2, TEMP_SAVE_3 // Restore old CARG2 and CARG3.
+ | addu CARG3, BASE, RC
+ | bnez CRET1, >4 // NaN?
+ |. sltu AT, TMP2, TMP0
+ | b >6
+ |. nop
+ |4:
+ | move AT, r0
+ |6:
+ | sll TMP2, TMP2, 3
+ | beqz AT, ->vmeta_tsetv // Integer key and in array part?
+ |. addu TMP1, TMP1, TMP2
+ | lbu TMP3, TAB:RB->marked
+ | lw TMP0, HI(TMP1)
+ | lw SFT1, 0(RA)
+ | beq TMP0, TISNIL, >3
+ |. lw SFT2, 4(RA)
+ |1:
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | sw SFT1, 0(TMP1)
+ | bnez AT, >7
+ |. sw SFT2, 4(TMP1)
+ |.endif
|2:
| ins_next
|
@@ -3374,7 +4161,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
| addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- | ldc1 f20, 0(RA)
+ | load_double f20, SFT1, SFT2, 0(RA)
|1:
| lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3388,8 +4175,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. lw TAB:TMP0, TAB:RB->metatable
|2:
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| bnez AT, >7
|. sdc1 f20, NODE:TMP2->val
+ |.else
+ | sw SFT1, NODE:TMP2->val.u32.hi
+ | bnez AT, >7
+ |. sw SFT2, NODE:TMP2->val.u32.lo
+ |.endif
|3:
| ins_next
|
@@ -3417,6 +4210,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|. li AT, LJ_TSTR
|6:
+ |.if not FPU
+ | sw SFT1, TEMP_SAVE_1
+ | sw SFT2, TEMP_SAVE_2
+ |.endif
| load_got lj_tab_newkey
| sw STR:RC, LO(CARG3)
| sw AT, HI(CARG3)
@@ -3427,8 +4224,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. move CARG1, L
| // Returns TValue *.
| lw BASE, L->base
+ |.if FPU
| b <3 // No 2nd write barrier needed.
|. sdc1 f20, 0(CRET1)
+ |.else
+ | lw SFT2, TEMP_SAVE_1
+ | lw SFT3, TEMP_SAVE_2
+ | sw SFT2, 0(CRET1)
+ | b <3
+ |. sw SFT3, 4(CRET1)
+ |.endif
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <3
@@ -3453,11 +4258,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP1, HI(RC)
| lbu TMP3, TAB:RB->marked
| beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RA)
|1:
- | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | load_double1 0(RA)
+ |.if FPU
| bnez AT, >7
|. sdc1 f0, 0(RC)
+ |.else
+ | sw SFT1, 0(RC)
+ | bnez AT, >7
+ |. sw SFT2, 4(RC)
+ |.endif
|2:
| ins_next
|
@@ -3482,12 +4293,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| decode_RDtoRC8 RC, RD
| addu CARG1, BASE, RB
| addu CARG3, BASE, RC
- | lw TAB:CARG2, LO(CARG1)
+ |.if FPU
| ldc1 f0, 0(CARG3)
| trunc.w.d f2, f0
+ | mfc1 CARG3, f2
+ |.else
+ | load_got __fixdfsi
+ | sw CARG1, TEMP_SAVE_1
+ | lw CARG1, 0(CARG3)
+ | call_extern
+ |. lw CARG2, 4(CARG3)
+ | lw CARG1, TEMP_SAVE_1
+ | move CARG3, CRET1
+ |.endif
+ | lw TAB:CARG2, LO(CARG1)
| lbu TMP3, TAB:CARG2->marked
| lw TMP0, TAB:CARG2->asize
- | mfc1 CARG3, f2
| lw TMP1, TAB:CARG2->array
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
| bnez AT, >7
@@ -3495,12 +4316,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|2:
| sltu AT, CARG3, TMP0
| sll TMP2, CARG3, 3
+ |.if FPU
| beqz AT, ->vmeta_tsetr // In array part?
|. ldc1 f20, 0(RA)
| addu CRET1, TMP1, TMP2
|->BC_TSETR_Z:
+ |.else
+ | lw TMP0, 0(RA)
+ | lw TMP3, 4(RA)
+ | sw TMP0, TEMP_SAVE_1
+ | beqz AT, ->vmeta_tsetr // In array part?
+ |. sw TMP3, TEMP_SAVE_2
+ | addu CRET1, TMP1, TMP2
+ |->BC_TSETR_Z:
+ | lw TMP0, TEMP_SAVE_1
+ | lw TMP3, TEMP_SAVE_2
+ |.endif
| ins_next1
- | sdc1 f20, 0(CRET1)
+ | store_double f20, TMP0, TMP3, 0(CRET1)
| ins_next2
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -3529,10 +4362,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu TMP1, TMP1, CARG1
| andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| addiu RA, RA, 8
| sltu AT, RA, TMP2
- | sdc1 f0, 0(TMP1)
+ | store_double1 0(TMP1)
| bnez AT, <3
|. addiu TMP1, TMP1, 8
| bnez TMP0, >7
@@ -3607,10 +4440,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz NARGS8:RC, >3
|. move TMP3, NARGS8:RC
|2:
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| addiu RA, RA, 8
| addiu TMP3, TMP3, -8
- | sdc1 f0, 0(TMP2)
+ | store_double1 0(TMP2)
| bnez TMP3, <2
|. addiu TMP2, TMP2, 8
|3:
@@ -3647,12 +4480,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li AT, LJ_TFUNC
| lw TMP1, -24+HI(BASE)
| lw LFUNC:RB, -24+LO(BASE)
- | ldc1 f2, -8(BASE)
- | ldc1 f0, -16(BASE)
+ | load_double1 -8(BASE)
+ | load_double2 -16(BASE)
| sw TMP1, HI(BASE) // Copy callable.
| sw LFUNC:RB, LO(BASE)
- | sdc1 f2, 16(BASE) // Copy control var.
- | sdc1 f0, 8(BASE) // Copy state.
+ | store_double1 16(BASE) // Copy control var.
+ | store_double2 8(BASE) // Copy state.
| addiu BASE, BASE, 8
| bne TMP1, AT, ->vmeta_call
|. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3676,19 +4509,29 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. sll TMP3, RC, 3
| addu TMP3, TMP1, TMP3
| lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
+ | load_double1 0(TMP3)
+ |.if FPU
| mtc1 RC, f2
+ |.else
+ | move CARG1, RC
+ |.endif
| lhu RD, -4+OFS_RD(PC)
| beq TMP2, TISNIL, <1 // Skip holes in array part.
|. addiu RC, RC, 1
+ | store_double1 8(RA)
+ |.if FPU
| cvt.d.w f2, f2
+ |.else
+ | load_got __floatsidf
+ | call_extern
+ |. nop
+ |.endif
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
+ | store_double f2, CRET1, CRET2, 0(RA)
| decode_RD4b RD
| addu RD, RD, TMP3
| sw RC, -8+LO(RA) // Update control var.
| addu PC, PC, RD
- | sdc1 f2, 0(RA)
|3:
| ins_next
|
@@ -3704,17 +4547,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subu TMP3, TMP3, RB
| addu NODE:TMP3, TMP3, TMP2
| lw RB, HI(NODE:TMP3)
- | ldc1 f0, 0(NODE:TMP3)
+ | load_double1 0(NODE:TMP3)
| lhu RD, -4+OFS_RD(PC)
| beq RB, TISNIL, <6 // Skip holes in hash part.
|. addiu RC, RC, 1
+ |.if FPU
| ldc1 f2, NODE:TMP3->key
+ |.else
+ | lw SFT3, NODE:TMP3->key.u32.hi
+ | lw SFT4, NODE:TMP3->key.u32.lo
+ |.endif
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
+ | store_double1 8(RA)
| addu RC, RC, TMP0
| decode_RD4b RD
| addu RD, RD, TMP3
- | sdc1 f2, 0(RA)
+ | store_double2 0(RA)
| addu PC, PC, RD
| b <3
|. sw RC, -8+LO(RA) // Update control var.
@@ -3794,9 +4642,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bnez AT, >7
|. addiu MULTRES, TMP1, 8
|6:
- | ldc1 f0, 0(RC)
+ | load_double1 0(RC)
| addiu RC, RC, 8
- | sdc1 f0, 0(RA)
+ | store_double1 0(RA)
| sltu AT, RC, TMP3
| bnez AT, <6 // More vararg slots?
|. addiu RA, RA, 8
@@ -3852,10 +4700,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz RC, >3
|. subu BASE, TMP2, TMP0
|2:
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
| addiu RA, RA, 8
| addiu RC, RC, -8
- | sdc1 f0, 0(TMP2)
+ | store_double1 0(TMP2)
| bnez RC, <2
|. addiu TMP2, TMP2, 8
|3:
@@ -3896,14 +4744,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw INS, -4(PC)
| addiu TMP2, BASE, -8
if (op == BC_RET1) {
- | ldc1 f0, 0(RA)
+ | load_double1 0(RA)
}
| decode_RB8a RB, INS
| decode_RA8a RA, INS
| decode_RB8b RB
| decode_RA8b RA
if (op == BC_RET1) {
- | sdc1 f0, 0(TMP2)
+ | store_double1 0(TMP2)
}
| subu BASE, TMP2, RA
|5:
@@ -3928,6 +4776,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
/* -- Loops and branches ------------------------------------------------ */
+ |.macro cmp_res, gt
+ |.if gt == 1
+ |.if FPU
+ | movf TMP1, r0, 0 // f0>f2: TMP1=0
+ | movf TMP2, r0, 1 // f2>f0: TMP2=0
+ |.else
+ | li SFT2, 1
+ | bne CRET1, SFT2, >1
+ |. nop
+ | b >2
+ |. move TMP1, r0
+ |1:
+ | li SFT2, -1
+ | bne CRET1, SFT2, >2
+ |. nop
+ | move TMP2, r0
+ |2:
+ |.endif
+ |.else
+ |.if FPU
+ | movt TMP1, r0, 0 // f0<=f2: TMP1=0
+ | movt TMP2, r0, 1 // f2<=f0: TMP2=0
+ |.else
+ | bltz CRET1, >3 // f02 // f0==f2: TMP1=TMP2=0
+ |. li SFT2, 1
+ | bne SFT2, CRET1, >4 // f0>f2: TMP2=0
+ |. nop
+ | b >4
+ |2:
+ |. move TMP2, r0
+ |3:
+ | move TMP1, r0
+ |4:
+ |.endif
+ |.endif
+ |.endmacro
+
case BC_FORL:
|.if JIT
| hotloop
@@ -3946,12 +4833,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = (op == BC_IFORL || op == BC_JFORL);
| addu RA, BASE, RA
if (vk) {
+ |.if FPU
| ldc1 f0, FORL_IDX*8(RA)
| ldc1 f4, FORL_STEP*8(RA)
| ldc1 f2, FORL_STOP*8(RA)
| lw TMP3, FORL_STEP*8+HI(RA)
| add.d f0, f0, f4
| sdc1 f0, FORL_IDX*8(RA)
+ |.else
+ | load_got __adddf3
+ | load_farg1 FORL_IDX*8(RA)
+ | load_farg2 FORL_STEP*8(RA)
+ | call_extern
+ |. sw RD, TEMP_SAVE_1 //save RD
+ | sw CRET1, FORL_IDX*8(RA)
+ | sw CRET2, FORL_IDX*8+4(RA)
+ | load_farg1 FORL_IDX*8(RA)
+ | load_farg2 FORL_STOP*8(RA) // f0 and f2
+ | lw TMP3, FORL_STEP*8+HI(RA)
+ | lw RD, TEMP_SAVE_1
+ |.endif
} else {
| lw TMP1, FORL_IDX*8+HI(RA)
| lw TMP3, FORL_STEP*8+HI(RA)
@@ -3961,25 +4862,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sltiu TMP2, TMP2, LJ_TISNUM
| and TMP1, TMP1, TMP0
| and TMP1, TMP1, TMP2
+ |.if FPU
| ldc1 f0, FORL_IDX*8(RA)
| beqz TMP1, ->vmeta_for
|. ldc1 f2, FORL_STOP*8(RA)
+ |.else
+ | beqz TMP1, ->vmeta_for
+ | load_farg1 FORL_IDX*8(RA)
+ | load_farg2 FORL_STOP*8(RA)
+ |.endif
}
if (op != BC_JFORL) {
| srl RD, RD, 1
| lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
}
+ | store_double f0, CARG1, CARG2, FORL_EXT*8(RA)
+ |.if FPU
| c.le.d 0, f0, f2
| c.le.d 1, f2, f0
- | sdc1 f0, FORL_EXT*8(RA)
+ |.else
+ | sw RD, TEMP_SAVE_1
+ | load_got __ledf2 // f0<=f2
+ | call_extern
+ |. sw TMP0, TEMP_SAVE_2
+ | lw TMP0, TEMP_SAVE_2
+ | lw RD, TEMP_SAVE_1
+ | lw TMP3, FORL_STEP*8+HI(RA) // Restored step.
+ |.endif
+ |
if (op == BC_JFORI) {
| li TMP1, 1
| li TMP2, 1
| addu TMP0, RD, TMP0
| slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
+ | cmp_res 1
| addu PC, PC, TMP0
- | movf TMP2, r0, 1
| lhu RD, -4+OFS_RD(PC)
| movn TMP1, TMP2, TMP3
| bnez TMP1, =>BC_JLOOP
@@ -3988,8 +4905,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP1, 1
| li TMP2, 1
| slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
+ | cmp_res 1
| movn TMP1, TMP2, TMP3
| bnez TMP1, =>BC_JLOOP
|. nop
@@ -3998,11 +4914,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slt TMP3, TMP3, r0
| move TMP2, TMP1
if (op == BC_FORI) {
- | movt TMP1, r0, 0
- | movt TMP2, r0, 1
+ | cmp_res 0
} else {
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
+ | cmp_res 1
}
| movn TMP1, TMP2, TMP3
| addu PC, PC, TMP1
@@ -4256,8 +5170,10 @@ static void emit_asm_debug(BuildCtx *ctx)
fcofs, CFRAME_SIZE);
for (i = 23; i >= 16; i--)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
+#if !LJ_SOFTFP
for (i = 30; i >= 20; i -= 2)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
+#endif
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE0:\n\n");
@@ -4275,6 +5191,7 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.align 2\n"
".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
#endif
+#if !LJ_NO_UNWIND
fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
fprintf(ctx->fp,
"\t.globl lj_err_unwind_dwarf\n"
@@ -4342,6 +5259,7 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.byte 0xd\n\t.uleb128 0x10\n"
"\t.align 2\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
#endif
break;
default:
diff --git a/lib/luajit/src/vm_x64.dasc b/lib/luajit/src/vm_x64.dasc
index e7e990ae27..bba89aaf1b 100644
--- a/lib/luajit/src/vm_x64.dasc
+++ b/lib/luajit/src/vm_x64.dasc
@@ -531,7 +531,7 @@ static void build_subroutines(BuildCtx *ctx)
| jmp >2
|
|->vm_growstack_v: // Grow stack for vararg Lua function.
- | sub RD, 8
+ | sub RD, 16 // LJ_FR2
| jmp >1
|
|->vm_growstack_f: // Grow stack for fixarg Lua function.
diff --git a/src/apps/basic/README.md b/src/apps/basic/README.md
index 9d430a8eda..d5943c77da 100644
--- a/src/apps/basic/README.md
+++ b/src/apps/basic/README.md
@@ -5,9 +5,9 @@ functionality for use in you app networks.
## Source
-The `Source` app is a synthetic packet generator. On each breath it
-outputs 1,000 new packets to each attached output port. The packet
-data is uninitialized garbage and each packet is 60 bytes long.
+The `Source` app is a synthetic packet generator. On each breath it fills
+each attached output link with new packets. The packet data is
+uninitialized garbage and each packet is 60 bytes long.
![Source](.images/Source.png)
diff --git a/src/apps/basic/README.md.src b/src/apps/basic/README.md.src
index 2521c35171..7473b17abf 100644
--- a/src/apps/basic/README.md.src
+++ b/src/apps/basic/README.md.src
@@ -5,9 +5,9 @@ functionality for use in you app networks.
## Source
-The `Source` app is a synthetic packet generator. On each breath it
-outputs 1,000 new packets to each attached output port. The packet
-data is uninitialized garbage and each packet is 60 bytes long.
+The `Source` app is a synthetic packet generator. On each breath it fills
+each attached output link with new packets. The packet data is
+uninitialized garbage and each packet is 60 bytes long.
DIAGRAM: Source
+--------+
diff --git a/src/apps/test/synth.lua b/src/apps/test/synth.lua
new file mode 100644
index 0000000000..36fe231902
--- /dev/null
+++ b/src/apps/test/synth.lua
@@ -0,0 +1,62 @@
+module(...,package.seeall)
+
+local ffi = require("ffi")
+local ethernet = require("lib.protocol.ethernet")
+local datagram = require("lib.protocol.datagram")
+local transmit, receive = link.transmit, link.receive
+
+Synth = {}
+
+function Synth:new (arg)
+ local conf = arg and config.parse_app_arg(arg) or {}
+ conf.sizes = conf.sizes or {64}
+ assert(#conf.sizes >= 1, "Needs at least one size.")
+ conf.src = conf.src or '00:00:00:00:00:00'
+ conf.dst = conf.dst or '00:00:00:00:00:00'
+ local packets = {}
+ for i, size in ipairs(conf.sizes) do
+ local ether = ethernet:new({ src = ethernet:pton(conf.src),
+ dst = ethernet:pton(conf.dst) })
+ local payload_size = size - ethernet:sizeof()
+ local data = ffi.new("char[?]", payload_size)
+ local dgram = datagram:new(packet.from_pointer(data, payload_size))
+ dgram:push(ether)
+ packets[i] = dgram:packet()
+ end
+ return setmetatable({packets=packets}, {__index=Synth})
+end
+
+function Synth:pull ()
+ for _, o in ipairs(self.output) do
+ for i = 1, link.nwritable(o) do
+ for _, p in ipairs(self.packets) do
+ transmit(o, packet.clone(p))
+ end
+ end
+ end
+end
+
+function Synth:stop ()
+ for _, p in ipairs(self.packets) do
+ packet.free(p)
+ end
+end
+
+function selftest ()
+ local pcap = require("apps.pcap.pcap")
+ local c = config.new()
+ config.app(c, "synth", Synth, { sizes = {32, 64, 128},
+ src = "11:11:11:11:11:11",
+ dst = "22:22:22:22:22:22" })
+ config.app(c, "writer", pcap.PcapWriter, "apps/test/synth.pcap.output")
+ config.link(c, "synth.output->writer.input")
+ engine.configure(c)
+ engine.main({ duration = 0.00000001, -- hack: one breath.
+ report = { showlinks = true } })
+
+ if io.open("apps/test/synth.pcap"):read('*a') ~=
+ io.open("apps/test/synth.pcap.output"):read('*a')
+ then
+ error("synth.pcap and synth.pcap.output differ.")
+ end
+end
diff --git a/src/apps/test/synth.pcap b/src/apps/test/synth.pcap
new file mode 100644
index 0000000000..4dd3dad5ed
Binary files /dev/null and b/src/apps/test/synth.pcap differ
diff --git a/src/bench/packetblaster-64 b/src/bench/packetblaster-64
index 4520673084..112a3b3852 100755
--- a/src/bench/packetblaster-64
+++ b/src/bench/packetblaster-64
@@ -2,7 +2,6 @@
set -e
[ ! -z "$SNABB_PCI_INTEL0" ] || exit 1
-
out=$(./snabb packetblaster replay --duration 1 \
program/snabbnfv/test_fixtures/pcap/64.pcap \
"$SNABB_PCI_INTEL0")
diff --git a/src/bench/packetblaster-synth-64 b/src/bench/packetblaster-synth-64
new file mode 100755
index 0000000000..301e588720
--- /dev/null
+++ b/src/bench/packetblaster-synth-64
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -e
+
+[ ! -z "$SNABB_PCI_INTEL0" ] || exit 1
+
+out=$(./snabb packetblaster synth --src 11:11:11:11:11:11 --dst 22:22:22:22:22:22 --sizes 64 --duration 1 \
+ "$SNABB_PCI_INTEL0")
+# Extract floating point Mpps number from output.
+pps=$(echo "$out" | grep TXDGPC | cut -f 3 | sed s/,//g)
+echo "scale=2; $pps / 1000000" | bc
diff --git a/src/core/app.lua b/src/core/app.lua
index f5f2cc397a..35a5305631 100644
--- a/src/core/app.lua
+++ b/src/core/app.lua
@@ -163,6 +163,10 @@ function apply_config_actions (actions, conf)
local class = conf.apps[name].class
local arg = conf.apps[name].arg
local app = class:new(arg)
+ if type(app) ~= 'table' then
+ error(("bad return value from app '%s' start() method: %s"):format(
+ name, tostring(app)))
+ end
local zone = app.zone or getfenv(class.new)._NAME or name
app.appname = name
app.output = {}
diff --git a/src/dasm.lua b/src/dasm.lua
index 448eab9cf9..acf8587e5f 100644
--- a/src/dasm.lua
+++ b/src/dasm.lua
@@ -1,5 +1,5 @@
---binding to the DynASM encoding engine.
+--Binding to the DynASM encoding engine.
--Written by Cosmin Apreutesei. Public Domain.
local ffi = require'ffi'
diff --git a/src/dasm_x64.lua b/src/dasm_x64.lua
index 24efbae866..c22ddcfda8 100644
--- a/src/dasm_x64.lua
+++ b/src/dasm_x64.lua
@@ -9,10 +9,11 @@
------------------------------------------------------------------------------
--unload dasm_x86 if it's already loaded.
+if not package then package = {loaded = {}} end --for compat. with minilua
local dasm_x86 = package.loaded.dasm_x86
package.loaded.dasm_x86 = nil
-rawset(_G, 'x64', true) -- Using a global is an ugly, but effective solution.
+x64 = true -- Using a global is an ugly, but effective solution.
local dasm_x64 = require("dasm_x86")
package.loaded.dasm_x86 = dasm_x86 --put it back
diff --git a/src/dasm_x86.h b/src/dasm_x86.h
index 175febe0ca..be9c289f02 100644
--- a/src/dasm_x86.h
+++ b/src/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
- int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+ int pos = sec->pos, ofs = sec->ofs, mrm = -1;
int *b;
if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
b[pos++] = n;
switch (action) {
case DASM_DISP:
- if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
case DASM_SPACE: p++; ofs += n; break;
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
- case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
- if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+ case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+ if (*p < 0x20 && (n&7) == 4) ofs++;
+ switch ((*p++ >> 3) & 3) {
+ case 3: n |= b[pos-3];
+ case 2: n |= b[pos-2];
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+ }
+ continue;
}
- mrm = 4;
+ mrm = -1;
} else {
int *pl, n;
switch (action) {
@@ -393,7 +400,22 @@ int dasm_encode(Dst_DECL, void *buffer)
case DASM_IMM_W: dasmw(n); break;
case DASM_VREG: {
int t = *p++;
- if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3;
+ unsigned char *ex = cp - (t&7);
+ if ((n & 8) && t < 0xa0) {
+ if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+ n &= 7;
+ } else if (n & 0x10) {
+ if (*ex & 0x80) {
+ *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+ }
+ while (++ex < cp) ex[-1] = *ex;
+ if (mark) mark--;
+ cp--;
+ n &= 7;
+ }
+ if (t >= 0xc0) n <<= 4;
+ else if (t >= 0x40) n <<= 3;
+ else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
cp[-1] ^= n;
break;
}
diff --git a/src/dasm_x86.lua b/src/dasm_x86.lua
index e7563d477f..0c11f020ec 100644
--- a/src/dasm_x86.lua
+++ b/src/dasm_x86.lua
@@ -44,7 +44,7 @@ local action_names = {
-- int arg, 1 buffer pos:
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
- "VREG", "SPACE", -- !x64: VREG support NYI.
+ "VREG", "SPACE",
-- ptrdiff_t arg, 1 buffer pos (address): !x64
"SETLABEL", "REL_A",
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -92,6 +92,21 @@ local function init_actionlist()
secpos = 1
end
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+ ["modrm.rm.m"] = 0x00,
+ ["modrm.rm.r"] = 0x20,
+ ["opcode"] = 0x20,
+ ["sib.base"] = 0x20,
+ ["sib.index"] = 0x40,
+ ["modrm.reg"] = 0x80,
+ ["vex.v"] = 0xa0,
+ ["imm.hi"] = 0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
------------------------------------------------------------------------------
-- Compute action numbers for action names.
@@ -151,6 +166,21 @@ local function waction(action, a, num)
if a or num then secpos = secpos + (num or 1) end
end
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+ if not vreg then return end
+ waction("VREG", vreg)
+ local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+ if b < (sk or 0) then
+ vreg_shrink_count = vreg_shrink_count + 1
+ end
+ if not defer then
+ b = b + vreg_shrink_count * 8
+ vreg_shrink_count = 0
+ end
+ wputxb(b + (psz or 0))
+end
+
-- Add call to embedded DynASM C code.
local function wcall(func, args)
if luamode then
@@ -390,6 +420,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
map_reg_valid_index[map_archdef.esp] = false
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
map_archdef["Ra"] = "@"..addrsize
-- FP registers (internally tword sized, but use "f" as operand size).
@@ -527,16 +558,24 @@ local function wputszarg(sz, n)
end
-- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex, vex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+ local psz, sk = 0, nil
if vex then
local tail
if vex.m == 1 and band(rex, 11) == 0 then
- wputb(0xc5)
+ if x64 and vregxb then
+ sk = map_vreg["modrm.reg"]
+ else
+ wputb(0xc5)
tail = shl(bxor(band(rex, 4), 4), 5)
- else
+ psz = 3
+ end
+ end
+ if not tail then
wputb(0xc4)
wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
tail = shl(band(rex, 8), 4)
+ psz = 4
end
local reg, vreg = 0, nil
if vex.v then
@@ -546,12 +585,18 @@ local function wputop(sz, op, rex, vex)
end
if sz == "y" or vex.l then tail = tail + 4 end
wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
- if vreg then waction("VREG", vreg); wputxb(4) end
+ wvreg("vex.v", vreg)
rex = 0
if op >= 256 then werror("bad vex opcode") end
+ else
+ if rex ~= 0 then
+ if not x64 then werror("bad operand size") end
+ elseif (vregr or vregxb) and x64 then
+ rex = 0x10
+ sk = map_vreg["vex.v"]
+ end
end
local r
- if rex ~= 0 and not x64 then werror("bad operand size") end
if sz == "w" then wputb(102) end
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -560,20 +605,20 @@ local function wputop(sz, op, rex, vex)
if rex ~= 0 then
local opc3 = band(op, 0xffff00)
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
- wputb(64 + band(rex, 15)); rex = 0
+ wputb(64 + band(rex, 15)); rex = 0; psz = 2
end
end
- wputb(shr(op, 16)); op = band(op, 0xffff)
+ wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
end
if op >= 256 then
local b = shr(op, 8)
- if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
- wputb(b)
- op = band(op, 255)
+ if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+ wputb(b); op = band(op, 255); psz = psz + 1
end
- if rex ~= 0 then wputb(64 + band(rex, 15)) end
+ if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
if sz == "b" then op = op - 1 end
wputb(op)
+ return psz, sk
end
-- Put ModRM or SIB formatted byte.
@@ -583,7 +628,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
end
-- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
local vreg, vxreg
local reg, xreg = t.reg, t.xreg
if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -593,8 +638,8 @@ local function wputmrmsib(t, imark, s, vsreg)
-- Register mode.
if sub(t.mode, 1, 1) == "r" then
wputmodrm(3, s, reg)
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(0) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.r", vreg, psz+1, sk)
return
end
@@ -608,21 +653,22 @@ local function wputmrmsib(t, imark, s, vsreg)
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
wputmodrm(0, s, 4)
if imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
wputmodrm(t.xsc, xreg, 5)
- if vxreg then waction("VREG", vxreg); wputxb(3) end
+ wvreg("sib.index", vxreg, psz+2, sk)
else
-- Pure 32 bit displacement.
if x64 and tdisp ~= "table" then
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
wputmodrm(0, 4, 5)
else
riprel = x64
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
end
if riprel then -- Emit rip-relative displacement.
if match("UWSiI", imark) then
@@ -650,16 +696,16 @@ local function wputmrmsib(t, imark, s, vsreg)
if xreg or band(reg, 7) == 4 then
wputmodrm(m or 2, s, 4) -- ModRM.
if m == nil or imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
- if vxreg then waction("VREG", vxreg); wputxb(3) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("sib.index", vxreg, psz+2, sk, vreg)
+ wvreg("sib.base", vreg, psz+2, sk)
else
wputmodrm(m or 2, s, reg) -- ModRM.
if (imark == "I" and (m == 1 or m == 2)) or
(m == nil and (vsreg or vreg)) then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.m", vreg, psz+1, sk)
end
-- Put displacement.
@@ -1184,7 +1230,7 @@ local map_op = {
shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
rdtsc_0 = "0F31", -- P1+
- rdpmc_0 = "0F33",
+ rdpmc_0 = "0F33", -- P6+
cpuid_0 = "0FA2", -- P1+
-- floating point ops
@@ -1327,46 +1373,14 @@ local map_op = {
movups_2 = "rmo:0F10rM|mro:0F11Rm",
orpd_2 = "rmo:660F56rM",
orps_2 = "rmo:0F56rM",
- packssdw_2 = "rmo:660F6BrM",
- packsswb_2 = "rmo:660F63rM",
- packuswb_2 = "rmo:660F67rM",
- paddb_2 = "rmo:660FFCrM",
- paddd_2 = "rmo:660FFErM",
- paddq_2 = "rmo:660FD4rM",
- paddsb_2 = "rmo:660FECrM",
- paddsw_2 = "rmo:660FEDrM",
- paddusb_2 = "rmo:660FDCrM",
- paddusw_2 = "rmo:660FDDrM",
- paddw_2 = "rmo:660FFDrM",
- pand_2 = "rmo:660FDBrM",
- pandn_2 = "rmo:660FDFrM",
pause_0 = "F390",
- pavgb_2 = "rmo:660FE0rM",
- pavgw_2 = "rmo:660FE3rM",
- pcmpeqb_2 = "rmo:660F74rM",
- pcmpeqd_2 = "rmo:660F76rM",
- pcmpeqw_2 = "rmo:660F75rM",
- pcmpgtb_2 = "rmo:660F64rM",
- pcmpgtd_2 = "rmo:660F66rM",
- pcmpgtw_2 = "rmo:660F65rM",
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
- pmaddwd_2 = "rmo:660FF5rM",
- pmaxsw_2 = "rmo:660FEErM",
- pmaxub_2 = "rmo:660FDErM",
- pminsw_2 = "rmo:660FEArM",
- pminub_2 = "rmo:660FDArM",
pmovmskb_2 = "rr/do:660FD7rM",
- pmulhuw_2 = "rmo:660FE4rM",
- pmulhw_2 = "rmo:660FE5rM",
- pmullw_2 = "rmo:660FD5rM",
- pmuludq_2 = "rmo:660FF4rM",
- por_2 = "rmo:660FEBrM",
prefetchnta_1 = "xb:n0F180m",
prefetcht0_1 = "xb:n0F181m",
prefetcht1_1 = "xb:n0F182m",
prefetcht2_1 = "xb:n0F183m",
- psadbw_2 = "rmo:660FF6rM",
pshufd_3 = "rmio:660F70rMU",
pshufhw_3 = "rmio:F30F70rMU",
pshuflw_3 = "rmio:F20F70rMU",
@@ -1380,23 +1394,6 @@ local map_op = {
psrldq_2 = "rio:660F733mU",
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
- psubb_2 = "rmo:660FF8rM",
- psubd_2 = "rmo:660FFArM",
- psubq_2 = "rmo:660FFBrM",
- psubsb_2 = "rmo:660FE8rM",
- psubsw_2 = "rmo:660FE9rM",
- psubusb_2 = "rmo:660FD8rM",
- psubusw_2 = "rmo:660FD9rM",
- psubw_2 = "rmo:660FF9rM",
- punpckhbw_2 = "rmo:660F68rM",
- punpckhdq_2 = "rmo:660F6ArM",
- punpckhqdq_2 = "rmo:660F6DrM",
- punpckhwd_2 = "rmo:660F69rM",
- punpcklbw_2 = "rmo:660F60rM",
- punpckldq_2 = "rmo:660F62rM",
- punpcklqdq_2 = "rmo:660F6CrM",
- punpcklwd_2 = "rmo:660F61rM",
- pxor_2 = "rmo:660FEFrM",
rcpps_2 = "rmo:0F53rM",
rcpss_2 = "rro:F30F53rM|rx/od:",
rsqrtps_2 = "rmo:0F52rM",
@@ -1640,6 +1637,12 @@ local map_op = {
-- AVX, AVX2 integer ops
-- In general, xmm requires AVX, ymm requires AVX2.
+ vaesdec_3 = "rrmo:660F38VDErM",
+ vaesdeclast_3 = "rrmo:660F38VDFrM",
+ vaesenc_3 = "rrmo:660F38VDCrM",
+ vaesenclast_3 = "rrmo:660F38VDDrM",
+ vaesimc_2 = "rmo:660F38uDBrM",
+ vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
vlddqu_2 = "rxoy:F20FuF0rM",
vmaskmovdqu_2 = "rro:660FuF7rM",
vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
@@ -1880,10 +1883,11 @@ local function dopattern(pat, args, sz, op, needrex)
if t.xreg and t.xreg > 7 then rex = rex + 2 end
if s > 7 then rex = rex + 4 end
if needrex then rex = rex + 16 end
- wputop(szov, opcode, rex, vex); opcode = nil
+ local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+ opcode = nil
local imark = sub(pat, -1) -- Force a mark (ugly).
-- Put ModRM/SIB with regno/last digit as spare.
- wputmrmsib(t, imark, s, addin and addin.vreg)
+ wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
addin = nil
elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
local b = band(opcode, 255); opcode = shr(opcode, 8)
@@ -1910,8 +1914,8 @@ local function dopattern(pat, args, sz, op, needrex)
if szov == "q" and rex == 0 then rex = rex + 8 end
if needrex then rex = rex + 16 end
if addin and addin.reg == -1 then
- wputop(szov, opcode - 7, rex, vex)
- waction("VREG", addin.vreg); wputxb(0)
+ local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+ wvreg("opcode", addin.vreg, psz, sk)
else
if addin and addin.reg > 7 then rex = rex + 1 end
wputop(szov, opcode, rex, vex)
@@ -1955,7 +1959,7 @@ local function dopattern(pat, args, sz, op, needrex)
local reg = a.reg
if reg < 0 then
wputb(0)
- waction("VREG", a.vreg); wputxb(5)
+ wvreg("imm.hi", a.vreg)
else
wputb(shl(reg, 4))
end
@@ -2107,8 +2111,8 @@ if x64 then
rex = a.reg > 7 and 9 or 8
end
end
- wputop(sz, opcode, rex)
- if vreg then waction("VREG", vreg); wputxb(0) end
+ local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+ wvreg("opcode", vreg, psz, sk)
if luamode then
waction("IMM_D", format("ffi.cast(\"uintptr_t\", %s) %% 2^32", op64))
waction("IMM_D", format("ffi.cast(\"uintptr_t\", %s) / 2^32", op64))
diff --git a/src/dynasm.lua b/src/dynasm.lua
index 10d93c0f8f..586e2a13dd 100644
--- a/src/dynasm.lua
+++ b/src/dynasm.lua
@@ -1141,14 +1141,13 @@ local function setlang(infile)
g_opt.comment = "--|"
g_opt.endcomment = ""
end
+ -- Set initial defines only available in Lua mode.
+ local ffi = require("ffi")
+ map_def.ARCH = ffi.arch --for `.arch ARCH`
+ map_def[upper(ffi.arch)] = 1 --for `.if X86 ...`
+ map_def.OS = ffi.os --for `.if OS == 'Windows'`
+ map_def[upper(ffi.os)] = 1 --for `.if WINDOWS ...`
end
-
- -- Set initial defines only available in Lua mode.
- local ffi = require'ffi'
- map_def.ARCH = ffi.arch --for `.arch ARCH`
- map_def[upper(ffi.arch)] = 1 --for `.if X86 ...`
- map_def.OS = ffi.os --for `.if OS == 'Windows'`
- map_def[upper(ffi.os)] = 1 --for `.if WINDOWS ...`
end
-- Parse arguments.
diff --git a/src/program/packetblaster/README b/src/program/packetblaster/README
index 6df251bd3e..034c9303d0 100644
--- a/src/program/packetblaster/README
+++ b/src/program/packetblaster/README
@@ -1,14 +1,7 @@
-Usage: packetblaster replay [OPTIONS] ...
+Usage:
+ packetblaster replay
+ packetblaster synth
- -D DURATION, --duration DURATION
- Run for DURATION seconds.
- -h, --help
- Print usage information.
-
-Transmit packets from PCAP-FILE continuously to one or more network
-adapters. The PCI arguments are Lua pattern strings that are used to
-match the network adapters to use.
-
-Examples:
- packetblaster replay myfile.cap 0000:01:00.0
- packetblaster replay myfile.cap 01:00
+Use --help for per-command usage.
+Example:
+ packetblaster synth --help
diff --git a/src/program/packetblaster/packetblaster.lua b/src/program/packetblaster/packetblaster.lua
index 4deb70d3f9..e1e4124b37 100644
--- a/src/program/packetblaster/packetblaster.lua
+++ b/src/program/packetblaster/packetblaster.lua
@@ -9,33 +9,67 @@ local intel_app = require("apps.intel.intel_app")
local basic_apps = require("apps.basic.basic_apps")
local main = require("core.main")
local PcapReader= require("apps.pcap.pcap").PcapReader
+local Synth = require("apps.test.synth").Synth
local LoadGen = require("apps.intel.loadgen").LoadGen
local lib = require("core.lib")
local ffi = require("ffi")
local C = ffi.C
local usage = require("program.packetblaster.README_inc")
+local usage_replay = require("program.packetblaster.replay.README_inc")
+local usage_synth = require("program.packetblaster.synth.README_inc")
local long_opts = {
duration = "D",
- help = "h"
+ help = "h",
+ src = "s",
+ dst = "d",
+ sizes = "S"
}
function run (args)
local opt = {}
+ local mode = table.remove(args, 1)
local duration
- function opt.D (arg) duration = tonumber(arg) end
- function opt.h (arg) print(usage) main.exit(1) end
- if #args < 3 or table.remove(args, 1) ~= 'replay' then opt.h() end
- args = lib.dogetopt(args, opt, "hD:", long_opts)
- local filename = table.remove(args, 1)
- local patterns = args
local c = config.new()
- config.app(c, "pcap", PcapReader, filename)
- config.app(c, "loop", basic_apps.Repeater)
- config.app(c, "tee", basic_apps.Tee)
- config.link(c, "pcap.output -> loop.input")
- config.link(c, "loop.output -> tee.input")
+ function opt.D (arg)
+ duration = assert(tonumber(arg), "duration is not a number!")
+ end
+ function opt.h (arg)
+ if mode == 'replay' then print(usage_replay)
+ elseif mode == 'synth' then print(usage_synth)
+ else print(usage) end
+ main.exit(1)
+ end
+ if mode == 'replay' and #args > 1 then
+ args = lib.dogetopt(args, opt, "hD:", long_opts)
+ local filename = table.remove(args, 1)
+ config.app(c, "pcap", PcapReader, filename)
+ config.app(c, "loop", basic_apps.Repeater)
+ config.app(c, "source", basic_apps.Tee)
+ config.link(c, "pcap.output -> loop.input")
+ config.link(c, "loop.output -> source.input")
+ elseif mode == 'synth' and #args >= 1 then
+ local source
+ local destination
+ local sizes
+ function opt.s (arg) source = arg end
+ function opt.d (arg) destination = arg end
+ function opt.S (arg)
+ sizes = {}
+ for size in string.gmatch(arg, "%d+") do
+ sizes[#sizes+1] = tonumber(size)
+ end
+ end
+
+ args = lib.dogetopt(args, opt, "hD:s:d:S:", long_opts)
+ config.app(c, "source", Synth, { sizes = sizes,
+ src = source,
+ dst = destination })
+ else
+ opt.h()
+ end
+ local patterns = args
local nics = 0
pci.scan_devices()
for _,device in ipairs(pci.devices) do
@@ -43,7 +77,7 @@ function run (args)
nics = nics + 1
local name = "nic"..nics
config.app(c, name, LoadGen, device.pciaddress)
- config.link(c, "tee."..tostring(nics).."->"..name..".input")
+ config.link(c, "source."..tostring(nics).."->"..name..".input")
end
end
assert(nics > 0, " matches no suitable devices.")
@@ -74,4 +108,3 @@ function is_device_suitable (pcidev, patterns)
end
end
-
diff --git a/src/program/packetblaster/replay/README b/src/program/packetblaster/replay/README
new file mode 100644
index 0000000000..4bc2a3fe40
--- /dev/null
+++ b/src/program/packetblaster/replay/README
@@ -0,0 +1,14 @@
+Usage: packetblaster replay [OPTIONS] ...
+
+ -D DURATION, --duration DURATION
+ Run for DURATION seconds.
+ Default: unlimited
+ -h, --help
+ Print usage information.
+
+packetblaster transmits packets continuously to one or more network adapters.
+The PCI arguments are Lua pattern strings that are used to match the network
+adapters to use. The packets are extracted from PCAPFILE.
+
+Examples:
+ packetblaster replay myfile.cap 0000:01:00.0
diff --git a/src/program/packetblaster/replay/README.inc b/src/program/packetblaster/replay/README.inc
new file mode 120000
index 0000000000..100b93820a
--- /dev/null
+++ b/src/program/packetblaster/replay/README.inc
@@ -0,0 +1 @@
+README
\ No newline at end of file
diff --git a/src/program/packetblaster/selftest.sh b/src/program/packetblaster/selftest.sh
index 0784d0b28d..881f9fa0d8 100755
--- a/src/program/packetblaster/selftest.sh
+++ b/src/program/packetblaster/selftest.sh
@@ -7,10 +7,17 @@ if [ -z "${PCIADDR}" ]; then
echo "selftest: skipping test - SNABB_PCI_INTEL0/SNABB_PCI0 not set"
exit 43
fi
-
+
# Simple test: Just make sure packetblaster runs for a period of time
# (doesn't crash on startup).
-timeout 5 ./snabb packetblaster replay program/snabbnfv/test_fixtures/pcap/64.pcap ${SNABB_PCI0}
+timeout 5 ./snabb packetblaster replay program/snabbnfv/test_fixtures/pcap/64.pcap ${PCIADDR}
+status=$?
+if [ $status != 124 ]; then
+ echo "Error: expected timeout (124) but got ${status}"
+ exit 1
+fi
+
+timeout 5 ./snabb packetblaster synth --src 11:11:11:11:11:11 --dst 22:22:22:22:22:22 --sizes 64,128,256 ${PCIADDR}
status=$?
if [ $status != 124 ]; then
echo "Error: expected timeout (124) but got ${status}"
diff --git a/src/program/packetblaster/synth/README b/src/program/packetblaster/synth/README
new file mode 100644
index 0000000000..77630edd71
--- /dev/null
+++ b/src/program/packetblaster/synth/README
@@ -0,0 +1,25 @@
+Usage: packetblaster synth [OPTIONS] ...
+
+ -s SOURCE, --src SOURCE
+ Source MAC-Address.
+ Default: 00:00:00:00:00:00
+ -d DESTINATION, --dst DESTINATION
+ Destination MAC-Address.
+ Default: 00:00:00:00:00:00
+ -S SIZES, --sizes SIZES
+ A comma separated list of numbers. Send packets of
+ SIZES bytes.
+ Default: 64
+ -D DURATION, --duration DURATION
+ Run for DURATION seconds.
+ Default: unlimited
+ -h, --help
+ Print usage information.
+
+packetblaster transmits packets continuously to one or more network adapters.
+The PCI arguments are Lua pattern strings that are used to match the network
+adapters to use. The packets are synthetisized according to SOURCE,
+DESTINATION and SIZES.
+
+Examples:
+ packetblaster synth -d 22:22:22:22:22:22 -S 32,64,128 01:00.0
diff --git a/src/program/packetblaster/synth/README.inc b/src/program/packetblaster/synth/README.inc
new file mode 120000
index 0000000000..100b93820a
--- /dev/null
+++ b/src/program/packetblaster/synth/README.inc
@@ -0,0 +1 @@
+README
\ No newline at end of file