Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/v2.1' into v2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
xrSimpodin committed Dec 2, 2023
2 parents 3209fd6 + 43d0a19 commit 60d81a5
Show file tree
Hide file tree
Showing 19 changed files with 229 additions and 83 deletions.
3 changes: 1 addition & 2 deletions src/jit/dis_arm64.lua
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,7 @@ local function disass_ins(ctx)
x = x.."]"
end
elseif p == "P" then
local opcv, sh = rshift(op, 26), 2
if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3)
Expand Down
28 changes: 18 additions & 10 deletions src/lj_asm_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -969,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
emit_opk(as, ARMI_ADD, dest, uv,
}
if (ir->o == IR_UREFC)
emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
else
emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, ARMI_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
emit_lso(as, ARMI_LDR, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}

Expand Down
20 changes: 14 additions & 6 deletions src/lj_asm_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,22 +931,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
if (ir->o == IR_UREFC) {
asm_guardcnb(as, A64I_CBZ, RID_TMP);
if (guarded)
asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
else
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadu64(as, dest, k);
} else {
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}

Expand Down
27 changes: 17 additions & 10 deletions src/lj_asm_mips.h
Original file line number Diff line number Diff line change
Expand Up @@ -1207,22 +1207,29 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
if (guarded)
asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
if (ir->o == IR_UREFC)
emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, dest, o);
} else {
emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}

Expand Down
27 changes: 17 additions & 10 deletions src/lj_asm_ppc.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
}
if (ir->o == IR_UREFC)
emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
else
emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
if (guarded)
emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loadi(as, dest, k);
} else {
emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
emit_tai(as, PPCI_LWZ, uv, func,
(int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}

Expand Down
49 changes: 31 additions & 18 deletions src/lj_asm_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
/* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references.
*/
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
{
IRIns *ir = as->ir;
IRRef i = as->curins;
Expand All @@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
while (--i > ref) {
if (ir[i].o == conflict)
return 0; /* Conflict found. */
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
return 0;
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
}
return 1; /* Ok, no conflict. */
Expand All @@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */
}
Expand Down Expand Up @@ -456,7 +458,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
noconflict(as, ref, IR_RETF, 0) &&
noconflict(as, ref, IR_RETF, 2) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
Expand All @@ -467,12 +469,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
noconflict(as, ref, IR_FSTORE, 0)) {
noconflict(as, ref, IR_FSTORE, 2)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
Expand All @@ -482,7 +484,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
noconflict(as, ref, IR_XSTORE, 0)) {
noconflict(as, ref, IR_XSTORE, 2)) {
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
Expand Down Expand Up @@ -815,6 +817,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
Expand Down Expand Up @@ -857,6 +860,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k);
checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
Expand Down Expand Up @@ -1173,6 +1177,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
checkmclim(as);
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
Expand Down Expand Up @@ -1232,7 +1237,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
Expand All @@ -1259,6 +1263,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
Expand All @@ -1276,7 +1281,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
} else {
emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64
checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32);
Expand Down Expand Up @@ -1373,24 +1377,31 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
if (ir->o == IR_UREFC)
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
else
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
if (guarded) {
asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
}
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
emit_loada(as, uv, o);
} else {
emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
emit_rmro(as, XO_MOV, uv|REX_GC64, func,
(int32_t)offsetof(GCfuncL, uvptr) +
(int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}

Expand Down Expand Up @@ -1547,6 +1558,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
Expand Down Expand Up @@ -1594,6 +1606,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
checkmclim(as);
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else
Expand Down
4 changes: 3 additions & 1 deletion src/lj_cparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -1766,9 +1766,11 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
cp_check(cp, '(');
if (cp->tok == CTOK_IDENT) {
if (cp_str_is(cp->str, "push")) {
if (cp->curpack < CPARSE_MAX_PACKSTACK) {
if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
cp->curpack++;
} else {
cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
}
} else if (cp_str_is(cp->str, "pop")) {
if (cp->curpack > 0) cp->curpack--;
Expand Down
11 changes: 7 additions & 4 deletions src/lj_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,12 +259,8 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
#else
unsigned char _BitScanForward(unsigned long *, unsigned long);
unsigned char _BitScanReverse(unsigned long *, unsigned long);
unsigned char _BitScanForward64(unsigned long *, uint64_t);
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
#pragma intrinsic(_BitScanForward64)
#pragma intrinsic(_BitScanReverse64)

static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
{
Expand All @@ -276,6 +272,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
}

#if defined(_M_X64) || defined(_M_ARM64)
unsigned char _BitScanForward64(unsigned long *, uint64_t);
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward64)
#pragma intrinsic(_BitScanReverse64)

static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
{
unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
Expand All @@ -286,6 +288,7 @@ static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
}
#endif
#endif

unsigned long _byteswap_ulong(unsigned long);
uint64_t _byteswap_uint64(uint64_t);
Expand Down
Loading

0 comments on commit 60d81a5

Please sign in to comment.