Skip to content

Commit

Permalink
ISLE: aarch64: fix narrow sdiv on intmin/-1 (#9541)
Browse files Browse the repository at this point in the history
* Fix a missing trap on narrow sdiv of intmin/-1 by left-shifting before checking for intmin

Co-authored-by: Michael McLoughlin <[email protected]>

* Remove non-upstreamed verifier attributes

---------

Co-authored-by: Michael McLoughlin <[email protected]>
  • Loading branch information
avanhatt and mmcloughlin authored Nov 1, 2024
1 parent 1499840 commit 025fea0
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 15 deletions.
39 changes: 26 additions & 13 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,12 @@
(Size64 64)
(Size128 128)))

;; Difference (32 - ty), useful for narrow calculations with 32-bit
;; instructions.
(decl diff_from_32 (Type) u8)
(rule (diff_from_32 $I8) 24)
(rule (diff_from_32 $I16) 16)

(type ScalarSize extern
(enum Size8
Size16
Expand Down Expand Up @@ -3664,24 +3670,18 @@

;; Check for signed overflow. The only case is min_value / -1.
;; The following checks must be done in 32-bit or 64-bit, depending
;; on the input type.
(spec (trap_if_div_overflow ty x y)
(provide (= x result)
(if (= ty 32)
(not (and (= #x00000000 (extract 31 0 y))
(= #x80000000 (extract 31 0 y))))
(not (and (= #x0000000000000000 y)
(= #x8000000000000000 y))))))
(decl trap_if_div_overflow (Type Reg Reg) Reg)
(rule (trap_if_div_overflow ty x y)
;; on the input type. For 8- and 16- bit, the check for x == min_value
;; must use a possibly-shifted value, xcheck, to overflow as expected.
(decl trap_if_div_overflow (Type Reg Reg Reg) Reg)
(rule (trap_if_div_overflow ty xcheck x y)
(let (
;; Check RHS is -1.
(_ Unit (emit (MInst.AluRRImm12 (ALUOp.AddS) (operand_size ty) (writable_zero_reg) y (u8_into_imm12 1))))

;; Check LHS is min_value, by subtracting 1 and branching if
;; there is overflow.
;; Check LHS is min_value, by subtracting 1 from the possibly-shifted
;; value and branching if there is overflow.
(_ Unit (emit (MInst.CCmpImm (size_from_ty ty)
x
xcheck
(u8_into_uimm5 1)
(nzcv $false $false $false $false)
(Cond.Eq))))
Expand All @@ -3690,6 +3690,19 @@
)
x))

;; In the cases narrower than a register width, subtracting 1 from the
;; min_value will not cause overflow (e.g., I8's min_value of -128 stored in
;; a 32-bit register produces -129 with no overflow). However, if we left shift
;; x by (32 - ty), we then produce the 32-bit min_value for the respective min
;; values of I8 and I16.
;; E.g., I8's 0x00000080 left-shifted by 24 is 0x80000000, which overflows.
(decl intmin_check (Type Reg) Reg)
(rule intmin_check_fits_in_16 (intmin_check (fits_in_16 ty) x)
(alu_rr_imm_shift (ALUOp.Lsl) ty x (imm_shift_from_u8 (diff_from_32 ty))))

;; In the I32 or I64 case, checking x itself against the min_value is fine.
(rule -1 (intmin_check ty x) x)

;; Check for unsigned overflow.
(decl trap_if_overflow (ProducesFlags TrapCode) Reg)
(rule (trap_if_overflow producer tc)
Expand Down
8 changes: 6 additions & 2 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@
;; TODO: Add SDiv32 to implement 32-bit directly, rather
;; than extending the input.
;;
;; The sequence of checks here should look like:
;; The sequence of checks here should look like this for 32 or 64 bits:
;;
;; cbnz rm, #8
;; udf ; divide by zero
Expand All @@ -1064,6 +1064,9 @@
;; b.vc #8
;; udf ; signed overflow
;;
;; In the narrow 8 or 16 bit case, we need to insert an additional left-shift
;; to check for the minimum value using the 32-bit ccmp instruction.
;;
;; Note The div instruction does not trap on divide by zero or overflow, so
;; checks need to be manually inserted.
;;
Expand All @@ -1072,7 +1075,8 @@
(rule sdiv_base_case (lower (has_type (fits_in_64 ty) (sdiv x y)))
(let ((x64 Reg (put_in_reg_sext64 x))
(y64 Reg (put_nonzero_in_reg_sext64 y))
(valid_x64 Reg (trap_if_div_overflow ty x64 y64))
(intmin_check_x Reg (intmin_check ty x64))
(valid_x64 Reg (trap_if_div_overflow ty intmin_check_x x64 y64))
(result Reg (a64_sdiv $I64 valid_x64 y64)))
result))

Expand Down
123 changes: 123 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/trap_sdiv.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
test compile precise-output
target aarch64

function %div8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = sdiv v0, v1
return v2
}

; VCode:
; block0:
; sxtb x3, w0
; sxtb x5, w1
; cbz x5, #trap=int_divz
; lsl w8, w3, #24
; adds wzr, w5, #1
; ccmp w8, #1, #nzcv, eq
; b.vs #trap=int_ovf
; sdiv x0, x3, x5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxtb x3, w0
; sxtb x5, w1
; cbz x5, #0x24
; lsl w8, w3, #0x18
; cmn w5, #1
; ccmp w8, #1, #0, eq
; b.vs #0x28
; sdiv x0, x3, x5
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf

function %div16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = sdiv v0, v1
return v2
}

; VCode:
; block0:
; sxth x3, w0
; sxth x5, w1
; cbz x5, #trap=int_divz
; lsl w8, w3, #16
; adds wzr, w5, #1
; ccmp w8, #1, #nzcv, eq
; b.vs #trap=int_ovf
; sdiv x0, x3, x5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxth x3, w0
; sxth x5, w1
; cbz x5, #0x24
; lsl w8, w3, #0x10
; cmn w5, #1
; ccmp w8, #1, #0, eq
; b.vs #0x28
; sdiv x0, x3, x5
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf

function %div32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = sdiv v0, v1
return v2
}

; VCode:
; block0:
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #trap=int_divz
; adds wzr, w5, #1
; ccmp w3, #1, #nzcv, eq
; b.vs #trap=int_ovf
; sdiv x0, x3, x5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sxtw x3, w0
; sxtw x5, w1
; cbz x5, #0x20
; cmn w5, #1
; ccmp w3, #1, #0, eq
; b.vs #0x24
; sdiv x0, x3, x5
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf

function %div64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = sdiv v0, v1
return v2
}

; VCode:
; block0:
; cbz x1, #trap=int_divz
; adds xzr, x1, #1
; ccmp x0, #1, #nzcv, eq
; b.vs #trap=int_ovf
; sdiv x0, x0, x1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; cbz x1, #0x18
; cmn x1, #1
; ccmp x0, #1, #0, eq
; b.vs #0x1c
; sdiv x0, x0, x1
; ret
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_divz
; .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf

0 comments on commit 025fea0

Please sign in to comment.