Skip to content

Commit

Permalink
pulley: Add a multiply-and-add macro instruction (bytecodealliance#10081
Browse files Browse the repository at this point in the history
)

This is present in riscv64 and aarch64 native ISAs and was found in a
benchmark I was looking at so let's add a macro-op as well to help cases
where this crops up in the wild.
  • Loading branch information
alexcrichton authored Jan 22, 2025
1 parent ca95576 commit 2f27a10
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 8 deletions.
23 changes: 15 additions & 8 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -195,27 +195,27 @@
(rule 1 (lower (has_type $I64 (iadd a b))) (pulley_xadd64 a b))

;; Fold constants into the instruction if possible
(rule 2 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u32_from_iconst b))))
(rule 10 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u32_from_iconst b))))
(pulley_xadd32_u32 a b))
(rule 3 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u8_from_iconst b))))
(rule 11 (lower (has_type (ty_int (fits_in_32 _)) (iadd a (u8_from_iconst b))))
(pulley_xadd32_u8 a b))
(rule 4 (lower (has_type $I64 (iadd a (u32_from_iconst b))))
(rule 12 (lower (has_type $I64 (iadd a (u32_from_iconst b))))
(pulley_xadd64_u32 a b))
(rule 5 (lower (has_type $I64 (iadd a (u8_from_iconst b))))
(rule 13 (lower (has_type $I64 (iadd a (u8_from_iconst b))))
(pulley_xadd64_u8 a b))

;; If the rhs is a constant and the negated version can fit within a smaller
;; constant then switch this to a subtraction with the negated constant.
(rule 6 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(rule 14 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xsub32_u32 a c))
(rule 7 (lower (has_type $I64 (iadd a b)))
(rule 15 (lower (has_type $I64 (iadd a b)))
(if-let c (u32_from_negated_iconst b))
(pulley_xsub64_u32 a c))
(rule 8 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(rule 16 (lower (has_type (ty_int (fits_in_32 _)) (iadd a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xsub32_u8 a c))
(rule 9 (lower (has_type $I64 (iadd a b)))
(rule 17 (lower (has_type $I64 (iadd a b)))
(if-let c (u8_from_negated_iconst b))
(pulley_xsub64_u8 a c))

Expand Down Expand Up @@ -258,6 +258,13 @@
(rule 1 (lower (has_type $I16X8 (sadd_sat a b))) (pulley_vaddi16x8_sat a b))
(rule 1 (lower (has_type $I16X8 (uadd_sat a b))) (pulley_vaddu16x8_sat a b))

;; Specialized lowerings for multiply-and-add

(rule 2 (lower (has_type $I32 (iadd (imul a b) c))) (pulley_xmadd32 a b c))
(rule 3 (lower (has_type $I32 (iadd c (imul a b)))) (pulley_xmadd32 a b c))
(rule 2 (lower (has_type $I64 (iadd (imul a b) c))) (pulley_xmadd64 a b c))
(rule 3 (lower (has_type $I64 (iadd c (imul a b)))) (pulley_xmadd64 a b c))

;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I16X8 (iadd_pairwise a b))) (pulley_vaddpairwisei16x8_s a b))
Expand Down
16 changes: 16 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,22 @@ impl OpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn xmadd32(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
let a = self.state[src1].get_u32();
let b = self.state[src2].get_u32();
let c = self.state[src3].get_u32();
self.state[dst].set_u32(a.wrapping_mul(b).wrapping_add(c));
ControlFlow::Continue(())
}

fn xmadd64(&mut self, dst: XReg, src1: XReg, src2: XReg, src3: XReg) -> ControlFlow<Done> {
let a = self.state[src1].get_u64();
let b = self.state[src2].get_u64();
let c = self.state[src3].get_u64();
self.state[dst].set_u64(a.wrapping_mul(b).wrapping_add(c));
ControlFlow::Continue(())
}

fn xsub32(&mut self, operands: BinaryOperands<XReg>) -> ControlFlow<Done> {
let a = self.state[operands.src1].get_u32();
let b = self.state[operands.src2].get_u32();
Expand Down
5 changes: 5 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ macro_rules! for_each_op {
/// Same as `xadd64` but `src2` is a zero-extended 32-bit immediate.
xadd64_u32 = Xadd64U32 { dst: XReg, src1: XReg, src2: u32 };

/// `low32(dst) = low32(src1) * low32(src2) + low32(src3)`
xmadd32 = Xmadd32 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };
/// `dst = src1 * src2 + src3`
xmadd64 = Xmadd64 { dst: XReg, src1: XReg, src2: XReg, src3: XReg };

/// 32-bit wrapping subtraction: `low32(dst) = low32(src1) - low32(src2)`.
///
/// The upper 32-bits of `dst` are unmodified.
Expand Down
25 changes: 25 additions & 0 deletions tests/disas/pulley/madd.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
;;! target = "pulley32"
;;! test = "compile"

(module
(func $madd32 (param i32 i32 i32) (result i32)
(i32.add
(i32.mul (local.get 0) (local.get 1))
(local.get 2)))

(func $madd64 (param i64 i64 i64) (result i64)
(i64.add
(i64.mul (local.get 0) (local.get 1))
(local.get 2)))
)
;; wasm[0]::function[0]::madd32:
;; push_frame
;; xmadd32 x0, x2, x3, x4
;; pop_frame
;; ret
;;
;; wasm[0]::function[1]::madd64:
;; push_frame
;; xmadd64 x0, x2, x3, x4
;; pop_frame
;; ret

0 comments on commit 2f27a10

Please sign in to comment.