From 7d78789deb16ac5d6b077fcf614733d3987b54ae Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 22 Jan 2025 14:53:11 -0600 Subject: [PATCH] pulley: Slightly optimize bounds checks (#10080) * pulley: Slightly optimize bounds checks In profiling a module I was noticing that the previous `xbc32_bound_trap` instruction wasn't being used when I expected. Investigation revealed that the load of the bound itself was GVN'd and deduplicated (yay!) but it meant that the load was used in two locations meaning it didn't pass checks for `sinkable_load`. This commit fixes this by repurposing `xbc32_bound_trap` for "the bound is in a register" and renaming the previous instruction to `xbc32_boundne_trap`. This helps cut down on the number of opcodes in this benchmark and improves performance slightly. At the same time this tightens up "sinkable loads" to require native endianness since that's what the bound of memory is stored as. Additionally in addition to testing for `a < b` and optimizing that this also now optimizes `b > a`, the same condition just having the arguments swapped. * Fix some copy/paste typos --- .../codegen/src/isa/pulley_shared/inst.isle | 14 ++- .../codegen/src/isa/pulley_shared/lower.isle | 40 ++++-- .../src/isa/pulley_shared/lower/isle.rs | 4 + .../filetests/filetests/isa/pulley32/xbc.clif | 104 ++++++++++++++++ .../filetests/filetests/isa/pulley64/xbc.clif | 114 ++++++++++++++++++ pulley/src/interp.rs | 14 ++- pulley/src/lib.rs | 11 +- tests/disas/pulley/pulley32_memory32.wat | 28 ++--- tests/disas/pulley/pulley64_memory32.wat | 43 ++++--- 9 files changed, 329 insertions(+), 43 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/pulley32/xbc.clif create mode 100644 cranelift/filetests/filetests/isa/pulley64/xbc.clif diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 19dbc961396e..8aacf382e228 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -178,14 +178,26 @@ (extern constructor endianness endianness) (type Endianness extern (enum Little Big)) +(decl pure is_native_endianness (Endianness) bool) +(extern constructor is_native_endianness is_native_endianness) + ;; Partial constructor and type representing a "sinkable load" which can be ;; moved into another instruction. Note that `SinkableLoad` should not be used ;; as-is and should instead be converted to a `SunkLoad`. +;; +;; To be a sinkable load the load must pass: +;; +;; * The `is_sinkable_inst` shared amongst backends test must be `true` +;; * The load must be in "native endianness" +;; * The static offset must fit in an unsigned 8-bit integer. +;; +;; If the last two requirements here are too restrictive then multiple helpers +;; might be needed in the future. (type SinkableLoad (enum (Load (inst Inst) (ty Type) (addr Value) (offset u8)))) (decl pure partial sinkable_load (Value) SinkableLoad) (rule (sinkable_load value @ (value_type ty)) (if-let inst @ (load flags addr (offset32 offset)) (is_sinkable_inst value)) - (if-let (Endianness.Little) (endianness flags)) + (if-let true (is_native_endianness (endianness flags))) (if-let offset8 (u8_try_from_i32 offset)) (SinkableLoad.Load inst ty addr offset8)) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 7f0d88e17352..b08626cbb4f4 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -123,23 +123,41 @@ ;; Each of these translates to a single "xbc" (x-register bounds check) ;; instruction. ;; -;; Note that there are two cases here, one for 32-bit hosts and one for 64-bit -;; hosts. They lower to the same `xbc32_bound_trap` instruction which has -;; different semantics on 32/64-bit but uses the 32-bit address as an argument -;; on both platforms. +;; Currently there's a 2x2 matrix here. One axis is 32-bit hosts and 64-bit +;; hosts while the other axis is `a < b` vs `a > b`. These all bottom out +;; in the `emit_xbc32` helper below. (rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) a @ (value_type $I32) (isub b (u8_from_iconst size))) code)) (if-let (PointerWidth.PointerWidth32) (pointer_width)) - (if-let load (sinkable_load b)) - (side_effect (emit_xbc32 a load size code))) + (side_effect (emit_xbc32 a b size code))) + +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedLessThan) (isub b (u8_from_iconst size)) a @ (value_type $I32)) code)) + (if-let (PointerWidth.PointerWidth32) (pointer_width)) + (side_effect (emit_xbc32 a b size code))) (rule 1 (lower (trapnz (icmp (IntCC.UnsignedGreaterThan) (uextend a @ (value_type $I32)) (isub b (u8_from_iconst size))) code)) (if-let (PointerWidth.PointerWidth64) (pointer_width)) - (if-let load (sinkable_load b)) - (side_effect (emit_xbc32 a load size code))) + (side_effect (emit_xbc32 a b size code))) -(decl emit_xbc32 (Value SunkLoad u8 TrapCode) SideEffectNoResult) -(rule (emit_xbc32 a (SunkLoad.Load _ bound_addr bound_off) size code) - (pulley_xbc32_bound_trap a bound_addr bound_off size code)) +(rule 1 (lower (trapnz (icmp (IntCC.UnsignedLessThan) (isub b (u8_from_iconst size)) (uextend a @ (value_type $I32))) code)) + (if-let (PointerWidth.PointerWidth64) (pointer_width)) + (side_effect (emit_xbc32 a b size code))) + +;; Helper to emit a bounds check which traps if the first value is greater than +;; the second value minus the immediate size provided here. +;; +;; This helper will see if the second value is a sinkable load in which case +;; it can fold the load directly into the "xbc" instruction. Otherwise a +;; simpler "xbc" instruction is used. +(decl emit_xbc32 (Value Value u8 TrapCode) SideEffectNoResult) +(rule 0 (emit_xbc32 a bound size code) + (pulley_xbc32_bound_trap a bound size code)) +(rule 1 (emit_xbc32 a bound size code) + (if-let load (sinkable_load bound)) + (emit_xbc32_sunk a load size code)) + +(decl emit_xbc32_sunk (Value SunkLoad u8 TrapCode) SideEffectNoResult) +(rule (emit_xbc32_sunk a (SunkLoad.Load _ bound_addr bound_off) size code) + (pulley_xbc32_boundne_trap a bound_addr bound_off size code)) ;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs index 8e2e62f052e7..a92c63aa4213 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -133,6 +133,10 @@ where flags.endianness(self.backend.isa_flags.endianness()) } + fn is_native_endianness(&mut self, endianness: &Endianness) -> bool { + *endianness == self.backend.isa_flags.endianness() + } + fn pointer_width(&mut self) -> PointerWidth { P::pointer_width() } diff --git a/cranelift/filetests/filetests/isa/pulley32/xbc.clif b/cranelift/filetests/filetests/isa/pulley32/xbc.clif new file mode 100644 index 000000000000..7f9534736b59 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/xbc.clif @@ -0,0 +1,104 @@ +test compile precise-output +target pulley32 + +function %simple(i32, i32) { +block0(v0: i32, v1: i32): + v2 = load.i32 v0+16 + v3 = iconst.i32 24 + v4 = isub v2, v3 + v5 = icmp ugt v1, v4 + trapnz v5, user1 + return +} + +; VCode: +; block0: +; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xbc32_boundne_trap x1, x0, 16, 24 +; ret + +function %swapped_args(i32, i32) { +block0(v0: i32, v1: i32): + v2 = load.i32 v0+16 + v3 = iconst.i32 24 + v4 = isub v2, v3 + v5 = icmp ult v4, v1 + trapnz v5, user1 + return +} + +; VCode: +; block0: +; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xbc32_boundne_trap x1, x0, 16, 24 +; ret + +function %twice(i32, i32, i32) { +block0(v0: i32, v1: i32, v2: i32): + ;; load the bound & calculate what to check against + v3 = load.i32 v0+16 + v4 = iconst.i32 24 + v5 = isub v3, v4 + + ;; check v1 + v6 = icmp ugt v1, v5 + trapnz v6, user1 + + ;; check v2 + v7 = icmp ugt v2, v5 + trapnz v7, user1 + + return +} + +; VCode: +; block0: +; x4 = xload32 x0+16 // flags = +; xbc32_bound_trap x1, x4, 24 // trap=TrapCode(1) +; xbc32_bound_trap x2, x4, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xload32le_offset8 x4, x0, 16 +; xbc32_bound_trap x1, x4, 24 +; xbc32_bound_trap x2, x4, 24 +; ret + + +function %twice_swapped(i32, i32, i32) { +block0(v0: i32, v1: i32, v2: i32): + ;; load the bound & calculate what to check against + v3 = load.i32 v0+16 + v4 = iconst.i32 24 + v5 = isub v3, v4 + + ;; check v1 + v6 = icmp ult v5, v1 + trapnz v6, user1 + + ;; check v2 + v7 = icmp ult v5, v1 + trapnz v7, user1 + + return +} + +; VCode: +; block0: +; x3 = xload32 x0+16 // flags = +; xbc32_bound_trap x1, x3, 24 // trap=TrapCode(1) +; xbc32_bound_trap x1, x3, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xload32le_offset8 x3, x0, 16 +; xbc32_bound_trap x1, x3, 24 +; xbc32_bound_trap x1, x3, 24 +; ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/xbc.clif b/cranelift/filetests/filetests/isa/pulley64/xbc.clif new file mode 100644 index 000000000000..2177b752733c --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/xbc.clif @@ -0,0 +1,114 @@ +test compile precise-output +target pulley64 + +function %simple(i64, i32) { +block0(v0: i64, v1: i32): + v2 = load.i64 v0+16 + v3 = uextend.i64 v1 + v4 = iconst.i64 24 + v5 = isub v2, v4 + v6 = icmp ugt v3, v5 + trapnz v6, user1 + return +} + +; VCode: +; block0: +; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xbc32_boundne_trap x1, x0, 16, 24 +; ret + +function %swapped_args(i64, i32) { +block0(v0: i64, v1: i32): + v2 = load.i64 v0+16 + v3 = uextend.i64 v1 + v4 = iconst.i64 24 + v5 = isub v2, v4 + v6 = icmp ult v5, v3 + trapnz v6, user1 + return +} + +; VCode: +; block0: +; xbc32_boundne_trap x1, x0, 16, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xbc32_boundne_trap x1, x0, 16, 24 +; ret + +function %twice(i64, i32, i32) { +block0(v0: i64, v1: i32, v2: i32): + ;; load the bound & calculate what to check against + v3 = load.i64 v0+16 + v4 = iconst.i64 24 + v5 = isub v3, v4 + + ;; check v1 + v6 = uextend.i64 v1 + v7 = icmp ugt v6, v5 + trapnz v7, user1 + + ;; check v2 + v8 = uextend.i64 v2 + v9 = icmp ugt v8, v5 + trapnz v9, user1 + + return +} + +; VCode: +; block0: +; x4 = xload64 x0+16 // flags = +; xbc32_bound_trap x1, x4, 24 // trap=TrapCode(1) +; xbc32_bound_trap x2, x4, 24 // trap=TrapCode(1) +; ret +; +; Disassembled: +; xload64le_offset8 x4, x0, 16 +; xbc32_bound_trap x1, x4, 24 +; xbc32_bound_trap x2, x4, 24 +; ret + +function %twice_swapped(i64, i32, i32) { +block0(v0: i64, v1: i32, v2: i32): + ;; load the bound & calculate what to check against + v3 = load.i64 v0+16 + v4 = iconst.i64 24 + v5 = isub v3, v4 + + ;; check v1 + v6 = uextend.i64 v1 + v7 = icmp ult v5, v6 + trapnz v7, user1 + + ;; check v2 + v8 = uextend.i64 v2 + v9 = icmp ugt v5, v8 + trapnz v9, user1 + + return +} + +; VCode: +; block0: +; x7 = xload64 x0+16 // flags = +; xsub64_u8 x6, x7, 24 +; xbc32_bound_trap x1, x7, 24 // trap=TrapCode(1) +; zext32 x7, x2 +; trap_if_xult64 x7, x6 // code = TrapCode(1) +; ret +; +; Disassembled: +; xload64le_offset8 x7, x0, 16 +; xsub64_u8 x6, x7, 24 +; xbc32_bound_trap x1, x7, 24 +; zext32 x7, x2 +; br_if_xult64 x7, x6, 0x8 // target = 0x17 +; ret +; trap + diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 7c21593c3d44..5a12ee67ff1b 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -2475,7 +2475,17 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } - fn xbc32_bound_trap( + fn xbc32_bound_trap(&mut self, addr: XReg, bound: XReg, size: u8) -> ControlFlow { + let bound = self.state[bound].get_u64() as usize; + let addr = self.state[addr].get_u32() as usize; + if addr > bound.wrapping_sub(usize::from(size)) { + self.done_trap::() + } else { + ControlFlow::Continue(()) + } + } + + fn xbc32_boundne_trap( &mut self, addr: XReg, bound_ptr: XReg, @@ -2485,7 +2495,7 @@ impl OpVisitor for Interpreter<'_> { let bound = unsafe { self.load::(bound_ptr, bound_off.into()) }; let addr = self.state[addr].get_u32() as usize; if addr > bound.wrapping_sub(usize::from(size)) { - self.done_trap::() + self.done_trap::() } else { ControlFlow::Continue(()) } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index ed95bd5162a6..a765947cc605 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -617,8 +617,17 @@ macro_rules! for_each_op { /// `dst = low32(cond) ? if_nonzero : if_zero` xselect64 = XSelect64 { dst: XReg, cond: XReg, if_nonzero: XReg, if_zero: XReg }; - /// `trapif(addr > *(bound_ptr + bound_off) - size)` (unsigned) + /// `trapif(addr > bound_ptr - size)` (unsigned) xbc32_bound_trap = XBc32BoundTrap { + addr: XReg, + bound: XReg, + size: u8 + }; + /// `trapif(addr > *(bound_ptr + bound_off) - size)` (unsigned) + /// + /// Note that the `bound_ptr + bound_off` load loads a + /// host-native-endian pointer-sized value. + xbc32_boundne_trap = XBc32BoundNeTrap { addr: XReg, bound_ptr: XReg, bound_off: u8, diff --git a/tests/disas/pulley/pulley32_memory32.wat b/tests/disas/pulley/pulley32_memory32.wat index 9369080fa86e..bf6ebdc5e80e 100644 --- a/tests/disas/pulley/pulley32_memory32.wat +++ b/tests/disas/pulley/pulley32_memory32.wat @@ -64,7 +64,7 @@ ;; ;; wasm[0]::function[1]::load16: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 2 +;; xbc32_boundne_trap x2, x0, 52, 2 ;; xload32le_offset8 x5, x0, 48 ;; xload16le_u32_g32 x0, x2, x5, 0 ;; pop_frame @@ -72,7 +72,7 @@ ;; ;; wasm[0]::function[2]::load32: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 4 +;; xbc32_boundne_trap x2, x0, 52, 4 ;; xload32le_offset8 x5, x0, 48 ;; xload32le_g32 x0, x2, x5, 0 ;; pop_frame @@ -80,7 +80,7 @@ ;; ;; wasm[0]::function[3]::load64: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 8 +;; xbc32_boundne_trap x2, x0, 52, 8 ;; xload32le_offset8 x5, x0, 48 ;; xload64le_g32 x0, x2, x5, 0 ;; pop_frame @@ -98,7 +98,7 @@ ;; ;; wasm[0]::function[5]::store16: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 2 +;; xbc32_boundne_trap x2, x0, 52, 2 ;; xload32le_offset8 x5, x0, 48 ;; xstore16le_g32 x2, x5, 0, x3 ;; pop_frame @@ -106,7 +106,7 @@ ;; ;; wasm[0]::function[6]::store32: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 4 +;; xbc32_boundne_trap x2, x0, 52, 4 ;; xload32le_offset8 x5, x0, 48 ;; xstore32le_g32 x2, x5, 0, x3 ;; pop_frame @@ -114,7 +114,7 @@ ;; ;; wasm[0]::function[7]::store64: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 8 +;; xbc32_boundne_trap x2, x0, 52, 8 ;; xload32le_offset8 x5, x0, 48 ;; xstore64le_g32 x2, x5, 0, x3 ;; pop_frame @@ -122,7 +122,7 @@ ;; ;; wasm[0]::function[8]::load8_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 33 +;; xbc32_boundne_trap x2, x0, 52, 33 ;; xload32le_offset8 x5, x0, 48 ;; xload8_u32_g32 x0, x2, x5, 32 ;; pop_frame @@ -130,7 +130,7 @@ ;; ;; wasm[0]::function[9]::load16_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 34 +;; xbc32_boundne_trap x2, x0, 52, 34 ;; xload32le_offset8 x5, x0, 48 ;; xload16le_u32_g32 x0, x2, x5, 32 ;; pop_frame @@ -138,7 +138,7 @@ ;; ;; wasm[0]::function[10]::load32_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 36 +;; xbc32_boundne_trap x2, x0, 52, 36 ;; xload32le_offset8 x5, x0, 48 ;; xload32le_g32 x0, x2, x5, 32 ;; pop_frame @@ -146,7 +146,7 @@ ;; ;; wasm[0]::function[11]::load64_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 40 +;; xbc32_boundne_trap x2, x0, 52, 40 ;; xload32le_offset8 x5, x0, 48 ;; xload64le_g32 x0, x2, x5, 32 ;; pop_frame @@ -154,7 +154,7 @@ ;; ;; wasm[0]::function[12]::store8_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 9 +;; xbc32_boundne_trap x2, x0, 52, 9 ;; xload32le_offset8 x5, x0, 48 ;; xstore8_g32 x2, x5, 8, x3 ;; pop_frame @@ -162,7 +162,7 @@ ;; ;; wasm[0]::function[13]::store16_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 10 +;; xbc32_boundne_trap x2, x0, 52, 10 ;; xload32le_offset8 x5, x0, 48 ;; xstore16le_g32 x2, x5, 8, x3 ;; pop_frame @@ -170,7 +170,7 @@ ;; ;; wasm[0]::function[14]::store32_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 12 +;; xbc32_boundne_trap x2, x0, 52, 12 ;; xload32le_offset8 x5, x0, 48 ;; xstore32le_g32 x2, x5, 8, x3 ;; pop_frame @@ -178,7 +178,7 @@ ;; ;; wasm[0]::function[15]::store64_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 52, 16 +;; xbc32_boundne_trap x2, x0, 52, 16 ;; xload32le_offset8 x5, x0, 48 ;; xstore64le_g32 x2, x5, 8, x3 ;; pop_frame diff --git a/tests/disas/pulley/pulley64_memory32.wat b/tests/disas/pulley/pulley64_memory32.wat index eeb016c317c0..5c1e2f1a2248 100644 --- a/tests/disas/pulley/pulley64_memory32.wat +++ b/tests/disas/pulley/pulley64_memory32.wat @@ -51,6 +51,10 @@ (func $store64_offset (param i32 i64) (i64.store offset=8 (local.get 0) (local.get 1))) + + (func $load16_two (param i32 i32) (result i32 i32) + (i32.load16_u (local.get 0)) + (i32.load16_u (local.get 1))) ) ;; wasm[0]::function[0]::load8: ;; push_frame @@ -65,7 +69,7 @@ ;; ;; wasm[0]::function[1]::load16: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 2 +;; xbc32_boundne_trap x2, x0, 104, 2 ;; xload64le_offset8 x5, x0, 96 ;; xload16le_u32_g32 x0, x5, x2, 0 ;; pop_frame @@ -73,7 +77,7 @@ ;; ;; wasm[0]::function[2]::load32: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 4 +;; xbc32_boundne_trap x2, x0, 104, 4 ;; xload64le_offset8 x5, x0, 96 ;; xload32le_g32 x0, x5, x2, 0 ;; pop_frame @@ -81,7 +85,7 @@ ;; ;; wasm[0]::function[3]::load64: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 8 +;; xbc32_boundne_trap x2, x0, 104, 8 ;; xload64le_offset8 x5, x0, 96 ;; xload64le_g32 x0, x5, x2, 0 ;; pop_frame @@ -100,7 +104,7 @@ ;; ;; wasm[0]::function[5]::store16: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 2 +;; xbc32_boundne_trap x2, x0, 104, 2 ;; xload64le_offset8 x5, x0, 96 ;; xstore16le_g32 x5, x2, 0, x3 ;; pop_frame @@ -108,7 +112,7 @@ ;; ;; wasm[0]::function[6]::store32: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 4 +;; xbc32_boundne_trap x2, x0, 104, 4 ;; xload64le_offset8 x5, x0, 96 ;; xstore32le_g32 x5, x2, 0, x3 ;; pop_frame @@ -116,7 +120,7 @@ ;; ;; wasm[0]::function[7]::store64: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 8 +;; xbc32_boundne_trap x2, x0, 104, 8 ;; xload64le_offset8 x5, x0, 96 ;; xstore64le_g32 x5, x2, 0, x3 ;; pop_frame @@ -124,7 +128,7 @@ ;; ;; wasm[0]::function[8]::load8_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 33 +;; xbc32_boundne_trap x2, x0, 104, 33 ;; xload64le_offset8 x5, x0, 96 ;; xload8_u32_g32 x0, x5, x2, 32 ;; pop_frame @@ -132,7 +136,7 @@ ;; ;; wasm[0]::function[9]::load16_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 34 +;; xbc32_boundne_trap x2, x0, 104, 34 ;; xload64le_offset8 x5, x0, 96 ;; xload16le_u32_g32 x0, x5, x2, 32 ;; pop_frame @@ -140,7 +144,7 @@ ;; ;; wasm[0]::function[10]::load32_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 36 +;; xbc32_boundne_trap x2, x0, 104, 36 ;; xload64le_offset8 x5, x0, 96 ;; xload32le_g32 x0, x5, x2, 32 ;; pop_frame @@ -148,7 +152,7 @@ ;; ;; wasm[0]::function[11]::load64_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 40 +;; xbc32_boundne_trap x2, x0, 104, 40 ;; xload64le_offset8 x5, x0, 96 ;; xload64le_g32 x0, x5, x2, 32 ;; pop_frame @@ -156,7 +160,7 @@ ;; ;; wasm[0]::function[12]::store8_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 9 +;; xbc32_boundne_trap x2, x0, 104, 9 ;; xload64le_offset8 x5, x0, 96 ;; xstore8_g32 x5, x2, 8, x3 ;; pop_frame @@ -164,7 +168,7 @@ ;; ;; wasm[0]::function[13]::store16_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 10 +;; xbc32_boundne_trap x2, x0, 104, 10 ;; xload64le_offset8 x5, x0, 96 ;; xstore16le_g32 x5, x2, 8, x3 ;; pop_frame @@ -172,7 +176,7 @@ ;; ;; wasm[0]::function[14]::store32_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 12 +;; xbc32_boundne_trap x2, x0, 104, 12 ;; xload64le_offset8 x5, x0, 96 ;; xstore32le_g32 x5, x2, 8, x3 ;; pop_frame @@ -180,8 +184,19 @@ ;; ;; wasm[0]::function[15]::store64_offset: ;; push_frame -;; xbc32_bound_trap x2, x0, 104, 16 +;; xbc32_boundne_trap x2, x0, 104, 16 ;; xload64le_offset8 x5, x0, 96 ;; xstore64le_g32 x5, x2, 8, x3 ;; pop_frame ;; ret +;; +;; wasm[0]::function[16]::load16_two: +;; push_frame +;; xload64le_offset8 x7, x0, 104 +;; xbc32_bound_trap x2, x7, 2 +;; xload64le_offset8 x8, x0, 96 +;; xload16le_u32_g32 x0, x8, x2, 0 +;; xbc32_bound_trap x3, x7, 2 +;; xload16le_u32_g32 x1, x8, x3, 0 +;; pop_frame +;; ret