Skip to content

Commit b5757cb

Browse files
committed
[AArch64] Remove redundant FMOV for zero-extended i32/i16 loads to f64
Previously, a load from i32 or i16, followed by zero-extension to i64 and a bitcast to f64, would emit separate FMOV instructions. This patch introduces new corresponding TableGen patterns to avoid the unnecessary FMOV. Tests added: - load_u64_from_u32.ll - load_u64_from_u16.ll
1 parent f242360 commit b5757cb

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3913,6 +3913,14 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
39133913
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
39143914
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
39153915

3916+
// load zero-extended i32, bitcast to f64
3917+
def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3918+
(SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3919+
3920+
// load zero-extended i16, bitcast to f64
3921+
def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
3922+
(SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3923+
39163924
// Pre-fetch.
39173925
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
39183926
[(AArch64Prefetch timm:$Rt,
@@ -10986,4 +10994,4 @@ defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
1098610994
include "AArch64InstrAtomics.td"
1098710995
include "AArch64SVEInstrInfo.td"
1098810996
include "AArch64SMEInstrInfo.td"
10989-
include "AArch64InstrGISel.td"
10997+
include "AArch64InstrGISel.td"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
define double @_Z9load_u64_from_u16_testPj(ptr %n){
5+
; CHECK-LABEL: _Z9load_u64_from_u16_testPj:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ldr h0, [x0]
8+
; CHECK-NEXT: ret
9+
entry:
10+
%0 = load i16, ptr %n, align 2
11+
%conv = zext i16 %0 to i64
12+
%1 = bitcast i64 %conv to double
13+
ret double %1
14+
}
15+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
3+
4+
define double @_Z9load_u64_from_u32_testPj(ptr %n) {
5+
; CHECK-LABEL: _Z9load_u64_from_u32_testPj:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: ldr s0, [x0]
8+
; CHECK-NEXT: ret
9+
entry:
10+
%0 = load i32, ptr %n, align 4
11+
%conv = zext i32 %0 to i64
12+
%1 = bitcast i64 %conv to double
13+
ret double %1
14+
}

0 commit comments

Comments
 (0)