Skip to content

[LLVM][DAGCombiner][SVE] Fold vselect into merge_pasthru_op. #146917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

paulwalker-arm
Copy link
Collaborator

vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
vselect A, (merge_pasthru_op A, B,{Bn,} -), C
-> merge_pasthru_op A, B,{Bn,} C

  vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
  vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
  vselect A, (merge_pasthru_op A, B,{Bn,} -), C
    -> merge_pasthru_op A, B,{Bn,} C
@llvmbot
Copy link
Member

llvmbot commented Jul 3, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
vselect A, (merge_pasthru_op A, B,{Bn,} -), C
-> merge_pasthru_op A, B,{Bn,} C


Patch is 75.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146917.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+19-3)
  • (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+9)
  • (modified) llvm/test/CodeGen/AArch64/sve-merging-unary.ll (+141-344)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..0d388fc3c787d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25523,6 +25523,9 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
     return SwapResult;
 
   SDValue N0 = N->getOperand(0);
+  SDValue IfTrue = N->getOperand(1);
+  SDValue IfFalse = N->getOperand(2);
+  EVT ResVT = N->getValueType(0);
   EVT CCVT = N0.getValueType();
 
   if (isAllActivePredicate(DAG, N0))
@@ -25531,6 +25534,22 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
   if (isAllInactivePredicate(N0))
     return N->getOperand(2);
 
+  if (isMergePassthruOpcode(IfTrue.getOpcode()) && IfTrue.hasOneUse()) {
+    // vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
+    // vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
+    // vselect A, (merge_pasthru_op A, B,{Bn,} -), C
+    //   -> merge_pasthru_op A, B,{Bn,} C
+    if (isAllActivePredicate(DAG, IfTrue->getOperand(0)) ||
+        IfTrue->getOperand(IfTrue.getNumOperands() - 1).isUndef() ||
+        IfTrue->getOperand(0) == N0) {
+      SmallVector<SDValue, 4> Ops(IfTrue->op_values());
+      Ops[0] = N0;
+      Ops[IfTrue.getNumOperands() - 1] = IfFalse;
+
+      return DAG.getNode(IfTrue.getOpcode(), SDLoc(N), ResVT, Ops);
+    }
+  }
+
   // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
   // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
   // supported types.
@@ -25570,14 +25589,11 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
       CmpVT.getVectorElementType().isFloatingPoint())
     return SDValue();
 
-  EVT ResVT = N->getValueType(0);
   // Only combine when the result type is of the same size as the compared
   // operands.
   if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
     return SDValue();
 
-  SDValue IfTrue = N->getOperand(1);
-  SDValue IfFalse = N->getOperand(2);
   SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
                        N0.getOperand(0), N0.getOperand(1),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 91a9d21fa7b2c..261df563bb2a9 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2445,14 +2445,23 @@ let Predicates = [HasSVE_or_SME] in {
   defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag,                     AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
 
   //These patterns exist to improve the code quality of conversions on unpacked types.
+  def : Pat<(nxv2f32 (AArch64fcvte_mt nxv2i1:$Pg, nxv2f16:$Zs, nxv2f32:$Zd)),
+            (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
   def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f16:$Zs, nxv2f32:$Zd)),
             (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
 
   // FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
   // This is ignored by the pattern below where it is matched by (i64 timm0_1)
+  def : Pat<(nxv2f16 (AArch64fcvtr_mt nxv2i1:$Pg, nxv2f32:$Zs, (i64 timm0_1), nxv2f16:$Zd)),
+            (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
   def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f32:$Zs, (i64 timm0_1), nxv2f16:$Zd)),
             (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
 
+  def : Pat<(nxv4f32 (AArch64fcvte_mt nxv4i1:$Pg, nxv4bf16:$Zs, nxv4f32:$Zd)),
+            (SEL_ZPZZ_S $Pg, (LSL_ZZI_S $Zs, (i32 16)), $Zd)>;
+  def : Pat<(nxv2f32 (AArch64fcvte_mt nxv2i1:$Pg, nxv2bf16:$Zs, nxv2f32:$Zd)),
+            (SEL_ZPZZ_D $Pg, (LSL_ZZI_S $Zs, (i32 16)), $Zd)>;
+
   def : Pat<(nxv4f32 (AArch64fcvte_mt (SVEAnyPredicate), nxv4bf16:$op, undef)),
             (LSL_ZZI_S $op, (i32 16))>;
   def : Pat<(nxv2f32 (AArch64fcvte_mt (SVEAnyPredicate), nxv2bf16:$op, undef)),
diff --git a/llvm/test/CodeGen/AArch64/sve-merging-unary.ll b/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
index 9e331a69bcf7c..eec111d3285fe 100644
--- a/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
+++ b/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
@@ -7,9 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 16 x i8> @abs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: abs_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    abs z1.b, p1/m, z1.b
-; CHECK-NEXT:    mov z0.b, p0/m, z1.b
+; CHECK-NEXT:    abs z0.b, p0/m, z1.b
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %b, i1 0)
   %res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -19,9 +17,7 @@ define <vscale x 16 x i8> @abs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 define <vscale x 8 x i16> @abs_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: abs_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    abs z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.h, p0/m, z1.h
+; CHECK-NEXT:    abs z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %b, i1 0)
   %res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -31,9 +27,7 @@ define <vscale x 8 x i16> @abs_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 define <vscale x 4 x i32> @abs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: abs_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    abs z1.s, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    abs z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %b, i1 0)
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -43,9 +37,7 @@ define <vscale x 4 x i32> @abs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 define <vscale x 2 x i64> @abs_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: abs_nxv2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    abs z1.d, p1/m, z1.d
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    abs z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %b, i1 0)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -55,9 +47,7 @@ define <vscale x 2 x i64> @abs_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 define <vscale x 16 x i8> @clz_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: clz_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    clz z1.b, p1/m, z1.b
-; CHECK-NEXT:    mov z0.b, p0/m, z1.b
+; CHECK-NEXT:    clz z0.b, p0/m, z1.b
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %b)
   %res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -67,9 +57,7 @@ define <vscale x 16 x i8> @clz_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 define <vscale x 8 x i16> @clz_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: clz_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    clz z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.h, p0/m, z1.h
+; CHECK-NEXT:    clz z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %b)
   %res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -79,9 +67,7 @@ define <vscale x 8 x i16> @clz_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 define <vscale x 4 x i32> @clz_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: clz_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    clz z1.s, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    clz z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %b)
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -91,9 +77,7 @@ define <vscale x 4 x i32> @clz_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 define <vscale x 2 x i64> @clz_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: clz_nxv2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    clz z1.d, p1/m, z1.d
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    clz z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %b)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -103,9 +87,7 @@ define <vscale x 2 x i64> @clz_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 define <vscale x 16 x i8> @cnt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cnt_nxv16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cnt z1.b, p1/m, z1.b
-; CHECK-NEXT:    mov z0.b, p0/m, z1.b
+; CHECK-NEXT:    cnt z0.b, p0/m, z1.b
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %b)
   %res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -115,9 +97,7 @@ define <vscale x 16 x i8> @cnt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
 define <vscale x 8 x i16> @cnt_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: cnt_nxv8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    cnt z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.h, p0/m, z1.h
+; CHECK-NEXT:    cnt z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %b)
   %res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -127,9 +107,7 @@ define <vscale x 8 x i16> @cnt_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 define <vscale x 4 x i32> @cnt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cnt_nxv4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cnt z1.s, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    cnt z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %b)
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -139,9 +117,7 @@ define <vscale x 4 x i32> @cnt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 define <vscale x 2 x i64> @cnt_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: cnt_nxv2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    cnt z1.d, p1/m, z1.d
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    cnt z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %b)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -151,9 +127,7 @@ define <vscale x 2 x i64> @cnt_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: fabs_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fabs z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fabs z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %b)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x half> %b.op, <vscale x 2 x half> %a
@@ -163,9 +137,7 @@ define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x ha
 define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x half> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: fabs_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fabs z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    fabs z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x half> %b.op, <vscale x 4 x half> %a
@@ -175,9 +147,7 @@ define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x ha
 define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: fabs_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    fabs z1.h, p1/m, z1.h
-; CHECK-NEXT:    mov z0.h, p0/m, z1.h
+; CHECK-NEXT:    fabs z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
   %res = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %b.op, <vscale x 8 x half> %a
@@ -187,9 +157,7 @@ define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x ha
 define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: fabs_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fabs z1.s, p1/m, z1.s
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fabs z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %b)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x float> %b.op, <vscale x 2 x float> %a
@@ -199,9 +167,7 @@ define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x f
 define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fabs_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fabs z1.s, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    fabs z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %b)
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %b.op, <vscale x 4 x float> %a
@@ -211,9 +177,7 @@ define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x f
 define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
 ; CHECK-LABEL: fabs_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fabs z1.d, p1/m, z1.d
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fabs z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %b.op = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %b)
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -256,9 +220,7 @@ define <vscale x 8 x bfloat> @fabs_nxv8bf16(<vscale x 8 x i1> %pg, <vscale x 8 x
 define <vscale x 2 x float> @fcvt_nxv2f16_to_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: fcvt_nxv2f16_to_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcvt z1.s, p1/m, z1.h
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fcvt z0.s, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = fpext <vscale x 2 x half> %b to <vscale x 2 x float>
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x float> %b.op, <vscale x 2 x float> %a
@@ -268,9 +230,7 @@ define <vscale x 2 x float> @fcvt_nxv2f16_to_nxv2f32(<vscale x 2 x i1> %pg, <vsc
 define <vscale x 2 x double> @fcvt_nxv2f16_to_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: fcvt_nxv2f16_to_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcvt z1.d, p1/m, z1.h
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fcvt z0.d, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = fpext <vscale x 2 x half> %b to <vscale x 2 x double>
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -280,9 +240,7 @@ define <vscale x 2 x double> @fcvt_nxv2f16_to_nxv2f64(<vscale x 2 x i1> %pg, <vs
 define <vscale x 4 x float> @fcvt_nxv4f16_to_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x half> %b) {
 ; CHECK-LABEL: fcvt_nxv4f16_to_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcvt z1.s, p1/m, z1.h
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    fcvt z0.s, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b.op = fpext <vscale x 4 x half> %b to <vscale x 4 x float>
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %b.op, <vscale x 4 x float> %a
@@ -292,9 +250,7 @@ define <vscale x 4 x float> @fcvt_nxv4f16_to_nxv4f32(<vscale x 4 x i1> %pg, <vsc
 define <vscale x 2 x half> @fcvt_nxv2f32_to_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: fcvt_nxv2f32_to_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcvt z1.h, p1/m, z1.s
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fcvt z0.h, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = fptrunc <vscale x 2 x float> %b to <vscale x 2 x half>
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x half> %b.op, <vscale x 2 x half> %a
@@ -304,9 +260,7 @@ define <vscale x 2 x half> @fcvt_nxv2f32_to_nxv2f16(<vscale x 2 x i1> %pg, <vsca
 define <vscale x 2 x double> @fcvt_nxv2f32_to_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: fcvt_nxv2f32_to_nxv2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    fcvt z1.d, p1/m, z1.s
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    fcvt z0.d, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = fpext <vscale x 2 x float> %b to <vscale x 2 x double>
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -316,9 +270,7 @@ define <vscale x 2 x double> @fcvt_nxv2f32_to_nxv2f64(<vscale x 2 x i1> %pg, <vs
 define <vscale x 2 x bfloat> @fcvt_nxv2f32_to_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %a, <vscale x 2 x float> %b) {
 ; CHECK-LABEL: fcvt_nxv2f32_to_nxv2bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    bfcvt z1.h, p1/m, z1.s
-; CHECK-NEXT:    mov z0.d, p0/m, z1.d
+; CHECK-NEXT:    bfcvt z0.h, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = fptrunc <vscale x 2 x float> %b to <vscale x 2 x bfloat>
   %res = select <vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %b.op, <vscale x 2 x bfloat> %a
@@ -328,9 +280,7 @@ define <vscale x 2 x bfloat> @fcvt_nxv2f32_to_nxv2bf16(<vscale x 2 x i1> %pg, <v
 define <vscale x 4 x half> @fcvt_nxv4f32_to_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x half> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvt_nxv4f32_to_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    fcvt z1.h, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    fcvt z0.h, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half>
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x half> %b.op, <vscale x 4 x half> %a
@@ -340,9 +290,7 @@ define <vscale x 4 x half> @fcvt_nxv4f32_to_nxv4f16(<vscale x 4 x i1> %pg, <vsca
 define <vscale x 4 x bfloat> @fcvt_nxv4f32_to_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: fcvt_nxv4f32_to_nxv4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    bfcvt z1.h, p1/m, z1.s
-; CHECK-NEXT:    mov z0.s, p0/m, z1.s
+; CHECK-NEXT:    bfcvt z0.h, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %b.op = fptrunc <vscale x 4 x float> %b to <vscale x 4 x bfloat>
   %res = select <vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %b.op, <vscale x 4 x bfloat> %a
@@ -352,9 +300,7 @@ define <vscale x 4 x bfloat> @fcvt_nxv4f32_to_nxv4bf16(<vscale x 4 x i1> %pg, <v
 define <vscale x 2 x half> @fcvt_nxv2f64_to_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a...
[truncated]

// -> merge_pasthru_op A, B,{Bn,} C
if (isAllActivePredicate(DAG, IfTrue->getOperand(0)) ||
IfTrue->getOperand(IfTrue.getNumOperands() - 1).isUndef() ||
IfTrue->getOperand(0) == N0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any tests where isAllActivePredicate(DAG, IfTrue->getOperand(0)) is not true? From what I can tell, all of the affected tests in sve-merging-unary.ll were using ptrue before this change & still pass if I try removing the other conditions.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whilst striving to extend the isel test coverage I've forgotten to test the DAG combine itself. I'll add tests to trigger the three cases.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants