-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[LLVM][DAGCombiner][SVE] Fold vselect into merge_pasthru_op. #146917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LLVM][DAGCombiner][SVE] Fold vselect into merge_pasthru_op. #146917
Conversation
vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C vselect A, (merge_pasthru_op -, B,{Bn,} undef), C vselect A, (merge_pasthru_op A, B,{Bn,} -), C -> merge_pasthru_op A, B,{Bn,} C
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) Changesvselect A, (merge_pasthru_op all_active, B,{Bn,} -), C Patch is 75.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146917.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..0d388fc3c787d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25523,6 +25523,9 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
return SwapResult;
SDValue N0 = N->getOperand(0);
+ SDValue IfTrue = N->getOperand(1);
+ SDValue IfFalse = N->getOperand(2);
+ EVT ResVT = N->getValueType(0);
EVT CCVT = N0.getValueType();
if (isAllActivePredicate(DAG, N0))
@@ -25531,6 +25534,22 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
if (isAllInactivePredicate(N0))
return N->getOperand(2);
+ if (isMergePassthruOpcode(IfTrue.getOpcode()) && IfTrue.hasOneUse()) {
+ // vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
+ // vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
+ // vselect A, (merge_pasthru_op A, B,{Bn,} -), C
+ // -> merge_pasthru_op A, B,{Bn,} C
+ if (isAllActivePredicate(DAG, IfTrue->getOperand(0)) ||
+ IfTrue->getOperand(IfTrue.getNumOperands() - 1).isUndef() ||
+ IfTrue->getOperand(0) == N0) {
+ SmallVector<SDValue, 4> Ops(IfTrue->op_values());
+ Ops[0] = N0;
+ Ops[IfTrue.getNumOperands() - 1] = IfFalse;
+
+ return DAG.getNode(IfTrue.getOpcode(), SDLoc(N), ResVT, Ops);
+ }
+ }
+
// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
@@ -25570,14 +25589,11 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
CmpVT.getVectorElementType().isFloatingPoint())
return SDValue();
- EVT ResVT = N->getValueType(0);
// Only combine when the result type is of the same size as the compared
// operands.
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
return SDValue();
- SDValue IfTrue = N->getOperand(1);
- SDValue IfFalse = N->getOperand(2);
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 91a9d21fa7b2c..261df563bb2a9 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2445,14 +2445,23 @@ let Predicates = [HasSVE_or_SME] in {
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
//These patterns exist to improve the code quality of conversions on unpacked types.
+ def : Pat<(nxv2f32 (AArch64fcvte_mt nxv2i1:$Pg, nxv2f16:$Zs, nxv2f32:$Zd)),
+ (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f16:$Zs, nxv2f32:$Zd)),
(FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
// This is ignored by the pattern below where it is matched by (i64 timm0_1)
+ def : Pat<(nxv2f16 (AArch64fcvtr_mt nxv2i1:$Pg, nxv2f32:$Zs, (i64 timm0_1), nxv2f16:$Zd)),
+ (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive:$Pg)), nxv2f32:$Zs, (i64 timm0_1), nxv2f16:$Zd)),
(FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
+ def : Pat<(nxv4f32 (AArch64fcvte_mt nxv4i1:$Pg, nxv4bf16:$Zs, nxv4f32:$Zd)),
+ (SEL_ZPZZ_S $Pg, (LSL_ZZI_S $Zs, (i32 16)), $Zd)>;
+ def : Pat<(nxv2f32 (AArch64fcvte_mt nxv2i1:$Pg, nxv2bf16:$Zs, nxv2f32:$Zd)),
+ (SEL_ZPZZ_D $Pg, (LSL_ZZI_S $Zs, (i32 16)), $Zd)>;
+
def : Pat<(nxv4f32 (AArch64fcvte_mt (SVEAnyPredicate), nxv4bf16:$op, undef)),
(LSL_ZZI_S $op, (i32 16))>;
def : Pat<(nxv2f32 (AArch64fcvte_mt (SVEAnyPredicate), nxv2bf16:$op, undef)),
diff --git a/llvm/test/CodeGen/AArch64/sve-merging-unary.ll b/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
index 9e331a69bcf7c..eec111d3285fe 100644
--- a/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
+++ b/llvm/test/CodeGen/AArch64/sve-merging-unary.ll
@@ -7,9 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 16 x i8> @abs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: abs_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: abs z1.b, p1/m, z1.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%b.op = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %b, i1 0)
%res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -19,9 +17,7 @@ define <vscale x 16 x i8> @abs_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
define <vscale x 8 x i16> @abs_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: abs_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: abs z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %b, i1 0)
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -31,9 +27,7 @@ define <vscale x 8 x i16> @abs_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
define <vscale x 4 x i32> @abs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: abs_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: abs z1.s, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %b, i1 0)
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -43,9 +37,7 @@ define <vscale x 4 x i32> @abs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
define <vscale x 2 x i64> @abs_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: abs_nxv2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: abs z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %b, i1 0)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -55,9 +47,7 @@ define <vscale x 2 x i64> @abs_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
define <vscale x 16 x i8> @clz_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: clz_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: clz z1.b, p1/m, z1.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: clz z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%b.op = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %b)
%res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -67,9 +57,7 @@ define <vscale x 16 x i8> @clz_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
define <vscale x 8 x i16> @clz_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: clz_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: clz z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: clz z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %b)
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -79,9 +67,7 @@ define <vscale x 8 x i16> @clz_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
define <vscale x 4 x i32> @clz_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: clz_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: clz z1.s, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: clz z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %b)
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -91,9 +77,7 @@ define <vscale x 4 x i32> @clz_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
define <vscale x 2 x i64> @clz_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: clz_nxv2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: clz z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: clz z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %b)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -103,9 +87,7 @@ define <vscale x 2 x i64> @clz_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
define <vscale x 16 x i8> @cnt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cnt_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cnt z1.b, p1/m, z1.b
-; CHECK-NEXT: mov z0.b, p0/m, z1.b
+; CHECK-NEXT: cnt z0.b, p0/m, z1.b
; CHECK-NEXT: ret
%b.op = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %b)
%res = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b.op, <vscale x 16 x i8> %a
@@ -115,9 +97,7 @@ define <vscale x 16 x i8> @cnt_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8
define <vscale x 8 x i16> @cnt_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: cnt_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: cnt z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %b)
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b.op, <vscale x 8 x i16> %a
@@ -127,9 +107,7 @@ define <vscale x 8 x i16> @cnt_nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
define <vscale x 4 x i32> @cnt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cnt_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cnt z1.s, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %b)
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b.op, <vscale x 4 x i32> %a
@@ -139,9 +117,7 @@ define <vscale x 4 x i32> @cnt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
define <vscale x 2 x i64> @cnt_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cnt_nxv2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: cnt z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %b)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b.op, <vscale x 2 x i64> %a
@@ -151,9 +127,7 @@ define <vscale x 2 x i64> @cnt_nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fabs_nxv2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fabs z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %b)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x half> %b.op, <vscale x 2 x half> %a
@@ -163,9 +137,7 @@ define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x ha
define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x half> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fabs_nxv4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fabs z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %b)
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x half> %b.op, <vscale x 4 x half> %a
@@ -175,9 +147,7 @@ define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x ha
define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fabs_nxv8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: fabs z1.h, p1/m, z1.h
-; CHECK-NEXT: mov z0.h, p0/m, z1.h
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
%res = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %b.op, <vscale x 8 x half> %a
@@ -187,9 +157,7 @@ define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x ha
define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fabs_nxv2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fabs z1.s, p1/m, z1.s
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %b)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x float> %b.op, <vscale x 2 x float> %a
@@ -199,9 +167,7 @@ define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x f
define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fabs_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fabs z1.s, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %b)
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %b.op, <vscale x 4 x float> %a
@@ -211,9 +177,7 @@ define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x f
define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fabs_nxv2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fabs z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
; CHECK-NEXT: ret
%b.op = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %b)
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -256,9 +220,7 @@ define <vscale x 8 x bfloat> @fabs_nxv8bf16(<vscale x 8 x i1> %pg, <vscale x 8 x
define <vscale x 2 x float> @fcvt_nxv2f16_to_nxv2f32(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fcvt_nxv2f16_to_nxv2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fcvt z1.s, p1/m, z1.h
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = fpext <vscale x 2 x half> %b to <vscale x 2 x float>
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x float> %b.op, <vscale x 2 x float> %a
@@ -268,9 +230,7 @@ define <vscale x 2 x float> @fcvt_nxv2f16_to_nxv2f32(<vscale x 2 x i1> %pg, <vsc
define <vscale x 2 x double> @fcvt_nxv2f16_to_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: fcvt_nxv2f16_to_nxv2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fcvt z1.d, p1/m, z1.h
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = fpext <vscale x 2 x half> %b to <vscale x 2 x double>
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -280,9 +240,7 @@ define <vscale x 2 x double> @fcvt_nxv2f16_to_nxv2f64(<vscale x 2 x i1> %pg, <vs
define <vscale x 4 x float> @fcvt_nxv4f16_to_nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x half> %b) {
; CHECK-LABEL: fcvt_nxv4f16_to_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcvt z1.s, p1/m, z1.h
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
; CHECK-NEXT: ret
%b.op = fpext <vscale x 4 x half> %b to <vscale x 4 x float>
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %b.op, <vscale x 4 x float> %a
@@ -292,9 +250,7 @@ define <vscale x 4 x float> @fcvt_nxv4f16_to_nxv4f32(<vscale x 4 x i1> %pg, <vsc
define <vscale x 2 x half> @fcvt_nxv2f32_to_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fcvt_nxv2f32_to_nxv2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fcvt z1.h, p1/m, z1.s
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = fptrunc <vscale x 2 x float> %b to <vscale x 2 x half>
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x half> %b.op, <vscale x 2 x half> %a
@@ -304,9 +260,7 @@ define <vscale x 2 x half> @fcvt_nxv2f32_to_nxv2f16(<vscale x 2 x i1> %pg, <vsca
define <vscale x 2 x double> @fcvt_nxv2f32_to_nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fcvt_nxv2f32_to_nxv2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: fcvt z1.d, p1/m, z1.s
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = fpext <vscale x 2 x float> %b to <vscale x 2 x double>
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %b.op, <vscale x 2 x double> %a
@@ -316,9 +270,7 @@ define <vscale x 2 x double> @fcvt_nxv2f32_to_nxv2f64(<vscale x 2 x i1> %pg, <vs
define <vscale x 2 x bfloat> @fcvt_nxv2f32_to_nxv2bf16(<vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %a, <vscale x 2 x float> %b) {
; CHECK-LABEL: fcvt_nxv2f32_to_nxv2bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: bfcvt z1.h, p1/m, z1.s
-; CHECK-NEXT: mov z0.d, p0/m, z1.d
+; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = fptrunc <vscale x 2 x float> %b to <vscale x 2 x bfloat>
%res = select <vscale x 2 x i1> %pg, <vscale x 2 x bfloat> %b.op, <vscale x 2 x bfloat> %a
@@ -328,9 +280,7 @@ define <vscale x 2 x bfloat> @fcvt_nxv2f32_to_nxv2bf16(<vscale x 2 x i1> %pg, <v
define <vscale x 4 x half> @fcvt_nxv4f32_to_nxv4f16(<vscale x 4 x i1> %pg, <vscale x 4 x half> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcvt_nxv4f32_to_nxv4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: fcvt z1.h, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half>
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x half> %b.op, <vscale x 4 x half> %a
@@ -340,9 +290,7 @@ define <vscale x 4 x half> @fcvt_nxv4f32_to_nxv4f16(<vscale x 4 x i1> %pg, <vsca
define <vscale x 4 x bfloat> @fcvt_nxv4f32_to_nxv4bf16(<vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcvt_nxv4f32_to_nxv4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: bfcvt z1.h, p1/m, z1.s
-; CHECK-NEXT: mov z0.s, p0/m, z1.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
; CHECK-NEXT: ret
%b.op = fptrunc <vscale x 4 x float> %b to <vscale x 4 x bfloat>
%res = select <vscale x 4 x i1> %pg, <vscale x 4 x bfloat> %b.op, <vscale x 4 x bfloat> %a
@@ -352,9 +300,7 @@ define <vscale x 4 x bfloat> @fcvt_nxv4f32_to_nxv4bf16(<vscale x 4 x i1> %pg, <v
define <vscale x 2 x half> @fcvt_nxv2f64_to_nxv2f16(<vscale x 2 x i1> %pg, <vscale x 2 x half> %a...
[truncated]
|
// -> merge_pasthru_op A, B,{Bn,} C | ||
if (isAllActivePredicate(DAG, IfTrue->getOperand(0)) || | ||
IfTrue->getOperand(IfTrue.getNumOperands() - 1).isUndef() || | ||
IfTrue->getOperand(0) == N0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there any tests where isAllActivePredicate(DAG, IfTrue->getOperand(0))
is not true? From what I can tell, all of the affected tests in sve-merging-unary.ll were using ptrue
before this change & still pass if I try removing the other conditions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whilst striving to extend the isel test coverage I've forgotten to test the DAG combine itself. I'll add tests to trigger the three cases.
vselect A, (merge_pasthru_op all_active, B,{Bn,} -), C
vselect A, (merge_pasthru_op -, B,{Bn,} undef), C
vselect A, (merge_pasthru_op A, B,{Bn,} -), C
-> merge_pasthru_op A, B,{Bn,} C