-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[SLP][REVEC] Fix insertelement legality checks #146921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Gaëtan Bossu (gbossu) ChangesThe current code assumes that all the values in VL are valid instructions, while it is possible to get poison. Full diff: https://github.com/llvm/llvm-project/pull/146921.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0941bf61953f1..921668d0d9c43 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9060,6 +9060,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
// different vectors.
ValueSet SourceVectors;
for (Value *V : VL) {
+ if (isa<PoisonValue>(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement/poison vector.\n");
+ return TreeEntry::NeedToGather;
+ }
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
assert(getElementIndex(V) != std::nullopt &&
"Non-constant or undef index?");
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
new file mode 100644
index 0000000000000..374d9ed6efe98
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-1000 --debug %s | FileCheck %s
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, poison, %1 ]
+define void @test_missing_lanes_1_2(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <8 x i32> poison, ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> poison, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> %1, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; The 4 stores can be re-vectorised, make sure the poison sources
+; are safely handled when trying to vectorise [ %0, poison, %1, poison ]
+define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_missing_lanes_1_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4)
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12
+; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> poison, ptr %getelementptr1, align 4
+ %getelementptr2 = getelementptr i32, ptr %ptr, i64 8
+ store <4 x i32> %1, ptr %getelementptr2, align 4
+ %getelementptr3 = getelementptr i32, ptr %ptr, i64 12
+ store <4 x i32> poison, ptr %getelementptr3, align 4
+
+ ret void
+}
+
+; This could be re-vectorised to use a store <8 x i32> instruction.
+define void @test_valid_value_operands(ptr %ptr, i32 %val0, i32 %val1) {
+; CHECK-LABEL: @test_valid_value_operands(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL0:%.*]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[VAL1:%.*]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GETELEMENTPTR1]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val0, i32 0
+ %1 = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 %val1, i32 0
+
+ %getelementptr0 = getelementptr i32, ptr %ptr, i64 0
+ store <4 x i32> %0, ptr %getelementptr0, align 4
+ %getelementptr1 = getelementptr i32, ptr %ptr, i64 4
+ store <4 x i32> %1, ptr %getelementptr1, align 4
+ ret void
+}
|
|
The current code assumes that all the values in VL are valid instructions, while it is possible to get poison.
61f35e0
to
b2bbc9d
Compare
Hi @HanKuanChen , thanks for the review! Should I try and apply your suggestion? So far I just created a |
No. As you said, SLP assumes that VL consists of InsertElement and comes from the same source. In addition, |
The current code assumes that all the values in VL are valid instructions, while it is possible to get poison.