Skip to content

Commit 8201e09

Browse files
MiloszSkobejkoigcbot
authored andcommitted
WaveShuffleIndexSinking Pass shuffleGroup by basic block
The WaveShuffleIndexSinking Pass doesn't consider program control flow, potentially producing invalid IR if users aren't dominated. It merges instructions globally, risking undefined instructions if blocks are entered out of order, leading to incorrect calculations. Now, shuffleGroups are formed by channel and within the same basic block.
1 parent 176b097 commit 8201e09

File tree

2 files changed

+55
-7
lines changed

2 files changed

+55
-7
lines changed

IGC/Compiler/Optimizer/WaveShuffleIndexSinking.cpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ namespace IGC
367367
static bool isHoistable( BinaryOperator* inst );
368368
static bool isHoistableOverAnchor( BinaryOperator* instToHoist, BinaryOperator* anchorInst );
369369
Function& F;
370-
DenseMap<Value*, SmallVector<ShuffleGroup, 4>> ShuffleGroupMap;
370+
DenseMap<std::pair<BasicBlock*, Value*>, SmallVector<ShuffleGroup, 4>> ShuffleGroupMap;
371371
DenseSet<WaveShuffleIndexIntrinsic*> Visited;
372372
};
373373

@@ -451,7 +451,7 @@ bool WaveShuffleIndexSinkingImpl::splitWaveShuffleIndexes()
451451
bool WaveShuffleIndexSinkingImpl::mergeWaveShuffleIndexes()
452452
{
453453
// Map from Source to (Map from Lane to list of duplicate instructions)
454-
DenseMap<Value*, DenseMap<ConstantInt*, SmallVector<WaveShuffleIndexIntrinsic*>>> mergeMap;
454+
DenseMap<std::pair<BasicBlock*, Value*>, DenseMap<ConstantInt*, SmallVector<WaveShuffleIndexIntrinsic*>>> mergeMap;
455455
for( auto& BB : F )
456456
{
457457
for( auto& I : BB )
@@ -460,7 +460,7 @@ bool WaveShuffleIndexSinkingImpl::mergeWaveShuffleIndexes()
460460
{
461461
if( auto* constantChannel = dyn_cast<ConstantInt>( waveShuffleInst->getChannel() ) )
462462
{
463-
mergeMap[ waveShuffleInst->getSrc() ][ constantChannel ].push_back( waveShuffleInst );
463+
mergeMap[ {&BB, waveShuffleInst->getSrc()} ][ constantChannel ].push_back( waveShuffleInst );
464464
}
465465
}
466466
}
@@ -505,11 +505,13 @@ void WaveShuffleIndexSinkingImpl::gatherShuffleGroups()
505505
// Save compute and do not re-process/ create a new ShuffleGroup
506506
continue;
507507
}
508-
if( ShuffleGroupMap.count( waveShuffleInst->getSrc() ) )
508+
509+
std::pair<BasicBlock*, Value*> bbShuffleGroup = { &BB, waveShuffleInst->getSrc() };
510+
if ( ShuffleGroupMap.count( bbShuffleGroup ) )
509511
{
510512
// Found existing group(s) with the same source, try to match with one of the groups
511513
bool match = false;
512-
for( auto& shuffleGroup : ShuffleGroupMap[ waveShuffleInst->getSrc() ] )
514+
for (auto& shuffleGroup : ShuffleGroupMap[ bbShuffleGroup ] )
513515
{
514516
if( shuffleGroup.match( waveShuffleInst ) )
515517
{
@@ -521,13 +523,13 @@ void WaveShuffleIndexSinkingImpl::gatherShuffleGroups()
521523
// create new ShuffleGroup since no suitable match was found
522524
if( !match )
523525
{
524-
ShuffleGroupMap[ waveShuffleInst->getSrc() ].emplace_back( waveShuffleInst );
526+
ShuffleGroupMap[ {&BB, waveShuffleInst->getSrc()} ].emplace_back( waveShuffleInst );
525527
}
526528
}
527529
else
528530
{
529531
// create new ShuffleGroup for broadcast operations
530-
ShuffleGroupMap[ waveShuffleInst->getSrc() ].emplace_back( waveShuffleInst );
532+
ShuffleGroupMap[ {&BB, waveShuffleInst->getSrc()} ].emplace_back( waveShuffleInst );
531533
}
532534
}
533535
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt --typed-pointers -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; This test checks if WaveShuffleIndex instructions are not merged when they are identical, but
13+
; in separate basic blocks. This behavior helps to avoid potentially invalid LLVM IR being
14+
; produced by not dominating all users.
15+
; ------------------------------------------------
16+
17+
define void @test_compare_cases(i32* %dst0, i32* %dst1, i1 %condition) {
18+
entry:
19+
br i1 %condition, label %bb0, label %bb1
20+
21+
bb0:
22+
; CHECK: [[SHUFFLE0:%.*]] = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
23+
%simdShuffle0 = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
24+
%and0 = and i8 %simdShuffle0, 1
25+
%cmp0 = icmp eq i8 %and0, 0
26+
%zxt0 = zext i1 %cmp0 to i32
27+
store i32 %zxt0, i32* %dst0
28+
br label %exit
29+
30+
bb1:
31+
; CHECK: [[SHUFFLE1:%.*]] = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
32+
%simdShuffle1 = call i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8 1, i32 0, i32 0)
33+
%and1 = and i8 %simdShuffle1, 1
34+
%cmp1 = icmp eq i8 %and1, 0
35+
%zxt1 = zext i1 %cmp1 to i32
36+
store i32 %zxt1, i32* %dst1
37+
br label %exit
38+
39+
exit:
40+
ret void
41+
}
42+
43+
; Function Attrs: convergent nounwind readnone
44+
declare i8 @llvm.genx.GenISA.WaveShuffleIndex.i8(i8, i32, i32) #0
45+
46+
attributes #0 = { convergent nounwind readnone }

0 commit comments

Comments
 (0)