From fea1d8e00ecdda2f0ead3c64b27a66fecd35b2b4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 27 Nov 2024 15:59:46 +0100 Subject: [PATCH 1/2] JIT: Allow strength reducing to GCD of IVs This adds support for strength reduction to create a new primary IV that is the GCD of several IVs found in the loop. When the same index is used to access arrays of different sizes we will often see the IV being multiplied by different values; however, it is usually still profitable to strength reduce to the GCD of the step values and then "recover" the final IV by scaling. Example: ```csharp public static void Foo() { string puzzle = "003020600900305001001806400008102900700000008006708200002609500800203009005010300"; int[] board = new int[81]; for (int i = 0; i < puzzle.Length; i++) { board[i] = puzzle[i] - '0'; } } ``` Codegen diff for loop: ```diff xor ecx, ecx + mov edx, 81 G_M24659_IG03: - mov edx, ecx - movzx r8, word ptr [rbx+2*rdx+0x10] + movzx r8, word ptr [rbx+rcx+0x10] add r8d, -48 - mov dword ptr [rax+4*rdx+0x10], r8d - inc ecx - cmp ecx, 81 - jl SHORT G_M24659_IG03 - ;; size=24 bbWeight=3.96 PerfScore 19.80 + mov dword ptr [rax+2*rcx+0x10], r8d + add rcx, 2 + dec edx + jne SHORT G_M24659_IG03 + ;; size=23 bbWeight=3.96 PerfScore 18.81 ``` A similar diff in ``System.Linq.Enumerable+EnumerableSorter`2[System.__Canon,System.Decimal]:ComputeKeys(System.__Canon[],int)``: ```diff + xor edx, edx G_M57524_IG05: - mov edx, r15d - mov r8, gword ptr [rbx+8*rdx+0x10] + mov r8, gword ptr [rbx+rdx+0x10] vmovups xmm0, xmmword ptr [r8+0x20] vmovups xmmword ptr [rsp+0x28], xmm0 - shl rdx, 4 vmovups xmm0, xmmword ptr [rsp+0x28] - vmovups xmmword ptr [r14+rdx+0x10], xmm0 - inc r15d - cmp r13d, r15d - jg SHORT G_M57524_IG05 - ;; size=45 bbWeight=75.73 PerfScore 1079.10 + vmovups xmmword ptr [r14+2*rdx+0x10], xmm0 + add rdx, 8 + dec r13d + jne SHORT G_M57524_IG05 + ;; size=39 bbWeight=75.73 PerfScore 1022.31 ``` Fix #102068 Fix #105241 --- src/coreclr/jit/inductionvariableopts.cpp | 186 +++++++++++++++++++++- 1 file changed, 183 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 06cae5799a0da..8a3859c47dd29 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -1376,6 +1376,10 @@ class StrengthReductionContext void AdvanceCursors(ArrayStack* cursors, ArrayStack* nextCursors); void ExpandStoredCursors(ArrayStack* cursors, ArrayStack* otherCursors); bool CheckAdvancedCursors(ArrayStack* cursors, ScevAddRec** nextIV); + ScevAddRec* ComputeRephrasableIV(ScevAddRec* iv1, ScevAddRec* iv2); + template + ScevAddRec* ComputeRephrasableIVWithDifferentStep(ScevAddRec* iv1, ScevAddRec* iv2); + GenTree* RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree); bool StaysWithinManagedObject(ArrayStack* cursors, ScevAddRec* addRec); bool TryReplaceUsesWithNewPrimaryIV(ArrayStack* cursors, ScevAddRec* iv); BasicBlock* FindUpdateInsertionPoint(ArrayStack* cursors, Statement** afterStmt); @@ -1509,6 +1513,10 @@ bool StrengthReductionContext::TryStrengthReduce() break; } + JITDUMP(" Next IV is: "); + DBEXEC(VERBOSE, nextIV->Dump(m_comp)); + JITDUMP("\n"); + assert(nextIV != nullptr); if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV)) @@ -1950,6 +1958,30 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack* curso } } +//------------------------------------------------------------------------ +// Gcd: Compute the greatest common divisor of two values. +// +// Parameters: +// a - First value +// b - Second value +// +// Returns: +// Greatest common divisor. +// +template +static T Gcd(T a, T b) +{ + while (a != 0) + { + T newA = b % a; + T newB = a; + a = newA; + b = newB; + } + + return b; +} + //------------------------------------------------------------------------ // CheckAdvancedCursors: Check whether the specified advanced cursors still // represent a valid set of cursors to introduce a new primary IV for. @@ -1975,10 +2007,20 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack* curs { CursorInfo& cursor = cursors->BottomRef(i); - if ((cursor.IV != nullptr) && ((*nextIV == nullptr) || Scev::Equals(cursor.IV, *nextIV))) + if (cursor.IV != nullptr) { - *nextIV = cursor.IV; - continue; + if (*nextIV == nullptr) + { + *nextIV = cursor.IV; + continue; + } + + ScevAddRec* rephrasableAddRec = ComputeRephrasableIV(cursor.IV, *nextIV); + if (rephrasableAddRec != nullptr) + { + *nextIV = rephrasableAddRec; + continue; + } } JITDUMP(" [%d] does not match; will not advance\n", i); @@ -1988,6 +2030,143 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack* curs return *nextIV != nullptr; } +//------------------------------------------------------------------------ +// ComputeRephrasableIVWithDifferentStep: +// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of, when +// their step values do not match. +// +// Parameters: +// iv1 - First IV +// iv2 - Second IV +// +// Returns: +// The IV, or nullptr if no IV could be computed. +// +template +ScevAddRec* StrengthReductionContext::ComputeRephrasableIVWithDifferentStep(ScevAddRec* iv1, ScevAddRec* iv2) +{ + // To rephrase the IVs we will need to scale them up. This requires the + // start value to be 0 since that starting value will be scaled too. + int64_t start; + if (!iv1->Start->GetConstantValue(m_comp, &start) || ((T)start != 0) || + !iv2->Start->GetConstantValue(m_comp, &start) || ((T)start != 0)) + { + return nullptr; + } + + int64_t iv1Step; + int64_t iv2Step; + if (!iv1->Step->GetConstantValue(m_comp, &iv1Step) || !iv2->Step->GetConstantValue(m_comp, &iv2Step)) + { + return nullptr; + } + + T gcd = Gcd((T)iv1Step, (T)iv2Step); + + // Commonly one step value divides the other. + if (gcd == (T)iv1Step) + { + return iv1; + } + if (gcd == (T)iv2Step) + { + return iv2; + } + if ((gcd == 1) || (gcd == -1)) + { + return nullptr; + } + + return m_scevContext.NewAddRec(iv1->Start, m_scevContext.NewConstant(iv1->Type, gcd)); +} + +//------------------------------------------------------------------------ +// ComputeRephrasableIV: +// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of. +// +// Parameters: +// iv1 - First IV +// iv2 - Second IV +// +// Returns: +// The IV, or nullptr if no IV could be computed. +// +ScevAddRec* StrengthReductionContext::ComputeRephrasableIV(ScevAddRec* iv1, ScevAddRec* iv2) +{ + if (!Scev::Equals(iv1->Start, iv2->Start)) + { + return nullptr; + } + + if (Scev::Equals(iv1->Step, iv2->Step)) + { + return iv1; + } + + // Steps are not equal. However, if they have gcd > 1 it is still expected + // to be profitable to rewrite in terms of such a new IV. + if (iv1->Type == TYP_INT) + { + return ComputeRephrasableIVWithDifferentStep(iv1, iv2); + } + + if (iv2->Type == TYP_LONG) + { + return ComputeRephrasableIVWithDifferentStep(iv1, iv2); + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// RephraseIV: +// Given an IV and a source IV with a tree that computes that source IV, +// compute a tree that calculates "iv" based on the source IV. Requires the +// source IV to have been computed via ComputeRephrasableIV. +// +// Parameters: +// iv - IV to rephrase in terms of the source IV +// sourceIV - Source IV +// sourceTree - Tree computing the source IV +// +// Returns: +// A tree computing "iv" via "sourceTree". +// +GenTree* StrengthReductionContext::RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree) +{ + assert(Scev::Equals(iv->Start, sourceIV->Start)); + + if (Scev::Equals(iv->Step, sourceIV->Step)) + { + return sourceTree; + } + + int64_t ivStep = 0; + int64_t sourceIVStep = 0; + if (!iv->Step->GetConstantValue(m_comp, &ivStep) || !sourceIV->Step->GetConstantValue(m_comp, &sourceIVStep)) + { + unreached(); + } + + assert(iv->Type == sourceIV->Type); + + if (iv->Type == TYP_INT) + { + assert((int32_t)ivStep % (int32_t)sourceIVStep == 0); + int32_t scale = (int32_t)ivStep / (int32_t)sourceIVStep; + return m_comp->gtNewOperNode(GT_MUL, TYP_INT, sourceTree, m_comp->gtNewIconNode(scale)); + } + + if (iv->Type == TYP_LONG) + { + assert(ivStep % sourceIVStep == 0); + int64_t scale = ivStep / sourceIVStep; + return m_comp->gtNewOperNode(GT_MUL, TYP_LONG, sourceTree, m_comp->gtNewIconNode(scale, TYP_LONG)); + } + + unreached(); +} + //------------------------------------------------------------------------ // StaysWithinManagedObject: Check whether the specified GC-pointer add-rec can // be guaranteed to be inside the same managed object for the whole loop. @@ -2211,6 +2390,7 @@ bool StrengthReductionContext::TryReplaceUsesWithNewPrimaryIV(ArrayStackBottomRef(i); GenTree* newUse = m_comp->gtNewLclVarNode(newPrimaryIV, iv->Type); + newUse = RephraseIV(cursor.IV, iv, newUse); JITDUMP(" Replacing use [%06u] with [%06u]. Before:\n", Compiler::dspTreeID(cursor.Tree), Compiler::dspTreeID(newUse)); From d407ba2d1cc9e87cfc652812f3b55f0ed5d0580b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 27 Nov 2024 16:58:33 +0100 Subject: [PATCH 2/2] Fix x86 build --- src/coreclr/jit/inductionvariableopts.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 8a3859c47dd29..c714faebdea49 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -938,7 +938,7 @@ bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop, GenTree* initVal; if (initToConstant) { - initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG); + initVal = gtNewLconNode((int64_t)(uint32_t)startConstant); } else { @@ -2161,7 +2161,7 @@ GenTree* StrengthReductionContext::RephraseIV(ScevAddRec* iv, ScevAddRec* source { assert(ivStep % sourceIVStep == 0); int64_t scale = ivStep / sourceIVStep; - return m_comp->gtNewOperNode(GT_MUL, TYP_LONG, sourceTree, m_comp->gtNewIconNode(scale, TYP_LONG)); + return m_comp->gtNewOperNode(GT_MUL, TYP_LONG, sourceTree, m_comp->gtNewLconNode(scale)); } unreached();