Skip to content

Commit

Permalink
make kernels depend each other
Browse files Browse the repository at this point in the history
  • Loading branch information
mehmetyusufoglu committed Nov 7, 2024
1 parent 8fefd70 commit 3777a36
Showing 1 changed file with 23 additions and 19 deletions.
42 changes: 23 additions & 19 deletions benchmarks/babelstream/src/babelStreamMainTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* Can be run with custom arguments as well as catch2 arguments
* Run with Custom arguments:
* ./babelstream --array-size=33554432 --number-runs=100
* Runt with default array size and num runs:
* Run with default array size and num runs:
* ./babelstream
* Run with Catch2 arguments and defaul arrary size and num runs:
* ./babelstream --success
Expand Down Expand Up @@ -76,12 +76,12 @@ struct CopyKernel
//! \tparam T The data type
//! \param acc The accelerator to be executed on.
//! \param a Pointer for vector a
//! \param b Pointer for vector b
//! \param c Pointer for vector c
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* b) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T* c) const
{
auto const [index] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
b[index] = a[index];
c[index] = a[index];
}
};

Expand All @@ -92,14 +92,14 @@ struct MultKernel
//! \tparam TAcc The accelerator environment to be executed on.
//! \tparam T The data type
//! \param acc The accelerator to be executed on.
//! \param a Pointer for vector a
//! \param c Pointer for vector c
//! \param b Pointer for result vector b
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const a, T* b) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* const c, T* b) const
{
const T scalar = static_cast<T>(scalarVal);
auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
b[i] = scalar * a[i];
b[i] = scalar * c[i];
}
};

Expand Down Expand Up @@ -132,11 +132,11 @@ struct TriadKernel
//! \param b Pointer for vector b
//! \param c Pointer for result vector c
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* c) const
ALPAKA_FN_ACC void operator()(TAcc const& acc, T* a, T const* b, T const* c) const
{
const T scalar = static_cast<T>(scalarVal);
auto const [i] = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
c[i] = a[i] + scalar * b[i];
a[i] = b[i] + scalar * c[i];
}
};

Expand All @@ -151,6 +151,7 @@ struct DotKernel
//! \param a Pointer for vector a
//! \param b Pointer for vector b
//! \param sum Pointer for result vector consisting sums for each block
//! \param arraySize the size of the array
template<typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(TAcc const& acc, T const* a, T const* b, T* sum, alpaka::Idx<TAcc> arraySize) const
{
Expand Down Expand Up @@ -316,23 +317,23 @@ void testKernels()
},
"InitKernel");

// Test the copy-kernel. Copy A one by one to B.
// Test the copy-kernel. Copy A one by one to C.
measureKernelExec(
[&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccInputBPtr); },
[&]() { alpaka::exec<Acc>(queue, workDivCopy, CopyKernel(), bufAccInputAPtr, bufAccOutputCPtr); },
"CopyKernel");

// Test the scaling-kernel. Calculate B=scalar*A.
// Test the scaling-kernel. Calculate B=scalar*C. Where C = A.
measureKernelExec(
[&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccInputAPtr, bufAccInputBPtr); },
[&]() { alpaka::exec<Acc>(queue, workDivMult, MultKernel(), bufAccOutputCPtr, bufAccInputBPtr); },
"MultKernel");

// Test the addition-kernel. Calculate C=A+B. Where B=scalar*A.
// Test the addition-kernel. Calculate C=A+B. Where B=scalar*C or B=scalar*A.
measureKernelExec(
[&]()
{ alpaka::exec<Acc>(queue, workDivAdd, AddKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
"AddKernel");

// Test the Triad-kernel. Calculate C=A+scalar*B where B=scalar*A.
// Test the Triad-kernel. Calculate A=B+scalar*C. Where C is A+scalar*A.
measureKernelExec(
[&]()
{ alpaka::exec<Acc>(queue, workDivTriad, TriadKernel(), bufAccInputAPtr, bufAccInputBPtr, bufAccOutputCPtr); },
Expand All @@ -350,9 +351,9 @@ void testKernels()
DataType initVal{static_cast<DataType>(0.0)};
DataType sumErrC{initVal}, sumErrB{initVal}, sumErrA{initVal};

auto const expectedC = static_cast<DataType>(valA + scalarVal * scalarVal * valA);
auto const expectedC = static_cast<DataType>(valA + scalarVal * valA);
auto const expectedB = static_cast<DataType>(scalarVal * valA);
auto const expectedA = static_cast<DataType>(valA);
auto const expectedA = static_cast<DataType>(expectedB + static_cast<DataType>(scalarVal) * expectedC);

// sum of the errors for each array
for(Idx i = 0; i < arraySize; ++i)
Expand All @@ -363,6 +364,7 @@ void testKernels()
}

// Normalize and compare sum of the errors
// Use a different equality check if floating point errors exceed precision of FuzzyEqual function
REQUIRE(FuzzyEqual(sumErrC / static_cast<DataType>(arraySize) / expectedC, static_cast<DataType>(0.0)));
REQUIRE(FuzzyEqual(sumErrB / static_cast<DataType>(arraySize) / expectedB, static_cast<DataType>(0.0)));
REQUIRE(FuzzyEqual(sumErrA / static_cast<DataType>(arraySize) / expectedA, static_cast<DataType>(0.0)));
Expand Down Expand Up @@ -401,8 +403,10 @@ void testKernels()

DataType const* sumPtr = std::data(bufHostSumPerBlock);
auto const result = std::reduce(sumPtr, sumPtr + gridBlockExtent, DataType{0});
// Since vector values are 1, dot product should be identical to arraySize
REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize * 2)));

// dot product should be identical to arraySize*valA*valB
// Use a different equality check if floating point errors exceed precision of FuzzyEqual function
REQUIRE(FuzzyEqual(static_cast<DataType>(result), static_cast<DataType>(arraySize) * expectedA * expectedB));
// Add workdiv to the list of workdivs to print later
metaData.setItem(BMInfoDataType::WorkDivDot, workDivDot);
}
Expand Down

0 comments on commit 3777a36

Please sign in to comment.