@@ -121,20 +121,20 @@ template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename
121
121
struct RegBoundScaling
122
122
{
123
123
static constexpr int ITEMS_PER_THREAD =
124
- ( ::cuda::std::max) (1 , Nominal4ByteItemsPerThread * 4 / ( ::cuda::std::max) (4 , int {sizeof (T)}));
125
- static constexpr int BLOCK_THREADS = (::cuda::std::min)(
126
- Nominal4ByteBlockThreads,
127
- ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
124
+ ::cuda::std::max (1 , Nominal4ByteItemsPerThread * 4 / ::cuda::std::max(4 , int {sizeof (T)}));
125
+ static constexpr int BLOCK_THREADS =
126
+ ::cuda::std::min ( Nominal4ByteBlockThreads,
127
+ ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
128
128
};
129
129
130
130
template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename T>
131
131
struct MemBoundScaling
132
132
{
133
- static constexpr int ITEMS_PER_THREAD = ( ::cuda::std::max) (
134
- 1 , ( ::cuda::std::min) (Nominal4ByteItemsPerThread * 4 / int {sizeof (T)}, Nominal4ByteItemsPerThread * 2 ));
135
- static constexpr int BLOCK_THREADS = (::cuda::std::min)(
136
- Nominal4ByteBlockThreads,
137
- ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
133
+ static constexpr int ITEMS_PER_THREAD = ::cuda::std::max(
134
+ 1 , ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int {sizeof (T)}, Nominal4ByteItemsPerThread * 2 ));
135
+ static constexpr int BLOCK_THREADS =
136
+ ::cuda::std::min ( Nominal4ByteBlockThreads,
137
+ ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
138
138
};
139
139
140
140
#endif // Do not document
0 commit comments