Skip to content

Commit

Permalink
Merge branch 'staging/ctk_12.0' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
alliepiper committed Dec 13, 2022
2 parents b583868 + f31d712 commit 1651ba1
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 19 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ See the [changelog](CHANGELOG.md) for details about specific releases.
| CUB Release | Included In |
| ------------------------- | --------------------------------------- |
| 2.0.1 | CUDA Toolkit 12.0 |
| 2.0.0 | TBD |
| 1.17.2 | TBD |
| 1.17.1 | TBD |
Expand Down
2 changes: 1 addition & 1 deletion cmake/CubCudaConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ enable_language(CUDA)
# Architecture options:
#

set(all_archs 35 37 50 52 53 60 61 62 70 72 75 80 86)
set(all_archs 35 37 50 52 53 60 61 62 70 72 75 80 86 90)
set(arch_message "CUB: Explicitly enabled compute architectures:")

# Thrust sets up the architecture flags in CMAKE_CUDA_FLAGS already. Just
Expand Down
9 changes: 9 additions & 0 deletions cub/detail/detect_cuda_runtime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,15 @@ namespace detail

#endif // CUB_RUNTIME_FUNCTION predefined

#ifdef CUB_RDC_ENABLED
// Detect available version of CDP:
#if __CUDACC_VER_MAJOR__ < 12 || defined(CUDA_FORCE_CDP1_IF_SUPPORTED)
#define CUB_DETAIL_CDPv1
#else
#define CUB_DETAIL_CDPv2
#endif
#endif

#endif // Do not document

} // namespace detail
Expand Down
18 changes: 13 additions & 5 deletions cub/detail/device_synchronize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,26 @@ namespace detail
CUB_EXEC_CHECK_DISABLE
CUB_RUNTIME_FUNCTION inline cudaError_t device_synchronize()
{
cudaError_t result = cudaErrorUnknown;
cudaError_t result = cudaErrorNotSupported;

#if defined(__CUDACC__) && \
((__CUDACC_VER_MAJOR__ > 11) || \
((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// Device-side sync is only available under CDPv1:
#if defined(CUB_DETAIL_CDPv1)

#if ((__CUDACC_VER_MAJOR__ > 11) || \
((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// CUDA >= 11.6
#define CUB_TMP_DEVICE_SYNC_IMPL \
result = __cudaDeviceSynchronizeDeprecationAvoidance();
#else // CUDA < 11.6
#else // CUDA < 11.6:
#define CUB_TMP_DEVICE_SYNC_IMPL result = cudaDeviceSynchronize();
#endif

#else // CDPv2 or no CDP:

#define CUB_TMP_DEVICE_SYNC_IMPL /* unavailable */

#endif // CDP version

NV_IF_TARGET(NV_IS_HOST,
(result = cudaDeviceSynchronize();),
(CUB_TMP_DEVICE_SYNC_IMPL));
Expand Down
47 changes: 34 additions & 13 deletions cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ CUB_RUNTIME_FUNCTION inline cudaError_t SmVersion(int &sm_version,
*/
CUB_RUNTIME_FUNCTION inline cudaError_t SyncStream(cudaStream_t stream)
{
cudaError_t result = cudaErrorUnknown;
cudaError_t result = cudaErrorNotSupported;

NV_IF_TARGET(NV_IS_HOST,
(result = CubDebug(cudaStreamSynchronize(stream));),
Expand Down Expand Up @@ -532,21 +532,42 @@ namespace detail
CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream(cudaStream_t stream)
{
#ifndef CUB_DETAIL_DEBUG_ENABLE_SYNC
(void)stream;

return cudaSuccess;
#else
#if 1 // All valid targets currently support device-side synchronization
_CubLog("%s\n", "Synchronizing...");
return SyncStream(stream);
#else
(void)stream;
_CubLog("%s\n",
"WARNING: Skipping CUB `debug_synchronous` synchronization "
"(unsupported target).");
return cudaSuccess;
#endif
#endif

#else // CUB_DETAIL_DEBUG_ENABLE_SYNC:

#define CUB_TMP_SYNC_AVAILABLE \
_CubLog("%s\n", "Synchronizing..."); \
return SyncStream(stream)

#define CUB_TMP_DEVICE_SYNC_UNAVAILABLE \
(void)stream; \
_CubLog("WARNING: Skipping CUB `debug_synchronous` synchronization (%s).\n", \
"device-side sync requires <sm_90, RDC, and CDPv1"); \
return cudaSuccess

#ifdef CUB_DETAIL_CDPv1

// Can sync everywhere but SM_90+
NV_IF_TARGET(NV_PROVIDES_SM_90,
(CUB_TMP_DEVICE_SYNC_UNAVAILABLE;),
(CUB_TMP_SYNC_AVAILABLE;));

#else // CDPv2 or no CDP:

// Can only sync on host
NV_IF_TARGET(NV_IS_HOST,
(CUB_TMP_SYNC_AVAILABLE;),
(CUB_TMP_DEVICE_SYNC_UNAVAILABLE;));

#endif // CDP version

#undef CUB_TMP_DEVICE_SYNC_UNAVAILABLE
#undef CUB_TMP_SYNC_AVAILABLE

#endif // CUB_DETAIL_DEBUG_ENABLE_SYNC
}

/** \brief Gets whether the current device supports unified addressing */
Expand Down

0 comments on commit 1651ba1

Please sign in to comment.