Skip to content

Commit

Permalink
Enable gfx950 build target (#685)
Browse files Browse the repository at this point in the history
* gfx950 support

* Enable slow fence workaround for gfx950; fix potential correctness issue with fence

* Update to vmcnt 0

* Fix typo

* skip texture_cache on gfx95x (#11)

* skip texture_cache on gfx95x

* included warning for gfx950

* updated warning message

* Add changelog entry

---------

Co-authored-by: thansen-amd <[email protected]>
Co-authored-by: Nguyen, Zee <[email protected]>
  • Loading branch information
3 people authored Feb 13, 2025
1 parent 7e9391c commit 34da354
Show file tree
Hide file tree
Showing 8 changed files with 16 additions and 11 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

Full documentation for rocPRIM is available at [https://rocm.docs.amd.com/projects/rocPRIM/en/latest/](https://rocm.docs.amd.com/projects/rocPRIM/en/latest/).

## rocPRIM 3.5.0 for ROCm 6.5.0

### Added
* gfx950 support

## rocPRIM 3.4.0 for ROCm 6.4.0

### Added
Expand Down
5 changes: 2 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,13 @@ else()
if(BUILD_ADDRESS_SANITIZER)
# ASAN builds require xnack
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+"
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx942:xnack+;gfx950:xnack+"
)
else()
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201"
)
endif()

set(GPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "GPU architectures to compile for" FORCE)
endif()
endif()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
// cache.
#ifndef ROCPRIM_DETAIL_LOOKBACK_SCAN_STATE_WITHOUT_SLOW_FENCES
#if defined(__HIP_DEVICE_COMPILE__) \
&& (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
&& (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__))
#define ROCPRIM_DETAIL_LOOKBACK_SCAN_STATE_WITHOUT_SLOW_FENCES 1
#else
#define ROCPRIM_DETAIL_LOOKBACK_SCAN_STATE_WITHOUT_SLOW_FENCES 0
Expand Down
1 change: 1 addition & 0 deletions rocprim/include/rocprim/intrinsics/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ namespace detail
/// developers that know what they are doing.
ROCPRIM_DEVICE ROCPRIM_INLINE void atomic_fence_acquire_order_only()
{
__builtin_amdgcn_s_waitcnt(/*vmcnt*/ 0 | (/*exp_cnt*/ 0x7 << 4) | (/*lgkmcnt*/ 0xf << 8));
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
}
}
Expand Down
6 changes: 3 additions & 3 deletions rocprim/include/rocprim/iterator/texture_cache_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ class texture_cache_iterator
#else
texture_type words[multiple];

#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx1200__) || defined(__gfx1201__)
#pragma message "Texture cache iterator is not supported on gfx94x or gfx120x as the texture fetch functions in HIP are not available."
ROCPRIM_PRINT_ERROR_ONCE("WARNING: Usage of texture_cache_iterator on gfx94x or gfx120x devices is not supported and will not produce valid results.")
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) || defined(__gfx950__) || defined(__gfx1200__) || defined(__gfx1201__)
#pragma message "Texture cache iterator is not supported on gfx94x, gfx120x or gfx95x as the texture fetch functions in HIP are not available."
ROCPRIM_PRINT_ERROR_ONCE("WARNING: Usage of texture_cache_iterator on gfx94x, gfx120x or gfx95x devices is not supported and will not produce valid results.")
#else
ROCPRIM_UNROLL
for(unsigned int i = 0; i < multiple; i++)
Expand Down
2 changes: 1 addition & 1 deletion rocprim/include/rocprim/thread/thread_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ ROCPRIM_ASM_THREAD_LOAD_GROUP(load_ca, "sc0", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cg, "sc1", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cv, "sc0 sc1", "s_waitcnt", "vmcnt");
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_volatile, "sc0 sc1", "s_waitcnt", "vmcnt");
#elif defined(__gfx942__)
#elif defined(__gfx942__) || defined(__gfx950__)
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_ca, "sc0", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cg, "sc0 nt", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cv, "sc0", "s_waitcnt", "vmcnt");
Expand Down
2 changes: 1 addition & 1 deletion rocprim/include/rocprim/thread/thread_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "sc0 sc1", "s_waitcnt", ""); // TODO: g
ROCPRIM_ASM_THREAD_STORE_GROUP(store_cg, "sc0 sc1", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_STORE_GROUP(store_wt, "sc0 sc1", "s_waitcnt", "vmcnt");
ROCPRIM_ASM_THREAD_STORE_GROUP(store_volatile, "sc0 sc1", "s_waitcnt", "vmcnt");
#elif defined(__gfx942__)
#elif defined(__gfx942__) || defined(__gfx950__)
ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "sc0", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_STORE_GROUP(store_cg, "sc0 nt", "s_waitcnt", "");
ROCPRIM_ASM_THREAD_STORE_GROUP(store_wt, "sc0", "s_waitcnt", "vmcnt");
Expand Down
4 changes: 2 additions & 2 deletions test/rocprim/test_texture_cache_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ TYPED_TEST(RocprimTextureCacheIteratorTests, Transform)
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, device_id));
std::string deviceName = std::string(props.gcnArchName);
if (deviceName.rfind("gfx94", 0) == 0 || deviceName.rfind("gfx120") == 0) {
if (deviceName.rfind("gfx94", 0) == 0 || deviceName.rfind("gfx120") == 0 || deviceName.rfind("gfx95") == 0) {
// This is a gfx94x or gfx120x device, so skip this test
GTEST_SKIP() << "Test not run on gfx94x or gfx120x as texture cache API is not supported";
GTEST_SKIP() << "Test not run on gfx94x, gfx120x or gfx95x as texture cache API is not supported";
}

HIP_CHECK(hipSetDevice(device_id));
Expand Down

0 comments on commit 34da354

Please sign in to comment.