Skip to content

Commit

Permalink
5.5 cherry pick (#413)
Browse files Browse the repository at this point in the history
* HIP SDK compatibility (#398)

* HIP SDK compatibility

* Update cmake_path function in rmake.py

* Fix typo

* Updating copyright year

* double to half conversion workaround for block_reduce ReduceMultiplies test case (#401)

* Directly use __builtin_amdgcn_fence (#403)

This was relying on an implementation detail of the
HIP headers which has been removed.

Co-authored-by: Matt Arsenault <[email protected]>

* Fix toolchain-windows.cmake for HIP SDK (#404)

* HIP SDK compatibility

* Update cmake_path function in rmake.py

* Fix typo

* Updating copyright year

* Removing unnecessary cmake compiler flags

* Removing old comments, cleanup

* Take python3 by default

* gfx90a tuning merge-sort and radix-sort (#397)

and special case tuning for merge-sort char,double value_type

* Escape right brace in regex (#407)

* Escape right brace in regex

* Escape parentheses

* Fix package name on Windows (#411)

* Add rocm-cmake path to CMAKE_PREFIX_PATH

* fallback to rocm-cmake master (match other libs)

* Update changelog for 5.5 cherry pick

* Revert "gfx90a tuning merge-sort and radix-sort (#397)"

This reverts commit cc92f03.

---------

Co-authored-by: Matt Arsenault <[email protected]>
Co-authored-by: Matt Arsenault <[email protected]>
Co-authored-by: Vincent van Heertum <[email protected]>
Co-authored-by: Lauren Wrubleski <[email protected]>
  • Loading branch information
5 people authored Mar 6, 2023
1 parent 5a6724d commit 629acf4
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 36 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Full documentation for rocPRIM is available at [https://codedocs.xyz/ROCmSoftwar
- Improved the performance of `block_radix_sort` and `device_radix_sort`.
### Known Issues
- Disabled GPU error messages relating to incorrect warp operation usage with Navi GPUs on Windows, due to GPU printf performance issues on Windows.
### Fixed
- Fixed benchmark build on Windows

## [rocPRIM-2.12.0 for ROCm 5.4.0]
### Changed
Expand Down
3 changes: 1 addition & 2 deletions benchmark/benchmark_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,7 @@ struct bench_naming
static std::string format_name(std::string string)
{
format format = get_format();
std::regex r("([A-z0-9]*):\\s*((?:custom_type<[A-z0-9,]*>)|[A-z:().<>\\s0-9]*)(}*)");

std::regex r("([A-z0-9]*):\\s*((?:custom_type<[A-z0-9,]*>)|[A-z:\\(\\)\\.<>\\s0-9]*)(\\}*)");
// First we perform some checks
bool checks[4] = {false};
for(std::sregex_iterator i = std::sregex_iterator(string.begin(), string.end(), r);
Expand Down
5 changes: 4 additions & 1 deletion cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,17 @@ if(BUILD_BENCHMARK)
endif(BUILD_BENCHMARK)

if(NOT DEPENDENCIES_FORCE_DOWNLOAD)
set(CMAKE_FIND_DEBUG_MODE TRUE)
find_package(ROCM 0.7.3 CONFIG QUIET PATHS /opt/rocm)
set(CMAKE_FIND_DEBUG_MODE FALSE)
endif()
if(NOT ROCM_FOUND)
if(NOT EXISTS "${FETCHCONTENT_BASE_DIR}/rocm-cmake-src")
message(STATUS "ROCm CMake not found. Fetching...")
set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download")
FetchContent_Declare(
rocm-cmake
URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/refs/tags/rocm-5.2.0.tar.gz
URL https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.tar.gz
)
FetchContent_MakeAvailable(rocm-cmake)
endif()
Expand Down
18 changes: 14 additions & 4 deletions rmake.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/python3
""" Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
""" Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
Manage build and installation"""

import re
Expand Down Expand Up @@ -38,7 +38,7 @@ def parse_args():
parser.add_argument( '--cmake-darg', required=False, dest='cmake_dargs', action='append', default=[],
help='List of additional cmake defines for builds (e.g. CMAKE_CXX_COMPILER_LAUNCHER=ccache)')
parser.add_argument('-a', '--architecture', dest='gpu_architecture', required=False, default="gfx906;gfx1030;gfx1100;gfx1101;gfx1102", #:sramecc+:xnack-" ) #gfx1030" ) #gfx906" ) # gfx1030" )
help='Set GPU architectures, e.g. all, gfx000, gfx803, gfx906:xnack-;gfx1030;gfx1100 (optional, default: all)')
help='Set GPU architectures, e.g. all, gfx000, gfx803, gfx906:xnack-;gfx1030;gfx1100 (optional, default: all)')
parser.add_argument('-v', '--verbose', required=False, default=False, action='store_true',
help='Verbose build (default: False)')
return parser.parse_args()
Expand Down Expand Up @@ -75,6 +75,12 @@ def delete_dir(dir_path) :
#print( linux_path )
run_cmd( "rm" , f"-rf {linux_path}")

def cmake_path(os_path):
if OS_info["ID"] == "windows":
return os_path.replace("\\", "/")
else:
return os.path.realpath(os_path)

def config_cmd():
global args
global OS_info
Expand All @@ -87,17 +93,21 @@ def config_cmd():
cmake_platform_opts = []
if (OS_info["ID"] == 'windows'):
# we don't have ROCM on windows but have hip, ROCM can be downloaded if required
rocm_path = os.getenv( 'ROCM_PATH', "C:/hipsdk/rocm-cmake-master") #C:/hip") # rocm/Utils/cmake-rocm4.2.0"
# CMAKE_PREFIX_PATH set to rocm_path and HIP_PATH set BY SDK Installer
raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip"))
rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path
cmake_executable = "cmake.exe"
toolchain = os.path.join( src_path, "toolchain-windows.cmake" )
#set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation
cmake_platform_opts.append( f"-DWIN32=ON -DCPACK_PACKAGING_INSTALL_PREFIX=") #" -DCPACK_PACKAGING_INSTALL_PREFIX={rocm_path}"
cmake_platform_opts.append( f"-DCMAKE_INSTALL_PREFIX=\"C:/hipSDK\"" )
rocm_cmake_path = '"' + cmake_path(os.getenv("ROCM_CMAKE_PATH", "C:/hipSDK")) + '"'
generator = f"-G Ninja"
# "-G \"Visual Studio 16 2019\" -A x64" # -G NMake ") #
cmake_options.append( generator )
else:
rocm_path = os.getenv( 'ROCM_PATH', "/opt/rocm")
rocm_cmake_path = '"' + rocm_path + '"'
if (OS_info["ID"] in ['centos', 'rhel']):
cmake_executable = "cmake3"
else:
Expand Down Expand Up @@ -127,7 +137,7 @@ def config_cmd():
deps_dir = os.path.abspath(os.path.join(build_dir, 'deps')).replace('\\','/')
else:
deps_dir = args.deps_dir
cmake_base_options = f"-DROCM_PATH={rocm_path} -DCMAKE_PREFIX_PATH:PATH={rocm_path}" # -DCMAKE_INSTALL_PREFIX=rocmath-install" #-DCMAKE_INSTALL_LIBDIR=
cmake_base_options = f"-DROCM_PATH={rocm_path} -DCMAKE_PREFIX_PATH:PATH={rocm_path[:-1]};{rocm_cmake_path[1:]}" # -DCMAKE_INSTALL_PREFIX=rocmath-install" #-DCMAKE_INSTALL_LIBDIR=
cmake_options.append( cmake_base_options )

print( cmake_options )
Expand Down
9 changes: 3 additions & 6 deletions rocprim/include/rocprim/intrinsics/thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,10 @@ void syncthreads()
ROCPRIM_DEVICE ROCPRIM_INLINE
void wave_barrier()
{
__atomic_work_item_fence(__CLK_LOCAL_MEM_FENCE,
__memory_order_release,
__memory_scope_sub_group);
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "wavefront");
__builtin_amdgcn_wave_barrier();
__atomic_work_item_fence(__CLK_LOCAL_MEM_FENCE,
__memory_order_acquire,
__memory_scope_sub_group);
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "wavefront");

}

namespace detail
Expand Down
5 changes: 3 additions & 2 deletions test/rocprim/test_block_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ typed_test_def(suite_name_single, name_suffix, ReduceMultiplies)
using T = typename TestFixture::input_type;
using binary_op_type = rocprim::multiplies<T>;
constexpr size_t block_size = TestFixture::block_size;

using cast_type = typename test_utils::select_plus_operator_host<T>::cast_type;

// Given block size not supported
if(block_size > test_utils::get_max_block_size())
{
Expand Down Expand Up @@ -137,7 +138,7 @@ typed_test_def(suite_name_single, name_suffix, ReduceMultiplies)
auto idx = i * block_size + j;
value *= static_cast<double>(output[idx]);
}
expected_reductions[i] = static_cast<T>(value);
expected_reductions[i] = static_cast<cast_type>(value);
}

// Preparing device
Expand Down
31 changes: 10 additions & 21 deletions toolchain-windows.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,36 @@
# Ninja doesn't support platform
#set(CMAKE_GENERATOR_PLATFORM x64)

if (DEFINED ENV{HIP_DIR})
if (DEFINED ENV{HIP_PATH})
file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
elseif (DEFINED ENV{HIP_DIR})
file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
else()
set(HIP_DIR "C:/hip")
set(rocm_bin "C:/hip/bin")
endif()

#set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc.bat")
#set(CMAKE_C_COMPILER "${rocm_bin}/hipcc.bat")
set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe")
set(CMAKE_C_COMPILER "${rocm_bin}/clang.exe")

#set(CMAKE_CXX_LINKER "${rocm_bin}/hipcc.bat" )

# TODO remove, just to speed up slow cmake
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
#
if (NOT python)
set(python "python3") # take default for windows
endif()

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -IC:/hip/include -IC:/hip/lib/clang/12.0.0 -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
# our usage flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS")

# flags for clang direct use
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility")
# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility -Wno-ignored-attributes")

# flags for clang direct use with hip
# -x hip causes linker error
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -x hip -IC:/hip/include/hip -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include/hip -D__HIP_PLATFORM_HCC__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")

if (DEFINED ENV{VCPKG_PATH})
file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH)
else()
set(VCPKG_PATH "C:/github/vcpkg")
endif()
include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake")
# set(GTEST_DIR "C:/rocm/Utils/GTestMSVC")
# set(GTEST_INCLUDE_DIR "${GTEST_DIR}/include")
# set(GTEST_LIBRARY "${GTEST_DIR}/lib/Release/gtest.lib")
# set(GTEST_MAIN_LIBRARY "${GTEST_DIR}/lib/Release/gtest_main.lib")
# set(GTEST_LIBRARIES "${GTEST_DIR}/lib/Release/gtest.lib;${GTEST_DIR}/lib/Release/gtest_main.lib")

0 comments on commit 629acf4

Please sign in to comment.