Skip to content

Commit

Permalink
Merge pull request #882 from LLNL/v0.12.0-rc
Browse files Browse the repository at this point in the history
V0.12.0 rc
  • Loading branch information
rhornung67 authored Sep 1, 2020
2 parents 0502b9b + d214be3 commit 32d92e3
Show file tree
Hide file tree
Showing 574 changed files with 46,530 additions and 14,868 deletions.
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
*.a
*.exe
*.gch
build/
build-*/
/*.sublime-*
/build/
/build_*/
/build-*/
/install/
/install_*/
/install-*/
/Debug/
17 changes: 3 additions & 14 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,17 @@ matrix:
env:
- COMPILER=g++
- IMG=gcc8
- CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On -DENABLE_TBB=On"
- CMAKE_EXTRA_FLAGS="-DENABLE_WARNINGS=On -DENABLE_TBB=On -DRAJA_ENABLE_BOUNDS_CHECK=ON"
- compiler: clang9
env:
- COMPILER=clang++-9
- IMG=clang9
- CMAKE_EXTRA_FLAGS="-DCMAKE_CXX_FLAGS=-fmodules -DENABLE_TBB=On"
- compiler: clang5
env:
- COMPILER=clang++
- IMG=clang5
- CMAKE_EXTRA_FLAGS="-DCMAKE_CXX_FLAGS=-fmodules -DENABLE_TBB=On"
- compiler: intel18
env:
- COMPILER=/opt/intel/bin/icpc
- IMG=icc18
- CMAKE_EXTRA_FLAGS="-DENABLE_TBB=On"
- compiler: nvcc9
env:
- COMPILER=g++
- IMG=nvcc9
- CMAKE_EXTRA_FLAGS="-DENABLE_CUDA=On -DENABLE_TBB=On"
- DO_TEST=no
- CMAKE_EXTRA_FLAGS="-DENABLE_FORCEINLINE_RECURSIVE=Off -DENABLE_TBB=On"
- compiler: nvcc10.2
env:
- COMPILER=g++
Expand Down Expand Up @@ -86,7 +75,7 @@ matrix:
- COMPILER=g++
- IMG=hip
- HCC_AMDGPU_TARGET=gfx900
- CMAKE_EXTRA_FLAGS="-DENABLE_HIP=On -DENABLE_OPENMP=Off -DENABLE_CUDA=Off -DENABLE_WARNINGS_AS_ERRORS=Off"
- CMAKE_EXTRA_FLAGS="-DENABLE_HIP=On -DENABLE_OPENMP=Off -DENABLE_CUDA=Off -DENABLE_WARNINGS_AS_ERRORS=Off -DHIP_HIPCC_FLAGS=-fPIC"
- DO_TEST=no


Expand Down
62 changes: 33 additions & 29 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
cmake_policy(SET CMP0042 NEW)
cmake_policy(SET CMP0048 NEW)

if (APPLE)
cmake_policy(SET CMP0025 NEW)
endif()

# Set version number
set(RAJA_VERSION_MAJOR 0)
set(RAJA_VERSION_MINOR 11)
set(RAJA_VERSION_MINOR 12)
set(RAJA_VERSION_PATCHLEVEL 0)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
Expand Down Expand Up @@ -47,7 +51,6 @@ set(ENABLE_GTEST_DEATH_TESTS On CACHE BOOL "Enable tests asserting failure.")
set(RAJA_CXX_STANDARD_FLAG "default" CACHE STRING "Specific c++ standard flag to use, default attempts to autodetect the highest available")

option(ENABLE_TBB "Build TBB support" Off)
option(ENABLE_CHAI "Build CHAI support" Off)
option(ENABLE_TARGET_OPENMP "Build OpenMP on target device support" Off)
option(ENABLE_CLANG_CUDA "Use Clang's native CUDA support" Off)
option(ENABLE_EXTERNAL_CUB "Use an external cub for scans" Off)
Expand All @@ -64,6 +67,7 @@ option(ENABLE_FORCEINLINE_RECURSIVE "Enable Forceinline recursive (only supporte
option(ENABLE_BENCHMARKS "Build benchmarks" Off)
option(RAJA_DEPRECATED_TESTS "Test deprecated features" Off)
option(RAJA_ENABLE_BOUNDS_CHECK "Enable bounds checking in RAJA::Views/Layouts" Off)
option(RAJA_TEST_EXHAUSTIVE "Build RAJA exhaustive tests" Off)

set(TEST_DRIVER "" CACHE STRING "driver used to wrap test commands")

Expand All @@ -72,7 +76,7 @@ cmake_minimum_required(VERSION 3.9)
if (ENABLE_CUDA)
if (DEFINED CUDA_ARCH)
if (CUDA_ARCH MATCHES "^sm_*")
if ("${CUDA_ARCH}" STRLESS "sm_35")
if ("${CUDA_ARCH}" STRLESS "sm_35")
message( FATAL_ERROR "RAJA requires minimum CUDA compute architecture of sm_35")
endif()
endif()
Expand All @@ -85,7 +89,7 @@ if (ENABLE_CUDA)
message(STATUS "CUDA compute architecture set to RAJA default sm_35 since it was not specified")
set(CUDA_ARCH "sm_35" CACHE STRING "Set CUDA_ARCH to RAJA minimum supported" FORCE)
endif()
if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
if ( (CMAKE_CXX_COMPILER_ID MATCHES GNU) AND (CMAKE_SYSTEM_PROCESSOR MATCHES ppc64le) )
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -mno-float128")
endif ()
Expand Down Expand Up @@ -157,7 +161,9 @@ set (raja_sources
src/LockFreeIndexSetBuilders.cpp
src/MemUtils_CUDA.cpp
src/MemUtils_HIP.cpp
src/PluginStrategy.cpp)
src/PluginStrategy.cpp
src/RuntimePluginLoader.cpp
src/KokkosPluginLoader.cpp)

set (raja_depends)

Expand All @@ -182,7 +188,7 @@ if (ENABLE_CUDA)
if (CUB_FOUND)
blt_register_library(
NAME cub
INCLUDES ${CUB_INCLUDE_DIRS})
INCLUDES $<BUILD_INTERFACE:${CUB_INCLUDE_DIRS}>)
set(raja_depends
${raja_depends}
cub)
Expand Down Expand Up @@ -216,38 +222,34 @@ if (ENABLE_HIP)
endif ()
endif ()

if (ENABLE_CHAI)
set (raja_depends
${raja_depends}
chai)
endif ()

if (ENABLE_TBB)
set(raja_depends
${raja_depends}
tbb)
endif ()

set(EXTERNAL_CAMP_SOURCE_DIR "" CACHE FILEPATH "build with a specific external
if (NOT TARGET camp)
set(EXTERNAL_CAMP_SOURCE_DIR "" CACHE FILEPATH "build with a specific external
camp source repository")
if (EXTERNAL_CAMP_SOURCE_DIR)
message(STATUS "Using external source CAMP from: " ${EXTERNAL_CAMP_SOURCE_DIR})
add_subdirectory(${EXTERNAL_CAMP_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tpl/camp)
else (EXTERNAL_CAMP_SOURCE_DIR)
find_package(camp QUIET)
if (NOT camp_FOUND)
message(STATUS "Using RAJA CAMP submodule.")
add_subdirectory(tpl/camp)
else (NOT camp_FOUND)
message(STATUS "Using installed CAMP from: ${camp_INSTALL_PREFIX}")
endif(NOT camp_FOUND)
endif (EXTERNAL_CAMP_SOURCE_DIR)
if (EXTERNAL_CAMP_SOURCE_DIR)
message(STATUS "Using external source CAMP from: " ${EXTERNAL_CAMP_SOURCE_DIR})
add_subdirectory(${EXTERNAL_CAMP_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}/tpl/camp)
else (EXTERNAL_CAMP_SOURCE_DIR)
find_package(camp QUIET)
if (NOT camp_FOUND)
message(STATUS "Using RAJA CAMP submodule.")
add_subdirectory(tpl/camp)
else (NOT camp_FOUND)
message(STATUS "Using installed CAMP from: ${camp_INSTALL_PREFIX}")
endif(NOT camp_FOUND)
endif (EXTERNAL_CAMP_SOURCE_DIR)
endif (NOT TARGET camp)

blt_add_library(
NAME RAJA
SOURCES ${raja_sources}
DEPENDS_ON ${raja_depends} camp)
DEPENDS_ON ${raja_depends} camp ${CMAKE_DL_LIBS})

install(TARGETS RAJA
EXPORT RAJA
Expand All @@ -262,9 +264,11 @@ target_include_directories(RAJA
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tpl/cub>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tpl/rocPRIM/rocprim/include>
$<INSTALL_INTERFACE:include>)
target_include_directories(RAJA SYSTEM
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tpl/cub>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tpl/rocPRIM/rocprim/include>)

install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN *.hpp)
if(NOT ENABLE_EXTERNAL_CUB)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ submodule or as an installed library.
User Documentation
-------------------

The [**RAJA User Guide and Tutorial**](http://raja.readthedocs.io/en/master/)
The [**RAJA User Guide and Tutorial**](http://raja.readthedocs.io/en/main/)
is the best place to start learning about RAJA and how to use it.

To cite RAJA, please use the following references:

* RAJA Performance Portability Layer. https://github.com/LLNL/RAJA

* D. A. Beckingsale, J. Burmark, R. Hornung, H. Jones, W. Killian, A. J. Kunen, O. Pearce, P. Robinson, B. S. Ryujin, T. R. W. Scogland, "RAJA: Porrtable Performance for Large-Scale Scientific Applications", 2019 IEEE/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC). [Download here](https://conferences.computer.org/sc19w/2019/#!/toc/14)
* D. A. Beckingsale, J. Burmark, R. Hornung, H. Jones, W. Killian, A. J. Kunen, O. Pearce, P. Robinson, B. S. Ryujin, T. R. W. Scogland, "RAJA: Portable Performance for Large-Scale Scientific Applications", 2019 IEEE/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC). [Download here](https://conferences.computer.org/sc19w/2019/#!/toc/14)

Related Software
--------------------
Expand Down Expand Up @@ -114,7 +114,7 @@ The RAJA team follows the [GitFlow](http://nvie.com/posts/a-successful-git-branc
include their work in a feature branch created from the RAJA `develop` branch.
Then, create a pull request with the `develop` branch as the destination. That
branch contains the latest work in RAJA. Periodically, we will merge the
develop branch into the `master` branch and tag a new release.
develop branch into the `main` branch and tag a new release.

Authors
-----------
Expand Down
106 changes: 105 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,110 @@
Version vxx.yy.zz -- Release date 20yy-mm-dd
============================================

Version v0.12.0 -- Release date 2020-09-03
============================================

This release contains new features, notable changes, and bug fixes. Please
see the RAJA user guide for more information about items in this release.

Notable changes include:

* Notable repository change:
* The 'master' branch in the RAJA git repo has been renamed to 'main'.

* New features:
* New RAJA "work group" capability added. This allows multiple GPU
kernels to be fused into one kernel launch, greatly reducing the
run time overhead of launching CUDA kernels.
* Added support for dynamic plug-ins in RAJA, which enable the use of
things like Kokkos Performance Profiline Tools to be used with RAJA
(https://github.com/kokkos/kokkos-tools)
* Added ability to pass a resource object to RAJA::forall methods to
enable asynchronous execution for CUDA and HIP back-ends.
* Added "Multi-view" that works like a regular view, except that it
can wrap multiple arrays so their accesses can share index arithmetic.
* Multiple sort algorithms added. This provides portable parallel sort
operations, which are basic parallel algorithm building blocks.
* Introduced RAJA "Teams" concept as an experimental feature. This
enables hierarchical parallelism and additional nested loop patterns
beyond what RAJA::kernel supports. Please note that this is very much
a work-in-progress and is not yet documented in the user guide.
* Added initial support for dynamic loop tiling.
* New OpenMP execution policies added to support static, dynamic, and
guided scheduling.
* Added support for const iterators to be used with RAJA scans.
* Support for bitwise and and or reductions have been added.
* The RAJA::kernel interface has been expanded to allow only segment
index arguments used in a lambda to be passed to the lambda. In
previous versions of RAJA, every lambda invoked in a kernel had to
accept an index argument for every segment in the segment tuple passed
to RAJA::kernel execution templates, even if not all segment indices
were used in a lambda. This release still allows that usage pattern.
The new capability requires an additional template parameter to be
passed to the RAJA::statement::Lambda type, which identify the segment
indices that will be passed and in which order.

* API Changes:
* The RAJA 'VarOps' namespace has been removed. All entities previously
in that namespace are now in the 'RAJA' namespace.
* RAJA span is now public for users to access and has been made more like
std::span.
* RAJA::statement::tile_fixed has been moved to RAJA::tile_fixed
(namespace change).
* RAJA::statement::{Segs, Offsets, Params, ValuesT} have been moved to
RAJA::{Segs, Offsets, Params, ValuesT} (namespace change).
* RAJA ListSegment constructors have been expanded to accept a camp
Resource object. This enables run time specification of the memory
space where the data for list segment indices will live. In earlier
RAJA versions, the space in which list segment index data lived was a
compile-time choice based on whether CUDA or HIP was enabled and the
data resided in unified memory for either case. This is still supported
in this release, but is marked as a DEPRECATED FEATURE. In the next RAJA
release, ListSegment construction will require a camp Resource object.
When compiling RAJA with your application, you will see deprecation
warnings if you are using the deprecated ListSegment constructor.
* A reset method was added to OpenMP target offload reduction classes
so they contain the same functionality as reductions for all other
back-ends.

* Build changes/improvements:
* The BLT, camp, CUB, and rocPRIM submodules have all been updated to
more recent versions. Please note that RAJA now requires rocm version
3.5 or newer to use the HIP back-end.
* Build for clang9 on macosx has been fixed.
* Build for Intel19 on Windows has been fixed.
* Host/device annotations have been added to reduction operations to
eliminate compiler warnings for certain use cases.
* Several warnings generated by the MSVC compiler have been eliminated.
* A couple of PGI compiler warnings have been removed.
* CMake improvements to make it is easier to use an external camp or
CUB library with RAJA.
* Note that the RAJA tests are undergoing a substantial overhaul. Users,
who chose to build and run RAJA tests, should know that many tests
are now being generated in the build space directory structure which
mimics the RAJA source directory structure. As a result, only some
test executables appear in the top-level 'test' subdirectory of the
build directory; others can be found in lower-level directories. The
reason for this change is to reduce test build times for certain
compilers.

* Bug fixes:
* An issue with SIMD privatization with the Intel compiler, required
to generate correct code, has been fixed.
* An issue with the atomicExchange() operation for the RAJA HIP back-end
has been fixed.
* A type issue in the RAJA::kernel implementation involving RAJA span
usage has been fixed.
* Checks for iterator ranges and container sizes have been added to
RAJA scans, which fixes an issue when users attempted to run a
scan over a range of size zero.
* Several type errors in the Layout.hpp header file have been fixed.
* Several fixes have been made in the Layout and Static Layout types.
* Several fixes have been made to the OpenMP target offload back-end
to address host-device memory issues.
* A variety of RAJA User Guide issues have been addressed, as well as
issues in RAJA example codes.

Version v0.11.0 -- Release date 2020-01-29
==========================================

Expand Down Expand Up @@ -85,7 +189,7 @@ Notable changes include:
* Added a bounds checking option to RAJA Layout types as a debugging
feature. This is a compile-time option that will report user errors
when given View or Layout indices are out-of-bounds. See View/Layout
section in the RAjA User Guide for instructions on enabling this and
section in the RAJA User Guide for instructions on enabling this and
how this feature works.
* We've added a RAJA Template Project on GitHub, which shows how to
use RAJA in an application, either as a Git submodule or as an
Expand Down
44 changes: 44 additions & 0 deletions cmake/RAJAMacros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,50 @@ macro(raja_add_executable)
)
endmacro(raja_add_executable)

macro(raja_add_plugin_library)
set(options )
set(singleValueArgs NAME SHARED)
set(multiValueArgs SOURCES DEPENDS_ON)

cmake_parse_arguments(arg
"${options}" "${singleValueArgs}" "${multiValueArgs}" ${ARGN})

list(APPEND arg_DEPENDS_ON RAJA)

if (ENABLE_OPENMP)
list (APPEND arg_DEPENDS_ON openmp)
endif ()

if (ENABLE_CUDA)
list (APPEND arg_DEPENDS_ON cuda)
endif ()

if (ENABLE_HIP)
list (APPEND arg_DEPENDS_ON hip)
endif ()

if (ENABLE_TBB)
list (APPEND arg_DEPENDS_ON tbb)
endif ()

blt_add_library(
NAME ${arg_NAME}
SOURCES ${arg_SOURCES}
DEPENDS_ON ${arg_DEPENDS_ON}
SHARED ${arg_SHARED}
)

#target_include_directories(${arg_NAME}
#PUBLIC
#$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
#$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
#$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/tpl/cub>
#$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/tpl/camp/include>
#$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/tpl/rocPRIM/rocprim/include>
#$<INSTALL_INTERFACE:include>)

endmacro(raja_add_plugin_library)

macro(raja_add_test)
set(options )
set(singleValueArgs NAME)
Expand Down
Loading

0 comments on commit 32d92e3

Please sign in to comment.