From b66a51711d3ea8f2a232d7dc0f6e1a9f0e7ab472 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 1 Sep 2023 12:05:41 +0200 Subject: [PATCH] Fix and update a few things in the documentation --- docs/source/basic/cheatsheet.rst | 45 +++++++++++++------------------- docs/source/basic/example.rst | 29 ++++++-------------- docs/source/basic/install.rst | 9 ++----- docs/source/basic/intro.rst | 6 ++--- docs/source/basic/library.rst | 2 +- docs/source/index.rst | 5 ++-- 6 files changed, 35 insertions(+), 61 deletions(-) diff --git a/docs/source/basic/cheatsheet.rst b/docs/source/basic/cheatsheet.rst index 601c4a297033..e769cc6d9e25 100644 --- a/docs/source/basic/cheatsheet.rst +++ b/docs/source/basic/cheatsheet.rst @@ -40,6 +40,10 @@ Define accelerator type (CUDA, OpenMP,etc.) .. code-block:: c++ AccGpuCudaRt, + AccGpuHipRt, + AccCpuSycl, + AccFpgaSyclIntel, + AccGpuSyclIntel, AccCpuOmp2Blocks, AccCpuOmp2Threads, AccCpuOmp4, @@ -126,9 +130,9 @@ Create a view to host memory represented by a pointer .. code-block:: c++ using Dim = alpaka::DimInt<1u>; - Vec extent = value; - DataType* date = new DataType[extent[0]]; - auto hostView = createView(devHost, data, extent); + Vec extent = size; + DataType* ptr = ...; + auto hostView = createView(devHost, ptr, extent); Create a view to host std::vector .. code-block:: c++ @@ -139,7 +143,7 @@ Create a view to host std::vector Create a view to host std::array .. code-block:: c++ - std::vector array = {42u, 23}; + std::array array = {42u, 23}; auto hostView = createView(devHost, array); Get a raw pointer to a buffer or view initialization, etc. @@ -148,11 +152,6 @@ Get a raw pointer to a buffer or view initialization, etc. DataType* raw = view::getPtrNative(bufHost); DataType* rawViewPtr = view::getPtrNative(hostView); -Get an accessor to a buffer and the accessor's type (experimental) - .. code-block:: c++ - - experimental::BufferAccessor a = experimental::access(buffer); - Allocate a buffer in device memory .. code-block:: c++ @@ -230,21 +229,14 @@ Access multi-dimensional indices and extents of blocks, threads, and elements auto idx = getIdx(acc); auto extent = getWorkDiv(acc); + // Origin: Grid, Block, Thread + // Unit: Blocks, Threads, Elems - Origin: - .. code-block:: c++ - - Grid, Block, Thread - - Unit: - .. code-block:: c++ - - Blocks, Threads, Elems - -Access components of multi-dimensional indices and extents +Access components of and destructuremulti-dimensional indices and extents .. code-block:: c++ auto idxX = idx[0]; + auto [z, y, x] = extent3D; Linearize multi-dimensional vectors .. code-block:: c++ @@ -258,7 +250,8 @@ Linearize multi-dimensional vectors Allocate static shared memory variable .. code-block:: c++ - Type & var = declareSharedVar(acc); + Type& var = declareSharedVar(acc); // scalar + auto& arr = declareSharedVar(acc); // array Get dynamic shared memory pool, requires the kernel to specialize .. code-block:: c++ @@ -275,12 +268,10 @@ Atomic operations .. code-block:: c++ auto result = atomicOp(acc, arguments); - - Operations: - .. code-block:: c++ - - AtomicAdd, AtomicSub, AtomicMin, AtomicMax, AtomicExch, - AtomicInc, AtomicDec, AtomicAnd, AtomicOr, AtomicXor, AtomicCas + // Operation: AtomicAdd, AtomicSub, AtomicMin, AtomicMax, AtomicExch, + // AtomicInc, AtomicDec, AtomicAnd, AtomicOr, AtomicXor, AtomicCas + // Also dedicated functions available, e.g.: + auto old = atomicAdd(acc, ptr, 1); Memory fences on block-, grid- or device level (guarantees LoadLoad and StoreStore ordering) .. code-block:: c++ diff --git a/docs/source/basic/example.rst b/docs/source/basic/example.rst index a14290f63e52..6903d866982e 100644 --- a/docs/source/basic/example.rst +++ b/docs/source/basic/example.rst @@ -27,16 +27,11 @@ The following example shows a minimal example of a ``CMakeLists.txt`` that uses :caption: CMakeLists.txt cmake_minimum_required(VERSION 3.22) - - set(_TARGET_NAME myProject) - project(${_TARGET_NAME}) + project("myexample" CXX) find_package(alpaka REQUIRED) - - alpaka_add_executable(${_TARGET_NAME} helloWorld.cpp) - target_link_libraries( - ${_TARGET_NAME} - PUBLIC alpaka::alpaka) + alpaka_add_executable(${PROJECT_NAME} helloWorld.cpp) + target_link_libraries(${PROJECT_NAME} PUBLIC alpaka::alpaka) In the CMake configuration phase of the project, you must activate the accelerator you want to use: @@ -44,16 +39,13 @@ In the CMake configuration phase of the project, you must activate the accelerat cd mkdir build && cd build - # enable the CUDA accelerator cmake .. -Dalpaka_ACC_GPU_CUDA_ENABLE=ON - # compile and link cmake --build . - # execute application - ./myProject + ./myexample A complete list of CMake flags for the accelerator can be found :doc:`here `. -If the configuration was successful and CMake found the CUDA SDK, the C++ template accelerator type ``alpaka::acc::AccGpuCudaRt`` is available. +If the configuration was successful and CMake found the CUDA SDK, the C++ template accelerator type ``alpaka::AccGpuCudaRt`` is available. Use alpaka via ``add_subdirectory`` ----------------------------------- @@ -64,15 +56,10 @@ The ``add_subdirectory`` method does not require alpaka to be installed. Instead :caption: CMakeLists.txt cmake_minimum_required(VERSION 3.22) - - set(_TARGET_NAME myProject) - project(${_TARGET_NAME}) + project("myexample" CXX) add_subdirectory(thirdParty/alpaka) - - alpaka_add_executable(${_TARGET_NAME} helloWorld.cpp) - target_link_libraries( - ${_TARGET_NAME} - PUBLIC alpaka::alpaka) + alpaka_add_executable(${PROJECT_NAME} helloWorld.cpp) + target_link_libraries(${PROJECT_NAME} PUBLIC alpaka::alpaka) The CMake configure and build commands are the same as for the ``find_package`` approach. diff --git a/docs/source/basic/install.rst b/docs/source/basic/install.rst index 5aef1b428902..1f716fafc1f3 100644 --- a/docs/source/basic/install.rst +++ b/docs/source/basic/install.rst @@ -44,12 +44,7 @@ By default, no accelerator is enabled because some combinations of compilers and .. code-block:: - # create build folder - mkdir build && cd build - # run cmake configure with enable CUDA backend - cmake -Dalpaka_ACC_GPU_CUDA_ENABLE=ON .. - # compile source code - cmake --build . + cmake -Dalpaka_ACC_GPU_CUDA_ENABLE=ON ... In the overview of :doc:`cmake arguments ` you will find all CMake flags for activating the different accelerators. How to select an accelerator in the source code is described on the :doc:`example page `. @@ -60,4 +55,4 @@ In the overview of :doc:`cmake arguments ` you will find all CM .. hint:: - When the test or examples are activated, the alpaka build system automatically activates the ``serial backend``, as it is needed for many tests. Therefore, the tests are run with the ``serial backend`` by default. If you want to test another backend, you have to activate it at CMake configuration time, for example the ``HIP`` backend: ``cmake .. -DBUILD_TESTING=ON -Dalpaka_ACC_GPU_HIP_ENABLE=ON``. The alpaka tests use a selector algorithm to choose a specific accelerator for the test cases. The selector works with accelerator priorities. Therefore, it is recommended to enable only one accelerator for a build to make sure that the right one is used. + When the test or examples are activated, the alpaka build system automatically activates the ``serial backend``, as it is needed for many tests. Therefore, the tests are run with the ``serial backend`` by default. If you want to test another backend, you have to activate it at CMake configuration time, for example the ``HIP`` backend: ``cmake .. -DBUILD_TESTING=ON -Dalpaka_ACC_GPU_HIP_ENABLE=ON``. Some alpaka tests use a selector algorithm to choose a specific accelerator for the test cases. The selector works with accelerator priorities. Therefore, it is recommended to enable only one accelerator for a build to make sure that the right one is used. diff --git a/docs/source/basic/intro.rst b/docs/source/basic/intro.rst index 76c219ff53a8..02852466d579 100644 --- a/docs/source/basic/intro.rst +++ b/docs/source/basic/intro.rst @@ -4,9 +4,9 @@ Introduction The *alpaka* library defines and implements an abstract interface for the *hierarchical redundant parallelism* model. This model exploits task- and data-parallelism as well as memory hierarchies at all levels of current multi-core architectures. This allows to achieve performance portability across various types of accelerators by ignoring specific unsupported levels and utilizing only the ones supported on a specific accelerator. -All hardware types (multi- and many-core CPUs, GPUs and other accelerators) are treated and can be programmed in the same way. -The *alpaka* library provides back-ends for *CUDA*, *OpenMP*, *HIP* and other methods. -The policy-based C++ template interface provided allows for straightforward user-defined extension of the library to support other accelerators. +All hardware types (CPUs, GPUs and other accelerators) are treated and can be programmed in the same way. +The *alpaka* library provides back-ends for *CUDA*, *OpenMP*, *HIP*, *SYCL* and other technologies. +The trait-based C++ template interface provided allows for straightforward user-defined extension of the library to support other accelerators. The library name *alpaka* is an acronym standing for **A**\ bstraction **L**\ ibrary for **Pa**\ rallel **K**\ ernel **A**\ cceleration. diff --git a/docs/source/basic/library.rst b/docs/source/basic/library.rst index 7818101ff7f7..d9083f028819 100644 --- a/docs/source/basic/library.rst +++ b/docs/source/basic/library.rst @@ -117,7 +117,7 @@ Kernels can also be defined via lambda expressions. } .. attention:: - The Nvidia ``nvcc`` does not support generic lambdas which are marked with `__device__`, which is what `ALPAKA_FN_ACC` expands to (among others) when the CUDA backend is active. + NVIDIA's ``nvcc`` compiler does not support generic lambdas which are marked with `__device__`, which is what `ALPAKA_FN_ACC` expands to (among others) when the CUDA backend is active. Therefore, a workaround is required. The type of the ``acc`` must be defined outside the lambda. .. code-block:: cpp diff --git a/docs/source/index.rst b/docs/source/index.rst index 2efb180d46c7..88151199d263 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,10 +10,11 @@ *alpaka - An Abstraction Library for Parallel Kernel Acceleration* -The alpaka library is a header-only C++14 abstraction library for accelerator development. Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism. +The alpaka library is a header-only C++17 abstraction library for accelerator development. +Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism. .. CAUTION:: - The readthedocs pages are work in progress and contain outdated sections. + The readthedocs pages are provided with best effort, but may contain outdated sections. alpaka - How to Read This Document ----------------------------------