From acf5193285e6e13d3da539ce543e811dcf27ad44 Mon Sep 17 00:00:00 2001
From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
Date: Fri, 1 Sep 2023 12:05:41 +0200
Subject: [PATCH] Fix and update a few things in the documentation

---
 docs/source/basic/cheatsheet.rst | 46 +++++++++++++-------------------
 docs/source/basic/example.rst    | 29 ++++++--------------
 docs/source/basic/install.rst    |  9 ++-----
 docs/source/basic/intro.rst      |  6 ++---
 docs/source/basic/library.rst    |  2 +-
 docs/source/index.rst            |  5 ++--
 6 files changed, 35 insertions(+), 62 deletions(-)

diff --git a/docs/source/basic/cheatsheet.rst b/docs/source/basic/cheatsheet.rst
index 601c4a297033..3b22aa7a9094 100644
--- a/docs/source/basic/cheatsheet.rst
+++ b/docs/source/basic/cheatsheet.rst
@@ -40,9 +40,12 @@ Define accelerator type (CUDA, OpenMP,etc.)
      .. code-block:: c++
 
 	AccGpuCudaRt,
+	AccGpuHipRt,
+	AccCpuSycl,
+	AccFpgaSyclIntel,
+	AccGpuSyclIntel,
 	AccCpuOmp2Blocks,
 	AccCpuOmp2Threads,
-	AccCpuOmp4,
 	AccCpuTbbBlocks,
 	AccCpuThreads,
 	AccCpuSerial
@@ -126,9 +129,9 @@ Create a view to host memory represented by a pointer
   .. code-block:: c++
 
      using Dim = alpaka::DimInt<1u>;
-     Vec<Dim, Idx> extent = value;
-     DataType* date = new DataType[extent[0]];
-     auto hostView = createView(devHost, data, extent);
+     Vec<Dim, Idx> extent = size;
+     DataType* ptr = ...;
+     auto hostView = createView(devHost, ptr, extent);
 
 Create a view to host std::vector
    .. code-block:: c++
@@ -139,7 +142,7 @@ Create a view to host std::vector
 Create a view to host std::array
    .. code-block:: c++
 
-     std::vector<DataType, 2> array = {42u, 23};
+     std::array<DataType, 2> array = {42u, 23};
      auto hostView = createView(devHost, array);
 
 Get a raw pointer to a buffer or view initialization, etc.
@@ -148,11 +151,6 @@ Get a raw pointer to a buffer or view initialization, etc.
      DataType* raw = view::getPtrNative(bufHost);
      DataType* rawViewPtr = view::getPtrNative(hostView);
 
-Get an accessor to a buffer and the accessor's type (experimental)
-  .. code-block:: c++
-
-     experimental::BufferAccessor<Acc, Elem, N, AccessTag> a = experimental::access(buffer);
-
 Allocate a buffer in device memory
   .. code-block:: c++
 
@@ -230,21 +228,14 @@ Access multi-dimensional indices and extents of blocks, threads, and elements
 
      auto idx = getIdx<Origin, Unit>(acc);
      auto extent = getWorkDiv<Origin, Unit>(acc);
+     // Origin: Grid, Block, Thread
+     // Unit: Blocks, Threads, Elems
 
-  Origin:
-     .. code-block:: c++
-
-	Grid, Block, Thread
-
-  Unit:
-     .. code-block:: c++
-
-	Blocks, Threads, Elems
-
-Access components of multi-dimensional indices and extents
+Access components of and destructuremulti-dimensional indices and extents
   .. code-block:: c++
 
      auto idxX = idx[0];
+     auto [z, y, x] = extent3D;
 
 Linearize multi-dimensional vectors
   .. code-block:: c++
@@ -258,7 +249,8 @@ Linearize multi-dimensional vectors
 Allocate static shared memory variable
   .. code-block:: c++
 
-     Type & var = declareSharedVar<Type, __COUNTER__>(acc);
+     Type& var = declareSharedVar<Type, __COUNTER__>(acc);       // scalar
+     auto& arr = declareSharedVar<float[256], __COUNTER__>(acc); // array
 
 Get dynamic shared memory pool, requires the kernel to specialize
   .. code-block:: c++
@@ -275,12 +267,10 @@ Atomic operations
   .. code-block:: c++
 
      auto result = atomicOp<Operation>(acc, arguments);
-
-  Operations:
-     .. code-block:: c++
-
-         AtomicAdd, AtomicSub, AtomicMin, AtomicMax, AtomicExch,
-         AtomicInc, AtomicDec, AtomicAnd, AtomicOr, AtomicXor, AtomicCas
+     // Operation: AtomicAdd, AtomicSub, AtomicMin, AtomicMax, AtomicExch,
+     //            AtomicInc, AtomicDec, AtomicAnd, AtomicOr, AtomicXor, AtomicCas
+     // Also dedicated functions available, e.g.:
+     auto old = atomicAdd(acc, ptr, 1);
 
 Memory fences on block-, grid- or device level (guarantees LoadLoad and StoreStore ordering)
   .. code-block:: c++
diff --git a/docs/source/basic/example.rst b/docs/source/basic/example.rst
index a14290f63e52..6903d866982e 100644
--- a/docs/source/basic/example.rst
+++ b/docs/source/basic/example.rst
@@ -27,16 +27,11 @@ The following example shows a minimal example of a ``CMakeLists.txt`` that uses
    :caption: CMakeLists.txt
 
    cmake_minimum_required(VERSION 3.22)
-
-   set(_TARGET_NAME myProject)
-   project(${_TARGET_NAME})
+   project("myexample" CXX)
 
    find_package(alpaka REQUIRED)
-
-   alpaka_add_executable(${_TARGET_NAME} helloWorld.cpp)
-   target_link_libraries(
-     ${_TARGET_NAME}
-     PUBLIC alpaka::alpaka)
+   alpaka_add_executable(${PROJECT_NAME} helloWorld.cpp)
+   target_link_libraries(${PROJECT_NAME} PUBLIC alpaka::alpaka)
 
 In the CMake configuration phase of the project, you must activate the accelerator you want to use:
 
@@ -44,16 +39,13 @@ In the CMake configuration phase of the project, you must activate the accelerat
 
     cd <path/to/the/project/root>
     mkdir build && cd build
-    # enable the CUDA accelerator
     cmake .. -Dalpaka_ACC_GPU_CUDA_ENABLE=ON
-    # compile and link
     cmake --build .
-    # execute application
-    ./myProject
+    ./myexample
 
 A complete list of CMake flags for the  accelerator can be found :doc:`here </advanced/cmake>`.
 
-If the configuration was successful and CMake found the CUDA SDK, the C++ template accelerator type ``alpaka::acc::AccGpuCudaRt`` is available.
+If the configuration was successful and CMake found the CUDA SDK, the C++ template accelerator type ``alpaka::AccGpuCudaRt`` is available.
 
 Use alpaka via ``add_subdirectory``
 -----------------------------------
@@ -64,15 +56,10 @@ The ``add_subdirectory`` method does not require alpaka to be installed. Instead
    :caption: CMakeLists.txt
 
    cmake_minimum_required(VERSION 3.22)
-
-   set(_TARGET_NAME myProject)
-   project(${_TARGET_NAME})
+   project("myexample" CXX)
 
    add_subdirectory(thirdParty/alpaka)
-
-   alpaka_add_executable(${_TARGET_NAME} helloWorld.cpp)
-   target_link_libraries(
-     ${_TARGET_NAME}
-     PUBLIC alpaka::alpaka)
+   alpaka_add_executable(${PROJECT_NAME} helloWorld.cpp)
+   target_link_libraries(${PROJECT_NAME} PUBLIC alpaka::alpaka)
 
 The CMake configure and build commands are the same as for the ``find_package`` approach.
diff --git a/docs/source/basic/install.rst b/docs/source/basic/install.rst
index 5aef1b428902..1f716fafc1f3 100644
--- a/docs/source/basic/install.rst
+++ b/docs/source/basic/install.rst
@@ -44,12 +44,7 @@ By default, no accelerator is enabled because some combinations of compilers and
 
 .. code-block::
 
-  # create build folder
-  mkdir build && cd build
-  # run cmake configure with enable CUDA backend
-  cmake -Dalpaka_ACC_GPU_CUDA_ENABLE=ON ..
-  # compile source code
-  cmake --build .
+  cmake -Dalpaka_ACC_GPU_CUDA_ENABLE=ON ...
 
 In the overview of :doc:`cmake arguments </advanced/cmake>` you will find all CMake flags for activating the different accelerators. How to select an accelerator in the source code is described on the :doc:`example page </basic/example>`.
 
@@ -60,4 +55,4 @@ In the overview of :doc:`cmake arguments </advanced/cmake>` you will find all CM
 
 .. hint::
 
-  When the test or examples are activated, the alpaka build system automatically activates the ``serial backend``, as it is needed for many tests. Therefore, the tests are run with the ``serial backend`` by default. If you want to test another backend, you have to activate it at CMake configuration time, for example the ``HIP`` backend: ``cmake .. -DBUILD_TESTING=ON -Dalpaka_ACC_GPU_HIP_ENABLE=ON``. The alpaka tests use a selector algorithm to choose a specific accelerator for the test cases. The selector works with accelerator priorities. Therefore, it is recommended to enable only one accelerator for a build to make sure that the right one is used.
+  When the test or examples are activated, the alpaka build system automatically activates the ``serial backend``, as it is needed for many tests. Therefore, the tests are run with the ``serial backend`` by default. If you want to test another backend, you have to activate it at CMake configuration time, for example the ``HIP`` backend: ``cmake .. -DBUILD_TESTING=ON -Dalpaka_ACC_GPU_HIP_ENABLE=ON``. Some alpaka tests use a selector algorithm to choose a specific accelerator for the test cases. The selector works with accelerator priorities. Therefore, it is recommended to enable only one accelerator for a build to make sure that the right one is used.
diff --git a/docs/source/basic/intro.rst b/docs/source/basic/intro.rst
index 76c219ff53a8..02852466d579 100644
--- a/docs/source/basic/intro.rst
+++ b/docs/source/basic/intro.rst
@@ -4,9 +4,9 @@ Introduction
 The *alpaka* library defines and implements an abstract interface for the *hierarchical redundant parallelism* model.
 This model exploits task- and data-parallelism as well as memory hierarchies at all levels of current multi-core architectures.
 This allows to achieve performance portability across various types of accelerators by ignoring specific unsupported levels and utilizing only the ones supported on a specific accelerator.
-All hardware types (multi- and many-core CPUs, GPUs and other accelerators) are treated and can be programmed in the same way.
-The *alpaka* library provides back-ends for *CUDA*, *OpenMP*, *HIP* and other methods.
-The policy-based C++ template interface provided allows for straightforward user-defined extension of the library to support other accelerators.
+All hardware types (CPUs, GPUs and other accelerators) are treated and can be programmed in the same way.
+The *alpaka* library provides back-ends for *CUDA*, *OpenMP*, *HIP*, *SYCL* and other technologies.
+The trait-based C++ template interface provided allows for straightforward user-defined extension of the library to support other accelerators.
 
 The library name *alpaka* is an acronym standing for **A**\ bstraction **L**\ ibrary for **Pa**\ rallel **K**\ ernel **A**\ cceleration.
 
diff --git a/docs/source/basic/library.rst b/docs/source/basic/library.rst
index 7818101ff7f7..d9083f028819 100644
--- a/docs/source/basic/library.rst
+++ b/docs/source/basic/library.rst
@@ -117,7 +117,7 @@ Kernels can also be defined via lambda expressions.
    }
 
 .. attention::
-   The Nvidia ``nvcc`` does not support generic lambdas which are marked with `__device__`, which is what `ALPAKA_FN_ACC` expands to (among others) when the CUDA backend is active.
+   NVIDIA's ``nvcc`` compiler does not support generic lambdas which are marked with `__device__`, which is what `ALPAKA_FN_ACC` expands to (among others) when the CUDA backend is active.
    Therefore, a workaround is required. The type of the ``acc`` must be defined outside the lambda.
 
    .. code-block:: cpp
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2efb180d46c7..88151199d263 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,10 +10,11 @@
 
 *alpaka - An Abstraction Library for Parallel Kernel Acceleration*
 
-The alpaka library is a header-only C++14 abstraction library for accelerator development. Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism.
+The alpaka library is a header-only C++17 abstraction library for accelerator development.
+Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism.
 
 .. CAUTION::
-   The readthedocs pages are work in progress and contain outdated sections.
+   The readthedocs pages are provided with best effort, but may contain outdated sections.
 
 alpaka - How to Read This Document
 ----------------------------------