From 38983ebc42de5683e212562c931aa0789c6eefe7 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Thu, 30 Jan 2025 16:40:27 +0100 Subject: [PATCH 1/2] PTX: Add tcgen05 instructions (#3607) * ptx: Add tcgen05.alloc * ptx: Add tcgen05.commit * ptx: Add tcgen05.cp * ptx: Add tcgen05.fence * ptx: Add tcgen05.ld * ptx: Add tcgen05.mma * ptx: Add tcgen05.mma.ws * ptx: Add tcgen05.shift * ptx: Add tcgen05.st * ptx: Add tcgen05.wait * fix docs --------- Co-authored-by: Allard Hendriksen --- docs/libcudacxx/ptx/instructions.rst | 10 +++++ .../ptx/instructions/tcgen05_alloc.rst | 9 +++++ .../ptx/instructions/tcgen05_commit.rst | 9 +++++ .../ptx/instructions/tcgen05_cp.rst | 9 +++++ .../ptx/instructions/tcgen05_fence.rst | 9 +++++ .../ptx/instructions/tcgen05_ld.rst | 9 +++++ .../ptx/instructions/tcgen05_mma.rst | 9 +++++ .../ptx/instructions/tcgen05_mma_ws.rst | 9 +++++ .../ptx/instructions/tcgen05_shift.rst | 9 +++++ .../ptx/instructions/tcgen05_st.rst | 9 +++++ .../ptx/instructions/tcgen05_wait.rst | 9 +++++ .../cuda/__ptx/instructions/tcgen05_alloc.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_commit.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_cp.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_fence.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_ld.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_mma.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_mma_ws.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_shift.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_st.h | 37 +++++++++++++++++++ .../cuda/__ptx/instructions/tcgen05_wait.h | 37 +++++++++++++++++++ libcudacxx/include/cuda/ptx | 10 +++++ .../ptx/ptx.tcgen05.alloc.compile.pass.cpp | 22 +++++++++++ .../ptx/ptx.tcgen05.commit.compile.pass.cpp | 22 +++++++++++ .../cuda/ptx/ptx.tcgen05.cp.compile.pass.cpp | 22 +++++++++++ .../ptx/ptx.tcgen05.fence.compile.pass.cpp | 22 +++++++++++ .../cuda/ptx/ptx.tcgen05.ld.compile.pass.cpp | 22 +++++++++++ .../cuda/ptx/ptx.tcgen05.mma.compile.pass.cpp | 22 +++++++++++ .../ptx/ptx.tcgen05.mma.ws.compile.pass.cpp | 22 +++++++++++ .../ptx/ptx.tcgen05.shift.compile.pass.cpp | 22 +++++++++++ .../cuda/ptx/ptx.tcgen05.st.compile.pass.cpp | 22 +++++++++++ .../ptx/ptx.tcgen05.wait.compile.pass.cpp | 22 +++++++++++ 32 files changed, 700 insertions(+) create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_alloc.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_commit.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_cp.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_fence.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_ld.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_mma.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_mma_ws.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_shift.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_st.rst create mode 100644 docs/libcudacxx/ptx/instructions/tcgen05_wait.rst create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_alloc.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_commit.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_cp.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_fence.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_ld.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma_ws.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_shift.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_st.h create mode 100644 libcudacxx/include/cuda/__ptx/instructions/tcgen05_wait.h create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.alloc.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.commit.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.cp.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.fence.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.ld.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.ws.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.shift.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.st.compile.pass.cpp create mode 100644 libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.wait.compile.pass.cpp diff --git a/docs/libcudacxx/ptx/instructions.rst b/docs/libcudacxx/ptx/instructions.rst index 87ccc82b5b1..136dfb81fc3 100644 --- a/docs/libcudacxx/ptx/instructions.rst +++ b/docs/libcudacxx/ptx/instructions.rst @@ -29,6 +29,16 @@ PTX Instructions instructions/red_async instructions/st_async instructions/st_bulk + instructions/tcgen05_alloc + instructions/tcgen05_commit + instructions/tcgen05_cp + instructions/tcgen05_fence + instructions/tcgen05_ld + instructions/tcgen05_mma + instructions/tcgen05_mma_ws + instructions/tcgen05_shift + instructions/tcgen05_st + instructions/tcgen05_wait instructions/tensormap_replace instructions/tensormap_cp_fenceproxy instructions/special_registers diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_alloc.rst b/docs/libcudacxx/ptx/instructions/tcgen05_alloc.rst new file mode 100644 index 00000000000..a30f2a2560c --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_alloc.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-alloc: + +tcgen05.alloc +============= + +- PTX ISA: + `tcgen05.alloc `__ + +.. include:: generated/tcgen05_alloc.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_commit.rst b/docs/libcudacxx/ptx/instructions/tcgen05_commit.rst new file mode 100644 index 00000000000..a431350dea8 --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_commit.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-commit: + +tcgen05.commit +============== + +- PTX ISA: + `tcgen05.commit `__ + +.. include:: generated/tcgen05_commit.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_cp.rst b/docs/libcudacxx/ptx/instructions/tcgen05_cp.rst new file mode 100644 index 00000000000..5a220536d6e --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_cp.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-cp: + +tcgen05.cp +========== + +- PTX ISA: + `tcgen05.cp `__ + +.. include:: generated/tcgen05_cp.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_fence.rst b/docs/libcudacxx/ptx/instructions/tcgen05_fence.rst new file mode 100644 index 00000000000..6635131f707 --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_fence.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-fence: + +tcgen05.fence +============= + +- PTX ISA: + `tcgen05.fence `__ + +.. include:: generated/tcgen05_fence.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_ld.rst b/docs/libcudacxx/ptx/instructions/tcgen05_ld.rst new file mode 100644 index 00000000000..165b8eb935a --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_ld.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-ld: + +tcgen05.ld +========== + +- PTX ISA: + `tcgen05.ld `__ + +.. include:: generated/tcgen05_ld.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_mma.rst b/docs/libcudacxx/ptx/instructions/tcgen05_mma.rst new file mode 100644 index 00000000000..9672ae0d0a1 --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_mma.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-mma: + +tcgen05.mma +=========== + +- PTX ISA: + `tcgen05.mma `__ + +.. include:: generated/tcgen05_mma.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_mma_ws.rst b/docs/libcudacxx/ptx/instructions/tcgen05_mma_ws.rst new file mode 100644 index 00000000000..e22066298ac --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_mma_ws.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-mma-ws: + +tcgen05.mma.ws +============== + +- PTX ISA: + `tcgen05.mma.ws `__ + +.. include:: generated/tcgen05_mma_ws.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_shift.rst b/docs/libcudacxx/ptx/instructions/tcgen05_shift.rst new file mode 100644 index 00000000000..eef04ae4d5e --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_shift.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-shift: + +tcgen05.shift +============= + +- PTX ISA: + `tcgen05.shift `__ + +.. include:: generated/tcgen05_shift.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_st.rst b/docs/libcudacxx/ptx/instructions/tcgen05_st.rst new file mode 100644 index 00000000000..f101149481f --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_st.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-st: + +tcgen05.st +========== + +- PTX ISA: + `tcgen05.st `__ + +.. include:: generated/tcgen05_st.rst diff --git a/docs/libcudacxx/ptx/instructions/tcgen05_wait.rst b/docs/libcudacxx/ptx/instructions/tcgen05_wait.rst new file mode 100644 index 00000000000..cb149e5c9a1 --- /dev/null +++ b/docs/libcudacxx/ptx/instructions/tcgen05_wait.rst @@ -0,0 +1,9 @@ +.. _libcudacxx-ptx-instructions-tcgen05-wait: + +tcgen05.wait +============ + +- PTX ISA: + `tcgen05.wait `__ + +.. include:: generated/tcgen05_wait.rst diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_alloc.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_alloc.h new file mode 100644 index 00000000000..743ee4306ee --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_alloc.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_ALLOC_H_ +#define _CUDA_PTX_TCGEN05_ALLOC_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_ALLOC_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_commit.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_commit.h new file mode 100644 index 00000000000..ca06ec6b97d --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_commit.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_COMMIT_H_ +#define _CUDA_PTX_TCGEN05_COMMIT_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_COMMIT_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_cp.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_cp.h new file mode 100644 index 00000000000..e0c6ebf74ad --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_cp.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_CP_H_ +#define _CUDA_PTX_TCGEN05_CP_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_CP_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_fence.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_fence.h new file mode 100644 index 00000000000..a36847cd0f3 --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_fence.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_FENCE_H_ +#define _CUDA_PTX_TCGEN05_FENCE_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_FENCE_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_ld.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_ld.h new file mode 100644 index 00000000000..782ba20e804 --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_ld.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_LD_H_ +#define _CUDA_PTX_TCGEN05_LD_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_LD_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma.h new file mode 100644 index 00000000000..ff9d159930b --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_MMA_H_ +#define _CUDA_PTX_TCGEN05_MMA_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_MMA_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma_ws.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma_ws.h new file mode 100644 index 00000000000..5d0bd5b8b5a --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_mma_ws.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_MMA_WS_H_ +#define _CUDA_PTX_TCGEN05_MMA_WS_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_MMA_WS_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_shift.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_shift.h new file mode 100644 index 00000000000..aab5cbe27b8 --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_shift.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_SHIFT_H_ +#define _CUDA_PTX_TCGEN05_SHIFT_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_SHIFT_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_st.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_st.h new file mode 100644 index 00000000000..94c86614b1e --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_st.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_ST_H_ +#define _CUDA_PTX_TCGEN05_ST_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_ST_H_ diff --git a/libcudacxx/include/cuda/__ptx/instructions/tcgen05_wait.h b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_wait.h new file mode 100644 index 00000000000..1684d9afd65 --- /dev/null +++ b/libcudacxx/include/cuda/__ptx/instructions/tcgen05_wait.h @@ -0,0 +1,37 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#ifndef _CUDA_PTX_TCGEN05_WAIT_H_ +#define _CUDA_PTX_TCGEN05_WAIT_H_ + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#include +#include +#include + +#include // __CUDA_MINIMUM_ARCH__ and friends + +_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX + +#include + +_LIBCUDACXX_END_NAMESPACE_CUDA_PTX + +#endif // _CUDA_PTX_TCGEN05_WAIT_H_ diff --git a/libcudacxx/include/cuda/ptx b/libcudacxx/include/cuda/ptx index 0d699b2e2ca..971288b456c 100644 --- a/libcudacxx/include/cuda/ptx +++ b/libcudacxx/include/cuda/ptx @@ -91,6 +91,16 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.alloc.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.alloc.compile.pass.cpp new file mode 100644 index 00000000000..49f9df928e9 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.alloc.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_alloc.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.commit.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.commit.compile.pass.cpp new file mode 100644 index 00000000000..73ea1851bec --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.commit.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_commit.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.cp.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.cp.compile.pass.cpp new file mode 100644 index 00000000000..85ddc17efe4 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.cp.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_cp.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.fence.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.fence.compile.pass.cpp new file mode 100644 index 00000000000..fda57b348de --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.fence.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_fence.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.ld.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.ld.compile.pass.cpp new file mode 100644 index 00000000000..8da8e54f18d --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.ld.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_ld.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.compile.pass.cpp new file mode 100644 index 00000000000..098cbbfa896 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_mma.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.ws.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.ws.compile.pass.cpp new file mode 100644 index 00000000000..350c964d749 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.mma.ws.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_mma_ws.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.shift.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.shift.compile.pass.cpp new file mode 100644 index 00000000000..5ecfff7ff3b --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.shift.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_shift.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.st.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.st.compile.pass.cpp new file mode 100644 index 00000000000..92a49224f0e --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.st.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_st.h" + +int main(int, char**) +{ + return 0; +} diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.wait.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.wait.compile.pass.cpp new file mode 100644 index 00000000000..4bb3156ed12 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tcgen05.wait.compile.pass.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// +// UNSUPPORTED: libcpp-has-no-threads + +// + +#include +#include + +#include "generated/tcgen05_wait.h" + +int main(int, char**) +{ + return 0; +} From cea61a3410fdea796154dcd9157e010659aab837 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 30 Jan 2025 16:48:09 +0100 Subject: [PATCH 2/2] Use a differrent implementation for `tuple_of_iterator_references` to tuple conversion (#3609) --- .../include/cuda/std/detail/libcxx/include/tuple | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/tuple b/libcudacxx/include/cuda/std/detail/libcxx/include/tuple index 6ff1039e61b..47f8b16222b 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/tuple +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/tuple @@ -891,10 +891,19 @@ public: enable_if_t<__is_tuple_of_iterator_references<_TupleOfIteratorReferences>::value, int> = 0, enable_if_t<(tuple_size<_TupleOfIteratorReferences>::value == sizeof...(_Tp)), int> = 0> _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 tuple(_TupleOfIteratorReferences&& __t) - : tuple(_CUDA_VSTD::forward<_TupleOfIteratorReferences>(__t).template __to_tuple<_Tp...>( - __make_tuple_indices_t())) + : tuple(_CUDA_VSTD::forward<_TupleOfIteratorReferences>(__t), + typename __make_tuple_indices::type{}) {} +private: + template ::value, int> = 0> + _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 tuple(_TupleOfIteratorReferences&& __t, __tuple_indices<_Indices...>) + : tuple(_CUDA_VSTD::get<_Indices>(_CUDA_VSTD::forward<_TupleOfIteratorReferences>(__t))...) + {} + +public: template , enable_if_t::value, int> = 0,