1
+ ..
2
+ This file was automatically generated. Do not edit.
3
+
1
4
cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx: :bytes
2
5
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3
6
.. code :: cuda
4
7
5
8
// cp.async.bulk.tensor.1d.dst.src.tile.mbarrier::complete_tx::bytes [dstMem], [tensorMap, tensorCoords], [smem_bar];// 1a. PTX ISA 80, SM_90
6
9
// .dst = { .shared::cluster }
7
10
// .src = { .global }
8
- template <typename= void>
11
+ template <typename = void>
9
12
__device__ static inline void cp_async_bulk_tensor(
10
13
cuda::ptx::space_cluster_t,
11
14
cuda::ptx::space_global_t,
@@ -21,7 +24,7 @@ cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group
21
24
// cp.async.bulk.tensor.1d.dst.src.tile.bulk_group [tensorMap, tensorCoords], [srcMem]; // 3a. PTX ISA 80, SM_90
22
25
// .dst = { .global }
23
26
// .src = { .shared::cta }
24
- template <typename= void>
27
+ template <typename = void>
25
28
__device__ static inline void cp_async_bulk_tensor(
26
29
cuda::ptx::space_global_t,
27
30
cuda::ptx::space_shared_t,
@@ -36,7 +39,7 @@ cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes
36
39
// cp.async.bulk.tensor.2d.dst.src.tile.mbarrier::complete_tx::bytes [dstMem], [tensorMap, tensorCoords], [smem_bar];// 1b. PTX ISA 80, SM_90
37
40
// .dst = { .shared::cluster }
38
41
// .src = { .global }
39
- template <typename= void>
42
+ template <typename = void>
40
43
__device__ static inline void cp_async_bulk_tensor(
41
44
cuda::ptx::space_cluster_t,
42
45
cuda::ptx::space_global_t,
@@ -52,7 +55,7 @@ cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group
52
55
// cp.async.bulk.tensor.2d.dst.src.tile.bulk_group [tensorMap, tensorCoords], [srcMem]; // 3b. PTX ISA 80, SM_90
53
56
// .dst = { .global }
54
57
// .src = { .shared::cta }
55
- template <typename= void>
58
+ template <typename = void>
56
59
__device__ static inline void cp_async_bulk_tensor(
57
60
cuda::ptx::space_global_t,
58
61
cuda::ptx::space_shared_t,
@@ -67,7 +70,7 @@ cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes
67
70
// cp.async.bulk.tensor.3d.dst.src.tile.mbarrier::complete_tx::bytes [dstMem], [tensorMap, tensorCoords], [smem_bar];// 1c. PTX ISA 80, SM_90
68
71
// .dst = { .shared::cluster }
69
72
// .src = { .global }
70
- template <typename= void>
73
+ template <typename = void>
71
74
__device__ static inline void cp_async_bulk_tensor(
72
75
cuda::ptx::space_cluster_t,
73
76
cuda::ptx::space_global_t,
@@ -83,7 +86,7 @@ cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group
83
86
// cp.async.bulk.tensor.3d.dst.src.tile.bulk_group [tensorMap, tensorCoords], [srcMem]; // 3c. PTX ISA 80, SM_90
84
87
// .dst = { .global }
85
88
// .src = { .shared::cta }
86
- template <typename= void>
89
+ template <typename = void>
87
90
__device__ static inline void cp_async_bulk_tensor(
88
91
cuda::ptx::space_global_t,
89
92
cuda::ptx::space_shared_t,
@@ -98,7 +101,7 @@ cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes
98
101
// cp.async.bulk.tensor.4d.dst.src.tile.mbarrier::complete_tx::bytes [dstMem], [tensorMap, tensorCoords], [smem_bar];// 1d. PTX ISA 80, SM_90
99
102
// .dst = { .shared::cluster }
100
103
// .src = { .global }
101
- template <typename= void>
104
+ template <typename = void>
102
105
__device__ static inline void cp_async_bulk_tensor(
103
106
cuda::ptx::space_cluster_t,
104
107
cuda::ptx::space_global_t,
@@ -114,7 +117,7 @@ cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group
114
117
// cp.async.bulk.tensor.4d.dst.src.tile.bulk_group [tensorMap, tensorCoords], [srcMem]; // 3d. PTX ISA 80, SM_90
115
118
// .dst = { .global }
116
119
// .src = { .shared::cta }
117
- template <typename= void>
120
+ template <typename = void>
118
121
__device__ static inline void cp_async_bulk_tensor(
119
122
cuda::ptx::space_global_t,
120
123
cuda::ptx::space_shared_t,
@@ -129,7 +132,7 @@ cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes
129
132
// cp.async.bulk.tensor.5d.dst.src.tile.mbarrier::complete_tx::bytes [dstMem], [tensorMap, tensorCoords], [smem_bar];// 1e. PTX ISA 80, SM_90
130
133
// .dst = { .shared::cluster }
131
134
// .src = { .global }
132
- template <typename= void>
135
+ template <typename = void>
133
136
__device__ static inline void cp_async_bulk_tensor(
134
137
cuda::ptx::space_cluster_t,
135
138
cuda::ptx::space_global_t,
@@ -145,7 +148,7 @@ cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group
145
148
// cp.async.bulk.tensor.5d.dst.src.tile.bulk_group [tensorMap, tensorCoords], [srcMem]; // 3e. PTX ISA 80, SM_90
146
149
// .dst = { .global }
147
150
// .src = { .shared::cta }
148
- template <typename= void>
151
+ template <typename = void>
149
152
__device__ static inline void cp_async_bulk_tensor(
150
153
cuda::ptx::space_global_t,
151
154
cuda::ptx::space_shared_t,
0 commit comments