@@ -9,7 +9,8 @@ or [`NCCL.avg`](@ref)), writing the result to `recvbuf` to all ranks.
9
9
# External links
10
10
- [`ncclAllReduce`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/colls.html#ncclallreduce)
11
11
"""
12
- function Allreduce! (sendbuf, recvbuf, op, comm:: Communicator ; stream:: CuStream = default_device_stream (comm))
12
+ function Allreduce! (sendbuf, recvbuf, op, comm:: Communicator ;
13
+ stream:: CuStream = default_device_stream (comm))
13
14
count = length (recvbuf)
14
15
@assert length (sendbuf) == count
15
16
data_type = ncclDataType_t (eltype (recvbuf))
27
28
Reduce the array `sendrecvbuf` using `op` (one of `+`, `*`, `min`, `max`,
28
29
or `[`NCCL.avg`](@ref)`), writing the result inplace to all ranks.
29
30
"""
30
- Allreduce! (sendrecvbuf, op, comm:: Communicator ; stream:: CuStream = default_device_stream (comm) ) =
31
+ function Allreduce! (sendrecvbuf, op, comm:: Communicator ;
32
+ stream:: CuStream = default_device_stream (comm))
31
33
Allreduce! (sendrecvbuf, sendrecvbuf, op, comm; stream)
34
+ end
32
35
33
36
"""
34
37
NCCL.Broadcast!(
@@ -41,14 +44,17 @@ Copies array the `sendbuf` on rank `root` to `recvbuf` on all ranks.
41
44
# External links
42
45
- [`ncclBroadcast`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/colls.html#ncclbroadcast)
43
46
"""
44
- function Broadcast! (sendbuf, recvbuf, comm:: Communicator ; root:: Integer = 0 , stream:: CuStream = default_device_stream (comm))
47
+ function Broadcast! (sendbuf, recvbuf, comm:: Communicator ; root:: Integer = 0 ,
48
+ stream:: CuStream = default_device_stream (comm))
45
49
data_type = ncclDataType_t (eltype (recvbuf))
46
50
count = length (recvbuf)
47
51
ncclBroadcast (sendbuf, recvbuf, count, data_type, root, comm, stream)
48
52
return recvbuf
49
53
end
50
- Broadcast! (sendrecvbuf, comm:: Communicator ; root:: Integer = 0 , stream:: CuStream = default_device_stream (comm)) =
54
+ function Broadcast! (sendrecvbuf, comm:: Communicator ; root:: Integer = 0 ,
55
+ stream:: CuStream = default_device_stream (comm))
51
56
Broadcast! (sendrecvbuf, sendrecvbuf, comm; root, stream)
57
+ end
52
58
53
59
54
60
"""
@@ -63,15 +69,18 @@ or `[`NCCL.avg`](@ref)`), writing the result to `recvbuf` on rank `root`.
63
69
# External links
64
70
- [`ncclReduce`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/colls.html#ncclreduce)
65
71
"""
66
- function Reduce! (sendbuf, recvbuf, op, comm:: Communicator ; root:: Integer = 0 , stream:: CuStream = default_device_stream (comm))
72
+ function Reduce! (sendbuf, recvbuf, op, comm:: Communicator ; root:: Integer = 0 ,
73
+ stream:: CuStream = default_device_stream (comm))
67
74
data_type = ncclDataType_t (eltype (recvbuf))
68
75
count = length (recvbuf)
69
76
_op = ncclRedOp_t (op)
70
77
ncclReduce (sendbuf, recvbuf, count, data_type, _op, root, comm, stream)
71
78
return recvbuf
72
79
end
73
- Reduce! (sendrecvbuf, op, comm:: Communicator ; root:: Integer = 0 , stream:: CuStream = default_device_stream (comm)) =
80
+ function Reduce! (sendrecvbuf, op, comm:: Communicator ; root:: Integer = 0 ,
81
+ stream:: CuStream = default_device_stream (comm))
74
82
Reduce! (sendrecvbuf, sendrecvbuf, op, comm; root, stream)
83
+ end
75
84
76
85
"""
77
86
NCCL.Allgather!(
@@ -84,7 +93,8 @@ Concatenate `sendbuf` from each rank into `recvbuf` on all ranks.
84
93
# External links
85
94
- [`ncclAllGather`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/colls.html#ncclallgather)
86
95
"""
87
- function Allgather! (sendbuf, recvbuf, comm:: Communicator ; stream:: CuStream = default_device_stream (comm))
96
+ function Allgather! (sendbuf, recvbuf, comm:: Communicator ;
97
+ stream:: CuStream = default_device_stream (comm))
88
98
data_type = ncclDataType_t (eltype (recvbuf))
89
99
sendcount = length (sendbuf)
90
100
@assert length (recvbuf) == sendcount * size (comm)
@@ -105,7 +115,8 @@ scattered over the devices such that `recvbuf` on each rank will contain the
105
115
# External links
106
116
- [`ncclReduceScatter`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/api/colls.html#ncclreducescatter)
107
117
"""
108
- function ReduceScatter! (sendbuf, recvbuf, op, comm:: Communicator ; stream:: CuStream = default_device_stream (comm) )
118
+ function ReduceScatter! (sendbuf, recvbuf, op, comm:: Communicator ;
119
+ stream:: CuStream = default_device_stream (comm))
109
120
recvcount = length (recvbuf)
110
121
@assert length (sendbuf) == recvcount * size (comm)
111
122
data_type = ncclDataType_t (eltype (recvbuf))
0 commit comments