Skip to content

Commit e9a5a78

Browse files
committed
Merge tag 'for-6.8/io_uring-2024-01-18' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: "Nothing major in here, just a few fixes and cleanups that arrived after the initial merge window pull request got finalized, as well as a fix for a patch that got merged earlier" * tag 'for-6.8/io_uring-2024-01-18' of git://git.kernel.dk/linux: io_uring: combine cq_wait_nr checks io_uring: clean *local_work_add var naming io_uring: clean up local tw add-wait sync io_uring: adjust defer tw counting io_uring/register: guard compat syscall with CONFIG_COMPAT io_uring/rsrc: improve code generation for fixed file assignment io_uring/rw: cleanup io_rw_done()
2 parents 6f36250 + b4bc35c commit e9a5a78

File tree

4 files changed

+86
-47
lines changed

4 files changed

+86
-47
lines changed

io_uring/io_uring.c

+45-18
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@ struct io_defer_entry {
137137
#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
138138
#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
139139

140+
/*
141+
* No waiters. It's larger than any valid value of the tw counter
142+
* so that tests against ->cq_wait_nr would fail and skip wake_up().
143+
*/
144+
#define IO_CQ_WAKE_INIT (-1U)
145+
/* Forced wake up if there is a waiter regardless of ->cq_wait_nr */
146+
#define IO_CQ_WAKE_FORCE (IO_CQ_WAKE_INIT >> 1)
147+
140148
static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
141149
struct task_struct *task,
142150
bool cancel_all);
@@ -303,6 +311,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
303311
goto err;
304312

305313
ctx->flags = p->flags;
314+
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
306315
init_waitqueue_head(&ctx->sqo_sq_wait);
307316
INIT_LIST_HEAD(&ctx->sqd_list);
308317
INIT_LIST_HEAD(&ctx->cq_overflow_list);
@@ -1304,16 +1313,23 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
13041313
{
13051314
struct io_ring_ctx *ctx = req->ctx;
13061315
unsigned nr_wait, nr_tw, nr_tw_prev;
1307-
struct llist_node *first;
1316+
struct llist_node *head;
1317+
1318+
/* See comment above IO_CQ_WAKE_INIT */
1319+
BUILD_BUG_ON(IO_CQ_WAKE_FORCE <= IORING_MAX_CQ_ENTRIES);
13081320

1321+
/*
1322+
* We don't know how many reuqests is there in the link and whether
1323+
* they can even be queued lazily, fall back to non-lazy.
1324+
*/
13091325
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
13101326
flags &= ~IOU_F_TWQ_LAZY_WAKE;
13111327

1312-
first = READ_ONCE(ctx->work_llist.first);
1328+
head = READ_ONCE(ctx->work_llist.first);
13131329
do {
13141330
nr_tw_prev = 0;
1315-
if (first) {
1316-
struct io_kiocb *first_req = container_of(first,
1331+
if (head) {
1332+
struct io_kiocb *first_req = container_of(head,
13171333
struct io_kiocb,
13181334
io_task_work.node);
13191335
/*
@@ -1322,32 +1338,42 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
13221338
*/
13231339
nr_tw_prev = READ_ONCE(first_req->nr_tw);
13241340
}
1341+
1342+
/*
1343+
* Theoretically, it can overflow, but that's fine as one of
1344+
* previous adds should've tried to wake the task.
1345+
*/
13251346
nr_tw = nr_tw_prev + 1;
1326-
/* Large enough to fail the nr_wait comparison below */
13271347
if (!(flags & IOU_F_TWQ_LAZY_WAKE))
1328-
nr_tw = -1U;
1348+
nr_tw = IO_CQ_WAKE_FORCE;
13291349

13301350
req->nr_tw = nr_tw;
1331-
req->io_task_work.node.next = first;
1332-
} while (!try_cmpxchg(&ctx->work_llist.first, &first,
1351+
req->io_task_work.node.next = head;
1352+
} while (!try_cmpxchg(&ctx->work_llist.first, &head,
13331353
&req->io_task_work.node));
13341354

1335-
if (!first) {
1355+
/*
1356+
* cmpxchg implies a full barrier, which pairs with the barrier
1357+
* in set_current_state() on the io_cqring_wait() side. It's used
1358+
* to ensure that either we see updated ->cq_wait_nr, or waiters
1359+
* going to sleep will observe the work added to the list, which
1360+
* is similar to the wait/wawke task state sync.
1361+
*/
1362+
1363+
if (!head) {
13361364
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
13371365
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
13381366
if (ctx->has_evfd)
13391367
io_eventfd_signal(ctx);
13401368
}
13411369

13421370
nr_wait = atomic_read(&ctx->cq_wait_nr);
1343-
/* no one is waiting */
1344-
if (!nr_wait)
1371+
/* not enough or no one is waiting */
1372+
if (nr_tw < nr_wait)
13451373
return;
1346-
/* either not enough or the previous add has already woken it up */
1347-
if (nr_wait > nr_tw || nr_tw_prev >= nr_wait)
1374+
/* the previous add has already woken it up */
1375+
if (nr_tw_prev >= nr_wait)
13481376
return;
1349-
/* pairs with set_current_state() in io_cqring_wait() */
1350-
smp_mb__after_atomic();
13511377
wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
13521378
}
13531379

@@ -2000,9 +2026,10 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
20002026
goto out;
20012027
fd = array_index_nospec(fd, ctx->nr_user_files);
20022028
slot = io_fixed_file_slot(&ctx->file_table, fd);
2003-
file = io_slot_file(slot);
2029+
if (!req->rsrc_node)
2030+
__io_req_set_rsrc_node(req, ctx);
20042031
req->flags |= io_slot_flags(slot);
2005-
io_req_set_rsrc_node(req, ctx, 0);
2032+
file = io_slot_file(slot);
20062033
out:
20072034
io_ring_submit_unlock(ctx, issue_flags);
20082035
return file;
@@ -2613,7 +2640,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
26132640

26142641
ret = io_cqring_wait_schedule(ctx, &iowq);
26152642
__set_current_state(TASK_RUNNING);
2616-
atomic_set(&ctx->cq_wait_nr, 0);
2643+
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
26172644

26182645
/*
26192646
* Run task_work after scheduling and before io_should_wake().

io_uring/register.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/slab.h>
1515
#include <linux/uaccess.h>
1616
#include <linux/nospec.h>
17+
#include <linux/compat.h>
1718
#include <linux/io_uring.h>
1819
#include <linux/io_uring_types.h>
1920

@@ -278,13 +279,14 @@ static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
278279
if (len > cpumask_size())
279280
len = cpumask_size();
280281

281-
if (in_compat_syscall()) {
282+
#ifdef CONFIG_COMPAT
283+
if (in_compat_syscall())
282284
ret = compat_get_bitmap(cpumask_bits(new_mask),
283285
(const compat_ulong_t __user *)arg,
284286
len * 8 /* CHAR_BIT */);
285-
} else {
287+
else
288+
#endif
286289
ret = copy_from_user(new_mask, arg, len);
287-
}
288290

289291
if (ret) {
290292
free_cpumask_var(new_mask);

io_uring/rsrc.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,21 @@ static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx,
102102
node->refs++;
103103
}
104104

105+
static inline void __io_req_set_rsrc_node(struct io_kiocb *req,
106+
struct io_ring_ctx *ctx)
107+
{
108+
lockdep_assert_held(&ctx->uring_lock);
109+
req->rsrc_node = ctx->rsrc_node;
110+
io_charge_rsrc_node(ctx, ctx->rsrc_node);
111+
}
112+
105113
static inline void io_req_set_rsrc_node(struct io_kiocb *req,
106114
struct io_ring_ctx *ctx,
107115
unsigned int issue_flags)
108116
{
109117
if (!req->rsrc_node) {
110118
io_ring_submit_lock(ctx, issue_flags);
111-
112-
lockdep_assert_held(&ctx->uring_lock);
113-
114-
req->rsrc_node = ctx->rsrc_node;
115-
io_charge_rsrc_node(ctx, ctx->rsrc_node);
119+
__io_req_set_rsrc_node(req, ctx);
116120
io_ring_submit_unlock(ctx, issue_flags);
117121
}
118122
}

io_uring/rw.c

+27-21
Original file line numberDiff line numberDiff line change
@@ -168,27 +168,6 @@ void io_readv_writev_cleanup(struct io_kiocb *req)
168168
kfree(io->free_iovec);
169169
}
170170

171-
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
172-
{
173-
switch (ret) {
174-
case -EIOCBQUEUED:
175-
break;
176-
case -ERESTARTSYS:
177-
case -ERESTARTNOINTR:
178-
case -ERESTARTNOHAND:
179-
case -ERESTART_RESTARTBLOCK:
180-
/*
181-
* We can't just restart the syscall, since previously
182-
* submitted sqes may already be in progress. Just fail this
183-
* IO with EINTR.
184-
*/
185-
ret = -EINTR;
186-
fallthrough;
187-
default:
188-
kiocb->ki_complete(kiocb, ret);
189-
}
190-
}
191-
192171
static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
193172
{
194173
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
@@ -371,6 +350,33 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
371350
smp_store_release(&req->iopoll_completed, 1);
372351
}
373352

353+
static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
354+
{
355+
/* IO was queued async, completion will happen later */
356+
if (ret == -EIOCBQUEUED)
357+
return;
358+
359+
/* transform internal restart error codes */
360+
if (unlikely(ret < 0)) {
361+
switch (ret) {
362+
case -ERESTARTSYS:
363+
case -ERESTARTNOINTR:
364+
case -ERESTARTNOHAND:
365+
case -ERESTART_RESTARTBLOCK:
366+
/*
367+
* We can't just restart the syscall, since previously
368+
* submitted sqes may already be in progress. Just fail
369+
* this IO with EINTR.
370+
*/
371+
ret = -EINTR;
372+
break;
373+
}
374+
}
375+
376+
INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll,
377+
io_complete_rw, kiocb, ret);
378+
}
379+
374380
static int kiocb_done(struct io_kiocb *req, ssize_t ret,
375381
unsigned int issue_flags)
376382
{

0 commit comments

Comments
 (0)