Skip to content

Commit 9d1694d

Browse files
committed
Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - NVMe pull request via Keith: - tcp, fc, and rdma target fixes (Maurizio, Daniel, Hannes, Christoph) - discard fixes and improvements (Christoph) - timeout debug improvements (Keith, Max) - various cleanups (Daniel, Max, Giuxen) - trace event string fixes (Arnd) - shadow doorbell setup on reset fix (William) - a write zeroes quirk for SK Hynix (Jim) - MD pull request via Song: - Sparse warning since v6.0 (Bart) - /proc/mdstat regression since v6.7 (Yu Kuai) - Use symbolic error value (Christian) - IO Priority documentation update (Christian) - Fix for accessing queue limits without having entered the queue (Christoph, me) - Fix for loop dio support (Christoph) - Move null_blk off deprecated ida interface (Christophe) - Ensure nbd initializes full msghdr (Eric) - Fix for a regression with the folio conversion, which is now easier to hit because of an unrelated change (Matthew) - Remove redundant check in virtio-blk (Li) - Fix for a potential hang in sbitmap (Ming) - Fix for partial zone appending (Damien) - Misc changes and fixes (Bart, me, Kemeng, Dmitry) * tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux: (45 commits) Documentation: block: ioprio: Update schedulers loop: fix the the direct I/O support check when used on top of block devices blk-mq: Remove the hctx 'run' debugfs attribute nbd: always initialize struct msghdr completely block: Fix iterating over an empty bio with bio_for_each_folio_all block: bio-integrity: fix kcalloc() arguments order virtio_blk: remove duplicate check if queue is broken in virtblk_done sbitmap: remove stale comment in sbq_calc_wake_batch block: Correct a documentation comment in blk-cgroup.c null_blk: Remove usage of the deprecated ida_simple_xx() API block: ensure we hold a queue reference when using queue limits blk-mq: rename blk_mq_can_use_cached_rq block: print symbolic error name instead of error code blk-mq: fix IO hang from sbitmap wakeup race nvmet-rdma: avoid circular locking dependency on install_queue() nvmet-tcp: avoid circular locking dependency on install_queue() nvme-pci: set doorbell config before unquiescing block: fix partial zone append completion handling in req_bio_endio() block/iocost: silence warning on 'last_period' potentially being unused md/raid1: Use blk_opf_t for read and write operations ...
2 parents e9a5a78 + b2e792a commit 9d1694d

34 files changed

+287
-224
lines changed

Documentation/block/ioprio.rst

+6-7
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,16 @@ Block io priorities
66
Intro
77
-----
88

9-
With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
10-
priorities are supported for reads on files. This enables users to io nice
11-
processes or process groups, similar to what has been possible with cpu
12-
scheduling for ages. This document mainly details the current possibilities
13-
with cfq; other io schedulers do not support io priorities thus far.
9+
The io priority feature enables users to io nice processes or process groups,
10+
similar to what has been possible with cpu scheduling for ages. Support for io
11+
priorities is io scheduler dependent and currently supported by bfq and
12+
mq-deadline.
1413

1514
Scheduling classes
1615
------------------
1716

18-
CFQ implements three generic scheduling classes that determine how io is
19-
served for a process.
17+
Three generic scheduling classes are implemented for io priorities that
18+
determine how io is served for a process.
2019

2120
IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
2221
higher priority than any other in the system, processes from this class are

block/bio-integrity.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
336336
if (nr_vecs > BIO_MAX_VECS)
337337
return -E2BIG;
338338
if (nr_vecs > UIO_FASTIOV) {
339-
bvec = kcalloc(sizeof(*bvec), nr_vecs, GFP_KERNEL);
339+
bvec = kcalloc(nr_vecs, sizeof(*bvec), GFP_KERNEL);
340340
if (!bvec)
341341
return -ENOMEM;
342342
pages = NULL;

block/blk-cgroup.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
300300
* @disk: gendisk the new blkg is associated with
301301
* @gfp_mask: allocation mask to use
302302
*
303-
* Allocate a new blkg assocating @blkcg and @q.
303+
* Allocate a new blkg associating @blkcg and @disk.
304304
*/
305305
static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
306306
gfp_t gfp_mask)

block/blk-iocost.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1261,7 +1261,7 @@ static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now)
12611261
static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
12621262
{
12631263
struct ioc *ioc = iocg->ioc;
1264-
u64 last_period, cur_period;
1264+
u64 __maybe_unused last_period, cur_period;
12651265
u64 vtime, vtarget;
12661266
int i;
12671267

block/blk-mq-debugfs.c

-18
Original file line numberDiff line numberDiff line change
@@ -479,23 +479,6 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m)
479479
return res;
480480
}
481481

482-
static int hctx_run_show(void *data, struct seq_file *m)
483-
{
484-
struct blk_mq_hw_ctx *hctx = data;
485-
486-
seq_printf(m, "%lu\n", hctx->run);
487-
return 0;
488-
}
489-
490-
static ssize_t hctx_run_write(void *data, const char __user *buf, size_t count,
491-
loff_t *ppos)
492-
{
493-
struct blk_mq_hw_ctx *hctx = data;
494-
495-
hctx->run = 0;
496-
return count;
497-
}
498-
499482
static int hctx_active_show(void *data, struct seq_file *m)
500483
{
501484
struct blk_mq_hw_ctx *hctx = data;
@@ -624,7 +607,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
624607
{"tags_bitmap", 0400, hctx_tags_bitmap_show},
625608
{"sched_tags", 0400, hctx_sched_tags_show},
626609
{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
627-
{"run", 0600, hctx_run_show, hctx_run_write},
628610
{"active", 0400, hctx_active_show},
629611
{"dispatch_busy", 0400, hctx_dispatch_busy_show},
630612
{"type", 0400, hctx_type_show},

block/blk-mq-sched.c

-2
Original file line numberDiff line numberDiff line change
@@ -324,8 +324,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
324324
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
325325
return;
326326

327-
hctx->run++;
328-
329327
/*
330328
* A return of -EAGAIN is an indication that hctx->dispatch is not
331329
* empty and we must run again in order to avoid starving flushes.

block/blk-mq.c

+39-11
Original file line numberDiff line numberDiff line change
@@ -772,11 +772,16 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
772772
/*
773773
* Partial zone append completions cannot be supported as the
774774
* BIO fragments may end up not being written sequentially.
775+
* For such case, force the completed nbytes to be equal to
776+
* the BIO size so that bio_advance() sets the BIO remaining
777+
* size to 0 and we end up calling bio_endio() before returning.
775778
*/
776-
if (bio->bi_iter.bi_size != nbytes)
779+
if (bio->bi_iter.bi_size != nbytes) {
777780
bio->bi_status = BLK_STS_IOERR;
778-
else
781+
nbytes = bio->bi_iter.bi_size;
782+
} else {
779783
bio->bi_iter.bi_sector = rq->__sector;
784+
}
780785
}
781786

782787
bio_advance(bio, nbytes);
@@ -1859,6 +1864,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
18591864
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
18601865
__add_wait_queue(wq, wait);
18611866

1867+
/*
1868+
* Add one explicit barrier since blk_mq_get_driver_tag() may
1869+
* not imply barrier in case of failure.
1870+
*
1871+
* Order adding us to wait queue and allocating driver tag.
1872+
*
1873+
* The pair is the one implied in sbitmap_queue_wake_up() which
1874+
* orders clearing sbitmap tag bits and waitqueue_active() in
1875+
* __sbitmap_queue_wake_up(), since waitqueue_active() is lockless
1876+
*
1877+
* Otherwise, re-order of adding wait queue and getting driver tag
1878+
* may cause __sbitmap_queue_wake_up() to wake up nothing because
1879+
* the waitqueue_active() may not observe us in wait queue.
1880+
*/
1881+
smp_mb();
1882+
18621883
/*
18631884
* It's possible that a tag was freed in the window between the
18641885
* allocation failure and adding the hardware queue to the wait
@@ -2891,8 +2912,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
28912912
return NULL;
28922913
}
28932914

2894-
/* return true if this @rq can be used for @bio */
2895-
static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
2915+
/*
2916+
* Check if we can use the passed on request for submitting the passed in bio,
2917+
* and remove it from the request list if it can be used.
2918+
*/
2919+
static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
28962920
struct bio *bio)
28972921
{
28982922
enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
@@ -2952,12 +2976,6 @@ void blk_mq_submit_bio(struct bio *bio)
29522976
blk_status_t ret;
29532977

29542978
bio = blk_queue_bounce(bio, q);
2955-
if (bio_may_exceed_limits(bio, &q->limits)) {
2956-
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
2957-
if (!bio)
2958-
return;
2959-
}
2960-
29612979
bio_set_ioprio(bio);
29622980

29632981
if (plug) {
@@ -2966,16 +2984,26 @@ void blk_mq_submit_bio(struct bio *bio)
29662984
rq = NULL;
29672985
}
29682986
if (rq) {
2987+
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
2988+
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
2989+
if (!bio)
2990+
return;
2991+
}
29692992
if (!bio_integrity_prep(bio))
29702993
return;
29712994
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
29722995
return;
2973-
if (blk_mq_can_use_cached_rq(rq, plug, bio))
2996+
if (blk_mq_use_cached_rq(rq, plug, bio))
29742997
goto done;
29752998
percpu_ref_get(&q->q_usage_counter);
29762999
} else {
29773000
if (unlikely(bio_queue_enter(bio)))
29783001
return;
3002+
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
3003+
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
3004+
if (!bio)
3005+
goto fail;
3006+
}
29793007
if (!bio_integrity_prep(bio))
29803008
goto fail;
29813009
}

block/ioprio.c

-26
Original file line numberDiff line numberDiff line change
@@ -139,32 +139,6 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
139139
return ret;
140140
}
141141

142-
/*
143-
* If the task has set an I/O priority, use that. Otherwise, return
144-
* the default I/O priority.
145-
*
146-
* Expected to be called for current task or with task_lock() held to keep
147-
* io_context stable.
148-
*/
149-
int __get_task_ioprio(struct task_struct *p)
150-
{
151-
struct io_context *ioc = p->io_context;
152-
int prio;
153-
154-
if (p != current)
155-
lockdep_assert_held(&p->alloc_lock);
156-
if (ioc)
157-
prio = ioc->ioprio;
158-
else
159-
prio = IOPRIO_DEFAULT;
160-
161-
if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
162-
prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
163-
task_nice_ioprio(p));
164-
return prio;
165-
}
166-
EXPORT_SYMBOL_GPL(__get_task_ioprio);
167-
168142
static int get_task_ioprio(struct task_struct *p)
169143
{
170144
int ret;

block/partitions/core.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,8 @@ static bool blk_add_partition(struct gendisk *disk,
562562
part = add_partition(disk, p, from, size, state->parts[p].flags,
563563
&state->parts[p].info);
564564
if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
565-
printk(KERN_ERR " %s: p%d could not be added: %ld\n",
566-
disk->disk_name, p, -PTR_ERR(part));
565+
printk(KERN_ERR " %s: p%d could not be added: %pe\n",
566+
disk->disk_name, p, part);
567567
return true;
568568
}
569569

drivers/block/loop.c

+25-27
Original file line numberDiff line numberDiff line change
@@ -165,39 +165,37 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
165165
return get_size(lo->lo_offset, lo->lo_sizelimit, file);
166166
}
167167

168+
/*
169+
* We support direct I/O only if lo_offset is aligned with the logical I/O size
170+
* of backing device, and the logical block size of loop is bigger than that of
171+
* the backing device.
172+
*/
173+
static bool lo_bdev_can_use_dio(struct loop_device *lo,
174+
struct block_device *backing_bdev)
175+
{
176+
unsigned short sb_bsize = bdev_logical_block_size(backing_bdev);
177+
178+
if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
179+
return false;
180+
if (lo->lo_offset & (sb_bsize - 1))
181+
return false;
182+
return true;
183+
}
184+
168185
static void __loop_update_dio(struct loop_device *lo, bool dio)
169186
{
170187
struct file *file = lo->lo_backing_file;
171-
struct address_space *mapping = file->f_mapping;
172-
struct inode *inode = mapping->host;
173-
unsigned short sb_bsize = 0;
174-
unsigned dio_align = 0;
188+
struct inode *inode = file->f_mapping->host;
189+
struct block_device *backing_bdev = NULL;
175190
bool use_dio;
176191

177-
if (inode->i_sb->s_bdev) {
178-
sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
179-
dio_align = sb_bsize - 1;
180-
}
192+
if (S_ISBLK(inode->i_mode))
193+
backing_bdev = I_BDEV(inode);
194+
else if (inode->i_sb->s_bdev)
195+
backing_bdev = inode->i_sb->s_bdev;
181196

182-
/*
183-
* We support direct I/O only if lo_offset is aligned with the
184-
* logical I/O size of backing device, and the logical block
185-
* size of loop is bigger than the backing device's.
186-
*
187-
* TODO: the above condition may be loosed in the future, and
188-
* direct I/O may be switched runtime at that time because most
189-
* of requests in sane applications should be PAGE_SIZE aligned
190-
*/
191-
if (dio) {
192-
if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
193-
!(lo->lo_offset & dio_align) &&
194-
(file->f_mode & FMODE_CAN_ODIRECT))
195-
use_dio = true;
196-
else
197-
use_dio = false;
198-
} else {
199-
use_dio = false;
200-
}
197+
use_dio = dio && (file->f_mode & FMODE_CAN_ODIRECT) &&
198+
(!backing_bdev || lo_bdev_can_use_dio(lo, backing_bdev));
201199

202200
if (lo->use_dio == use_dio)
203201
return;

drivers/block/nbd.c

+1-5
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
508508
struct iov_iter *iter, int msg_flags, int *sent)
509509
{
510510
int result;
511-
struct msghdr msg;
511+
struct msghdr msg = {} ;
512512
unsigned int noreclaim_flag;
513513

514514
if (unlikely(!sock)) {
@@ -524,10 +524,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
524524
do {
525525
sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
526526
sock->sk->sk_use_task_frag = false;
527-
msg.msg_name = NULL;
528-
msg.msg_namelen = 0;
529-
msg.msg_control = NULL;
530-
msg.msg_controllen = 0;
531527
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
532528

533529
if (send)

drivers/block/null_blk/main.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -1840,7 +1840,7 @@ static void null_del_dev(struct nullb *nullb)
18401840

18411841
dev = nullb->dev;
18421842

1843-
ida_simple_remove(&nullb_indexes, nullb->index);
1843+
ida_free(&nullb_indexes, nullb->index);
18441844

18451845
list_del_init(&nullb->list);
18461846

@@ -2174,7 +2174,7 @@ static int null_add_dev(struct nullb_device *dev)
21742174
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
21752175

21762176
mutex_lock(&lock);
2177-
rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
2177+
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
21782178
if (rv < 0) {
21792179
mutex_unlock(&lock);
21802180
goto out_cleanup_zone;

drivers/block/virtio_blk.c

-2
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,6 @@ static void virtblk_done(struct virtqueue *vq)
367367
blk_mq_complete_request(req);
368368
req_done = true;
369369
}
370-
if (unlikely(virtqueue_is_broken(vq)))
371-
break;
372370
} while (!virtqueue_enable_cb(vq));
373371

374372
/* In case queue is stopped waiting for more buffers. */

0 commit comments

Comments
 (0)