Skip to content

Commit 35a4474

Browse files
committed
Merge tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs
Pull more bcachefs updates from Kent Overstreet: "Some fixes, Some refactoring, some minor features: - Assorted prep work for disk space accounting rewrite - BTREE_TRIGGER_ATOMIC: after combining our trigger callbacks, this makes our trigger context more explicit - A few fixes to avoid excessive transaction restarts on multithreaded workloads: fstests (in addition to ktest tests) are now checking slowpath counters, and that's shaking out a few bugs - Assorted tracepoint improvements - Starting to break up bcachefs_format.h and move on disk types so they're with the code they belong to; this will make room to start documenting the on disk format better. - A few minor fixes" * tag 'bcachefs-2024-01-21' of https://evilpiepirate.org/git/bcachefs: (46 commits) bcachefs: Improve inode_to_text() bcachefs: logged_ops_format.h bcachefs: reflink_format.h bcachefs; extents_format.h bcachefs: ec_format.h bcachefs: subvolume_format.h bcachefs: snapshot_format.h bcachefs: alloc_background_format.h bcachefs: xattr_format.h bcachefs: dirent_format.h bcachefs: inode_format.h bcachefs; quota_format.h bcachefs: sb-counters_format.h bcachefs: counters.c -> sb-counters.c bcachefs: comment bch_subvolume bcachefs: bch_snapshot::btime bcachefs: add missing __GFP_NOWARN bcachefs: opts->compression can now also be applied in the background bcachefs: Prep work for variable size btree node buffers bcachefs: grab s_umount only if snapshotting ...
2 parents 4fbbed7 + 249f441 commit 35a4474

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+1629
-1426
lines changed

fs/bcachefs/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ bcachefs-y := \
2727
checksum.o \
2828
clock.o \
2929
compress.o \
30-
counters.o \
3130
darray.o \
3231
debug.o \
3332
dirent.o \
@@ -71,6 +70,7 @@ bcachefs-y := \
7170
reflink.o \
7271
replicas.o \
7372
sb-clean.o \
73+
sb-counters.o \
7474
sb-downgrade.o \
7575
sb-errors.o \
7676
sb-members.o \

fs/bcachefs/alloc_background.c

+45-44
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
273273
bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
274274
c, err, alloc_key_dirty_sectors_0,
275275
"data_type %s but dirty_sectors==0",
276-
bch2_data_types[a.v->data_type]);
276+
bch2_data_type_str(a.v->data_type));
277277
break;
278278
case BCH_DATA_cached:
279279
bkey_fsck_err_on(!a.v->cached_sectors ||
@@ -321,16 +321,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
321321
{
322322
struct bch_alloc_v4 _a;
323323
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
324-
unsigned i;
325324

326325
prt_newline(out);
327326
printbuf_indent_add(out, 2);
328327

329-
prt_printf(out, "gen %u oldest_gen %u data_type %s",
330-
a->gen, a->oldest_gen,
331-
a->data_type < BCH_DATA_NR
332-
? bch2_data_types[a->data_type]
333-
: "(invalid data type)");
328+
prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
329+
bch2_prt_data_type(out, a->data_type);
334330
prt_newline(out);
335331
prt_printf(out, "journal_seq %llu", a->journal_seq);
336332
prt_newline(out);
@@ -353,23 +349,6 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
353349
prt_printf(out, "fragmentation %llu", a->fragmentation_lru);
354350
prt_newline(out);
355351
prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a));
356-
prt_newline(out);
357-
358-
if (BCH_ALLOC_V4_NR_BACKPOINTERS(a)) {
359-
struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
360-
const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
361-
362-
prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
363-
printbuf_indent_add(out, 2);
364-
365-
for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
366-
prt_newline(out);
367-
bch2_backpointer_to_text(out, &bps[i]);
368-
}
369-
370-
printbuf_indent_sub(out, 2);
371-
}
372-
373352
printbuf_indent_sub(out, 2);
374353
}
375354

@@ -839,7 +818,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
839818
}
840819
}
841820

842-
if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
821+
if ((flags & BTREE_TRIGGER_ATOMIC) && (flags & BTREE_TRIGGER_INSERT)) {
843822
struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
844823
u64 journal_seq = trans->journal_res.seq;
845824
u64 bucket_journal_seq = new_a->journal_seq;
@@ -1625,13 +1604,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
16251604
return ret;
16261605
}
16271606

1607+
struct discard_buckets_state {
1608+
u64 seen;
1609+
u64 open;
1610+
u64 need_journal_commit;
1611+
u64 discarded;
1612+
struct bch_dev *ca;
1613+
u64 need_journal_commit_this_dev;
1614+
};
1615+
1616+
static void discard_buckets_next_dev(struct bch_fs *c, struct discard_buckets_state *s, struct bch_dev *ca)
1617+
{
1618+
if (s->ca == ca)
1619+
return;
1620+
1621+
if (s->ca && s->need_journal_commit_this_dev >
1622+
bch2_dev_usage_read(s->ca).d[BCH_DATA_free].buckets)
1623+
bch2_journal_flush_async(&c->journal, NULL);
1624+
1625+
if (s->ca)
1626+
percpu_ref_put(&s->ca->ref);
1627+
if (ca)
1628+
percpu_ref_get(&ca->ref);
1629+
s->ca = ca;
1630+
s->need_journal_commit_this_dev = 0;
1631+
}
1632+
16281633
static int bch2_discard_one_bucket(struct btree_trans *trans,
16291634
struct btree_iter *need_discard_iter,
16301635
struct bpos *discard_pos_done,
1631-
u64 *seen,
1632-
u64 *open,
1633-
u64 *need_journal_commit,
1634-
u64 *discarded)
1636+
struct discard_buckets_state *s)
16351637
{
16361638
struct bch_fs *c = trans->c;
16371639
struct bpos pos = need_discard_iter->pos;
@@ -1643,20 +1645,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
16431645
int ret = 0;
16441646

16451647
ca = bch_dev_bkey_exists(c, pos.inode);
1648+
16461649
if (!percpu_ref_tryget(&ca->io_ref)) {
16471650
bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
16481651
return 0;
16491652
}
16501653

1654+
discard_buckets_next_dev(c, s, ca);
1655+
16511656
if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
1652-
(*open)++;
1657+
s->open++;
16531658
goto out;
16541659
}
16551660

16561661
if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
16571662
c->journal.flushed_seq_ondisk,
16581663
pos.inode, pos.offset)) {
1659-
(*need_journal_commit)++;
1664+
s->need_journal_commit++;
1665+
s->need_journal_commit_this_dev++;
16601666
goto out;
16611667
}
16621668

@@ -1732,9 +1738,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
17321738
goto out;
17331739

17341740
count_event(c, bucket_discard);
1735-
(*discarded)++;
1741+
s->discarded++;
17361742
out:
1737-
(*seen)++;
1743+
s->seen++;
17381744
bch2_trans_iter_exit(trans, &iter);
17391745
percpu_ref_put(&ca->io_ref);
17401746
printbuf_exit(&buf);
@@ -1744,7 +1750,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
17441750
static void bch2_do_discards_work(struct work_struct *work)
17451751
{
17461752
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
1747-
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
1753+
struct discard_buckets_state s = {};
17481754
struct bpos discard_pos_done = POS_MAX;
17491755
int ret;
17501756

@@ -1756,19 +1762,14 @@ static void bch2_do_discards_work(struct work_struct *work)
17561762
ret = bch2_trans_run(c,
17571763
for_each_btree_key(trans, iter,
17581764
BTREE_ID_need_discard, POS_MIN, 0, k,
1759-
bch2_discard_one_bucket(trans, &iter, &discard_pos_done,
1760-
&seen,
1761-
&open,
1762-
&need_journal_commit,
1763-
&discarded)));
1764-
1765-
if (need_journal_commit * 2 > seen)
1766-
bch2_journal_flush_async(&c->journal, NULL);
1765+
bch2_discard_one_bucket(trans, &iter, &discard_pos_done, &s)));
17671766

1768-
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
1767+
discard_buckets_next_dev(c, &s, NULL);
17691768

1770-
trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
1769+
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
17711770
bch2_err_str(ret));
1771+
1772+
bch2_write_ref_put(c, BCH_WRITE_REF_discard);
17721773
}
17731774

17741775
void bch2_do_discards(struct bch_fs *c)

fs/bcachefs/alloc_background_format.h

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
3+
#define _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H
4+
5+
struct bch_alloc {
6+
struct bch_val v;
7+
__u8 fields;
8+
__u8 gen;
9+
__u8 data[];
10+
} __packed __aligned(8);
11+
12+
#define BCH_ALLOC_FIELDS_V1() \
13+
x(read_time, 16) \
14+
x(write_time, 16) \
15+
x(data_type, 8) \
16+
x(dirty_sectors, 16) \
17+
x(cached_sectors, 16) \
18+
x(oldest_gen, 8) \
19+
x(stripe, 32) \
20+
x(stripe_redundancy, 8)
21+
22+
enum {
23+
#define x(name, _bits) BCH_ALLOC_FIELD_V1_##name,
24+
BCH_ALLOC_FIELDS_V1()
25+
#undef x
26+
};
27+
28+
struct bch_alloc_v2 {
29+
struct bch_val v;
30+
__u8 nr_fields;
31+
__u8 gen;
32+
__u8 oldest_gen;
33+
__u8 data_type;
34+
__u8 data[];
35+
} __packed __aligned(8);
36+
37+
#define BCH_ALLOC_FIELDS_V2() \
38+
x(read_time, 64) \
39+
x(write_time, 64) \
40+
x(dirty_sectors, 32) \
41+
x(cached_sectors, 32) \
42+
x(stripe, 32) \
43+
x(stripe_redundancy, 8)
44+
45+
struct bch_alloc_v3 {
46+
struct bch_val v;
47+
__le64 journal_seq;
48+
__le32 flags;
49+
__u8 nr_fields;
50+
__u8 gen;
51+
__u8 oldest_gen;
52+
__u8 data_type;
53+
__u8 data[];
54+
} __packed __aligned(8);
55+
56+
LE32_BITMASK(BCH_ALLOC_V3_NEED_DISCARD,struct bch_alloc_v3, flags, 0, 1)
57+
LE32_BITMASK(BCH_ALLOC_V3_NEED_INC_GEN,struct bch_alloc_v3, flags, 1, 2)
58+
59+
struct bch_alloc_v4 {
60+
struct bch_val v;
61+
__u64 journal_seq;
62+
__u32 flags;
63+
__u8 gen;
64+
__u8 oldest_gen;
65+
__u8 data_type;
66+
__u8 stripe_redundancy;
67+
__u32 dirty_sectors;
68+
__u32 cached_sectors;
69+
__u64 io_time[2];
70+
__u32 stripe;
71+
__u32 nr_external_backpointers;
72+
__u64 fragmentation_lru;
73+
} __packed __aligned(8);
74+
75+
#define BCH_ALLOC_V4_U64s_V0 6
76+
#define BCH_ALLOC_V4_U64s (sizeof(struct bch_alloc_v4) / sizeof(__u64))
77+
78+
BITMASK(BCH_ALLOC_V4_NEED_DISCARD, struct bch_alloc_v4, flags, 0, 1)
79+
BITMASK(BCH_ALLOC_V4_NEED_INC_GEN, struct bch_alloc_v4, flags, 1, 2)
80+
BITMASK(BCH_ALLOC_V4_BACKPOINTERS_START,struct bch_alloc_v4, flags, 2, 8)
81+
BITMASK(BCH_ALLOC_V4_NR_BACKPOINTERS, struct bch_alloc_v4, flags, 8, 14)
82+
83+
#define KEY_TYPE_BUCKET_GENS_BITS 8
84+
#define KEY_TYPE_BUCKET_GENS_NR (1U << KEY_TYPE_BUCKET_GENS_BITS)
85+
#define KEY_TYPE_BUCKET_GENS_MASK (KEY_TYPE_BUCKET_GENS_NR - 1)
86+
87+
struct bch_bucket_gens {
88+
struct bch_val v;
89+
u8 gens[KEY_TYPE_BUCKET_GENS_NR];
90+
} __packed __aligned(8);
91+
92+
#endif /* _BCACHEFS_ALLOC_BACKGROUND_FORMAT_H */

fs/bcachefs/alloc_foreground.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -1525,10 +1525,11 @@ static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, str
15251525
unsigned data_type = ob->data_type;
15261526
barrier(); /* READ_ONCE() doesn't work on bitfields */
15271527

1528-
prt_printf(out, "%zu ref %u %s %u:%llu gen %u allocated %u/%u",
1528+
prt_printf(out, "%zu ref %u ",
15291529
ob - c->open_buckets,
1530-
atomic_read(&ob->pin),
1531-
data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
1530+
atomic_read(&ob->pin));
1531+
bch2_prt_data_type(out, data_type);
1532+
prt_printf(out, " %u:%llu gen %u allocated %u/%u",
15321533
ob->dev, ob->bucket, ob->gen,
15331534
ca->mi.bucket_size - ob->sectors_free, ca->mi.bucket_size);
15341535
if (ob->ec)

0 commit comments

Comments
 (0)