Skip to content

Commit 556e2d1

Browse files
committed
Merge tag 'ceph-for-6.8-rc1' of https://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "Assorted CephFS fixes and cleanups with nothing standing out" * tag 'ceph-for-6.8-rc1' of https://github.com/ceph/ceph-client: ceph: get rid of passing callbacks in __dentry_leases_walk() ceph: d_obtain_{alias,root}(ERR_PTR(...)) will do the right thing ceph: fix invalid pointer access if get_quota_realm return ERR_PTR ceph: remove duplicated code in ceph_netfs_issue_read() ceph: send oldest_client_tid when renewing caps ceph: rename create_session_open_msg() to create_session_full_msg() ceph: select FS_ENCRYPTION_ALGS if FS_ENCRYPTION ceph: fix deadlock or deadcode of misusing dget() ceph: try to allocate a smaller extent map for sparse read libceph: remove MAX_EXTENTS check for sparse reads ceph: reinitialize mds feature bit even when session in open ceph: skip reconnecting if MDS is not ready
2 parents ec2d264 + 2a965d1 commit 556e2d1

File tree

11 files changed

+98
-63
lines changed

11 files changed

+98
-63
lines changed

fs/ceph/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ config CEPH_FS
77
select CRYPTO_AES
88
select CRYPTO
99
select NETFS_SUPPORT
10+
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
1011
default n
1112
help
1213
Choose Y or M here to include support for mounting the

fs/ceph/addr.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
337337
u64 len = subreq->len;
338338
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
339339
u64 off = subreq->start;
340+
int extent_cnt;
340341

341342
if (ceph_inode_is_shutdown(inode)) {
342343
err = -EIO;
@@ -350,16 +351,17 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
350351

351352
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
352353
off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
353-
CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
354-
NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
354+
CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq,
355+
ci->i_truncate_size, false);
355356
if (IS_ERR(req)) {
356357
err = PTR_ERR(req);
357358
req = NULL;
358359
goto out;
359360
}
360361

361362
if (sparse) {
362-
err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
363+
extent_cnt = __ceph_sparse_read_ext_count(inode, len);
364+
err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt);
363365
if (err)
364366
goto out;
365367
}

fs/ceph/caps.c

+3-6
Original file line numberDiff line numberDiff line change
@@ -4887,13 +4887,15 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
48874887
struct inode *dir,
48884888
int mds, int drop, int unless)
48894889
{
4890-
struct dentry *parent = NULL;
48914890
struct ceph_mds_request_release *rel = *p;
48924891
struct ceph_dentry_info *di = ceph_dentry(dentry);
48934892
struct ceph_client *cl;
48944893
int force = 0;
48954894
int ret;
48964895

4896+
/* This shouldn't happen */
4897+
BUG_ON(!dir);
4898+
48974899
/*
48984900
* force an record for the directory caps if we have a dentry lease.
48994901
* this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4903,14 +4905,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
49034905
spin_lock(&dentry->d_lock);
49044906
if (di->lease_session && di->lease_session->s_mds == mds)
49054907
force = 1;
4906-
if (!dir) {
4907-
parent = dget(dentry->d_parent);
4908-
dir = d_inode(parent);
4909-
}
49104908
spin_unlock(&dentry->d_lock);
49114909

49124910
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
4913-
dput(parent);
49144911

49154912
cl = ceph_inode_to_client(dir);
49164913
spin_lock(&dentry->d_lock);

fs/ceph/dir.c

+13-8
Original file line numberDiff line numberDiff line change
@@ -1593,10 +1593,12 @@ struct ceph_lease_walk_control {
15931593
unsigned long dir_lease_ttl;
15941594
};
15951595

1596+
static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *);
1597+
static int __dentry_lease_check(const struct dentry *);
1598+
15961599
static unsigned long
15971600
__dentry_leases_walk(struct ceph_mds_client *mdsc,
1598-
struct ceph_lease_walk_control *lwc,
1599-
int (*check)(struct dentry*, void*))
1601+
struct ceph_lease_walk_control *lwc)
16001602
{
16011603
struct ceph_dentry_info *di, *tmp;
16021604
struct dentry *dentry, *last = NULL;
@@ -1624,7 +1626,10 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
16241626
goto next;
16251627
}
16261628

1627-
ret = check(dentry, lwc);
1629+
if (lwc->dir_lease)
1630+
ret = __dir_lease_check(dentry, lwc);
1631+
else
1632+
ret = __dentry_lease_check(dentry);
16281633
if (ret & TOUCH) {
16291634
/* move it into tail of dir lease list */
16301635
__dentry_dir_lease_touch(mdsc, di);
@@ -1681,7 +1686,7 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
16811686
return freed;
16821687
}
16831688

1684-
static int __dentry_lease_check(struct dentry *dentry, void *arg)
1689+
static int __dentry_lease_check(const struct dentry *dentry)
16851690
{
16861691
struct ceph_dentry_info *di = ceph_dentry(dentry);
16871692
int ret;
@@ -1696,9 +1701,9 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg)
16961701
return DELETE;
16971702
}
16981703

1699-
static int __dir_lease_check(struct dentry *dentry, void *arg)
1704+
static int __dir_lease_check(const struct dentry *dentry,
1705+
struct ceph_lease_walk_control *lwc)
17001706
{
1701-
struct ceph_lease_walk_control *lwc = arg;
17021707
struct ceph_dentry_info *di = ceph_dentry(dentry);
17031708

17041709
int ret = __dir_lease_try_check(dentry);
@@ -1737,7 +1742,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
17371742

17381743
lwc.dir_lease = false;
17391744
lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2;
1740-
freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
1745+
freed = __dentry_leases_walk(mdsc, &lwc);
17411746
if (!lwc.nr_to_scan) /* more invalid leases */
17421747
return -EAGAIN;
17431748

@@ -1747,7 +1752,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
17471752
lwc.dir_lease = true;
17481753
lwc.expire_dir_lease = freed < count;
17491754
lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
1750-
freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
1755+
freed +=__dentry_leases_walk(mdsc, &lwc);
17511756
if (!lwc.nr_to_scan) /* more to check */
17521757
return -EAGAIN;
17531758

fs/ceph/export.c

-2
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,6 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
286286
doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
287287
vino.snap, sfh->parent_ino, sfh->hash, err);
288288
}
289-
if (IS_ERR(inode))
290-
return ERR_CAST(inode);
291289
/* see comments in ceph_get_parent() */
292290
return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
293291
}

fs/ceph/file.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10291029
struct ceph_osd_req_op *op;
10301030
u64 read_off = off;
10311031
u64 read_len = len;
1032+
int extent_cnt;
10321033

10331034
/* determine new offset/length if encrypted */
10341035
ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
@@ -1068,7 +1069,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
10681069

10691070
op = &req->r_ops[0];
10701071
if (sparse) {
1071-
ret = ceph_alloc_sparse_ext_map(op);
1072+
extent_cnt = __ceph_sparse_read_ext_count(inode, read_len);
1073+
ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
10721074
if (ret) {
10731075
ceph_osdc_put_request(req);
10741076
break;
@@ -1465,6 +1467,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
14651467
ssize_t len;
14661468
struct ceph_osd_req_op *op;
14671469
int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
1470+
int extent_cnt;
14681471

14691472
if (write)
14701473
size = min_t(u64, size, fsc->mount_options->wsize);
@@ -1528,7 +1531,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
15281531
osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
15291532
op = &req->r_ops[0];
15301533
if (sparse) {
1531-
ret = ceph_alloc_sparse_ext_map(op);
1534+
extent_cnt = __ceph_sparse_read_ext_count(inode, size);
1535+
ret = ceph_alloc_sparse_ext_map(op, extent_cnt);
15321536
if (ret) {
15331537
ceph_osdc_put_request(req);
15341538
break;

fs/ceph/mds_client.c

+25-10
Original file line numberDiff line numberDiff line change
@@ -1534,7 +1534,8 @@ static int encode_metric_spec(void **p, void *end)
15341534
* session message, specialization for CEPH_SESSION_REQUEST_OPEN
15351535
* to include additional client metadata fields.
15361536
*/
1537-
static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
1537+
static struct ceph_msg *
1538+
create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq)
15381539
{
15391540
struct ceph_msg *msg;
15401541
struct ceph_mds_session_head *h;
@@ -1578,6 +1579,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
15781579
size = METRIC_BYTES(count);
15791580
extra_bytes += 2 + 4 + 4 + size;
15801581

1582+
/* flags, mds auth caps and oldest_client_tid */
1583+
extra_bytes += 4 + 4 + 8;
1584+
15811585
/* Allocate the message */
15821586
msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
15831587
GFP_NOFS, false);
@@ -1589,16 +1593,16 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
15891593
end = p + msg->front.iov_len;
15901594

15911595
h = p;
1592-
h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
1596+
h->op = cpu_to_le32(op);
15931597
h->seq = cpu_to_le64(seq);
15941598

15951599
/*
15961600
* Serialize client metadata into waiting buffer space, using
15971601
* the format that userspace expects for map<string, string>
15981602
*
1599-
* ClientSession messages with metadata are v4
1603+
* ClientSession messages with metadata are v7
16001604
*/
1601-
msg->hdr.version = cpu_to_le16(4);
1605+
msg->hdr.version = cpu_to_le16(7);
16021606
msg->hdr.compat_version = cpu_to_le16(1);
16031607

16041608
/* The write pointer, following the session_head structure */
@@ -1634,6 +1638,15 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
16341638
return ERR_PTR(ret);
16351639
}
16361640

1641+
/* version == 5, flags */
1642+
ceph_encode_32(&p, 0);
1643+
1644+
/* version == 6, mds auth caps */
1645+
ceph_encode_32(&p, 0);
1646+
1647+
/* version == 7, oldest_client_tid */
1648+
ceph_encode_64(&p, mdsc->oldest_tid);
1649+
16371650
msg->front.iov_len = p - msg->front.iov_base;
16381651
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
16391652

@@ -1663,7 +1676,8 @@ static int __open_session(struct ceph_mds_client *mdsc,
16631676
session->s_renew_requested = jiffies;
16641677

16651678
/* send connect message */
1666-
msg = create_session_open_msg(mdsc, session->s_seq);
1679+
msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN,
1680+
session->s_seq);
16671681
if (IS_ERR(msg))
16681682
return PTR_ERR(msg);
16691683
ceph_con_send(&session->s_con, msg);
@@ -2028,10 +2042,10 @@ static int send_renew_caps(struct ceph_mds_client *mdsc,
20282042

20292043
doutc(cl, "to mds%d (%s)\n", session->s_mds,
20302044
ceph_mds_state_name(state));
2031-
msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
2045+
msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS,
20322046
++session->s_renew_seq);
2033-
if (!msg)
2034-
return -ENOMEM;
2047+
if (IS_ERR(msg))
2048+
return PTR_ERR(msg);
20352049
ceph_con_send(&session->s_con, msg);
20362050
return 0;
20372051
}
@@ -4128,12 +4142,12 @@ static void handle_session(struct ceph_mds_session *session,
41284142
pr_info_client(cl, "mds%d reconnect success\n",
41294143
session->s_mds);
41304144

4145+
session->s_features = features;
41314146
if (session->s_state == CEPH_MDS_SESSION_OPEN) {
41324147
pr_notice_client(cl, "mds%d is already opened\n",
41334148
session->s_mds);
41344149
} else {
41354150
session->s_state = CEPH_MDS_SESSION_OPEN;
4136-
session->s_features = features;
41374151
renewed_caps(mdsc, session, 0);
41384152
if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT,
41394153
&session->s_features))
@@ -5870,7 +5884,8 @@ static void mds_peer_reset(struct ceph_connection *con)
58705884

58715885
pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n",
58725886
s->s_mds);
5873-
if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO)
5887+
if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO &&
5888+
ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT)
58745889
send_mds_reconnect(mdsc, s);
58755890
}
58765891

fs/ceph/quota.c

+22-17
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,10 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
197197
}
198198

199199
/*
200-
* This function walks through the snaprealm for an inode and returns the
201-
* ceph_snap_realm for the first snaprealm that has quotas set (max_files,
200+
* This function walks through the snaprealm for an inode and set the
201+
* realmp with the first snaprealm that has quotas set (max_files,
202202
* max_bytes, or any, depending on the 'which_quota' argument). If the root is
203-
* reached, return the root ceph_snap_realm instead.
203+
* reached, set the realmp with the root ceph_snap_realm instead.
204204
*
205205
* Note that the caller is responsible for calling ceph_put_snap_realm() on the
206206
* returned realm.
@@ -211,19 +211,20 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
211211
* this function will return -EAGAIN; otherwise, the snaprealms walk-through
212212
* will be restarted.
213213
*/
214-
static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
215-
struct inode *inode,
216-
enum quota_get_realm which_quota,
217-
bool retry)
214+
static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
215+
enum quota_get_realm which_quota,
216+
struct ceph_snap_realm **realmp, bool retry)
218217
{
219218
struct ceph_client *cl = mdsc->fsc->client;
220219
struct ceph_inode_info *ci = NULL;
221220
struct ceph_snap_realm *realm, *next;
222221
struct inode *in;
223222
bool has_quota;
224223

224+
if (realmp)
225+
*realmp = NULL;
225226
if (ceph_snap(inode) != CEPH_NOSNAP)
226-
return NULL;
227+
return 0;
227228

228229
restart:
229230
realm = ceph_inode(inode)->i_snap_realm;
@@ -250,7 +251,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
250251
break;
251252
ceph_put_snap_realm(mdsc, realm);
252253
if (!retry)
253-
return ERR_PTR(-EAGAIN);
254+
return -EAGAIN;
254255
goto restart;
255256
}
256257

@@ -259,8 +260,11 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
259260
iput(in);
260261

261262
next = realm->parent;
262-
if (has_quota || !next)
263-
return realm;
263+
if (has_quota || !next) {
264+
if (realmp)
265+
*realmp = realm;
266+
return 0;
267+
}
264268

265269
ceph_get_snap_realm(mdsc, next);
266270
ceph_put_snap_realm(mdsc, realm);
@@ -269,14 +273,15 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
269273
if (realm)
270274
ceph_put_snap_realm(mdsc, realm);
271275

272-
return NULL;
276+
return 0;
273277
}
274278

275279
bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
276280
{
277281
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
278282
struct ceph_snap_realm *old_realm, *new_realm;
279283
bool is_same;
284+
int ret;
280285

281286
restart:
282287
/*
@@ -286,9 +291,9 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
286291
* dropped and we can then restart the whole operation.
287292
*/
288293
down_read(&mdsc->snap_rwsem);
289-
old_realm = get_quota_realm(mdsc, old, QUOTA_GET_ANY, true);
290-
new_realm = get_quota_realm(mdsc, new, QUOTA_GET_ANY, false);
291-
if (PTR_ERR(new_realm) == -EAGAIN) {
294+
get_quota_realm(mdsc, old, QUOTA_GET_ANY, &old_realm, true);
295+
ret = get_quota_realm(mdsc, new, QUOTA_GET_ANY, &new_realm, false);
296+
if (ret == -EAGAIN) {
292297
up_read(&mdsc->snap_rwsem);
293298
if (old_realm)
294299
ceph_put_snap_realm(mdsc, old_realm);
@@ -492,8 +497,8 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
492497
bool is_updated = false;
493498

494499
down_read(&mdsc->snap_rwsem);
495-
realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root),
496-
QUOTA_GET_MAX_BYTES, true);
500+
get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
501+
&realm, true);
497502
up_read(&mdsc->snap_rwsem);
498503
if (!realm)
499504
return false;

0 commit comments

Comments
 (0)