From b50a9a3d676bf20e62c8b0b23c2c1e7aa69d84b0 Mon Sep 17 00:00:00 2001 From: Alex Hudspith Date: Mon, 6 Nov 2023 09:17:38 +0000 Subject: [PATCH 1/3] proc_fuse: Fix get_swap_info typo swtotal == 0 -> *swtotal == 0 Signed-off-by: Alex Hudspith --- src/proc_fuse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/proc_fuse.c b/src/proc_fuse.c index 1d253bba..cb2fca2e 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -375,7 +375,7 @@ static void get_swap_info(const char *cgroup, uint64_t memlimit, *swtotal = 0; else *swtotal = (memswlimit - memlimit) / 1024; - if (memusage > memswusage || swtotal == 0) + if (memusage > memswusage || *swtotal == 0) *swusage = 0; else *swusage = (memswusage - memusage) / 1024; From f496e62cdbeb01215af34fed266fc5a98d25feeb Mon Sep 17 00:00:00 2001 From: Alex Hudspith Date: Mon, 6 Nov 2023 09:17:38 +0000 Subject: [PATCH 2/3] proc: Fix swap handling for cgroups v2 (can_use_swap) On cgroups v2, there are no swap current/max files at the cgroup root, so can_use_swap must look lower in the hierarchy to determine if swap accounting is enabled. To also account for memory accounting being turned off at some level, walk the hierarchy upwards from lxcfs' own cgroup. Signed-off-by: Alex Hudspith [ added check cgroup pointer is not NULL in lxcfs_init() ] Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 4 +++- src/cgroups/cgfsng.c | 33 ++++++++++++--------------------- src/cgroups/cgroup.h | 2 +- src/proc_fuse.c | 9 +++++---- 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index 27c08c38..dc0550ca 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -866,6 +866,7 @@ static void __attribute__((constructor)) lxcfs_init(void) { __do_close int init_ns = -EBADF, root_fd = -EBADF, pidfd = -EBADF; + __do_free char *cgroup = NULL; int i = 0; pid_t pid; struct hierarchy *hierarchy; @@ -920,7 +921,8 @@ static void __attribute__((constructor)) lxcfs_init(void) lxcfs_info("Kernel supports pidfds"); } - can_use_swap = cgroup_ops->can_use_swap(cgroup_ops); + cgroup = get_pid_cgroup(pid, "memory"); + can_use_swap = cgroup && cgroup_ops->can_use_swap(cgroup_ops, cgroup); if (can_use_swap) lxcfs_info("Kernel supports swap accounting"); else diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c index 2d583c67..7b732926 100644 --- a/src/cgroups/cgfsng.c +++ b/src/cgroups/cgfsng.c @@ -631,34 +631,25 @@ static int cgfsng_get_memory_slabinfo_fd(struct cgroup_ops *ops, const char *cgr return openat(h->fd, path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); } -static bool cgfsng_can_use_swap(struct cgroup_ops *ops) +static bool cgfsng_can_use_swap(struct cgroup_ops *ops, const char *cgroup) { - bool has_swap = false; + __do_free char *cgroup_rel = NULL, *junk_value = NULL; + const char *file; struct hierarchy *h; h = ops->get_hierarchy(ops, "memory"); if (!h) return false; - if (is_unified_hierarchy(h)) { - if (faccessat(h->fd, "memory.swap.max", F_OK, 0)) - return false; - - if (faccessat(h->fd, "memory.swap.current", F_OK, 0)) - return false; - - has_swap = true; - } else { - if (faccessat(h->fd, "memory.memsw.limit_in_bytes", F_OK, 0)) - return false; - - if (faccessat(h->fd, "memory.memsw.usage_in_bytes", F_OK, 0)) - return false; - - has_swap = true; - } - - return has_swap; + cgroup_rel = must_make_path_relative(cgroup, NULL); + file = is_unified_hierarchy(h) ? "memory.swap.current" : "memory.memsw.usage_in_bytes"; + /* For v2, we need to look at the lower levels of the hierarchy because + * no 'memory.swap.current' file exists at the root. We must search + * upwards in the hierarchy in case memory accounting is disabled via + * cgroup.subtree_control for the given cgroup itself. + */ + int ret = cgroup_walkup_to_root(ops->cgroup2_root_fd, h->fd, cgroup_rel, file, &junk_value); + return ret == 0; } static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup, diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h index 122e8ebf..afa7db2e 100644 --- a/src/cgroups/cgroup.h +++ b/src/cgroups/cgroup.h @@ -148,7 +148,7 @@ struct cgroup_ops { char **value); int (*get_memory_slabinfo_fd)(struct cgroup_ops *ops, const char *cgroup); - bool (*can_use_swap)(struct cgroup_ops *ops); + bool (*can_use_swap)(struct cgroup_ops *ops, const char *cgroup); /* cpuset */ int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup, diff --git a/src/proc_fuse.c b/src/proc_fuse.c index cb2fca2e..9dedc375 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -459,11 +459,13 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset, } if (wants_swap) { - /* The total amount of swap is always reported to be the + /* For cgroups v1, the total amount of swap is always reported to be the lesser of the RAM+SWAP limit or the SWAP device size. This is because the kernel can swap as much as it wants and not only up to swtotal. */ - swtotal = memlimit / 1024 + swtotal; + if (!liblxcfs_memory_is_cgroupv2()) + swtotal = memlimit / 1024 + swtotal; + if (hostswtotal < swtotal) { swtotal = hostswtotal; } @@ -1359,11 +1361,10 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset, sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal); - /* The total amount of swap is always reported to be the + /* In cgroups v1, the total amount of swap is always reported to be the lesser of the RAM+SWAP limit or the SWAP device size. This is because the kernel can swap as much as it wants and not only up to swtotal. */ - if (!liblxcfs_memory_is_cgroupv2()) swtotal += memlimit; From a6c309be27e266b310851ca101a1b08644b1d9d5 Mon Sep 17 00:00:00 2001 From: Alex Hudspith Date: Mon, 6 Nov 2023 09:17:38 +0000 Subject: [PATCH 3/3] proc: Fix swap handling for cgroups v2 (zero limits) Since memory.swap.max = 0 is valid under v2, limits of 0 must not be treated differently. Instead, use UINT64_MAX as the default limit. This aligns with cgroups v1 behaviour anyway since 'limit_in_bytes' files contain a large number for unspecified limits (2^63). Resolves: #534 Signed-off-by: Alex Hudspith --- src/proc_fuse.c | 114 ++++++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 42 deletions(-) diff --git a/src/proc_fuse.c b/src/proc_fuse.c index 9dedc375..1049e72d 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -239,21 +239,37 @@ __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi) return 0; } -static uint64_t get_memlimit(const char *cgroup, bool swap) +/** + * Gets a non-hierarchical memory controller limit, or UINT64_MAX if no limit is + * in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise + * reads the memory (RAM) limits. + * + * @returns 0 on success (and sets `*limit`), < 0 on error + */ +static int get_memlimit(const char *cgroup, bool swap, uint64_t *limit) { __do_free char *memlimit_str = NULL; - uint64_t memlimit = 0; + uint64_t memlimit = UINT64_MAX; int ret; if (swap) ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str); else ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str); - if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0) - lxcfs_error("Failed to convert memory%s.max=%s for cgroup %s", - swap ? ".swap" : "", memlimit_str, cgroup); - return memlimit; + if (ret < 0) + return ret; + + if (memlimit_str[0]) { + ret = safe_uint64(memlimit_str, &memlimit, 10); + if (ret < 0) { + lxcfs_error("Failed to convert memory%s.max=%s for cgroup %s", + swap ? ".swap" : "", memlimit_str, cgroup); + return ret; + } + } + *limit = memlimit; + return 0; } /* @@ -318,31 +334,44 @@ static char *gnu_dirname(char *path) return path; } -static uint64_t get_min_memlimit(const char *cgroup, bool swap) +/** + * Gets a hierarchical memory controller limit, or UINT64_MAX if no limit is + * in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise + * reads the memory (RAM) limits. + * + * @returns 0 on success (and sets `*limit`), < 0 on error + */ +static int get_min_memlimit(const char *cgroup, bool swap, uint64_t *limit) { __do_free char *copy = NULL; - uint64_t memlimit = 0, retlimit = 0; + uint64_t memlimit = UINT64_MAX, retlimit = UINT64_MAX; + int ret; copy = strdup(cgroup); if (!copy) return log_error_errno(0, ENOMEM, "Failed to allocate memory"); - retlimit = get_memlimit(copy, swap); + ret = get_memlimit(copy, swap, &retlimit); + if (ret < 0) + return ret; /* * If the cgroup doesn't start with / (probably won't happen), dirname() * will terminate with "" instead of "/" */ - while (*copy && strcmp(copy, "/") != 0) { + while (retlimit != 0 && *copy && strcmp(copy, "/") != 0) { char *it = copy; it = gnu_dirname(it); - memlimit = get_memlimit(it, swap); - if (memlimit > 0 && memlimit < retlimit) + ret = get_memlimit(it, swap, &memlimit); + if (ret < 0) + return ret; + if (memlimit < retlimit) retlimit = memlimit; - }; + } - return retlimit; + *limit = retlimit; + return 0; } static inline bool startswith(const char *line, const char *pref) @@ -361,30 +390,30 @@ static void get_swap_info(const char *cgroup, uint64_t memlimit, *swtotal = *swusage = 0; *memswpriority = 1; - memswlimit = get_min_memlimit(cgroup, true); - if (memswlimit > 0) { - ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); - if (ret < 0 || safe_uint64(memswusage_str, &memswusage, 10) != 0) - return; - - if (liblxcfs_memory_is_cgroupv2()) { - *swtotal = memswlimit / 1024; - *swusage = memswusage / 1024; - } else { - if (memlimit > memswlimit) - *swtotal = 0; - else - *swtotal = (memswlimit - memlimit) / 1024; - if (memusage > memswusage || *swtotal == 0) - *swusage = 0; - else - *swusage = (memswusage - memusage) / 1024; - } - - ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str); - if (ret >= 0) - safe_uint64(memswpriority_str, memswpriority, 10); + ret = get_min_memlimit(cgroup, true, &memswlimit); + if (ret < 0) + return; + ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); + if (ret < 0 || safe_uint64(memswusage_str, &memswusage, 10) < 0) + return; + + if (liblxcfs_memory_is_cgroupv2()) { + *swtotal = memswlimit / 1024; + *swusage = memswusage / 1024; + } else { + if (memlimit > memswlimit) + *swtotal = 0; + else + *swtotal = (memswlimit - memlimit) / 1024; + if (memusage > memswusage || *swtotal == 0) + *swusage = 0; + else + *swusage = (memswusage - memusage) / 1024; } + + ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str); + if (ret >= 0) + safe_uint64(memswpriority_str, memswpriority, 10); } static int proc_swaps_read(char *buf, size_t size, off_t offset, @@ -432,12 +461,12 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset, return read_file_fuse("/proc/swaps", buf, size, d); prune_init_slice(cgroup); - memlimit = get_min_memlimit(cgroup, false); - + ret = get_min_memlimit(cgroup, false, &memlimit); + if (ret < 0) + return 0; ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); if (ret < 0) return 0; - if (safe_uint64(memusage_str, &memusage, 10) < 0) lxcfs_error("Failed to convert memusage %s", memusage_str); @@ -1320,8 +1349,9 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset, if (!cgroup_parse_memory_stat(cgroup, &mstat)) return read_file_fuse("/proc/meminfo", buf, size, d); - memlimit = get_min_memlimit(cgroup, false); - + ret = get_min_memlimit(cgroup, false, &memlimit); + if (ret < 0) + return read_file_fuse("/proc/meminfo", buf, size, d); /* * Following values are allowed to fail, because swapaccount might be * turned off for current kernel.