Skip to content

Commit

Permalink
cpumask: Add optimized helpers when NR_CPUS fits in a long
Browse files Browse the repository at this point in the history
When NR_CPUS fits in a long, it's possible to use compiler built-ins to
produce much faster code when operating on cpumasks compared to just using
the generic bitops APIs.

Therefore, add optimized helpers using compiler built-ins when NR_CPUS fits
in a long. This also turns nr_cpu_ids into a compile-time constant for
further optimization potential.

Note that compared to the upstream cpumask rewrite with this feature, these
optimized helpers perfectly preserve the semantics of the helpers they
replace. And this change is much smaller than the upstream version.

Signed-off-by: Sultan Alsawaf <[email protected]>
Signed-off-by: Tashfin Shakeer Rhythm <[email protected]>
  • Loading branch information
kerneltoast authored and Tashar02 committed Jan 7, 2024
1 parent 80af912 commit 82931da
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 0 deletions.
108 changes: 108 additions & 0 deletions include/linux/cpumask.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;

#if NR_CPUS == 1
#define nr_cpu_ids 1U
#elif NR_CPUS <= BITS_PER_LONG
#define nr_cpu_ids ((unsigned int)NR_CPUS)
#else
extern unsigned int nr_cpu_ids;
#endif
Expand Down Expand Up @@ -206,6 +208,95 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
#define for_each_cpu_and(cpu, mask, and) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
#else
#if NR_CPUS <= BITS_PER_LONG
static inline unsigned int cpumask_first(const struct cpumask *srcp)
{
unsigned int nr;

nr = __builtin_ffsl(*cpumask_bits(srcp)) - 1;
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}

static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
unsigned long bits = *cpumask_bits(srcp);
unsigned int nr;

if (unlikely(!bits))
return nr_cpumask_bits;

nr = BITS_PER_LONG - 1 - __builtin_clzl(bits);
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}

static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
unsigned int nr, shift;
unsigned long bits;

/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);

shift = n + 1;
if (unlikely(shift >= nr_cpumask_bits))
return nr_cpumask_bits;

bits = *cpumask_bits(srcp);
nr = __builtin_ffsl((bits >> shift) << shift) - 1;
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}

static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
unsigned int nr, shift;
unsigned long bits;

/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);

shift = n + 1;
if (unlikely(shift >= nr_cpumask_bits))
return nr_cpumask_bits;

bits = ~*cpumask_bits(srcp);
nr = __builtin_ffsl((bits >> shift) << shift) - 1;
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}

static inline int cpumask_next_and(int n, const struct cpumask *srcp,
const struct cpumask *andp)
{
unsigned int nr, shift;
unsigned long bits;

/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);

shift = n + 1;
if (unlikely(shift >= nr_cpumask_bits))
return nr_cpumask_bits;

bits = *cpumask_bits(srcp) & *cpumask_bits(andp);
nr = __builtin_ffsl((bits >> shift) << shift) - 1;
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}

static inline int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
{
unsigned long bits = *cpumask_bits(mask);
unsigned int nr;

cpumask_check(cpu);
if (likely(cpu < nr_cpumask_bits))
bits &= ~BIT(cpu);

nr = __builtin_ffsl(bits) - 1;
return nr > nr_cpumask_bits ? nr_cpumask_bits : nr;
}
#else /* NR_CPUS > BITS_PER_LONG */
/**
* cpumask_first - get the first cpu in a cpumask
* @srcp: the cpumask pointer
Expand Down Expand Up @@ -247,6 +338,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)

int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
#endif /* NR_CPUS <= BITS_PER_LONG */

unsigned int cpumask_local_spread(unsigned int i, int node);

/**
Expand Down Expand Up @@ -397,7 +490,12 @@ static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
*/
static inline void cpumask_setall(struct cpumask *dstp)
{
/* bitmap_fill() isn't optimized for compile-time constants */
#if NR_CPUS <= BITS_PER_LONG
*cpumask_bits(dstp) = BIT(NR_CPUS) - 1;
#else
bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
#endif
}

/**
Expand All @@ -406,7 +504,12 @@ static inline void cpumask_setall(struct cpumask *dstp)
*/
static inline void cpumask_clear(struct cpumask *dstp)
{
/* bitmap_zero() isn't optimized for compile-time constants */
#if NR_CPUS <= BITS_PER_LONG
*cpumask_bits(dstp) = 0;
#else
bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits);
#endif
}

/**
Expand Down Expand Up @@ -579,7 +682,12 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
static inline void cpumask_copy(struct cpumask *dstp,
const struct cpumask *srcp)
{
/* bitmap_copy() isn't optimized for compile-time constants */
#if NR_CPUS <= BITS_PER_LONG
*cpumask_bits(dstp) = *cpumask_bits(srcp);
#else
bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
#endif
}

/**
Expand Down
6 changes: 6 additions & 0 deletions kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ static int __init nosmp(char *str)

early_param("nosmp", nosmp);

#if NR_CPUS > BITS_PER_LONG
/* this is hard limit */
static int __init nrcpus(char *str)
{
Expand All @@ -542,6 +543,7 @@ static int __init nrcpus(char *str)
}

early_param("nr_cpus", nrcpus);
#endif

static int __init maxcpus(char *str)
{
Expand All @@ -567,14 +569,18 @@ static int __init boot_cpus(char *str)

early_param("boot_cpus", boot_cpus);

#if NR_CPUS > BITS_PER_LONG
/* Setup number of possible processor ids */
unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
#endif

/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
void __init setup_nr_cpu_ids(void)
{
#if NR_CPUS > BITS_PER_LONG
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
#endif
}

static inline bool boot_cpu(int cpu)
Expand Down
2 changes: 2 additions & 0 deletions lib/cpumask.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <linux/export.h>
#include <linux/bootmem.h>

#if NR_CPUS > BITS_PER_LONG
/**
* cpumask_next - get the next cpu in a cpumask
* @n: the cpu prior to the place to search (ie. return will be > @n)
Expand Down Expand Up @@ -60,6 +61,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
return i;
}
EXPORT_SYMBOL(cpumask_any_but);
#endif /* NR_CPUS > BITS_PER_LONG */

/**
* cpumask_next_wrap - helper to implement for_each_cpu_wrap
Expand Down

0 comments on commit 82931da

Please sign in to comment.