perf cpumap: Reduce cpu size from int to int16_t

Fewer than 32k logical CPUs are currently supported by perf. A cpumap
is indexed by an integer (see perf_cpu_map__cpu) yielding a perf_cpu
that wraps a 4-byte int for the logical CPU - the wrapping is done
deliberately to avoid confusing a logical CPU with an index into a
cpumap. Using a 4-byte int within the perf_cpu is larger than required
so this patch reduces it to the 2-byte int16_t. For a cpumap
containing 16 entries this will reduce the array size from 64 to 32
bytes. For very large servers with lots of logical CPUs the size
savings will be greater.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250210191231.156294-1-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
Ian Rogers 2025-02-10 11:12:31 -08:00 committed by Namhyung Kim
parent 2337b7251d
commit c760174401
4 changed files with 53 additions and 26 deletions

View file

@ -185,7 +185,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
while (isdigit(*cpu_list)) { while (isdigit(*cpu_list)) {
p = NULL; p = NULL;
start_cpu = strtoul(cpu_list, &p, 0); start_cpu = strtoul(cpu_list, &p, 0);
if (start_cpu >= INT_MAX if (start_cpu >= INT16_MAX
|| (*p != '\0' && *p != ',' && *p != '-' && *p != '\n')) || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
goto invalid; goto invalid;
@ -194,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL; p = NULL;
end_cpu = strtoul(cpu_list, &p, 0); end_cpu = strtoul(cpu_list, &p, 0);
if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n')) if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
goto invalid; goto invalid;
if (end_cpu < start_cpu) if (end_cpu < start_cpu)
@ -209,7 +209,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) { for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */ /* check for duplicates */
for (i = 0; i < nr_cpus; i++) for (i = 0; i < nr_cpus; i++)
if (tmp_cpus[i].cpu == (int)start_cpu) if (tmp_cpus[i].cpu == (int16_t)start_cpu)
goto invalid; goto invalid;
if (nr_cpus == max_entries) { if (nr_cpus == max_entries) {
@ -219,7 +219,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
goto invalid; goto invalid;
tmp_cpus = tmp; tmp_cpus = tmp;
} }
tmp_cpus[nr_cpus++].cpu = (int)start_cpu; tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu;
} }
if (*p) if (*p)
++p; ++p;

View file

@ -4,10 +4,11 @@
#include <perf/core.h> #include <perf/core.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
struct perf_cpu { struct perf_cpu {
int cpu; int16_t cpu;
}; };
struct perf_cache { struct perf_cache {

View file

@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m
struct perf_cpu_map *map; struct perf_cpu_map *map;
map = perf_cpu_map__empty_new(data->cpus_data.nr); map = perf_cpu_map__empty_new(data->cpus_data.nr);
if (map) { if (!map)
unsigned i; return NULL;
for (i = 0; i < data->cpus_data.nr; i++) { for (unsigned int i = 0; i < data->cpus_data.nr; i++) {
/* /*
* Special treatment for -1, which is not real cpu number, * Special treatment for -1, which is not real cpu number,
* and we need to use (int) -1 to initialize map[i], * and we need to use (int) -1 to initialize map[i],
* otherwise it would become 65535. * otherwise it would become 65535.
*/ */
if (data->cpus_data.cpu[i] == (u16) -1) if (data->cpus_data.cpu[i] == (u16) -1) {
RC_CHK_ACCESS(map)->map[i].cpu = -1; RC_CHK_ACCESS(map)->map[i].cpu = -1;
else } else if (data->cpus_data.cpu[i] < INT16_MAX) {
RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i]; RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i];
} else {
pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]);
perf_cpu_map__put(map);
return NULL;
} }
} }
@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_
int cpu; int cpu;
perf_record_cpu_map_data__read_one_mask(data, i, local_copy); perf_record_cpu_map_data__read_one_mask(data, i, local_copy);
for_each_set_bit(cpu, local_copy, 64) for_each_set_bit(cpu, local_copy, 64) {
RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; if (cpu + cpus_per_i < INT16_MAX) {
RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i;
} else {
pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i);
perf_cpu_map__put(map);
return NULL;
}
}
} }
return map; return map;
@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map
RC_CHK_ACCESS(map)->map[i++].cpu = -1; RC_CHK_ACCESS(map)->map[i++].cpu = -1;
for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu;
i++, cpu++) i++, cpu++) {
RC_CHK_ACCESS(map)->map[i].cpu = cpu; if (cpu < INT16_MAX) {
RC_CHK_ACCESS(map)->map[i].cpu = cpu;
} else {
pr_err("Invalid cpumap entry %d\n", cpu);
perf_cpu_map__put(map);
return NULL;
}
}
return map; return map;
} }
@ -427,7 +445,7 @@ static void set_max_cpu_num(void)
{ {
const char *mnt; const char *mnt;
char path[PATH_MAX]; char path[PATH_MAX];
int ret = -1; int max, ret = -1;
/* set up default */ /* set up default */
max_cpu_num.cpu = 4096; max_cpu_num.cpu = 4096;
@ -444,10 +462,12 @@ static void set_max_cpu_num(void)
goto out; goto out;
} }
ret = get_max_num(path, &max_cpu_num.cpu); ret = get_max_num(path, &max);
if (ret) if (ret)
goto out; goto out;
max_cpu_num.cpu = max;
/* get the highest present cpu number for a sparse allocation */ /* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
if (ret >= PATH_MAX) { if (ret >= PATH_MAX) {
@ -455,8 +475,14 @@ static void set_max_cpu_num(void)
goto out; goto out;
} }
ret = get_max_num(path, &max_present_cpu_num.cpu); ret = get_max_num(path, &max);
if (!ret && max > INT16_MAX) {
pr_err("Read out of bounds max cpus of %d\n", max);
ret = -1;
}
if (!ret)
max_present_cpu_num.cpu = (int16_t)max;
out: out:
if (ret) if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
#define COMMA first ? "" : "," #define COMMA first ? "" : ","
for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) { for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) {
struct perf_cpu cpu = { .cpu = INT_MAX }; struct perf_cpu cpu = { .cpu = INT16_MAX };
bool last = i == perf_cpu_map__nr(map); bool last = i == perf_cpu_map__nr(map);
if (!last) if (!last)

View file

@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
for (i = 0; i < env->nr_numa_nodes; i++) { for (i = 0; i < env->nr_numa_nodes; i++) {
nn = &env->numa_nodes[i]; nn = &env->numa_nodes[i];
nr = max(nr, perf_cpu_map__max(nn->map).cpu); nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu);
} }
nr++; nr++;