perf parse-events: Add "cpu" term to set the CPU an event is recorded on

The -C option allows the CPUs for a list of events to be specified but
its not possible to set the CPU for a single event. Add a term to
allow this. The term isn't a general CPU list due to ',' already being
a special character in event parsing instead multiple cpu= terms may
be provided and they will be merged/unioned together.

An example of mixing different types of events counted on different CPUs:
```
$ perf stat -A -C 0,4-5,8 -e "instructions/cpu=0/,l1d-misses/cpu=4,cpu=5/,inst_retired.any/cpu=8/,cycles" -a sleep 0.1

 Performance counter stats for 'system wide':

CPU0            6,979,225      instructions/cpu=0/              #    0.89  insn per cycle
CPU4               75,138      cpu/l1d-misses/
CPU5            1,418,939      cpu/l1d-misses/
CPU8              797,553      cpu/inst_retired.any,cpu=8/
CPU0            7,845,302      cycles
CPU4            6,546,859      cycles
CPU5          185,915,438      cycles
CPU8            2,065,668      cycles

       0.112449242 seconds time elapsed
```

Committer testing:

  root@number:~# grep -m1 "model name" /proc/cpuinfo
  model name	: AMD Ryzen 9 9950X3D 16-Core Processor
  root@number:~# perf stat -A -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.1

   Performance counter stats for 'system wide':

  CPU0    2,398,351   instructions/cpu=0/    #  0.44  insn per cycle
  CPU0    2,398,152   instructions           #  0.44  insn per cycle
  CPU1    1,265,634   instructions           #  0.49  insn per cycle
  CPU2      606,087   instructions           #  0.50  insn per cycle
  CPU3    4,025,752   instructions           #  0.52  insn per cycle
  CPU4    4,236,810   instructions           #  0.53  insn per cycle
  CPU5    3,984,832   instructions           #  0.66  insn per cycle
  CPU6      434,132   instructions           #  0.44  insn per cycle
  CPU7       65,752   instructions           #  0.41  insn per cycle
  CPU8      459,083   instructions           #  0.48  insn per cycle
  CPU9    6,464,161   instructions           #  1.31  insn per cycle
  <SNIP>
  root@number:~# perf stat -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.

   Performance counter stats for 'system wide':

             144,822      instructions/cpu=0/              #    0.03  insn per cycle
           4,666,114      instructions                     #    0.93  insn per cycle
               2,583      l1d-misses
           4,993,633      cycles

         0.000868512 seconds time elapsed

  root@number:~#

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20250403194337.40202-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2025-04-03 12:43:37 -07:00 committed by Arnaldo Carvalho de Melo
parent 168c7b5091
commit 255f5b6d06
6 changed files with 76 additions and 17 deletions

View file

@ -289,6 +289,15 @@ Sums up the event counts for all hardware threads in a core, e.g.:
perf stat -e cpu/event=0,umask=0x3,percore=1/
cpu:
Specifies the CPU to open the event upon. The value may be repeated to
specify opening the event on multiple CPUs:
perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1,cpu=2/ -a sleep 1
perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1
EVENT GROUPS
------------

View file

@ -48,6 +48,7 @@ struct evsel_config_term {
u32 aux_sample_size;
u64 cfg_chg;
char *str;
int cpu;
} val;
bool weak;
};

View file

@ -7,6 +7,7 @@
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include "cpumap.h"
#include "term.h"
#include "env.h"
#include "evlist.h"
@ -180,6 +181,26 @@ static char *get_config_name(const struct parse_events_terms *head_terms)
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
}
static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms)
{
struct parse_events_term *term;
struct perf_cpu_map *cpus = NULL;
if (!head_terms)
return NULL;
list_for_each_entry(term, &head_terms->terms, list) {
if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
perf_cpu_map__merge(&cpus, cpu);
perf_cpu_map__put(cpu);
}
}
return cpus;
}
/**
* fix_raw - For each raw term see if there is an event (aka alias) in pmu that
* matches the raw's string value. If the string value matches an
@ -443,11 +464,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
bool found_supported = false;
const char *config_name = get_config_name(parsed_terms);
const char *metric_id = get_config_metric_id(parsed_terms);
struct perf_cpu_map *cpus = get_config_cpu(parsed_terms);
int ret = 0;
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
LIST_HEAD(config_terms);
struct perf_event_attr attr;
int ret;
if (parse_events__filter_pmu(parse_state, pmu))
continue;
@ -462,7 +484,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
perf_pmu__auto_merge_stats(pmu),
/*alternate_hw_config=*/PERF_COUNT_HW_MAX);
if (ret)
return ret;
goto out_err;
continue;
}
@ -482,21 +504,27 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
if (parsed_terms) {
if (config_attr(&attr, parsed_terms, parse_state->error,
config_term_common))
return -EINVAL;
if (get_config_terms(parsed_terms, &config_terms))
return -ENOMEM;
config_term_common)) {
ret = -EINVAL;
goto out_err;
}
if (get_config_terms(parsed_terms, &config_terms)) {
ret = -ENOMEM;
goto out_err;
}
}
if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
/*cpu_list=*/NULL,
/*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
return -ENOMEM;
cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
ret = -ENOMEM;
free_config_terms(&config_terms);
if (ret)
goto out_err;
}
out_err:
perf_cpu_map__put(cpus);
return found_supported ? 0 : -EINVAL;
}
@ -815,6 +843,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
[PARSE_EVENTS__TERM_TYPE_CPU] = "cpu",
};
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
return "unknown term";
@ -844,6 +873,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_PERCORE:
case PARSE_EVENTS__TERM_TYPE_CPU:
return true;
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
@ -991,6 +1021,15 @@ do { \
return -EINVAL;
}
break;
case PARSE_EVENTS__TERM_TYPE_CPU:
CHECK_TYPE_VAL(NUM);
if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
parse_events_error__handle(err, term->err_val,
strdup("too big"),
NULL);
return -EINVAL;
}
break;
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@ -1118,6 +1157,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
default:
if (err) {
parse_events_error__handle(err, term->err_term,
@ -1252,6 +1292,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
default:
break;
}
@ -1306,6 +1347,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_CPU:
default:
break;
}
@ -1350,6 +1392,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
struct perf_event_attr attr;
LIST_HEAD(config_terms);
const char *name, *metric_id;
struct perf_cpu_map *cpus;
int ret;
memset(&attr, 0, sizeof(attr));
@ -1371,10 +1414,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
cpus = get_config_cpu(head_config);
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
/*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX
) == NULL ? -ENOMEM : 0;
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM;
perf_cpu_map__put(cpus);
free_config_terms(&config_terms);
return ret;
}
@ -1434,6 +1478,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
LIST_HEAD(config_terms);
struct parse_events_terms parsed_terms;
bool alias_rewrote_terms = false;
struct perf_cpu_map *term_cpu = NULL;
if (verbose > 1) {
struct strbuf sb;
@ -1528,11 +1573,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
return -EINVAL;
}
term_cpu = get_config_cpu(&parsed_terms);
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
get_config_name(&parsed_terms),
get_config_metric_id(&parsed_terms), pmu,
&config_terms, auto_merge_stats, /*cpu_list=*/NULL,
alternate_hw_config);
&config_terms, auto_merge_stats, term_cpu, alternate_hw_config);
perf_cpu_map__put(term_cpu);
if (!evsel) {
parse_events_terms__exit(&parsed_terms);
return -ENOMEM;

View file

@ -80,7 +80,8 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_RAW,
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
PARSE_EVENTS__TERM_TYPE_HARDWARE,
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
PARSE_EVENTS__TERM_TYPE_CPU,
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1)
};
struct parse_events_term {

View file

@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); }
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }

View file

@ -1470,7 +1470,7 @@ static int pmu_config_term(const struct perf_pmu *pmu,
break;
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
return -EINVAL;
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE:
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU:
/* Skip non-config terms. */
break;
default:
@ -1852,6 +1852,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"aux-output",
"aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
"cpu=number",
};
struct perf_pmu_format *format;
int ret;