mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-11-01 01:03:52 +00:00
perf parse-events: Add "cpu" term to set the CPU an event is recorded on
The -C option allows the CPUs for a list of events to be specified but
its not possible to set the CPU for a single event. Add a term to
allow this. The term isn't a general CPU list due to ',' already being
a special character in event parsing instead multiple cpu= terms may
be provided and they will be merged/unioned together.
An example of mixing different types of events counted on different CPUs:
```
$ perf stat -A -C 0,4-5,8 -e "instructions/cpu=0/,l1d-misses/cpu=4,cpu=5/,inst_retired.any/cpu=8/,cycles" -a sleep 0.1
Performance counter stats for 'system wide':
CPU0 6,979,225 instructions/cpu=0/ # 0.89 insn per cycle
CPU4 75,138 cpu/l1d-misses/
CPU5 1,418,939 cpu/l1d-misses/
CPU8 797,553 cpu/inst_retired.any,cpu=8/
CPU0 7,845,302 cycles
CPU4 6,546,859 cycles
CPU5 185,915,438 cycles
CPU8 2,065,668 cycles
0.112449242 seconds time elapsed
```
Committer testing:
root@number:~# grep -m1 "model name" /proc/cpuinfo
model name : AMD Ryzen 9 9950X3D 16-Core Processor
root@number:~# perf stat -A -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.1
Performance counter stats for 'system wide':
CPU0 2,398,351 instructions/cpu=0/ # 0.44 insn per cycle
CPU0 2,398,152 instructions # 0.44 insn per cycle
CPU1 1,265,634 instructions # 0.49 insn per cycle
CPU2 606,087 instructions # 0.50 insn per cycle
CPU3 4,025,752 instructions # 0.52 insn per cycle
CPU4 4,236,810 instructions # 0.53 insn per cycle
CPU5 3,984,832 instructions # 0.66 insn per cycle
CPU6 434,132 instructions # 0.44 insn per cycle
CPU7 65,752 instructions # 0.41 insn per cycle
CPU8 459,083 instructions # 0.48 insn per cycle
CPU9 6,464,161 instructions # 1.31 insn per cycle
<SNIP>
root@number:~# perf stat -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.
Performance counter stats for 'system wide':
144,822 instructions/cpu=0/ # 0.03 insn per cycle
4,666,114 instructions # 0.93 insn per cycle
2,583 l1d-misses
4,993,633 cycles
0.000868512 seconds time elapsed
root@number:~#
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Link: https://lore.kernel.org/r/20250403194337.40202-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
168c7b5091
commit
255f5b6d06
6 changed files with 76 additions and 17 deletions
|
|
@ -289,6 +289,15 @@ Sums up the event counts for all hardware threads in a core, e.g.:
|
|||
|
||||
perf stat -e cpu/event=0,umask=0x3,percore=1/
|
||||
|
||||
cpu:
|
||||
|
||||
Specifies the CPU to open the event upon. The value may be repeated to
|
||||
specify opening the event on multiple CPUs:
|
||||
|
||||
|
||||
perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1,cpu=2/ -a sleep 1
|
||||
perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1
|
||||
|
||||
|
||||
EVENT GROUPS
|
||||
------------
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ struct evsel_config_term {
|
|||
u32 aux_sample_size;
|
||||
u64 cfg_chg;
|
||||
char *str;
|
||||
int cpu;
|
||||
} val;
|
||||
bool weak;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include <errno.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/param.h>
|
||||
#include "cpumap.h"
|
||||
#include "term.h"
|
||||
#include "env.h"
|
||||
#include "evlist.h"
|
||||
|
|
@ -180,6 +181,26 @@ static char *get_config_name(const struct parse_events_terms *head_terms)
|
|||
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
|
||||
}
|
||||
|
||||
static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms)
|
||||
{
|
||||
struct parse_events_term *term;
|
||||
struct perf_cpu_map *cpus = NULL;
|
||||
|
||||
if (!head_terms)
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry(term, &head_terms->terms, list) {
|
||||
if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
|
||||
struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num);
|
||||
|
||||
perf_cpu_map__merge(&cpus, cpu);
|
||||
perf_cpu_map__put(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
return cpus;
|
||||
}
|
||||
|
||||
/**
|
||||
* fix_raw - For each raw term see if there is an event (aka alias) in pmu that
|
||||
* matches the raw's string value. If the string value matches an
|
||||
|
|
@ -443,11 +464,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
|
|||
bool found_supported = false;
|
||||
const char *config_name = get_config_name(parsed_terms);
|
||||
const char *metric_id = get_config_metric_id(parsed_terms);
|
||||
struct perf_cpu_map *cpus = get_config_cpu(parsed_terms);
|
||||
int ret = 0;
|
||||
|
||||
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
|
||||
LIST_HEAD(config_terms);
|
||||
struct perf_event_attr attr;
|
||||
int ret;
|
||||
|
||||
if (parse_events__filter_pmu(parse_state, pmu))
|
||||
continue;
|
||||
|
|
@ -462,7 +484,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
|
|||
perf_pmu__auto_merge_stats(pmu),
|
||||
/*alternate_hw_config=*/PERF_COUNT_HW_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_err;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -482,21 +504,27 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
|
|||
|
||||
if (parsed_terms) {
|
||||
if (config_attr(&attr, parsed_terms, parse_state->error,
|
||||
config_term_common))
|
||||
return -EINVAL;
|
||||
|
||||
if (get_config_terms(parsed_terms, &config_terms))
|
||||
return -ENOMEM;
|
||||
config_term_common)) {
|
||||
ret = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
if (get_config_terms(parsed_terms, &config_terms)) {
|
||||
ret = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
|
||||
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
|
||||
/*cpu_list=*/NULL,
|
||||
/*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
|
||||
return -ENOMEM;
|
||||
cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
|
||||
ret = -ENOMEM;
|
||||
|
||||
free_config_terms(&config_terms);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
}
|
||||
out_err:
|
||||
perf_cpu_map__put(cpus);
|
||||
return found_supported ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
|
|
@ -815,6 +843,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
|
|||
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
|
||||
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
|
||||
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
|
||||
[PARSE_EVENTS__TERM_TYPE_CPU] = "cpu",
|
||||
};
|
||||
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
|
||||
return "unknown term";
|
||||
|
|
@ -844,6 +873,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
|
|||
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
|
||||
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
|
||||
case PARSE_EVENTS__TERM_TYPE_PERCORE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
return true;
|
||||
case PARSE_EVENTS__TERM_TYPE_USER:
|
||||
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
|
||||
|
|
@ -991,6 +1021,15 @@ do { \
|
|||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
CHECK_TYPE_VAL(NUM);
|
||||
if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
|
||||
parse_events_error__handle(err, term->err_val,
|
||||
strdup("too big"),
|
||||
NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
|
||||
case PARSE_EVENTS__TERM_TYPE_USER:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
|
|
@ -1118,6 +1157,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
|
|||
case PARSE_EVENTS__TERM_TYPE_RAW:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
default:
|
||||
if (err) {
|
||||
parse_events_error__handle(err, term->err_term,
|
||||
|
|
@ -1252,6 +1292,7 @@ do { \
|
|||
case PARSE_EVENTS__TERM_TYPE_RAW:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1306,6 +1347,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
|
|||
case PARSE_EVENTS__TERM_TYPE_RAW:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1350,6 +1392,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
|
|||
struct perf_event_attr attr;
|
||||
LIST_HEAD(config_terms);
|
||||
const char *name, *metric_id;
|
||||
struct perf_cpu_map *cpus;
|
||||
int ret;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
|
|
@ -1371,10 +1414,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
|
|||
|
||||
name = get_config_name(head_config);
|
||||
metric_id = get_config_metric_id(head_config);
|
||||
cpus = get_config_cpu(head_config);
|
||||
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
|
||||
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
|
||||
/*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX
|
||||
) == NULL ? -ENOMEM : 0;
|
||||
metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
|
||||
cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM;
|
||||
perf_cpu_map__put(cpus);
|
||||
free_config_terms(&config_terms);
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1434,6 +1478,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
|
|||
LIST_HEAD(config_terms);
|
||||
struct parse_events_terms parsed_terms;
|
||||
bool alias_rewrote_terms = false;
|
||||
struct perf_cpu_map *term_cpu = NULL;
|
||||
|
||||
if (verbose > 1) {
|
||||
struct strbuf sb;
|
||||
|
|
@ -1528,11 +1573,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
term_cpu = get_config_cpu(&parsed_terms);
|
||||
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
|
||||
get_config_name(&parsed_terms),
|
||||
get_config_metric_id(&parsed_terms), pmu,
|
||||
&config_terms, auto_merge_stats, /*cpu_list=*/NULL,
|
||||
alternate_hw_config);
|
||||
&config_terms, auto_merge_stats, term_cpu, alternate_hw_config);
|
||||
perf_cpu_map__put(term_cpu);
|
||||
if (!evsel) {
|
||||
parse_events_terms__exit(&parsed_terms);
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -80,7 +80,8 @@ enum parse_events__term_type {
|
|||
PARSE_EVENTS__TERM_TYPE_RAW,
|
||||
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
|
||||
PARSE_EVENTS__TERM_TYPE_HARDWARE,
|
||||
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
|
||||
PARSE_EVENTS__TERM_TYPE_CPU,
|
||||
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1)
|
||||
};
|
||||
|
||||
struct parse_events_term {
|
||||
|
|
|
|||
|
|
@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
|
|||
aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
|
||||
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
|
||||
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
|
||||
cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); }
|
||||
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
|
||||
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
|
||||
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
|
||||
|
|
|
|||
|
|
@ -1470,7 +1470,7 @@ static int pmu_config_term(const struct perf_pmu *pmu,
|
|||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
|
||||
return -EINVAL;
|
||||
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
/* Skip non-config terms. */
|
||||
break;
|
||||
default:
|
||||
|
|
@ -1852,6 +1852,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
|
|||
"aux-output",
|
||||
"aux-action=(pause|resume|start-paused)",
|
||||
"aux-sample-size=number",
|
||||
"cpu=number",
|
||||
};
|
||||
struct perf_pmu_format *format;
|
||||
int ret;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue