2022-12-06 10:02:36 +05:30
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/list.h>
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
#include <linux/list_sort.h>
|
|
|
|
#include <linux/string.h>
|
2023-05-27 00:22:03 -07:00
|
|
|
#include <linux/zalloc.h>
|
2025-02-21 22:10:08 -08:00
|
|
|
#include <api/io_dir.h>
|
2023-05-27 00:22:03 -07:00
|
|
|
#include <subcmd/pager.h>
|
|
|
|
#include <sys/types.h>
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
#include <ctype.h>
|
2023-06-16 19:01:34 -03:00
|
|
|
#include <pthread.h>
|
2023-05-02 15:38:37 -07:00
|
|
|
#include <string.h>
|
2023-05-27 00:22:03 -07:00
|
|
|
#include <unistd.h>
|
2023-09-13 16:33:48 +01:00
|
|
|
#include "cpumap.h"
|
2023-05-27 00:22:03 -07:00
|
|
|
#include "debug.h"
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
#include "drm_pmu.h"
|
2023-05-27 00:22:03 -07:00
|
|
|
#include "evsel.h"
|
2023-05-02 15:38:37 -07:00
|
|
|
#include "pmus.h"
|
|
|
|
#include "pmu.h"
|
2024-11-08 16:37:57 -08:00
|
|
|
#include "hwmon_pmu.h"
|
2024-10-01 20:20:07 -07:00
|
|
|
#include "tool_pmu.h"
|
2023-05-27 00:22:03 -07:00
|
|
|
#include "print-events.h"
|
perf list: Give more details about raw event encodings
List all the PMUs, not just the first core one, and list real format
specifiers with value ranges.
Before:
$ perf list
...
rNNN [Raw hardware event descriptor]
cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
[(see 'man perf-list' on how to encode it)]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
After:
$ perf list
...
rNNN [Raw event descriptor]
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
breakpoint//modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
intel_bts//modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
kprobe/retprobe/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
power/event=0..255/modifier [Raw event descriptor]
software//modifier [Raw event descriptor]
tracepoint//modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
With '--details' provide more details on the formats encoding:
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
cpu/event=0..255,pc,edge,offcore_rsp=0..0xffffffffffffffff,ldlat=0..0xffff,inv,
umask=0..255,frontend=0..0xffffff,cmask=0..255,config=0..0xffffffffffffffff,
config1=0..0xffffffffffffffff,config2=0..0xffffffffffffffff,config3=0..0xffffffffffffffff,
name=string,period=number,freq=number,branch_type=(u|k|hv|any|...),time,
call-graph=(fp|dwarf|lbr),stack-size=number,max-stack=number,nr=number,inherit,no-inherit,
overwrite,no-overwrite,percore,aux-output,aux-sample-size=number/modifier
breakpoint//modifier [Raw event descriptor]
breakpoint//modifier
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier
intel_bts//modifier [Raw event descriptor]
intel_bts//modifier
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,pt,notnt,branch,tsc,pwr_evt,fup_on_ptw,cyc,noretcomp,
mtc,psb_period=0..15,mtc_period=0..15/modifier
kprobe/retprobe/modifier [Raw event descriptor]
kprobe/retprobe/modifier
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier
power/event=0..255/modifier [Raw event descriptor]
power/event=0..255/modifier
software//modifier [Raw event descriptor]
software//modifier
tracepoint//modifier [Raw event descriptor]
tracepoint//modifier
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier
Committer notes:
Address this build error in various distros:
55 58.44 ubuntu:24.04 : FAIL gcc version 13.2.0 (Ubuntu 13.2.0-17ubuntu2)
util/pmu.c:1638:70: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
1638 | _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6);
| ^
| , ""
1 error generated.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-03-07 16:19:13 -08:00
|
|
|
#include "strbuf.h"
|
2025-06-24 16:18:35 -07:00
|
|
|
#include "string2.h"
|
2022-12-06 10:02:36 +05:30
|
|
|
|
2023-06-15 10:46:58 +05:30
|
|
|
/*
|
|
|
|
* core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs
|
|
|
|
* directory contains "cpus" file. All PMUs belonging to core_pmus
|
|
|
|
* must have pmu->is_core=1. If there are more than one PMU in
|
|
|
|
* this list, perf interprets it as a heterogeneous platform.
|
|
|
|
* (FWIW, certain ARM platforms having heterogeneous cores uses
|
|
|
|
* homogeneous PMU, and thus they are treated as homogeneous
|
|
|
|
* platform by perf because core_pmus will have only one entry)
|
|
|
|
* other_pmus: All other PMUs which are not part of core_pmus list. It doesn't
|
|
|
|
* matter whether PMU is present per SMT-thread or outside of the
|
|
|
|
* core in the hw. For e.g., an instance of AMD ibs_fetch// and
|
|
|
|
* ibs_op// PMUs is present in each hw SMT thread, however they
|
|
|
|
* are captured under other_pmus. PMUs belonging to other_pmus
|
|
|
|
* must have pmu->is_core=0 but pmu->is_uncore could be 0 or 1.
|
|
|
|
*/
|
2023-05-27 00:22:04 -07:00
|
|
|
static LIST_HEAD(core_pmus);
|
|
|
|
static LIST_HEAD(other_pmus);
|
2025-01-31 23:43:17 -08:00
|
|
|
enum perf_tool_pmu_type {
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_CORE,
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_OTHER,
|
|
|
|
PERF_TOOL_PMU_TYPE_TOOL,
|
|
|
|
PERF_TOOL_PMU_TYPE_HWMON,
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
PERF_TOOL_PMU_TYPE_DRM,
|
2025-01-31 23:43:17 -08:00
|
|
|
|
|
|
|
#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
|
|
|
|
#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
|
|
|
|
#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
|
|
|
|
#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
#define PERF_TOOL_PMU_TYPE_DRM_MASK (1 << PERF_TOOL_PMU_TYPE_DRM)
|
2025-01-31 23:43:17 -08:00
|
|
|
|
|
|
|
#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
|
|
|
|
PERF_TOOL_PMU_TYPE_TOOL_MASK | \
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
PERF_TOOL_PMU_TYPE_HWMON_MASK | \
|
|
|
|
PERF_TOOL_PMU_TYPE_DRM_MASK)
|
2025-01-31 23:43:17 -08:00
|
|
|
};
|
|
|
|
static unsigned int read_pmu_types;
|
|
|
|
|
|
|
|
static void pmu_read_sysfs(unsigned int to_read_pmus);
|
2023-09-24 23:23:23 -07:00
|
|
|
|
2024-05-14 23:01:13 -07:00
|
|
|
size_t pmu_name_len_no_suffix(const char *str)
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
{
|
|
|
|
int orig_len, len;
|
2024-05-14 23:01:13 -07:00
|
|
|
bool has_hex_digits = false;
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
|
|
|
|
orig_len = len = strlen(str);
|
|
|
|
|
2024-05-14 23:01:13 -07:00
|
|
|
/* Count trailing digits. */
|
|
|
|
while (len > 0 && isxdigit(str[len - 1])) {
|
|
|
|
if (!isdigit(str[len - 1]))
|
|
|
|
has_hex_digits = true;
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
len--;
|
2024-05-14 23:01:13 -07:00
|
|
|
}
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
|
|
|
|
if (len > 0 && len != orig_len && str[len - 1] == '_') {
|
2024-05-14 23:01:13 -07:00
|
|
|
/*
|
|
|
|
* There is a '_{num}' suffix. For decimal suffixes any length
|
|
|
|
* will do, for hexadecimal ensure more than 2 hex digits so
|
|
|
|
* that S390's cpum_cf PMU doesn't match.
|
|
|
|
*/
|
|
|
|
if (!has_hex_digits || (orig_len - len) > 2)
|
|
|
|
return len - 1;
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
}
|
2024-05-14 23:01:13 -07:00
|
|
|
/* Use the full length. */
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
return orig_len;
|
|
|
|
}
|
|
|
|
|
2024-05-14 23:01:13 -07:00
|
|
|
int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name)
|
|
|
|
{
|
2024-08-31 00:04:10 -07:00
|
|
|
unsigned long long lhs_num = 0, rhs_num = 0;
|
2024-05-14 23:01:13 -07:00
|
|
|
size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name);
|
|
|
|
size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name);
|
|
|
|
int ret = strncmp(lhs_pmu_name, rhs_pmu_name,
|
|
|
|
lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len);
|
|
|
|
|
|
|
|
if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name))
|
2024-08-31 00:04:10 -07:00
|
|
|
lhs_num = strtoull(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16);
|
2024-05-14 23:01:13 -07:00
|
|
|
if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name))
|
2024-08-31 00:04:10 -07:00
|
|
|
rhs_num = strtoull(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16);
|
2024-05-14 23:01:13 -07:00
|
|
|
|
|
|
|
return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0);
|
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
void perf_pmus__destroy(void)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu, *tmp;
|
|
|
|
|
2023-05-27 00:22:04 -07:00
|
|
|
list_for_each_entry_safe(pmu, tmp, &core_pmus, list) {
|
|
|
|
list_del(&pmu->list);
|
|
|
|
|
|
|
|
perf_pmu__delete(pmu);
|
|
|
|
}
|
|
|
|
list_for_each_entry_safe(pmu, tmp, &other_pmus, list) {
|
2023-05-27 00:22:03 -07:00
|
|
|
list_del(&pmu->list);
|
|
|
|
|
|
|
|
perf_pmu__delete(pmu);
|
|
|
|
}
|
2025-01-31 23:43:17 -08:00
|
|
|
read_pmu_types = 0;
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct perf_pmu *pmu_find(const char *name)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu;
|
|
|
|
|
2023-05-27 00:22:04 -07:00
|
|
|
list_for_each_entry(pmu, &core_pmus, list) {
|
|
|
|
if (!strcmp(pmu->name, name) ||
|
|
|
|
(pmu->alias_name && !strcmp(pmu->alias_name, name)))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
list_for_each_entry(pmu, &other_pmus, list) {
|
2023-05-27 00:22:03 -07:00
|
|
|
if (!strcmp(pmu->name, name) ||
|
|
|
|
(pmu->alias_name && !strcmp(pmu->alias_name, name)))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct perf_pmu *perf_pmus__find(const char *name)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu;
|
|
|
|
int dirfd;
|
2023-05-27 00:22:06 -07:00
|
|
|
bool core_pmu;
|
2025-01-31 23:43:17 -08:00
|
|
|
unsigned int to_read_pmus = 0;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Once PMU is loaded it stays in the list,
|
|
|
|
* so we keep us from multiple reading/parsing
|
|
|
|
* the pmu format definitions.
|
|
|
|
*/
|
|
|
|
pmu = pmu_find(name);
|
|
|
|
if (pmu)
|
|
|
|
return pmu;
|
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
|
2023-05-27 00:22:06 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
core_pmu = is_pmu_core(name);
|
2025-01-31 23:43:17 -08:00
|
|
|
if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
|
2023-05-27 00:22:06 -07:00
|
|
|
return NULL;
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
dirfd = perf_pmu__event_source_devices_fd();
|
2024-05-02 14:35:04 -07:00
|
|
|
pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
|
|
|
|
/*eager_load=*/false);
|
2023-05-27 00:22:03 -07:00
|
|
|
close(dirfd);
|
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (pmu)
|
|
|
|
return pmu;
|
|
|
|
|
|
|
|
/* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
|
|
|
|
if (!strncmp(name, "hwmon_", 6))
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
else if (!strncmp(name, "drm_", 4))
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
|
2025-01-31 23:43:17 -08:00
|
|
|
else if (!strcmp(name, "tool"))
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
|
|
|
|
|
|
|
|
if (to_read_pmus) {
|
|
|
|
pmu_read_sysfs(to_read_pmus);
|
2023-09-24 23:23:23 -07:00
|
|
|
pmu = pmu_find(name);
|
2025-01-31 23:43:17 -08:00
|
|
|
if (pmu)
|
|
|
|
return pmu;
|
2023-09-24 23:23:23 -07:00
|
|
|
}
|
2025-01-31 23:43:17 -08:00
|
|
|
/* Read all necessary PMUs from sysfs and see if the PMU is found. */
|
|
|
|
to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK;
|
|
|
|
if (!core_pmu)
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
|
|
|
|
pmu_read_sysfs(to_read_pmus);
|
|
|
|
return pmu_find(name);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu;
|
2023-05-27 00:22:06 -07:00
|
|
|
bool core_pmu;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Once PMU is loaded it stays in the list,
|
|
|
|
* so we keep us from multiple reading/parsing
|
|
|
|
* the pmu format definitions.
|
|
|
|
*/
|
|
|
|
pmu = pmu_find(name);
|
|
|
|
if (pmu)
|
|
|
|
return pmu;
|
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
|
2023-05-27 00:22:06 -07:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
core_pmu = is_pmu_core(name);
|
2025-01-31 23:43:17 -08:00
|
|
|
if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
|
2023-05-27 00:22:06 -07:00
|
|
|
return NULL;
|
|
|
|
|
2024-05-02 14:35:04 -07:00
|
|
|
return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
|
|
|
|
/*eager_load=*/false);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
static int pmus_cmp(void *priv __maybe_unused,
|
|
|
|
const struct list_head *lhs, const struct list_head *rhs)
|
|
|
|
{
|
|
|
|
struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list);
|
|
|
|
struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list);
|
|
|
|
|
2024-05-14 23:01:13 -07:00
|
|
|
return pmu_name_cmp(lhs_pmu->name ?: "", rhs_pmu->name ?: "");
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
/* Add all pmus in sysfs to pmu list: */
|
2025-01-31 23:43:17 -08:00
|
|
|
static void pmu_read_sysfs(unsigned int to_read_types)
|
2023-05-27 00:22:03 -07:00
|
|
|
{
|
2024-10-01 20:20:07 -07:00
|
|
|
struct perf_pmu *tool_pmu;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if ((read_pmu_types & to_read_types) == to_read_types) {
|
|
|
|
/* All requested PMU types have been read. */
|
2023-05-27 00:22:06 -07:00
|
|
|
return;
|
2025-01-31 23:43:17 -08:00
|
|
|
}
|
2023-05-27 00:22:06 -07:00
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) {
|
|
|
|
int fd = perf_pmu__event_source_devices_fd();
|
2025-02-21 22:10:08 -08:00
|
|
|
struct io_dir dir;
|
|
|
|
struct io_dirent64 *dent;
|
2025-01-31 23:43:17 -08:00
|
|
|
bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (fd < 0)
|
|
|
|
goto skip_pe_pmus;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2025-02-21 22:10:08 -08:00
|
|
|
io_dir__init(&dir, fd);
|
2025-01-31 23:43:17 -08:00
|
|
|
|
2025-02-21 22:10:08 -08:00
|
|
|
while ((dent = io_dir__readdir(&dir)) != NULL) {
|
2025-01-31 23:43:17 -08:00
|
|
|
if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
|
|
|
|
continue;
|
|
|
|
if (core_only && !is_pmu_core(dent->d_name))
|
|
|
|
continue;
|
|
|
|
/* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
|
|
|
|
perf_pmu__find2(fd, dent->d_name);
|
|
|
|
}
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2025-02-21 22:10:08 -08:00
|
|
|
close(fd);
|
2025-01-31 23:43:17 -08:00
|
|
|
}
|
|
|
|
skip_pe_pmus:
|
|
|
|
if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) {
|
2023-07-06 11:37:04 -07:00
|
|
|
if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
|
|
|
|
pr_err("Failure to set up any core PMUs\n");
|
|
|
|
}
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
list_sort(NULL, &core_pmus, pmus_cmp);
|
2025-01-31 23:43:17 -08:00
|
|
|
|
|
|
|
if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 &&
|
|
|
|
(read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) {
|
2025-02-26 10:41:00 +00:00
|
|
|
tool_pmu = tool_pmu__new();
|
2025-03-19 13:28:20 +01:00
|
|
|
if (tool_pmu)
|
|
|
|
list_add_tail(&tool_pmu->list, &other_pmus);
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
2025-01-31 23:43:17 -08:00
|
|
|
if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 &&
|
|
|
|
(read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
|
|
|
|
perf_pmus__read_hwmon_pmus(&other_pmus);
|
|
|
|
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
if ((to_read_types & PERF_TOOL_PMU_TYPE_DRM_MASK) != 0 &&
|
|
|
|
(read_pmu_types & PERF_TOOL_PMU_TYPE_DRM_MASK) == 0)
|
|
|
|
perf_pmus__read_drm_pmus(&other_pmus);
|
|
|
|
|
perf pmus: Sort pmus by name then suffix
Sort PMUs by name. If two PMUs have the same name but differ by
suffix, sort the suffixes numerically.
For example, "breakpoint" comes before "cpu",
"uncore_imc_free_running_0" comes before "uncore_imc_free_running_1".
Suffixes need to be treated specially as otherwise they will be ordered
like 0, 1, 10, 11, .., 2, 20, 21, .., etc. Only PMUs starting 'uncore_'
are considered to have a potential suffix.
Sorting of PMUs is done so that later patches can skip duplicate uncore
PMUs that differ only by there suffix.
Committer notes:
Used the more compact, intention revealing strstarts() function we got
from the kernel sources:
- if (strncmp(str, "uncore_", 7))
+ if (!strstarts(str, "uncore_"))
Also in pmus_cmp() the lhs_num and rhs_num variables may end up not
being set for non "uncore_" prefixed PMUs in pmu_name_len_no_suffix(),
or at least gcc 7.5 in some distros (opensuse 15.5, to be EOLed in
Dec/2024) thins so, so initialize both to zero.
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230825135237.921058-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-08-25 06:52:36 -07:00
|
|
|
list_sort(NULL, &other_pmus, pmus_cmp);
|
2025-01-31 23:43:17 -08:00
|
|
|
|
|
|
|
read_pmu_types |= to_read_types;
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:07 -07:00
|
|
|
static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
|
2023-05-27 00:22:03 -07:00
|
|
|
{
|
|
|
|
struct perf_pmu *pmu;
|
|
|
|
|
2023-05-27 00:22:04 -07:00
|
|
|
list_for_each_entry(pmu, &core_pmus, list) {
|
2023-05-27 00:22:03 -07:00
|
|
|
if (pmu->type == type)
|
|
|
|
return pmu;
|
2023-05-27 00:22:04 -07:00
|
|
|
}
|
2023-05-27 00:22:07 -07:00
|
|
|
|
2023-05-27 00:22:04 -07:00
|
|
|
list_for_each_entry(pmu, &other_pmus, list) {
|
|
|
|
if (pmu->type == type)
|
|
|
|
return pmu;
|
|
|
|
}
|
2023-05-27 00:22:03 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:07 -07:00
|
|
|
struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
|
|
|
|
{
|
2025-01-31 23:43:17 -08:00
|
|
|
unsigned int to_read_pmus;
|
2023-05-27 00:22:07 -07:00
|
|
|
struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
|
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK))
|
2023-05-27 00:22:07 -07:00
|
|
|
return pmu;
|
|
|
|
|
2025-01-31 23:43:17 -08:00
|
|
|
if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
|
|
|
|
to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
} else if (type >= PERF_PMU_TYPE_DRM_START && type <= PERF_PMU_TYPE_DRM_END) {
|
|
|
|
to_read_pmus = PERF_TOOL_PMU_TYPE_DRM_MASK;
|
2025-01-31 23:43:17 -08:00
|
|
|
} else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
|
|
|
|
to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
|
|
|
|
} else {
|
|
|
|
to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK;
|
|
|
|
}
|
|
|
|
pmu_read_sysfs(to_read_pmus);
|
2023-05-27 00:22:07 -07:00
|
|
|
pmu = __perf_pmus__find_by_type(type);
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:04 -07:00
|
|
|
/*
|
|
|
|
* pmu iterator: If pmu is NULL, we start at the begin, otherwise return the
|
|
|
|
* next pmu. Returns NULL on end.
|
|
|
|
*/
|
2023-05-27 00:22:03 -07:00
|
|
|
struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
|
|
|
|
{
|
2023-05-27 00:22:04 -07:00
|
|
|
bool use_core_pmus = !pmu || pmu->is_core;
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
if (!pmu) {
|
2025-01-31 23:43:17 -08:00
|
|
|
pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
|
2023-05-27 00:22:04 -07:00
|
|
|
pmu = list_prepare_entry(pmu, &core_pmus, list);
|
|
|
|
}
|
|
|
|
if (use_core_pmus) {
|
|
|
|
list_for_each_entry_continue(pmu, &core_pmus, list)
|
|
|
|
return pmu;
|
|
|
|
|
|
|
|
pmu = NULL;
|
|
|
|
pmu = list_prepare_entry(pmu, &other_pmus, list);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
2023-05-27 00:22:04 -07:00
|
|
|
list_for_each_entry_continue(pmu, &other_pmus, list)
|
2023-05-27 00:22:03 -07:00
|
|
|
return pmu;
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-05-02 15:38:37 -07:00
|
|
|
|
2023-05-27 00:22:05 -07:00
|
|
|
struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
|
|
|
|
{
|
|
|
|
if (!pmu) {
|
2025-01-31 23:43:17 -08:00
|
|
|
pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK);
|
2023-09-13 16:33:48 +01:00
|
|
|
return list_first_entry_or_null(&core_pmus, typeof(*pmu), list);
|
2023-05-27 00:22:05 -07:00
|
|
|
}
|
|
|
|
list_for_each_entry_continue(pmu, &core_pmus, list)
|
|
|
|
return pmu;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2025-06-24 16:18:35 -07:00
|
|
|
struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event)
|
|
|
|
{
|
|
|
|
bool use_core_pmus = !pmu || pmu->is_core;
|
|
|
|
|
|
|
|
if (!pmu) {
|
|
|
|
/* Hwmon filename values that aren't used. */
|
|
|
|
enum hwmon_type type;
|
|
|
|
int number;
|
|
|
|
/*
|
|
|
|
* Core PMUs, other sysfs PMUs and tool PMU can take all event
|
|
|
|
* types or aren't wother optimizing for.
|
|
|
|
*/
|
|
|
|
unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_TOOL_MASK;
|
|
|
|
|
|
|
|
/* Could the event be a hwmon event? */
|
|
|
|
if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL))
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
|
|
|
|
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
/* Could the event be a DRM event? */
|
|
|
|
if (strlen(event) > 4 && strncmp("drm-", event, 4) == 0)
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
|
|
|
|
|
2025-06-24 16:18:35 -07:00
|
|
|
pmu_read_sysfs(to_read_pmus);
|
|
|
|
pmu = list_prepare_entry(pmu, &core_pmus, list);
|
|
|
|
}
|
|
|
|
if (use_core_pmus) {
|
|
|
|
list_for_each_entry_continue(pmu, &core_pmus, list)
|
|
|
|
return pmu;
|
|
|
|
|
|
|
|
pmu = NULL;
|
|
|
|
pmu = list_prepare_entry(pmu, &other_pmus, list);
|
|
|
|
}
|
|
|
|
list_for_each_entry_continue(pmu, &other_pmus, list)
|
|
|
|
return pmu;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard)
|
|
|
|
{
|
|
|
|
bool use_core_pmus = !pmu || pmu->is_core;
|
|
|
|
|
|
|
|
if (!pmu) {
|
|
|
|
/*
|
|
|
|
* Core PMUs, other sysfs PMUs and tool PMU can have any name or
|
|
|
|
* aren't wother optimizing for.
|
|
|
|
*/
|
|
|
|
unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_TOOL_MASK;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hwmon PMUs have an alias from a sysfs name like hwmon0,
|
|
|
|
* hwmon1, etc. or have a name of hwmon_<name>. They therefore
|
|
|
|
* can only have a wildcard match if the wildcard begins with
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
* "hwmon". Similarly drm PMUs must start "drm_", avoid reading
|
|
|
|
* such events unless the PMU could match.
|
2025-06-24 16:18:35 -07:00
|
|
|
*/
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
if (strisglob(wildcard)) {
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK |
|
|
|
|
PERF_TOOL_PMU_TYPE_DRM_MASK;
|
|
|
|
} else if (strlen(wildcard) >= 4 && strncmp("drm_", wildcard, 4) == 0) {
|
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
|
|
|
|
} else if (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0) {
|
2025-06-24 16:18:35 -07:00
|
|
|
to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
|
perf drm_pmu: Add a tool like PMU to expose DRM information
DRM clients expose information through usage stats as documented in
Documentation/gpu/drm-usage-stats.rst (available online at
https://docs.kernel.org/gpu/drm-usage-stats.html). Add a tool like
PMU, similar to the hwmon PMU, that exposes DRM information. For
example on a tigerlake laptop:
```
$ perf list drm
List of pre-defined events (to be used in -e or -M):
drm:
drm-active-stolen-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-active-system0
[Total memory active in one or more engines. Unit: drm_i915]
drm-engine-capacity-video
[Engine capacity. Unit: drm_i915]
drm-engine-copy
[Utilization in ns. Unit: drm_i915]
drm-engine-render
[Utilization in ns. Unit: drm_i915]
drm-engine-video
[Utilization in ns. Unit: drm_i915]
drm-engine-video-enhance
[Utilization in ns. Unit: drm_i915]
drm-purgeable-stolen-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-purgeable-system0
[Size of resident and purgeable memory bufers. Unit: drm_i915]
drm-resident-stolen-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-resident-system0
[Size of resident memory bufers. Unit: drm_i915]
drm-shared-stolen-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-shared-system0
[Size of shared memory bufers. Unit: drm_i915]
drm-total-stolen-system0
[Size of shared and private memory. Unit: drm_i915]
drm-total-system0
[Size of shared and private memory. Unit: drm_i915]
```
System wide data can be gathered:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0
1.000904910,0,bytes,drm-active-stolen-system0,1,100.00,,
1.000904910,0,bytes,drm-active-system0,1,100.00,,
1.000904910,36,capacity,drm-engine-capacity-video,1,100.00,,
1.000904910,0,ns,drm-engine-copy,1,100.00,,
1.000904910,1472970566175,ns,drm-engine-render,1,100.00,,
1.000904910,0,ns,drm-engine-video,1,100.00,,
1.000904910,0,ns,drm-engine-video-enhance,1,100.00,,
1.000904910,0,bytes,drm-purgeable-stolen-system0,1,100.00,,
1.000904910,38199296,bytes,drm-purgeable-system0,1,100.00,,
1.000904910,0,bytes,drm-resident-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-resident-system0,1,100.00,,
1.000904910,0,bytes,drm-shared-stolen-system0,1,100.00,,
1.000904910,1886871552,bytes,drm-shared-system0,1,100.00,,
1.000904910,0,bytes,drm-total-stolen-system0,1,100.00,,
1.000904910,4643196928,bytes,drm-total-system0,1,100.00,,
2.264426839,0,bytes,drm-active-stolen-system0,1,100.00,,
```
Or for a particular process:
```
$ perf stat -x, -I 1000 -e drm-active-stolen-system0,drm-active-system0,drm-engine-capacity-video,drm-engine-copy,drm-engine-render,drm-engine-video,drm-engine-video-enhance,drm-purgeable-stolen-system0,drm-purgeable-system0,drm-resident-stolen-system0,drm-resident-system0,drm-shared-stolen-system0,drm-shared-system0,drm-total-stolen-system0,drm-total-system0 -p 200027
1.001040274,0,bytes,drm-active-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-active-system0,6,100.00,,
1.001040274,12,capacity,drm-engine-capacity-video,6,100.00,,
1.001040274,0,ns,drm-engine-copy,6,100.00,,
1.001040274,1542300,ns,drm-engine-render,6,100.00,,
1.001040274,0,ns,drm-engine-video,6,100.00,,
1.001040274,0,ns,drm-engine-video-enhance,6,100.00,,
1.001040274,0,bytes,drm-purgeable-stolen-system0,6,100.00,,
1.001040274,13516800,bytes,drm-purgeable-system0,6,100.00,,
1.001040274,0,bytes,drm-resident-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-resident-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-stolen-system0,6,100.00,,
1.001040274,0,bytes,drm-shared-system0,6,100.00,,
1.001040274,0,bytes,drm-total-stolen-system0,6,100.00,,
1.001040274,27746304,bytes,drm-total-system0,6,100.00,,
2.016629075,0,bytes,drm-active-stolen-system0,6,100.00,,
```
As with the hwmon PMU, high numbered PMU types are used to encode
multiple possible "DRM" PMUs. The appropriate fdinfo is found by
scanning /proc and filtering which fdinfos to read with stat. To avoid
some unneeding scanning, events not starting with "drm-" are
ignored. The patch builds on commit 57e13264dcea ("perf pmus:
Restructure pmu_read_sysfs to scan fewer PMUs") and later so that only
if full wild carding is being done, the PMU starts with "drm_" or the
event starts with "drm-" will /proc be scanned. That is there should
be little to no cost in this PMU unless DRM events are requested.
Signed-off-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20250624231837.179536-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-06-24 16:18:36 -07:00
|
|
|
}
|
2025-06-24 16:18:35 -07:00
|
|
|
|
|
|
|
pmu_read_sysfs(to_read_pmus);
|
|
|
|
pmu = list_prepare_entry(pmu, &core_pmus, list);
|
|
|
|
}
|
|
|
|
if (use_core_pmus) {
|
|
|
|
list_for_each_entry_continue(pmu, &core_pmus, list) {
|
|
|
|
if (perf_pmu__wildcard_match(pmu, wildcard))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
pmu = NULL;
|
|
|
|
pmu = list_prepare_entry(pmu, &other_pmus, list);
|
|
|
|
}
|
|
|
|
list_for_each_entry_continue(pmu, &other_pmus, list) {
|
|
|
|
if (perf_pmu__wildcard_match(pmu, wildcard))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-08-25 06:52:37 -07:00
|
|
|
static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
|
|
|
|
{
|
|
|
|
bool use_core_pmus = !pmu || pmu->is_core;
|
|
|
|
int last_pmu_name_len = 0;
|
|
|
|
const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
|
|
|
|
|
|
|
|
if (!pmu) {
|
2025-01-31 23:43:17 -08:00
|
|
|
pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
|
2023-08-25 06:52:37 -07:00
|
|
|
pmu = list_prepare_entry(pmu, &core_pmus, list);
|
|
|
|
} else
|
2024-05-14 23:01:13 -07:00
|
|
|
last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
|
2023-08-25 06:52:37 -07:00
|
|
|
|
|
|
|
if (use_core_pmus) {
|
|
|
|
list_for_each_entry_continue(pmu, &core_pmus, list) {
|
2024-05-14 23:01:13 -07:00
|
|
|
int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
|
2023-08-25 06:52:37 -07:00
|
|
|
|
|
|
|
if (last_pmu_name_len == pmu_name_len &&
|
|
|
|
!strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
pmu = NULL;
|
|
|
|
pmu = list_prepare_entry(pmu, &other_pmus, list);
|
|
|
|
}
|
|
|
|
list_for_each_entry_continue(pmu, &other_pmus, list) {
|
2024-05-14 23:01:13 -07:00
|
|
|
int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
|
2023-08-25 06:52:37 -07:00
|
|
|
|
|
|
|
if (last_pmu_name_len == pmu_name_len &&
|
|
|
|
!strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-05-02 15:38:37 -07:00
|
|
|
const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu = NULL;
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
while ((pmu = perf_pmus__scan(pmu)) != NULL) {
|
2023-05-02 15:38:37 -07:00
|
|
|
if (!strcmp(pmu->name, str))
|
|
|
|
return pmu;
|
|
|
|
/* Ignore "uncore_" prefix. */
|
|
|
|
if (!strncmp(pmu->name, "uncore_", 7)) {
|
|
|
|
if (!strcmp(pmu->name + 7, str))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
/* Ignore "cpu_" prefix on Intel hybrid PMUs. */
|
|
|
|
if (!strncmp(pmu->name, "cpu_", 4)) {
|
|
|
|
if (!strcmp(pmu->name + 4, str))
|
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
/** Struct for ordering events as output in perf list. */
|
|
|
|
struct sevent {
|
|
|
|
/** PMU for event. */
|
|
|
|
const struct perf_pmu *pmu;
|
2023-08-23 21:13:14 -07:00
|
|
|
const char *name;
|
|
|
|
const char* alias;
|
|
|
|
const char *scale_unit;
|
|
|
|
const char *desc;
|
|
|
|
const char *long_desc;
|
|
|
|
const char *encoding_desc;
|
|
|
|
const char *topic;
|
|
|
|
const char *pmu_name;
|
2024-09-06 22:08:19 -07:00
|
|
|
const char *event_type_desc;
|
2023-08-23 21:13:14 -07:00
|
|
|
bool deprecated;
|
2023-05-27 00:22:03 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
static int cmp_sevent(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const struct sevent *as = a;
|
|
|
|
const struct sevent *bs = b;
|
2023-08-23 21:13:14 -07:00
|
|
|
bool a_iscpu, b_iscpu;
|
2023-05-27 00:22:03 -07:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Put extra events last. */
|
2023-08-23 21:13:14 -07:00
|
|
|
if (!!as->desc != !!bs->desc)
|
|
|
|
return !!as->desc - !!bs->desc;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
/* Order by topics. */
|
2023-08-23 21:13:14 -07:00
|
|
|
ret = strcmp(as->topic ?: "", bs->topic ?: "");
|
2023-05-27 00:22:03 -07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
/* Order CPU core events to be first */
|
2023-08-23 21:13:14 -07:00
|
|
|
a_iscpu = as->pmu ? as->pmu->is_core : true;
|
|
|
|
b_iscpu = bs->pmu ? bs->pmu->is_core : true;
|
|
|
|
if (a_iscpu != b_iscpu)
|
|
|
|
return a_iscpu ? -1 : 1;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
/* Order by PMU name. */
|
|
|
|
if (as->pmu != bs->pmu) {
|
2023-08-23 21:13:14 -07:00
|
|
|
ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
|
2023-05-27 00:22:03 -07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Order by event name. */
|
2023-08-23 21:13:14 -07:00
|
|
|
return strcmp(as->name, bs->name);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
2023-08-23 21:13:14 -07:00
|
|
|
static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
|
2023-05-27 00:22:03 -07:00
|
|
|
{
|
|
|
|
/* Different names -> never duplicates */
|
2023-08-23 21:13:14 -07:00
|
|
|
if (strcmp(a->name ?: "//", b->name ?: "//"))
|
2023-05-27 00:22:03 -07:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Don't remove duplicates for different PMUs */
|
2023-08-23 21:13:14 -07:00
|
|
|
return strcmp(a->pmu_name, b->pmu_name) == 0;
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
2023-08-23 21:13:14 -07:00
|
|
|
struct events_callback_state {
|
|
|
|
struct sevent *aliases;
|
|
|
|
size_t aliases_len;
|
|
|
|
size_t index;
|
|
|
|
};
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2023-08-23 21:13:14 -07:00
|
|
|
static int perf_pmus__print_pmu_events__callback(void *vstate,
|
|
|
|
struct pmu_event_info *info)
|
2023-05-27 00:22:03 -07:00
|
|
|
{
|
2023-08-23 21:13:14 -07:00
|
|
|
struct events_callback_state *state = vstate;
|
|
|
|
struct sevent *s;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
2023-08-23 21:13:14 -07:00
|
|
|
if (state->index >= state->aliases_len) {
|
|
|
|
pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name);
|
|
|
|
return 1;
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
2024-10-01 20:20:12 -07:00
|
|
|
assert(info->pmu != NULL || info->name != NULL);
|
2023-08-23 21:13:14 -07:00
|
|
|
s = &state->aliases[state->index];
|
|
|
|
s->pmu = info->pmu;
|
|
|
|
#define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL
|
|
|
|
COPY_STR(name);
|
|
|
|
COPY_STR(alias);
|
|
|
|
COPY_STR(scale_unit);
|
|
|
|
COPY_STR(desc);
|
|
|
|
COPY_STR(long_desc);
|
|
|
|
COPY_STR(encoding_desc);
|
|
|
|
COPY_STR(topic);
|
|
|
|
COPY_STR(pmu_name);
|
2024-09-06 22:08:19 -07:00
|
|
|
COPY_STR(event_type_desc);
|
2023-08-23 21:13:14 -07:00
|
|
|
#undef COPY_STR
|
|
|
|
s->deprecated = info->deprecated;
|
|
|
|
state->index++;
|
|
|
|
return 0;
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu;
|
|
|
|
int printed = 0;
|
2023-08-23 21:13:14 -07:00
|
|
|
int len;
|
2023-05-27 00:22:03 -07:00
|
|
|
struct sevent *aliases;
|
2023-08-23 21:13:14 -07:00
|
|
|
struct events_callback_state state;
|
2023-08-25 06:52:37 -07:00
|
|
|
bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
|
|
|
|
struct perf_pmu *(*scan_fn)(struct perf_pmu *);
|
|
|
|
|
|
|
|
if (skip_duplicate_pmus)
|
|
|
|
scan_fn = perf_pmus__scan_skip_duplicates;
|
|
|
|
else
|
|
|
|
scan_fn = perf_pmus__scan;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
pmu = NULL;
|
|
|
|
len = 0;
|
2023-08-25 06:52:37 -07:00
|
|
|
while ((pmu = scan_fn(pmu)) != NULL)
|
2023-08-23 21:13:14 -07:00
|
|
|
len += perf_pmu__num_events(pmu);
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
aliases = zalloc(sizeof(struct sevent) * len);
|
|
|
|
if (!aliases) {
|
|
|
|
pr_err("FATAL: not enough memory to print PMU events\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
pmu = NULL;
|
2023-08-23 21:13:14 -07:00
|
|
|
state = (struct events_callback_state) {
|
|
|
|
.aliases = aliases,
|
|
|
|
.aliases_len = len,
|
|
|
|
.index = 0,
|
|
|
|
};
|
2023-08-25 06:52:37 -07:00
|
|
|
while ((pmu = scan_fn(pmu)) != NULL) {
|
|
|
|
perf_pmu__for_each_event(pmu, skip_duplicate_pmus, &state,
|
|
|
|
perf_pmus__print_pmu_events__callback);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
|
2023-08-23 21:13:14 -07:00
|
|
|
for (int j = 0; j < len; j++) {
|
2023-05-27 00:22:03 -07:00
|
|
|
/* Skip duplicates */
|
2024-06-14 17:43:18 +08:00
|
|
|
if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1]))
|
|
|
|
goto free;
|
2023-05-27 00:22:03 -07:00
|
|
|
|
|
|
|
print_cb->print_event(print_state,
|
2023-08-23 21:13:14 -07:00
|
|
|
aliases[j].topic,
|
perf list: Fix topic and pmu_name argument order
Fix function definitions to match header file declaration. Fix two
callers to pass the arguments in the right order.
On Intel Tigerlake, before:
```
$ perf list -j|grep "\"Topic\""|sort|uniq
"Topic": "cache",
"Topic": "cpu",
"Topic": "floating point",
"Topic": "frontend",
"Topic": "memory",
"Topic": "other",
"Topic": "pfm icl",
"Topic": "pfm ix86arch",
"Topic": "pfm perf_raw",
"Topic": "pipeline",
"Topic": "tool",
"Topic": "uncore interconnect",
"Topic": "uncore memory",
"Topic": "uncore other",
"Topic": "virtual memory",
$ perf list -j|grep "\"Unit\""|sort|uniq
"Unit": "cache",
"Unit": "cpu",
"Unit": "cstate_core",
"Unit": "cstate_pkg",
"Unit": "i915",
"Unit": "icl",
"Unit": "intel_bts",
"Unit": "intel_pt",
"Unit": "ix86arch",
"Unit": "msr",
"Unit": "perf_raw",
"Unit": "power",
"Unit": "tool",
"Unit": "uncore_arb",
"Unit": "uncore_clock",
"Unit": "uncore_imc_free_running_0",
"Unit": "uncore_imc_free_running_1",
```
After:
```
$ perf list -j|grep "\"Topic\""|sort|uniq
"Topic": "cache",
"Topic": "floating point",
"Topic": "frontend",
"Topic": "memory",
"Topic": "other",
"Topic": "pfm icl",
"Topic": "pfm ix86arch",
"Topic": "pfm perf_raw",
"Topic": "pipeline",
"Topic": "tool",
"Topic": "uncore interconnect",
"Topic": "uncore memory",
"Topic": "uncore other",
"Topic": "virtual memory",
$ perf list -j|grep "\"Unit\""|sort|uniq
"Unit": "cpu",
"Unit": "cstate_core",
"Unit": "cstate_pkg",
"Unit": "i915",
"Unit": "icl",
"Unit": "intel_bts",
"Unit": "intel_pt",
"Unit": "ix86arch",
"Unit": "msr",
"Unit": "perf_raw",
"Unit": "power",
"Unit": "tool",
"Unit": "uncore_arb",
"Unit": "uncore_clock",
"Unit": "uncore_imc_free_running_0",
"Unit": "uncore_imc_free_running_1",
```
Fixes: e5c6109f4813246a ("perf list: Reorganize to use callbacks to allow honouring command line options")
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Jean-Philippe Romain <jean-philippe.romain@foss.st.com>
Tested-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Junhao He <hejunhao3@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20241109025801.560378-1-irogers@google.com
[ I fixed the two callers and added it to Jean-Phillippe's original change. ]
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-08 18:58:01 -08:00
|
|
|
aliases[j].pmu_name,
|
2025-07-25 11:51:53 -07:00
|
|
|
aliases[j].pmu->type,
|
2023-08-23 21:13:14 -07:00
|
|
|
aliases[j].name,
|
|
|
|
aliases[j].alias,
|
|
|
|
aliases[j].scale_unit,
|
|
|
|
aliases[j].deprecated,
|
2024-09-06 22:08:19 -07:00
|
|
|
aliases[j].event_type_desc,
|
2023-08-23 21:13:14 -07:00
|
|
|
aliases[j].desc,
|
|
|
|
aliases[j].long_desc,
|
|
|
|
aliases[j].encoding_desc);
|
2024-06-14 17:43:18 +08:00
|
|
|
free:
|
2023-08-23 21:13:14 -07:00
|
|
|
zfree(&aliases[j].name);
|
|
|
|
zfree(&aliases[j].alias);
|
|
|
|
zfree(&aliases[j].scale_unit);
|
|
|
|
zfree(&aliases[j].desc);
|
|
|
|
zfree(&aliases[j].long_desc);
|
|
|
|
zfree(&aliases[j].encoding_desc);
|
|
|
|
zfree(&aliases[j].topic);
|
|
|
|
zfree(&aliases[j].pmu_name);
|
2024-09-06 22:08:19 -07:00
|
|
|
zfree(&aliases[j].event_type_desc);
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
|
|
|
if (printed && pager_in_use())
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
zfree(&aliases);
|
|
|
|
}
|
|
|
|
|
perf list: Give more details about raw event encodings
List all the PMUs, not just the first core one, and list real format
specifiers with value ranges.
Before:
$ perf list
...
rNNN [Raw hardware event descriptor]
cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
[(see 'man perf-list' on how to encode it)]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
After:
$ perf list
...
rNNN [Raw event descriptor]
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
breakpoint//modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
intel_bts//modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
kprobe/retprobe/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
power/event=0..255/modifier [Raw event descriptor]
software//modifier [Raw event descriptor]
tracepoint//modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
With '--details' provide more details on the formats encoding:
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
cpu/event=0..255,pc,edge,offcore_rsp=0..0xffffffffffffffff,ldlat=0..0xffff,inv,
umask=0..255,frontend=0..0xffffff,cmask=0..255,config=0..0xffffffffffffffff,
config1=0..0xffffffffffffffff,config2=0..0xffffffffffffffff,config3=0..0xffffffffffffffff,
name=string,period=number,freq=number,branch_type=(u|k|hv|any|...),time,
call-graph=(fp|dwarf|lbr),stack-size=number,max-stack=number,nr=number,inherit,no-inherit,
overwrite,no-overwrite,percore,aux-output,aux-sample-size=number/modifier
breakpoint//modifier [Raw event descriptor]
breakpoint//modifier
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier
intel_bts//modifier [Raw event descriptor]
intel_bts//modifier
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,pt,notnt,branch,tsc,pwr_evt,fup_on_ptw,cyc,noretcomp,
mtc,psb_period=0..15,mtc_period=0..15/modifier
kprobe/retprobe/modifier [Raw event descriptor]
kprobe/retprobe/modifier
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier
power/event=0..255/modifier [Raw event descriptor]
power/event=0..255/modifier
software//modifier [Raw event descriptor]
software//modifier
tracepoint//modifier [Raw event descriptor]
tracepoint//modifier
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier
Committer notes:
Address this build error in various distros:
55 58.44 ubuntu:24.04 : FAIL gcc version 13.2.0 (Ubuntu 13.2.0-17ubuntu2)
util/pmu.c:1638:70: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
1638 | _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6);
| ^
| , ""
1 error generated.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-03-07 16:19:13 -08:00
|
|
|
struct build_format_string_args {
|
|
|
|
struct strbuf short_string;
|
|
|
|
struct strbuf long_string;
|
|
|
|
int num_formats;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int build_format_string(void *state, const char *name, int config,
|
|
|
|
const unsigned long *bits)
|
|
|
|
{
|
|
|
|
struct build_format_string_args *args = state;
|
|
|
|
unsigned int num_bits;
|
|
|
|
int ret1, ret2 = 0;
|
|
|
|
|
|
|
|
(void)config;
|
|
|
|
args->num_formats++;
|
|
|
|
if (args->num_formats > 1) {
|
|
|
|
strbuf_addch(&args->long_string, ',');
|
|
|
|
if (args->num_formats < 4)
|
|
|
|
strbuf_addch(&args->short_string, ',');
|
|
|
|
}
|
|
|
|
num_bits = bits ? bitmap_weight(bits, PERF_PMU_FORMAT_BITS) : 0;
|
|
|
|
if (num_bits <= 1) {
|
|
|
|
ret1 = strbuf_addf(&args->long_string, "%s", name);
|
|
|
|
if (args->num_formats < 4)
|
|
|
|
ret2 = strbuf_addf(&args->short_string, "%s", name);
|
|
|
|
} else if (num_bits > 8) {
|
|
|
|
ret1 = strbuf_addf(&args->long_string, "%s=0..0x%llx", name,
|
|
|
|
ULLONG_MAX >> (64 - num_bits));
|
|
|
|
if (args->num_formats < 4) {
|
|
|
|
ret2 = strbuf_addf(&args->short_string, "%s=0..0x%llx", name,
|
|
|
|
ULLONG_MAX >> (64 - num_bits));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ret1 = strbuf_addf(&args->long_string, "%s=0..%llu", name,
|
|
|
|
ULLONG_MAX >> (64 - num_bits));
|
|
|
|
if (args->num_formats < 4) {
|
|
|
|
ret2 = strbuf_addf(&args->short_string, "%s=0..%llu", name,
|
|
|
|
ULLONG_MAX >> (64 - num_bits));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret1 < 0 ? ret1 : (ret2 < 0 ? ret2 : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, void *print_state)
|
|
|
|
{
|
|
|
|
bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
|
|
|
|
struct perf_pmu *(*scan_fn)(struct perf_pmu *);
|
|
|
|
struct perf_pmu *pmu = NULL;
|
|
|
|
|
|
|
|
if (skip_duplicate_pmus)
|
|
|
|
scan_fn = perf_pmus__scan_skip_duplicates;
|
|
|
|
else
|
|
|
|
scan_fn = perf_pmus__scan;
|
|
|
|
|
|
|
|
while ((pmu = scan_fn(pmu)) != NULL) {
|
|
|
|
struct build_format_string_args format_args = {
|
|
|
|
.short_string = STRBUF_INIT,
|
|
|
|
.long_string = STRBUF_INIT,
|
|
|
|
.num_formats = 0,
|
|
|
|
};
|
2024-05-14 23:01:13 -07:00
|
|
|
int len = pmu_name_len_no_suffix(pmu->name);
|
perf list: Give more details about raw event encodings
List all the PMUs, not just the first core one, and list real format
specifiers with value ranges.
Before:
$ perf list
...
rNNN [Raw hardware event descriptor]
cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
[(see 'man perf-list' on how to encode it)]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
After:
$ perf list
...
rNNN [Raw event descriptor]
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
breakpoint//modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
intel_bts//modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
kprobe/retprobe/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
power/event=0..255/modifier [Raw event descriptor]
software//modifier [Raw event descriptor]
tracepoint//modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
With '--details' provide more details on the formats encoding:
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
cpu/event=0..255,pc,edge,offcore_rsp=0..0xffffffffffffffff,ldlat=0..0xffff,inv,
umask=0..255,frontend=0..0xffffff,cmask=0..255,config=0..0xffffffffffffffff,
config1=0..0xffffffffffffffff,config2=0..0xffffffffffffffff,config3=0..0xffffffffffffffff,
name=string,period=number,freq=number,branch_type=(u|k|hv|any|...),time,
call-graph=(fp|dwarf|lbr),stack-size=number,max-stack=number,nr=number,inherit,no-inherit,
overwrite,no-overwrite,percore,aux-output,aux-sample-size=number/modifier
breakpoint//modifier [Raw event descriptor]
breakpoint//modifier
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier
intel_bts//modifier [Raw event descriptor]
intel_bts//modifier
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,pt,notnt,branch,tsc,pwr_evt,fup_on_ptw,cyc,noretcomp,
mtc,psb_period=0..15,mtc_period=0..15/modifier
kprobe/retprobe/modifier [Raw event descriptor]
kprobe/retprobe/modifier
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier
power/event=0..255/modifier [Raw event descriptor]
power/event=0..255/modifier
software//modifier [Raw event descriptor]
software//modifier
tracepoint//modifier [Raw event descriptor]
tracepoint//modifier
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier
Committer notes:
Address this build error in various distros:
55 58.44 ubuntu:24.04 : FAIL gcc version 13.2.0 (Ubuntu 13.2.0-17ubuntu2)
util/pmu.c:1638:70: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
1638 | _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6);
| ^
| , ""
1 error generated.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-03-07 16:19:13 -08:00
|
|
|
const char *desc = "(see 'man perf-list' or 'man perf-record' on how to encode it)";
|
|
|
|
|
|
|
|
if (!pmu->is_core)
|
|
|
|
desc = NULL;
|
|
|
|
|
|
|
|
strbuf_addf(&format_args.short_string, "%.*s/", len, pmu->name);
|
|
|
|
strbuf_addf(&format_args.long_string, "%.*s/", len, pmu->name);
|
|
|
|
perf_pmu__for_each_format(pmu, &format_args, build_format_string);
|
|
|
|
|
|
|
|
if (format_args.num_formats > 3)
|
|
|
|
strbuf_addf(&format_args.short_string, ",.../modifier");
|
|
|
|
else
|
|
|
|
strbuf_addf(&format_args.short_string, "/modifier");
|
|
|
|
|
|
|
|
strbuf_addf(&format_args.long_string, "/modifier");
|
|
|
|
print_cb->print_event(print_state,
|
|
|
|
/*topic=*/NULL,
|
|
|
|
/*pmu_name=*/NULL,
|
2025-07-25 11:51:53 -07:00
|
|
|
pmu->type,
|
perf list: Give more details about raw event encodings
List all the PMUs, not just the first core one, and list real format
specifiers with value ranges.
Before:
$ perf list
...
rNNN [Raw hardware event descriptor]
cpu/t1=v1[,t2=v2,t3 ...]/modifier [Raw hardware event descriptor]
[(see 'man perf-list' on how to encode it)]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
After:
$ perf list
...
rNNN [Raw event descriptor]
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
breakpoint//modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
intel_bts//modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
kprobe/retprobe/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
power/event=0..255/modifier [Raw event descriptor]
software//modifier [Raw event descriptor]
tracepoint//modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
mem:<addr>[/len][:access] [Hardware breakpoint]
...
With '--details' provide more details on the formats encoding:
cpu/event=0..255,pc,edge,.../modifier [Raw event descriptor]
[(see 'man perf-list' or 'man perf-record' on how to encode it)]
cpu/event=0..255,pc,edge,offcore_rsp=0..0xffffffffffffffff,ldlat=0..0xffff,inv,
umask=0..255,frontend=0..0xffffff,cmask=0..255,config=0..0xffffffffffffffff,
config1=0..0xffffffffffffffff,config2=0..0xffffffffffffffff,config3=0..0xffffffffffffffff,
name=string,period=number,freq=number,branch_type=(u|k|hv|any|...),time,
call-graph=(fp|dwarf|lbr),stack-size=number,max-stack=number,nr=number,inherit,no-inherit,
overwrite,no-overwrite,percore,aux-output,aux-sample-size=number/modifier
breakpoint//modifier [Raw event descriptor]
breakpoint//modifier
cstate_core/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_core/event=0..0xffffffffffffffff/modifier
cstate_pkg/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
cstate_pkg/event=0..0xffffffffffffffff/modifier
i915/i915_eventid=0..0x1fffff/modifier [Raw event descriptor]
i915/i915_eventid=0..0x1fffff/modifier
intel_bts//modifier [Raw event descriptor]
intel_bts//modifier
intel_pt/ptw,event,cyc_thresh=0..15,.../modifier [Raw event descriptor]
intel_pt/ptw,event,cyc_thresh=0..15,pt,notnt,branch,tsc,pwr_evt,fup_on_ptw,cyc,noretcomp,
mtc,psb_period=0..15,mtc_period=0..15/modifier
kprobe/retprobe/modifier [Raw event descriptor]
kprobe/retprobe/modifier
msr/event=0..0xffffffffffffffff/modifier [Raw event descriptor]
msr/event=0..0xffffffffffffffff/modifier
power/event=0..255/modifier [Raw event descriptor]
power/event=0..255/modifier
software//modifier [Raw event descriptor]
software//modifier
tracepoint//modifier [Raw event descriptor]
tracepoint//modifier
uncore_arb/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_arb/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_cbox/event=0..255,edge,inv,.../modifier [Raw event descriptor]
uncore_cbox/event=0..255,edge,inv,umask=0..255,cmask=0..31/modifier
uncore_clock/event=0..255/modifier [Raw event descriptor]
uncore_clock/event=0..255/modifier
uncore_imc_free_running/event=0..255,umask=0..255/modifier[Raw event descriptor]
uncore_imc_free_running/event=0..255,umask=0..255/modifier
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier[Raw event descriptor]
uprobe/ref_ctr_offset=0..0xffffffff,retprobe/modifier
Committer notes:
Address this build error in various distros:
55 58.44 ubuntu:24.04 : FAIL gcc version 13.2.0 (Ubuntu 13.2.0-17ubuntu2)
util/pmu.c:1638:70: error: '_Static_assert' with no message is a C2x extension [-Werror,-Wc2x-extensions]
1638 | _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6);
| ^
| , ""
1 error generated.
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Link: https://lore.kernel.org/r/20240308001915.4060155-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-03-07 16:19:13 -08:00
|
|
|
format_args.short_string.buf,
|
|
|
|
/*event_alias=*/NULL,
|
|
|
|
/*scale_unit=*/NULL,
|
|
|
|
/*deprecated=*/false,
|
|
|
|
"Raw event descriptor",
|
|
|
|
desc,
|
|
|
|
/*long_desc=*/NULL,
|
|
|
|
format_args.long_string.buf);
|
|
|
|
|
|
|
|
strbuf_release(&format_args.short_string);
|
|
|
|
strbuf_release(&format_args.long_string);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:03 -07:00
|
|
|
bool perf_pmus__have_event(const char *pname, const char *name)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu = perf_pmus__find(pname);
|
|
|
|
|
|
|
|
return pmu && perf_pmu__have_event(pmu, name);
|
|
|
|
}
|
|
|
|
|
2023-05-27 00:22:08 -07:00
|
|
|
int perf_pmus__num_core_pmus(void)
|
|
|
|
{
|
|
|
|
static int count;
|
|
|
|
|
|
|
|
if (!count) {
|
|
|
|
struct perf_pmu *pmu = NULL;
|
|
|
|
|
|
|
|
while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2023-06-16 19:01:34 -03:00
|
|
|
static bool __perf_pmus__supports_extended_type(void)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu = NULL;
|
|
|
|
|
|
|
|
if (perf_pmus__num_core_pmus() <= 1)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
|
|
|
|
if (!is_event_supported(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES | ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool perf_pmus__do_support_extended_type;
|
|
|
|
|
|
|
|
static void perf_pmus__init_supports_extended_type(void)
|
|
|
|
{
|
|
|
|
perf_pmus__do_support_extended_type = __perf_pmus__supports_extended_type();
|
|
|
|
}
|
|
|
|
|
2023-06-01 01:29:53 -07:00
|
|
|
bool perf_pmus__supports_extended_type(void)
|
|
|
|
{
|
2023-06-16 19:01:34 -03:00
|
|
|
static pthread_once_t extended_type_once = PTHREAD_ONCE_INIT;
|
|
|
|
|
|
|
|
pthread_once(&extended_type_once, perf_pmus__init_supports_extended_type);
|
|
|
|
|
|
|
|
return perf_pmus__do_support_extended_type;
|
2023-06-01 01:29:53 -07:00
|
|
|
}
|
|
|
|
|
2025-07-18 20:05:11 -07:00
|
|
|
struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr)
|
2023-05-27 00:22:03 -07:00
|
|
|
{
|
2025-07-18 20:05:11 -07:00
|
|
|
struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
|
|
|
|
u32 type = attr->type;
|
|
|
|
bool legacy_core_type = type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE;
|
perf stat: Fix find_stat for mixed legacy/non-legacy events
Legacy events typically don't have a PMU when added leading to
mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu
to make sure the evsel PMU is looked up. Update the evsel__find_pmu
code to look for the PMU using the extended config type or, for legacy
hardware/hw_cache events on non-hybrid systems, just use the core PMU.
Before:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
215,309,764 cycles
44,326,491 cpu/instructions/
1.002555314 seconds time elapsed
```
After:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
990,676,332 cycles
1,235,762,487 cpu/instructions/ # 1.25 insn per cycle
1.002667198 seconds time elapsed
```
Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics calculation on the hybrid")
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Atish Patra <atishp@rivosinc.com>
Link: https://lore.kernel.org/r/20250109222109.567031-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-01-09 14:21:07 -08:00
|
|
|
|
2025-05-07 14:59:39 -07:00
|
|
|
if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) {
|
2025-07-18 20:05:11 -07:00
|
|
|
type = attr->config >> PERF_PMU_TYPE_SHIFT;
|
perf stat: Fix find_stat for mixed legacy/non-legacy events
Legacy events typically don't have a PMU when added leading to
mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu
to make sure the evsel PMU is looked up. Update the evsel__find_pmu
code to look for the PMU using the extended config type or, for legacy
hardware/hw_cache events on non-hybrid systems, just use the core PMU.
Before:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
215,309,764 cycles
44,326,491 cpu/instructions/
1.002555314 seconds time elapsed
```
After:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
990,676,332 cycles
1,235,762,487 cpu/instructions/ # 1.25 insn per cycle
1.002667198 seconds time elapsed
```
Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics calculation on the hybrid")
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Atish Patra <atishp@rivosinc.com>
Link: https://lore.kernel.org/r/20250109222109.567031-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-01-09 14:21:07 -08:00
|
|
|
|
2025-05-07 14:59:39 -07:00
|
|
|
pmu = perf_pmus__find_by_type(type);
|
|
|
|
}
|
2025-07-18 20:05:11 -07:00
|
|
|
if (!pmu && (legacy_core_type || type == PERF_TYPE_RAW)) {
|
2025-05-07 14:59:39 -07:00
|
|
|
/*
|
|
|
|
* For legacy events, if there was no extended type info then
|
|
|
|
* assume the PMU is the first core PMU.
|
|
|
|
*
|
|
|
|
* On architectures like ARM there is no sysfs PMU with type
|
|
|
|
* PERF_TYPE_RAW, assume the RAW events are going to be handled
|
|
|
|
* by the first core PMU.
|
|
|
|
*/
|
|
|
|
pmu = perf_pmus__find_core_pmu();
|
2023-05-27 00:22:03 -07:00
|
|
|
}
|
2025-07-18 20:05:11 -07:00
|
|
|
return pmu;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
|
|
|
|
{
|
|
|
|
struct perf_pmu *pmu = evsel->pmu;
|
|
|
|
|
|
|
|
if (pmu)
|
|
|
|
return pmu;
|
|
|
|
|
|
|
|
pmu = perf_pmus__find_by_attr(&evsel->core.attr);
|
perf stat: Fix find_stat for mixed legacy/non-legacy events
Legacy events typically don't have a PMU when added leading to
mismatched legacy/non-legacy cases in find_stat. Use evsel__find_pmu
to make sure the evsel PMU is looked up. Update the evsel__find_pmu
code to look for the PMU using the extended config type or, for legacy
hardware/hw_cache events on non-hybrid systems, just use the core PMU.
Before:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
215,309,764 cycles
44,326,491 cpu/instructions/
1.002555314 seconds time elapsed
```
After:
```
$ perf stat -e cycles,cpu/instructions/ -a sleep 1
Performance counter stats for 'system wide':
990,676,332 cycles
1,235,762,487 cpu/instructions/ # 1.25 insn per cycle
1.002667198 seconds time elapsed
```
Fixes: 3612ca8e2935 ("perf stat: Fix the hard-coded metrics calculation on the hybrid")
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Tested-by: Atish Patra <atishp@rivosinc.com>
Link: https://lore.kernel.org/r/20250109222109.567031-3-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-01-09 14:21:07 -08:00
|
|
|
((struct evsel *)evsel)->pmu = pmu;
|
2023-05-27 00:22:03 -07:00
|
|
|
return pmu;
|
|
|
|
}
|
2023-09-13 16:33:48 +01:00
|
|
|
|
|
|
|
struct perf_pmu *perf_pmus__find_core_pmu(void)
|
|
|
|
{
|
2023-09-13 16:33:49 +01:00
|
|
|
return perf_pmus__scan_core(NULL);
|
2023-09-13 16:33:48 +01:00
|
|
|
}
|
2024-05-02 14:35:04 -07:00
|
|
|
|
|
|
|
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Some PMU functions read from the sysfs mount point, so care is
|
|
|
|
* needed, hence passing the eager_load flag to load things like the
|
|
|
|
* format files.
|
|
|
|
*/
|
|
|
|
return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true);
|
|
|
|
}
|
2024-09-06 22:08:17 -07:00
|
|
|
|
2025-06-24 12:03:23 -07:00
|
|
|
struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir,
|
2024-11-08 16:37:58 -08:00
|
|
|
const char *sysfs_name,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name);
|
|
|
|
}
|
|
|
|
|
2024-09-06 22:08:17 -07:00
|
|
|
struct perf_pmu *perf_pmus__fake_pmu(void)
|
|
|
|
{
|
|
|
|
static struct perf_pmu fake = {
|
|
|
|
.name = "fake",
|
|
|
|
.type = PERF_PMU_TYPE_FAKE,
|
|
|
|
.format = LIST_HEAD_INIT(fake.format),
|
|
|
|
};
|
|
|
|
|
|
|
|
return &fake;
|
|
|
|
}
|