2024-10-01 20:20:07 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#include "cgroup.h"
|
|
|
|
#include "counts.h"
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#include "cputopo.h"
|
2024-10-01 20:20:07 -07:00
|
|
|
#include "evsel.h"
|
|
|
|
#include "pmu.h"
|
|
|
|
#include "print-events.h"
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#include "smt.h"
|
2024-10-01 20:20:07 -07:00
|
|
|
#include "time-utils.h"
|
|
|
|
#include "tool_pmu.h"
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#include "tsc.h"
|
|
|
|
#include <api/fs/fs.h>
|
|
|
|
#include <api/io.h>
|
2024-10-01 20:20:07 -07:00
|
|
|
#include <internal/threadmap.h>
|
|
|
|
#include <perf/threadmap.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <strings.h>
|
|
|
|
|
2024-10-01 20:20:08 -07:00
|
|
|
static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = {
|
2024-10-01 20:20:07 -07:00
|
|
|
NULL,
|
|
|
|
"duration_time",
|
|
|
|
"user_time",
|
|
|
|
"system_time",
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
"has_pmem",
|
|
|
|
"num_cores",
|
|
|
|
"num_cpus",
|
|
|
|
"num_cpus_online",
|
|
|
|
"num_dies",
|
|
|
|
"num_packages",
|
|
|
|
"slots",
|
|
|
|
"smt_on",
|
|
|
|
"system_tsc_freq",
|
2024-10-01 20:20:07 -07:00
|
|
|
};
|
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
bool tool_pmu__skip_event(const char *name __maybe_unused)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#if !defined(__aarch64__)
|
2024-10-01 20:20:12 -07:00
|
|
|
/* The slots event should only appear on arm64. */
|
|
|
|
if (strcasecmp(name, "slots") == 0)
|
|
|
|
return true;
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#endif
|
2024-10-01 20:20:12 -07:00
|
|
|
#if !defined(__i386__) && !defined(__x86_64__)
|
|
|
|
/* The system_tsc_freq event should only appear on x86. */
|
|
|
|
if (strcasecmp(name, "system_tsc_freq") == 0)
|
|
|
|
return true;
|
|
|
|
#endif
|
|
|
|
return false;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
int tool_pmu__num_skip_events(void)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
2024-10-01 20:20:12 -07:00
|
|
|
int num = 0;
|
2024-10-01 20:20:07 -07:00
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
#if !defined(__aarch64__)
|
|
|
|
num++;
|
|
|
|
#endif
|
|
|
|
#if !defined(__i386__) && !defined(__x86_64__)
|
|
|
|
num++;
|
|
|
|
#endif
|
|
|
|
return num;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
const char *tool_pmu__event_to_str(enum tool_pmu_event ev)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
2025-02-07 07:28:44 -08:00
|
|
|
if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) &&
|
|
|
|
!tool_pmu__skip_event(tool_pmu__event_names[ev]))
|
2024-10-01 20:20:12 -07:00
|
|
|
return tool_pmu__event_names[ev];
|
2024-10-01 20:20:07 -07:00
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
return NULL;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
enum tool_pmu_event tool_pmu__str_to_event(const char *str)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2024-10-01 20:20:12 -07:00
|
|
|
if (tool_pmu__skip_event(str))
|
|
|
|
return TOOL_PMU__EVENT_NONE;
|
|
|
|
|
2024-10-01 20:20:09 -07:00
|
|
|
tool_pmu__for_each_event(i) {
|
2024-10-01 20:20:12 -07:00
|
|
|
if (!strcasecmp(str, tool_pmu__event_names[i]))
|
|
|
|
return i;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
2024-10-01 20:20:12 -07:00
|
|
|
return TOOL_PMU__EVENT_NONE;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bool perf_pmu__is_tool(const struct perf_pmu *pmu)
|
|
|
|
{
|
|
|
|
return pmu && pmu->type == PERF_PMU_TYPE_TOOL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool evsel__is_tool(const struct evsel *evsel)
|
|
|
|
{
|
|
|
|
return perf_pmu__is_tool(evsel->pmu);
|
|
|
|
}
|
|
|
|
|
2024-10-01 20:20:08 -07:00
|
|
|
enum tool_pmu_event evsel__tool_event(const struct evsel *evsel)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
|
|
|
if (!evsel__is_tool(evsel))
|
2024-10-01 20:20:08 -07:00
|
|
|
return TOOL_PMU__EVENT_NONE;
|
2024-10-01 20:20:07 -07:00
|
|
|
|
2024-10-01 20:20:08 -07:00
|
|
|
return (enum tool_pmu_event)evsel->core.attr.config;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *evsel__tool_pmu_event_name(const struct evsel *evsel)
|
|
|
|
{
|
2024-10-01 20:20:09 -07:00
|
|
|
return tool_pmu__event_to_str(evsel->core.attr.config);
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool read_until_char(struct io *io, char e)
|
|
|
|
{
|
|
|
|
int c;
|
|
|
|
|
|
|
|
do {
|
|
|
|
c = io__get_char(io);
|
|
|
|
if (c == -1)
|
|
|
|
return false;
|
|
|
|
} while (c != e);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val)
|
|
|
|
{
|
|
|
|
char buf[256];
|
|
|
|
struct io io;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
io__init(&io, fd, buf, sizeof(buf));
|
|
|
|
|
|
|
|
/* Skip lines to relevant CPU. */
|
|
|
|
for (i = -1; i < cpu.cpu; i++) {
|
|
|
|
if (!read_until_char(&io, '\n'))
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* Skip to "cpu". */
|
|
|
|
if (io__get_char(&io) != 'c') return -EINVAL;
|
|
|
|
if (io__get_char(&io) != 'p') return -EINVAL;
|
|
|
|
if (io__get_char(&io) != 'u') return -EINVAL;
|
|
|
|
|
|
|
|
/* Skip N of cpuN. */
|
|
|
|
if (!read_until_char(&io, ' '))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
i = 1;
|
|
|
|
while (true) {
|
|
|
|
if (io__get_dec(&io, val) != ' ')
|
|
|
|
break;
|
|
|
|
if (field == i)
|
|
|
|
return 0;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int read_pid_stat_field(int fd, int field, __u64 *val)
|
|
|
|
{
|
|
|
|
char buf[256];
|
|
|
|
struct io io;
|
|
|
|
int c, i;
|
|
|
|
|
|
|
|
io__init(&io, fd, buf, sizeof(buf));
|
|
|
|
if (io__get_dec(&io, val) != ' ')
|
|
|
|
return -EINVAL;
|
|
|
|
if (field == 1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Skip comm. */
|
|
|
|
if (io__get_char(&io) != '(' || !read_until_char(&io, ')'))
|
|
|
|
return -EINVAL;
|
|
|
|
if (field == 2)
|
|
|
|
return -EINVAL; /* String can't be returned. */
|
|
|
|
|
|
|
|
/* Skip state */
|
|
|
|
if (io__get_char(&io) != ' ' || io__get_char(&io) == -1)
|
|
|
|
return -EINVAL;
|
|
|
|
if (field == 3)
|
|
|
|
return -EINVAL; /* String can't be returned. */
|
|
|
|
|
|
|
|
/* Loop over numeric fields*/
|
|
|
|
if (io__get_char(&io) != ' ')
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
i = 4;
|
|
|
|
while (true) {
|
|
|
|
c = io__get_dec(&io, val);
|
|
|
|
if (c == -1)
|
|
|
|
return -EINVAL;
|
|
|
|
if (c == -2) {
|
|
|
|
/* Assume a -ve was read */
|
|
|
|
c = io__get_dec(&io, val);
|
|
|
|
*val *= -1;
|
|
|
|
}
|
|
|
|
if (c != ' ')
|
|
|
|
return -EINVAL;
|
|
|
|
if (field == i)
|
|
|
|
return 0;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int evsel__tool_pmu_prepare_open(struct evsel *evsel,
|
|
|
|
struct perf_cpu_map *cpus,
|
|
|
|
int nthreads)
|
|
|
|
{
|
2024-10-01 20:20:08 -07:00
|
|
|
if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
|
|
|
|
evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) &&
|
2024-10-01 20:20:07 -07:00
|
|
|
!evsel->start_times) {
|
|
|
|
evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus),
|
|
|
|
nthreads,
|
|
|
|
sizeof(__u64));
|
|
|
|
if (!evsel->start_times)
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
|
|
|
|
|
|
|
|
int evsel__tool_pmu_open(struct evsel *evsel,
|
|
|
|
struct perf_thread_map *threads,
|
|
|
|
int start_cpu_map_idx, int end_cpu_map_idx)
|
|
|
|
{
|
2024-10-01 20:20:08 -07:00
|
|
|
enum tool_pmu_event ev = evsel__tool_event(evsel);
|
2024-10-01 20:20:07 -07:00
|
|
|
int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno;
|
|
|
|
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
if (ev == TOOL_PMU__EVENT_NUM_CPUS)
|
|
|
|
return 0;
|
|
|
|
|
2024-10-01 20:20:08 -07:00
|
|
|
if (ev == TOOL_PMU__EVENT_DURATION_TIME) {
|
2024-10-01 20:20:07 -07:00
|
|
|
if (evsel->core.attr.sample_period) /* no sampling */
|
|
|
|
return -EINVAL;
|
|
|
|
evsel->start_time = rdclock();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (evsel->cgrp)
|
|
|
|
pid = evsel->cgrp->fd;
|
|
|
|
|
|
|
|
nthreads = perf_thread_map__nr(threads);
|
|
|
|
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
|
|
|
|
for (thread = 0; thread < nthreads; thread++) {
|
|
|
|
if (thread >= nthreads)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (!evsel->cgrp && !evsel->core.system_wide)
|
|
|
|
pid = perf_thread_map__pid(threads, thread);
|
|
|
|
|
2024-10-01 20:20:08 -07:00
|
|
|
if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) {
|
|
|
|
bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME;
|
2024-10-01 20:20:07 -07:00
|
|
|
__u64 *start_time = NULL;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
if (evsel->core.attr.sample_period) {
|
|
|
|
/* no sampling */
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_close;
|
|
|
|
}
|
|
|
|
if (pid > -1) {
|
|
|
|
char buf[64];
|
|
|
|
|
|
|
|
snprintf(buf, sizeof(buf), "/proc/%d/stat", pid);
|
|
|
|
fd = open(buf, O_RDONLY);
|
|
|
|
evsel->pid_stat = true;
|
|
|
|
} else {
|
|
|
|
fd = open("/proc/stat", O_RDONLY);
|
|
|
|
}
|
|
|
|
FD(evsel, idx, thread) = fd;
|
|
|
|
if (fd < 0) {
|
|
|
|
err = -errno;
|
|
|
|
goto out_close;
|
|
|
|
}
|
|
|
|
start_time = xyarray__entry(evsel->start_times, idx, thread);
|
|
|
|
if (pid > -1) {
|
|
|
|
err = read_pid_stat_field(fd, system ? 15 : 14,
|
|
|
|
start_time);
|
|
|
|
} else {
|
|
|
|
struct perf_cpu cpu;
|
|
|
|
|
|
|
|
cpu = perf_cpu_map__cpu(evsel->core.cpus, idx);
|
|
|
|
err = read_stat_field(fd, cpu, system ? 3 : 1,
|
|
|
|
start_time);
|
|
|
|
}
|
|
|
|
if (err)
|
|
|
|
goto out_close;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_close:
|
|
|
|
if (err)
|
|
|
|
threads->err_thread = thread;
|
|
|
|
|
|
|
|
old_errno = errno;
|
|
|
|
do {
|
|
|
|
while (--thread >= 0) {
|
|
|
|
if (FD(evsel, idx, thread) >= 0)
|
|
|
|
close(FD(evsel, idx, thread));
|
|
|
|
FD(evsel, idx, thread) = -1;
|
|
|
|
}
|
|
|
|
thread = nthreads;
|
|
|
|
} while (--idx >= 0);
|
|
|
|
errno = old_errno;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
#if !defined(__i386__) && !defined(__x86_64__)
|
|
|
|
u64 arch_get_tsc_freq(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(__aarch64__)
|
|
|
|
u64 tool_pmu__cpu_slots_per_cycle(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static bool has_pmem(void)
|
|
|
|
{
|
|
|
|
static bool has_pmem, cached;
|
|
|
|
const char *sysfs = sysfs__mountpoint();
|
|
|
|
char path[PATH_MAX];
|
|
|
|
|
|
|
|
if (!cached) {
|
|
|
|
snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs);
|
|
|
|
has_pmem = access(path, F_OK) == 0;
|
|
|
|
cached = true;
|
|
|
|
}
|
|
|
|
return has_pmem;
|
|
|
|
}
|
|
|
|
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result)
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
{
|
|
|
|
const struct cpu_topology *topology;
|
|
|
|
|
|
|
|
switch (ev) {
|
|
|
|
case TOOL_PMU__EVENT_HAS_PMEM:
|
|
|
|
*result = has_pmem() ? 1 : 0;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_NUM_CORES:
|
|
|
|
topology = online_topology();
|
|
|
|
*result = topology->core_cpus_lists;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_NUM_CPUS:
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
|
|
|
|
/* No evsel to be specific to. */
|
|
|
|
*result = cpu__max_present_cpu().cpu;
|
|
|
|
} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
|
|
|
|
/* Evsel just has specific CPUs. */
|
|
|
|
*result = perf_cpu_map__nr(evsel->core.cpus);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* "Any CPU" event that can be scheduled on any CPU in
|
|
|
|
* the PMU's cpumask. The PMU cpumask should be saved in
|
2025-07-18 20:05:08 -07:00
|
|
|
* pmu_cpus. If not present fall back to max.
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
*/
|
2025-07-18 20:05:08 -07:00
|
|
|
if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
|
|
|
|
*result = perf_cpu_map__nr(evsel->core.pmu_cpus);
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
else
|
|
|
|
*result = cpu__max_present_cpu().cpu;
|
|
|
|
}
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
|
|
|
|
struct perf_cpu_map *online = cpu_map__online();
|
|
|
|
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
if (!online)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
|
|
|
|
/* No evsel to be specific to. */
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
*result = perf_cpu_map__nr(online);
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
|
|
|
|
/* Evsel just has specific CPUs. */
|
|
|
|
struct perf_cpu_map *tmp =
|
|
|
|
perf_cpu_map__intersect(online, evsel->core.cpus);
|
|
|
|
|
|
|
|
*result = perf_cpu_map__nr(tmp);
|
|
|
|
perf_cpu_map__put(tmp);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* "Any CPU" event that can be scheduled on any CPU in
|
|
|
|
* the PMU's cpumask. The PMU cpumask should be saved in
|
2025-07-18 20:05:08 -07:00
|
|
|
* pmu_cpus, if not present then just the online cpu
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
* mask.
|
|
|
|
*/
|
2025-07-18 20:05:08 -07:00
|
|
|
if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
struct perf_cpu_map *tmp =
|
2025-07-18 20:05:08 -07:00
|
|
|
perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
|
|
|
|
*result = perf_cpu_map__nr(tmp);
|
|
|
|
perf_cpu_map__put(tmp);
|
|
|
|
} else {
|
|
|
|
*result = perf_cpu_map__nr(online);
|
|
|
|
}
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
}
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
perf_cpu_map__put(online);
|
|
|
|
return true;
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
}
|
|
|
|
case TOOL_PMU__EVENT_NUM_DIES:
|
|
|
|
topology = online_topology();
|
|
|
|
*result = topology->die_cpus_lists;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_NUM_PACKAGES:
|
|
|
|
topology = online_topology();
|
|
|
|
*result = topology->package_cpus_lists;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_SLOTS:
|
|
|
|
*result = tool_pmu__cpu_slots_per_cycle();
|
|
|
|
return *result ? true : false;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_SMT_ON:
|
|
|
|
*result = smt_on() ? 1 : 0;
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
|
|
|
|
*result = arch_get_tsc_freq();
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case TOOL_PMU__EVENT_NONE:
|
|
|
|
case TOOL_PMU__EVENT_DURATION_TIME:
|
|
|
|
case TOOL_PMU__EVENT_USER_TIME:
|
|
|
|
case TOOL_PMU__EVENT_SYSTEM_TIME:
|
|
|
|
case TOOL_PMU__EVENT_MAX:
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-01 20:20:09 -07:00
|
|
|
int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
|
|
|
__u64 *start_time, cur_time, delta_start;
|
2024-10-14 10:34:17 -07:00
|
|
|
u64 val;
|
2024-10-01 20:20:07 -07:00
|
|
|
int fd, err = 0;
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
struct perf_counts_values *count, *old_count = NULL;
|
2024-10-01 20:20:07 -07:00
|
|
|
bool adjust = false;
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
enum tool_pmu_event ev = evsel__tool_event(evsel);
|
2024-10-01 20:20:07 -07:00
|
|
|
|
|
|
|
count = perf_counts(evsel->counts, cpu_map_idx, thread);
|
|
|
|
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
switch (ev) {
|
|
|
|
case TOOL_PMU__EVENT_HAS_PMEM:
|
|
|
|
case TOOL_PMU__EVENT_NUM_CORES:
|
|
|
|
case TOOL_PMU__EVENT_NUM_CPUS:
|
|
|
|
case TOOL_PMU__EVENT_NUM_CPUS_ONLINE:
|
|
|
|
case TOOL_PMU__EVENT_NUM_DIES:
|
|
|
|
case TOOL_PMU__EVENT_NUM_PACKAGES:
|
|
|
|
case TOOL_PMU__EVENT_SLOTS:
|
|
|
|
case TOOL_PMU__EVENT_SMT_ON:
|
|
|
|
case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ:
|
|
|
|
if (evsel->prev_raw_counts)
|
|
|
|
old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
|
|
|
|
val = 0;
|
|
|
|
if (cpu_map_idx == 0 && thread == 0) {
|
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-07-18 20:05:07 -07:00
|
|
|
if (!tool_pmu__read_event(ev, evsel, &val)) {
|
perf tool_pmu: Move expr literals to tool_pmu
Add the expr literals like "#smt_on" as tool events, this allows stat
events to give the values. On my laptop with hyperthreading enabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
16 num_cpus_online
1 num_dies
1 num_packages
1 smt_on
2,496,000,000 system_tsc_freq
0.001113637 seconds time elapsed
0.001218000 seconds user
0.000000000 seconds sys
```
And with hyperthreading disabled:
```
$ perf stat -e "has_pmem,num_cores,num_cpus,num_cpus_online,num_dies,num_packages,smt_on,system_tsc_freq" true
Performance counter stats for 'true':
0 has_pmem
8 num_cores
16 num_cpus
8 num_cpus_online
1 num_dies
1 num_packages
0 smt_on
2,496,000,000 system_tsc_freq
0.000802115 seconds time elapsed
0.000000000 seconds user
0.000806000 seconds sys
```
As zero matters for these values, in stat-display
should_skip_zero_counter only skip the zero value if it is not the
first aggregation index.
The tool event implementations are used in expr but not evaluated as
events for simplicity. Also core_wide isn't made a tool event as it
requires command line parameters.
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20241002032016.333748-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2024-10-01 20:20:10 -07:00
|
|
|
count->lost++;
|
|
|
|
val = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (old_count) {
|
|
|
|
count->val = old_count->val + val;
|
|
|
|
count->run = old_count->run + 1;
|
|
|
|
count->ena = old_count->ena + 1;
|
|
|
|
} else {
|
|
|
|
count->val = val;
|
|
|
|
count->run++;
|
|
|
|
count->ena++;
|
|
|
|
}
|
|
|
|
return 0;
|
2024-10-01 20:20:08 -07:00
|
|
|
case TOOL_PMU__EVENT_DURATION_TIME:
|
2024-10-01 20:20:07 -07:00
|
|
|
/*
|
|
|
|
* Pretend duration_time is only on the first CPU and thread, or
|
|
|
|
* else aggregation will scale duration_time by the number of
|
|
|
|
* CPUs/threads.
|
|
|
|
*/
|
|
|
|
start_time = &evsel->start_time;
|
|
|
|
if (cpu_map_idx == 0 && thread == 0)
|
|
|
|
cur_time = rdclock();
|
|
|
|
else
|
|
|
|
cur_time = *start_time;
|
|
|
|
break;
|
2024-10-01 20:20:08 -07:00
|
|
|
case TOOL_PMU__EVENT_USER_TIME:
|
|
|
|
case TOOL_PMU__EVENT_SYSTEM_TIME: {
|
|
|
|
bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME;
|
2024-10-01 20:20:07 -07:00
|
|
|
|
|
|
|
start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread);
|
|
|
|
fd = FD(evsel, cpu_map_idx, thread);
|
|
|
|
lseek(fd, SEEK_SET, 0);
|
|
|
|
if (evsel->pid_stat) {
|
|
|
|
/* The event exists solely on 1 CPU. */
|
|
|
|
if (cpu_map_idx == 0)
|
|
|
|
err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time);
|
|
|
|
else
|
|
|
|
cur_time = 0;
|
|
|
|
} else {
|
|
|
|
/* The event is for all threads. */
|
|
|
|
if (thread == 0) {
|
|
|
|
struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus,
|
|
|
|
cpu_map_idx);
|
|
|
|
|
|
|
|
err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time);
|
|
|
|
} else {
|
|
|
|
cur_time = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
adjust = true;
|
|
|
|
break;
|
|
|
|
}
|
2024-10-01 20:20:08 -07:00
|
|
|
case TOOL_PMU__EVENT_NONE:
|
|
|
|
case TOOL_PMU__EVENT_MAX:
|
2024-10-01 20:20:07 -07:00
|
|
|
default:
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
delta_start = cur_time - *start_time;
|
|
|
|
if (adjust) {
|
|
|
|
__u64 ticks_per_sec = sysconf(_SC_CLK_TCK);
|
|
|
|
|
|
|
|
delta_start *= 1000000000 / ticks_per_sec;
|
|
|
|
}
|
|
|
|
count->val = delta_start;
|
|
|
|
count->lost = 0;
|
2025-04-22 22:03:58 -07:00
|
|
|
/*
|
|
|
|
* The values of enabled and running must make a ratio of 100%. The
|
|
|
|
* exact values don't matter as long as they are non-zero to avoid
|
|
|
|
* issues with evsel__count_has_error.
|
|
|
|
*/
|
|
|
|
count->ena++;
|
|
|
|
count->run++;
|
2024-10-01 20:20:07 -07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-02-26 10:41:00 +00:00
|
|
|
struct perf_pmu *tool_pmu__new(void)
|
2024-10-01 20:20:07 -07:00
|
|
|
{
|
2025-02-26 10:41:00 +00:00
|
|
|
struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
|
|
|
|
|
2025-03-19 13:28:20 +01:00
|
|
|
if (!tool)
|
2025-05-12 12:46:21 -07:00
|
|
|
return NULL;
|
2025-03-19 13:28:20 +01:00
|
|
|
|
2025-05-12 12:46:21 -07:00
|
|
|
if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
|
|
|
|
perf_pmu__delete(tool);
|
|
|
|
return NULL;
|
|
|
|
}
|
2025-02-26 10:41:00 +00:00
|
|
|
tool->events_table = find_core_events_table("common", "common");
|
|
|
|
return tool;
|
2024-10-01 20:20:07 -07:00
|
|
|
}
|