mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

Add a new summary mode to collect stats for each cgroup. $ sudo ./perf trace -as --bpf-summary --summary-mode=cgroup -- sleep 1 Summary of events: cgroup /user.slice/user-657345.slice/user@657345.service/session.slice/org.gnome.Shell@x11.service, 535 events syscall calls errors total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- ------ -------- --------- --------- --------- ------ ppoll 15 0 373.600 0.004 24.907 197.491 55.26% poll 15 0 1.325 0.001 0.088 0.369 38.76% close 66 0 0.567 0.007 0.009 0.026 3.55% write 150 0 0.471 0.001 0.003 0.010 3.29% recvmsg 94 83 0.290 0.000 0.003 0.037 16.39% ioctl 26 0 0.237 0.001 0.009 0.096 50.13% timerfd_create 66 0 0.236 0.003 0.004 0.024 8.92% timerfd_settime 70 0 0.160 0.001 0.002 0.012 7.66% writev 10 0 0.118 0.001 0.012 0.019 18.17% read 9 0 0.021 0.001 0.002 0.004 14.07% getpid 14 0 0.019 0.000 0.001 0.004 20.28% cgroup /system.slice/polkit.service, 94 events syscall calls errors total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- ------ -------- --------- --------- --------- ------ ppoll 22 0 19.811 0.000 0.900 9.273 63.88% write 30 0 0.040 0.001 0.001 0.003 12.09% recvmsg 12 0 0.018 0.001 0.002 0.006 28.15% read 18 0 0.013 0.000 0.001 0.003 21.99% poll 12 0 0.006 0.000 0.001 0.001 4.48% cgroup /user.slice/user-657345.slice/user@657345.service/app.slice/app-org.gnome.Terminal.slice/gnome-terminal-server.service, 21 events syscall calls errors total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- ------ -------- --------- --------- --------- ------ ppoll 4 0 17.476 0.003 4.369 13.298 69.65% recvmsg 15 12 0.068 0.002 0.005 0.014 26.53% writev 1 0 0.033 0.033 0.033 0.033 0.00% poll 1 0 0.005 0.005 0.005 0.005 0.00% ... It works only for --bpf-summary for now. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Howard Chu <howardchu95@gmail.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20250501225337.928470-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
153 lines
3.4 KiB
C
153 lines
3.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Trace raw_syscalls tracepoints to collect system call statistics.
|
|
*/
|
|
|
|
#include "vmlinux.h"
|
|
#include "syscall_summary.h"
|
|
|
|
#include <bpf/bpf_helpers.h>
|
|
#include <bpf/bpf_tracing.h>
|
|
#include <bpf/bpf_core_read.h>
|
|
|
|
/* This is to calculate a delta between sys-enter and sys-exit for each thread */
|
|
struct syscall_trace {
|
|
int nr; /* syscall number is only available at sys-enter */
|
|
int unused;
|
|
u64 timestamp;
|
|
};
|
|
|
|
#define MAX_ENTRIES (128 * 1024)
|
|
|
|
struct syscall_trace_map {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__type(key, int); /* tid */
|
|
__type(value, struct syscall_trace);
|
|
__uint(max_entries, MAX_ENTRIES);
|
|
} syscall_trace_map SEC(".maps");
|
|
|
|
struct syscall_stats_map {
|
|
__uint(type, BPF_MAP_TYPE_HASH);
|
|
__type(key, struct syscall_key);
|
|
__type(value, struct syscall_stats);
|
|
__uint(max_entries, MAX_ENTRIES);
|
|
} syscall_stats_map SEC(".maps");
|
|
|
|
int enabled; /* controlled from userspace */
|
|
|
|
const volatile enum syscall_aggr_mode aggr_mode;
|
|
const volatile int use_cgroup_v2;
|
|
|
|
int perf_subsys_id = -1;
|
|
|
|
static inline __u64 get_current_cgroup_id(void)
|
|
{
|
|
struct task_struct *task;
|
|
struct cgroup *cgrp;
|
|
|
|
if (use_cgroup_v2)
|
|
return bpf_get_current_cgroup_id();
|
|
|
|
task = bpf_get_current_task_btf();
|
|
|
|
if (perf_subsys_id == -1) {
|
|
#if __has_builtin(__builtin_preserve_enum_value)
|
|
perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
|
|
perf_event_cgrp_id);
|
|
#else
|
|
perf_subsys_id = perf_event_cgrp_id;
|
|
#endif
|
|
}
|
|
|
|
cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup);
|
|
return BPF_CORE_READ(cgrp, kn, id);
|
|
}
|
|
|
|
static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration,
|
|
long ret)
|
|
{
|
|
struct syscall_key key = {
|
|
.cpu_or_tid = cpu_or_tid,
|
|
.cgroup = cgroup_id,
|
|
.nr = nr,
|
|
};
|
|
struct syscall_stats *stats;
|
|
|
|
stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
|
|
if (stats == NULL) {
|
|
struct syscall_stats zero = {};
|
|
|
|
bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST);
|
|
stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
|
|
if (stats == NULL)
|
|
return;
|
|
}
|
|
|
|
__sync_fetch_and_add(&stats->count, 1);
|
|
if (ret < 0)
|
|
__sync_fetch_and_add(&stats->error, 1);
|
|
|
|
if (duration > 0) {
|
|
__sync_fetch_and_add(&stats->total_time, duration);
|
|
__sync_fetch_and_add(&stats->squared_sum, duration * duration);
|
|
if (stats->max_time < duration)
|
|
stats->max_time = duration;
|
|
if (stats->min_time > duration || stats->min_time == 0)
|
|
stats->min_time = duration;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
SEC("tp_btf/sys_enter")
|
|
int sys_enter(u64 *ctx)
|
|
{
|
|
int tid;
|
|
struct syscall_trace st;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
st.nr = ctx[1]; /* syscall number */
|
|
st.unused = 0;
|
|
st.timestamp = bpf_ktime_get_ns();
|
|
|
|
tid = bpf_get_current_pid_tgid();
|
|
bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SEC("tp_btf/sys_exit")
|
|
int sys_exit(u64 *ctx)
|
|
{
|
|
int tid;
|
|
int key = 0;
|
|
u64 cgroup = 0;
|
|
long ret = ctx[1]; /* return value of the syscall */
|
|
struct syscall_trace *st;
|
|
s64 delta;
|
|
|
|
if (!enabled)
|
|
return 0;
|
|
|
|
tid = bpf_get_current_pid_tgid();
|
|
st = bpf_map_lookup_elem(&syscall_trace_map, &tid);
|
|
if (st == NULL)
|
|
return 0;
|
|
|
|
if (aggr_mode == SYSCALL_AGGR_THREAD)
|
|
key = tid;
|
|
else if (aggr_mode == SYSCALL_AGGR_CGROUP)
|
|
cgroup = get_current_cgroup_id();
|
|
else
|
|
key = bpf_get_smp_processor_id();
|
|
|
|
delta = bpf_ktime_get_ns() - st->timestamp;
|
|
update_stats(key, cgroup, st->nr, delta, ret);
|
|
|
|
bpf_map_delete_elem(&syscall_trace_map, &tid);
|
|
return 0;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL";
|