linux/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
Namhyung Kim 13f35928a4 perf lock contention: Symbolize zone->lock using BTF
The struct zone is embedded in struct pglist_data which can be allocated
for each NUMA node early in the boot process.  As it's not a slab object
nor a global lock, this was not symbolized.

Since the zone->lock is often contended, it'd be nice if we can
symbolize it.  On NUMA systems, node_data array will have pointers for
struct pglist_data.  By following the pointer, it can calculate the
address of each zone and its lock using BTF.  On UMA, it can just use
contig_page_data and its zones.

The following example shows the zone lock contention at the end.

  $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging
  # Running 'sched/messaging' benchmark:
  # 20 sender and receiver processes per group
  # 10 groups == 400 processes run

       Total time: 0.038 [sec]
   contended   total wait     max wait     avg wait            address   symbol

        5167     18.17 ms     10.27 us      3.52 us   ffff953340052d00   &kmem_cache_node (spinlock)
          38     11.75 ms    465.49 us    309.13 us   ffff95334060c480   &sock_inode_cache (spinlock)
        3916     10.13 ms     10.43 us      2.59 us   ffff953342aecb40   &kmem_cache_node (spinlock)
        2963     10.02 ms     13.75 us      3.38 us   ffff9533d2344098   &kmalloc-rnd-08-2k (spinlock)
         216      5.05 ms     99.49 us     23.39 us   ffff9542bf7d65d0   zone_lock (spinlock)

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-04-29 12:23:53 -03:00

215 lines
4.8 KiB
C

#ifndef __VMLINUX_H
#define __VMLINUX_H
#include <linux/stddef.h> // for define __always_inline
#include <linux/bpf.h>
#include <linux/types.h>
#include <linux/perf_event.h>
#include <stdbool.h>
// non-UAPI kernel data structures, used in the .bpf.c BPF tool component.
// Just the fields used in these tools preserving the access index so that
// libbpf can fixup offsets with the ones used in the kernel when loading the
// BPF bytecode, if they differ from what is used here.
typedef __u8 u8;
typedef __u32 u32;
typedef __s32 s32;
typedef __u64 u64;
typedef __s64 s64;
typedef int pid_t;
typedef __s64 time64_t;
struct timespec64 {
time64_t tv_sec;
long int tv_nsec;
};
enum cgroup_subsys_id {
perf_event_cgrp_id = 8,
};
enum {
HI_SOFTIRQ = 0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
typedef struct {
s64 counter;
} __attribute__((preserve_access_index)) atomic64_t;
typedef atomic64_t atomic_long_t;
struct raw_spinlock {
int rawlock;
} __attribute__((preserve_access_index));
typedef struct raw_spinlock raw_spinlock_t;
typedef struct {
struct raw_spinlock rlock;
} __attribute__((preserve_access_index)) spinlock_t;
struct sighand_struct {
spinlock_t siglock;
} __attribute__((preserve_access_index));
struct rw_semaphore {
atomic_long_t owner;
} __attribute__((preserve_access_index));
struct mutex {
atomic_long_t owner;
} __attribute__((preserve_access_index));
struct kernfs_node {
u64 id;
} __attribute__((preserve_access_index));
struct cgroup {
struct kernfs_node *kn;
int level;
} __attribute__((preserve_access_index));
struct cgroup_subsys_state {
struct cgroup *cgroup;
} __attribute__((preserve_access_index));
struct css_set {
struct cgroup_subsys_state *subsys[13];
struct cgroup *dfl_cgrp;
} __attribute__((preserve_access_index));
struct mm_struct {
struct rw_semaphore mmap_lock;
} __attribute__((preserve_access_index));
struct task_struct {
unsigned int flags;
struct mm_struct *mm;
pid_t pid;
pid_t tgid;
char comm[16];
struct sighand_struct *sighand;
struct css_set *cgroups;
} __attribute__((preserve_access_index));
struct trace_entry {
short unsigned int type;
unsigned char flags;
unsigned char preempt_count;
int pid;
} __attribute__((preserve_access_index));
struct trace_event_raw_irq_handler_entry {
struct trace_entry ent;
int irq;
u32 __data_loc_name;
char __data[];
} __attribute__((preserve_access_index));
struct trace_event_raw_irq_handler_exit {
struct trace_entry ent;
int irq;
int ret;
char __data[];
} __attribute__((preserve_access_index));
struct trace_event_raw_softirq {
struct trace_entry ent;
unsigned int vec;
char __data[];
} __attribute__((preserve_access_index));
struct trace_event_raw_workqueue_execute_start {
struct trace_entry ent;
void *work;
void *function;
char __data[];
} __attribute__((preserve_access_index));
struct trace_event_raw_workqueue_execute_end {
struct trace_entry ent;
void *work;
void *function;
char __data[];
} __attribute__((preserve_access_index));
struct trace_event_raw_workqueue_activate_work {
struct trace_entry ent;
void *work;
char __data[];
} __attribute__((preserve_access_index));
struct perf_sample_data {
u64 addr;
u64 period;
union perf_sample_weight weight;
u64 txn;
union perf_mem_data_src data_src;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 id;
struct {
u32 cpu;
} cpu_entry;
u64 phys_addr;
u64 cgroup;
u64 data_page_size;
u64 code_page_size;
} __attribute__((__aligned__(64))) __attribute__((preserve_access_index));
struct perf_event {
struct perf_event *parent;
u64 id;
} __attribute__((preserve_access_index));
struct bpf_perf_event_data_kern {
struct perf_sample_data *data;
struct perf_event *event;
} __attribute__((preserve_access_index));
/*
* If 'struct rq' isn't defined for lock_contention.bpf.c, for the sake of
* rq___old and rq___new, then the type for the 'runqueue' variable ends up
* being a forward declaration (BTF_KIND_FWD) while the kernel has it defined
* (BTF_KIND_STRUCT). The definition appears in vmlinux.h rather than
* lock_contention.bpf.c for consistency with a generated vmlinux.h.
*/
struct rq {};
struct kmem_cache {
const char *name;
} __attribute__((preserve_access_index));
struct bpf_iter__kmem_cache {
struct kmem_cache *s;
} __attribute__((preserve_access_index));
struct zone {
spinlock_t lock;
} __attribute__((preserve_access_index));
struct pglist_data {
struct zone node_zones[6]; /* value for all possible config */
int nr_zones;
} __attribute__((preserve_access_index));
#endif // __VMLINUX_H