linux/tools/perf/util/bpf_lock_contention.c

853 lines
20 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
#include "util/cgroup.h"
#include "util/debug.h"
#include "util/evlist.h"
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
#include "util/hashmap.h"
#include "util/machine.h"
#include "util/map.h"
#include "util/symbol.h"
#include "util/target.h"
#include "util/thread.h"
#include "util/thread_map.h"
#include "util/lock-contention.h"
#include <linux/zalloc.h>
perf lock contention: Show full callstack with -v option Currently it shows a caller function for each entry, but users need to see the full call stacks sometimes. Use -v/--verbose option to do that. # perf lock con -a -b -v sleep 3 Looking at the vmlinux_path (8 entries long) symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols contended total wait max wait avg wait type caller 1 10.74 us 10.74 us 10.74 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb841015 tick_do_update_jiffies64+0x25 0xffffffffbb8409ee tick_irq_enter+0x9e 1 7.70 us 7.70 us 7.70 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb7bc27e raw_spin_rq_lock_nested+0xe 0xffffffffbb7cef9c load_balance+0x66c Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220912055314.744552-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-09-11 22:53:12 -07:00
#include <linux/string.h>
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
#include <api/fs/fs.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <inttypes.h>
#include "bpf_skel/lock_contention.skel.h"
#include "bpf_skel/lock_data.h"
static struct lock_contention_bpf *skel;
static bool has_slab_iter;
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
static struct hashmap slab_hash;
static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
{
return key;
}
static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
{
return key1 == key2;
}
static void check_slab_cache_iter(struct lock_contention *con)
{
s32 ret;
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
con->btf = btf__load_vmlinux_btf();
if (con->btf == NULL) {
pr_debug("BTF loading failed: %s\n", strerror(errno));
return;
}
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
if (ret < 0) {
bpf_program__set_autoload(skel->progs.slab_cache_iter, false);
pr_debug("slab cache iterator is not available: %d\n", ret);
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
return;
}
has_slab_iter = true;
bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries);
}
static void run_slab_cache_iter(void)
{
int fd;
char buf[256];
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
long key, *prev_key;
if (!has_slab_iter)
return;
fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter));
if (fd < 0) {
pr_debug("cannot create slab cache iter: %d\n", fd);
return;
}
/* This will run the bpf program */
while (read(fd, buf, sizeof(buf)) > 0)
continue;
close(fd);
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
/* Read the slab cache map and build a hash with IDs */
fd = bpf_map__fd(skel->maps.slab_caches);
prev_key = NULL;
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
struct slab_cache_data *data;
data = malloc(sizeof(*data));
if (data == NULL)
break;
if (bpf_map_lookup_elem(fd, &key, data) < 0)
break;
hashmap__add(&slab_hash, data->id, data);
prev_key = &key;
}
}
static void exit_slab_cache_iter(void)
{
struct hashmap_entry *cur;
unsigned bkt;
hashmap__for_each_entry(&slab_hash, cur, bkt)
free(cur->pvalue);
hashmap__clear(&slab_hash);
}
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
static void init_numa_data(struct lock_contention *con)
{
struct symbol *sym;
struct map *kmap;
char *buf = NULL, *p;
size_t len;
long last = -1;
int ret;
/*
* 'struct zone' is embedded in 'struct pglist_data' as an array.
* As we may not have full information of the struct zone in the
* (fake) vmlinux.h, let's get the actual size from BTF.
*/
ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT);
if (ret < 0) {
pr_debug("cannot get type of struct zone: %d\n", ret);
return;
}
ret = btf__resolve_size(con->btf, ret);
if (ret < 0) {
pr_debug("cannot get size of struct zone: %d\n", ret);
return;
}
skel->rodata->sizeof_zone = ret;
/* UMA system doesn't have 'node_data[]' - just use contig_page_data. */
sym = machine__find_kernel_symbol_by_name(con->machine,
"contig_page_data",
&kmap);
if (sym) {
skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start);
map__put(kmap);
return;
}
/*
* The 'node_data' is an array of pointers to struct pglist_data.
* It needs to follow the pointer for each node in BPF to get the
* address of struct pglist_data and its zones.
*/
sym = machine__find_kernel_symbol_by_name(con->machine,
"node_data",
&kmap);
if (sym == NULL)
return;
skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start);
map__put(kmap);
/* get the number of online nodes using the last node number + 1 */
ret = sysfs__read_str("devices/system/node/online", &buf, &len);
if (ret < 0) {
pr_debug("failed to read online node: %d\n", ret);
return;
}
p = buf;
while (p && *p) {
last = strtol(p, &p, 0);
if (p && (*p == ',' || *p == '-' || *p == '\n'))
p++;
}
skel->rodata->nr_nodes = last + 1;
free(buf);
}
int lock_contention_prepare(struct lock_contention *con)
{
int i, fd;
perf lock contention: Handle slab objects in -L/--lock-filter option This is to filter lock contention from specific slab objects only. Like in the lock symbol output, we can use '&' prefix to filter slab object names. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 3 14.99 us 14.44 us 5.00 us ffffffff851c0940 pack_mutex (mutex) 2 2.75 us 2.56 us 1.38 us ffff98d7031fb498 &task_struct (mutex) 4 1.42 us 557 ns 355 ns ffff98d706311400 &kmalloc-cg-512 (mutex) 2 953 ns 714 ns 476 ns ffffffff851c3620 delayed_uprobe_lock (mutex) 1 929 ns 929 ns 929 ns ffff98d7031fb538 &task_struct (mutex) 3 561 ns 210 ns 187 ns ffffffff84a8b3a0 text_mutex (mutex) 1 479 ns 479 ns 479 ns ffffffff851b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 320 ns 195 ns 160 ns ffffffff851cf840 pcpu_alloc_mutex (mutex) 1 212 ns 212 ns 212 ns ffff98d7031784d8 &signal_cache (mutex) 1 177 ns 177 ns 177 ns ffffffff851b4c28 tracepoint_srcu_srcu_usage (mutex) With the filter, it can show contentions from the task_struct only. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl -L '&task_struct' sleep 1 contended total wait max wait avg wait address symbol 2 1.97 us 1.71 us 987 ns ffff98d7032fd658 &task_struct (mutex) 1 1.20 us 1.20 us 1.20 us ffff98d7032fd6f8 &task_struct (mutex) It can work with other aggregation mode: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -ab -L '&task_struct' sleep 1 contended total wait max wait avg wait type caller 1 25.10 us 25.10 us 25.10 us mutex perf_event_exit_task+0x39 1 21.60 us 21.60 us 21.60 us mutex futex_exit_release+0x21 1 5.56 us 5.56 us 5.56 us mutex futex_exec_release+0x21 Committer testing: root@number:~# perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 1 20.80 us 20.80 us 20.80 us ffff9d417fbd65d0 (spinlock) 8 12.85 us 2.41 us 1.61 us ffff9d415eeb6a40 rq_lock (spinlock) 1 2.55 us 2.55 us 2.55 us ffff9d415f636a40 rq_lock (spinlock) 7 1.92 us 840 ns 274 ns ffff9d39c2cbc8c4 (spinlock) 1 1.23 us 1.23 us 1.23 us ffff9d415fb36a40 rq_lock (spinlock) 2 928 ns 738 ns 464 ns ffff9d39c1fa6660 &kmalloc-rnd-14-192 (rwlock) 4 788 ns 252 ns 197 ns ffffffffb8608a80 jiffies_lock (spinlock) 1 304 ns 304 ns 304 ns ffff9d39c2c979c4 (spinlock) 1 216 ns 216 ns 216 ns ffff9d3a0225c660 &kmalloc-rnd-14-192 (rwlock) 1 89 ns 89 ns 89 ns ffff9d3a0adbf3e0 &kmalloc-rnd-14-192 (rwlock) 1 61 ns 61 ns 61 ns ffff9d415f9b6a40 rq_lock (spinlock) root@number:~# uname -r 6.13.0-rc2 root@number:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:09 -08:00
int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1;
struct evlist *evlist = con->evlist;
struct target *target = con->target;
skel = lock_contention_bpf__open();
if (!skel) {
pr_err("Failed to open lock-contention BPF skeleton\n");
return -1;
}
perf lock contention: Allow to change stack depth and skip It needs stack traces to find callers of locks. To minimize the performance overhead it only collects up to 8 entries for each stack trace. And it skips first 3 entries as they came from BPF, tracepoint and lock functions which are not interested for most users. But it turned out that those numbers are different in some configuration. Using fixed number can result in non meaningful caller names. Let's make them adjustable with --stack-depth and --skip-stack options. On my setup, the default output is like below: # /perf lock con -ab -F contended,wait_total sleep 3 contended total wait type caller 28 4.55 ms rwlock:W __bpf_trace_contention_begin+0xb 33 1.67 ms rwlock:W __bpf_trace_contention_begin+0xb 12 580.28 us spinlock __bpf_trace_contention_begin+0xb 60 240.54 us rwsem:R __bpf_trace_contention_begin+0xb 27 64.45 us spinlock __bpf_trace_contention_begin+0xb If I change the stack skip to 5, the result will be like: # perf lock con -ab -F contended,wait_total --stack-skip 5 sleep 3 contended total wait type caller 32 715.45 us spinlock folio_lruvec_lock_irqsave+0x61 26 550.22 us spinlock folio_lruvec_lock_irqsave+0x61 15 486.93 us rwsem:R mmap_read_lock+0x13 12 139.66 us rwsem:W vm_mmap_pgoff+0x93 1 7.04 us spinlock tick_do_update_jiffies64+0x25 Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220912055314.744552-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-09-11 22:53:13 -07:00
bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
if (con->aggr_mode == LOCK_AGGR_TASK)
bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
else
bpf_map__set_max_entries(skel->maps.task_data, 1);
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
if (con->save_callstack) {
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
if (con->owner) {
bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64));
bpf_map__set_key_size(skel->maps.owner_stacks,
con->max_stack * sizeof(u64));
bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries);
skel->rodata->max_stack = con->max_stack;
}
} else {
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
bpf_map__set_max_entries(skel->maps.stacks, 1);
}
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
}
if (target__has_task(target)) {
skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads);
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
}
if (con->filters->nr_types) {
skel->rodata->has_type = 1;
ntypes = con->filters->nr_types;
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
}
if (con->filters->nr_cgrps) {
skel->rodata->has_cgroup = 1;
perf lock contention: Add -G/--cgroup-filter option The -G/--cgroup-filter is to limit lock contention collection on the tasks in the specific cgroups only. $ sudo ./perf lock con -abt -G /user.slice/.../vte-spawn-52221fb8-b33f-4a52-b5c3-e35d1e6fc0e0.scope \ ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.174 [sec] contended total wait max wait avg wait pid comm 4 114.45 us 60.06 us 28.61 us 214847 sched-messaging 2 111.40 us 60.84 us 55.70 us 214848 sched-messaging 2 106.09 us 59.42 us 53.04 us 214837 sched-messaging 1 81.70 us 81.70 us 81.70 us 214709 sched-messaging 68 78.44 us 6.83 us 1.15 us 214633 sched-messaging 69 73.71 us 2.69 us 1.07 us 214632 sched-messaging 4 72.62 us 60.83 us 18.15 us 214850 sched-messaging 2 71.75 us 67.60 us 35.88 us 214840 sched-messaging 2 69.29 us 67.53 us 34.65 us 214804 sched-messaging 2 69.00 us 68.23 us 34.50 us 214826 sched-messaging ... Export cgroup__new() function as it's needed from outside. Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230906174903.346486-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-09-06 10:49:02 -07:00
ncgrps = con->filters->nr_cgrps;
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
}
/* resolve lock name filters to addr */
if (con->filters->nr_syms) {
struct symbol *sym;
struct map *kmap;
unsigned long *addrs;
for (i = 0; i < con->filters->nr_syms; i++) {
sym = machine__find_kernel_symbol_by_name(con->machine,
con->filters->syms[i],
&kmap);
if (sym == NULL) {
pr_warning("ignore unknown symbol: %s\n",
con->filters->syms[i]);
continue;
}
addrs = realloc(con->filters->addrs,
(con->filters->nr_addrs + 1) * sizeof(*addrs));
if (addrs == NULL) {
pr_warning("memory allocation failure\n");
continue;
}
perf map: Add helper for ->map_ip() and ->unmap_ip() Later changes will add reference count checking for struct map, add a helper function to invoke the map_ip and unmap_ip function pointers. The helper allows the reference count check to be in fewer places. Committer notes: Add missing conversions to: tools/perf/util/map.c tools/perf/util/cs-etm.c tools/perf/util/annotate.c tools/perf/arch/powerpc/util/sym-handling.c tools/perf/arch/s390/annotate/instructions.c Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: German Gomez <german.gomez@arm.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Miaoqian Lin <linmq006@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Riccardo Mancini <rickyman7@gmail.com> Cc: Shunsuke Nakamura <nakamura.shun@fujitsu.com> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Stephen Brennan <stephen.s.brennan@oracle.com> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Yury Norov <yury.norov@gmail.com> Link: https://lore.kernel.org/r/20230404205954.2245628-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-04-04 13:59:44 -07:00
addrs[con->filters->nr_addrs++] = map__unmap_ip(kmap, sym->start);
con->filters->addrs = addrs;
}
naddrs = con->filters->nr_addrs;
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
skel->rodata->has_addr = 1;
}
perf lock contention: Add -J/--inject-delay option This is to slow down lock acquistion (on contention locks) deliberately. A possible use case is to estimate impact on application performance by optimization of kernel locking behavior. By delaying the lock it can simulate the worse condition as a control group, and then compare with the current behavior as a optimized condition. The syntax is 'time@function' and the time can have unit suffix like "us" and "ms". For example, I ran a simple test like below. $ sudo perf lock con -abl -L tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 92 1.18 ms 199.54 us 12.79 us ffffffff8a806080 tasklist_lock (rwlock) The contention count was 92 and the average wait time was around 10 us. But if I add 100 usec of delay to the tasklist_lock, $ sudo perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 190 15.67 ms 230.10 us 82.46 us ffffffff8a806080 tasklist_lock (rwlock) The contention count increased and the average wait time was up closed to 100 usec. If I increase the delay even more, $ sudo perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1002 2.80 s 3.01 ms 2.80 ms ffffffff8a806080 tasklist_lock (rwlock) Now every sleep process had contention and the wait time was more than 1 msec. This is on my 4 CPU laptop so I guess one CPU has the lock while other 3 are waiting for it mostly. For simplicity, it only supports global locks for now. Committer testing: root@number:~# grep -m1 'model name' /proc/cpuinfo model name : AMD Ryzen 9 9950X3D 16-Core Processor root@number:~# perf lock con -abl -L tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 142 453.85 us 25.39 us 3.20 us ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1040 2.39 s 3.11 ms 2.30 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1025 24.72 s 31.01 ms 24.12 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# Suggested-by: Stephane Eranian <eranian@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20250509171950.183591-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-05-09 10:19:50 -07:00
/* resolve lock name in delays */
if (con->nr_delays) {
struct symbol *sym;
struct map *kmap;
for (i = 0; i < con->nr_delays; i++) {
sym = machine__find_kernel_symbol_by_name(con->machine,
con->delays[i].sym,
&kmap);
if (sym == NULL) {
pr_warning("ignore unknown symbol: %s\n",
con->delays[i].sym);
continue;
}
con->delays[i].addr = map__unmap_ip(kmap, sym->start);
}
skel->rodata->lock_delay = 1;
bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays);
}
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
bpf_map__set_max_entries(skel->maps.type_filter, ntypes);
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
perf lock contention: Add -G/--cgroup-filter option The -G/--cgroup-filter is to limit lock contention collection on the tasks in the specific cgroups only. $ sudo ./perf lock con -abt -G /user.slice/.../vte-spawn-52221fb8-b33f-4a52-b5c3-e35d1e6fc0e0.scope \ ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.174 [sec] contended total wait max wait avg wait pid comm 4 114.45 us 60.06 us 28.61 us 214847 sched-messaging 2 111.40 us 60.84 us 55.70 us 214848 sched-messaging 2 106.09 us 59.42 us 53.04 us 214837 sched-messaging 1 81.70 us 81.70 us 81.70 us 214709 sched-messaging 68 78.44 us 6.83 us 1.15 us 214633 sched-messaging 69 73.71 us 2.69 us 1.07 us 214632 sched-messaging 4 72.62 us 60.83 us 18.15 us 214850 sched-messaging 2 71.75 us 67.60 us 35.88 us 214840 sched-messaging 2 69.29 us 67.53 us 34.65 us 214804 sched-messaging 2 69.00 us 68.23 us 34.50 us 214826 sched-messaging ... Export cgroup__new() function as it's needed from outside. Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230906174903.346486-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-09-06 10:49:02 -07:00
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
skel->rodata->stack_skip = con->stack_skip;
skel->rodata->aggr_mode = con->aggr_mode;
skel->rodata->needs_callstack = con->save_callstack;
skel->rodata->lock_owner = con->owner;
if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
if (cgroup_is_v2("perf_event"))
skel->rodata->use_cgroup_v2 = 1;
}
check_slab_cache_iter(con);
perf lock contention: Handle slab objects in -L/--lock-filter option This is to filter lock contention from specific slab objects only. Like in the lock symbol output, we can use '&' prefix to filter slab object names. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 3 14.99 us 14.44 us 5.00 us ffffffff851c0940 pack_mutex (mutex) 2 2.75 us 2.56 us 1.38 us ffff98d7031fb498 &task_struct (mutex) 4 1.42 us 557 ns 355 ns ffff98d706311400 &kmalloc-cg-512 (mutex) 2 953 ns 714 ns 476 ns ffffffff851c3620 delayed_uprobe_lock (mutex) 1 929 ns 929 ns 929 ns ffff98d7031fb538 &task_struct (mutex) 3 561 ns 210 ns 187 ns ffffffff84a8b3a0 text_mutex (mutex) 1 479 ns 479 ns 479 ns ffffffff851b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 320 ns 195 ns 160 ns ffffffff851cf840 pcpu_alloc_mutex (mutex) 1 212 ns 212 ns 212 ns ffff98d7031784d8 &signal_cache (mutex) 1 177 ns 177 ns 177 ns ffffffff851b4c28 tracepoint_srcu_srcu_usage (mutex) With the filter, it can show contentions from the task_struct only. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl -L '&task_struct' sleep 1 contended total wait max wait avg wait address symbol 2 1.97 us 1.71 us 987 ns ffff98d7032fd658 &task_struct (mutex) 1 1.20 us 1.20 us 1.20 us ffff98d7032fd6f8 &task_struct (mutex) It can work with other aggregation mode: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -ab -L '&task_struct' sleep 1 contended total wait max wait avg wait type caller 1 25.10 us 25.10 us 25.10 us mutex perf_event_exit_task+0x39 1 21.60 us 21.60 us 21.60 us mutex futex_exit_release+0x21 1 5.56 us 5.56 us 5.56 us mutex futex_exec_release+0x21 Committer testing: root@number:~# perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 1 20.80 us 20.80 us 20.80 us ffff9d417fbd65d0 (spinlock) 8 12.85 us 2.41 us 1.61 us ffff9d415eeb6a40 rq_lock (spinlock) 1 2.55 us 2.55 us 2.55 us ffff9d415f636a40 rq_lock (spinlock) 7 1.92 us 840 ns 274 ns ffff9d39c2cbc8c4 (spinlock) 1 1.23 us 1.23 us 1.23 us ffff9d415fb36a40 rq_lock (spinlock) 2 928 ns 738 ns 464 ns ffff9d39c1fa6660 &kmalloc-rnd-14-192 (rwlock) 4 788 ns 252 ns 197 ns ffffffffb8608a80 jiffies_lock (spinlock) 1 304 ns 304 ns 304 ns ffff9d39c2c979c4 (spinlock) 1 216 ns 216 ns 216 ns ffff9d3a0225c660 &kmalloc-rnd-14-192 (rwlock) 1 89 ns 89 ns 89 ns ffff9d3a0adbf3e0 &kmalloc-rnd-14-192 (rwlock) 1 61 ns 61 ns 61 ns ffff9d415f9b6a40 rq_lock (spinlock) root@number:~# uname -r 6.13.0-rc2 root@number:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:09 -08:00
if (con->filters->nr_slabs && has_slab_iter) {
skel->rodata->has_slab = 1;
nslabs = con->filters->nr_slabs;
}
bpf_map__set_max_entries(skel->maps.slab_filter, nslabs);
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
init_numa_data(con);
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
}
if (target__has_cpu(target)) {
u32 cpu;
u8 val = 1;
fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) {
cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
}
}
if (target__has_task(target)) {
u32 pid;
u8 val = 1;
fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) {
pid = perf_thread_map__pid(evlist->core.threads, i);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
}
if (target__none(target) && evlist->workload.pid > 0) {
u32 pid = evlist->workload.pid;
u8 val = 1;
fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
if (con->filters->nr_types) {
u8 val = 1;
fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++)
bpf_map_update_elem(fd, &con->filters->types[i], &val, BPF_ANY);
}
if (con->filters->nr_addrs) {
u8 val = 1;
fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++)
bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY);
}
perf lock contention: Add -G/--cgroup-filter option The -G/--cgroup-filter is to limit lock contention collection on the tasks in the specific cgroups only. $ sudo ./perf lock con -abt -G /user.slice/.../vte-spawn-52221fb8-b33f-4a52-b5c3-e35d1e6fc0e0.scope \ ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.174 [sec] contended total wait max wait avg wait pid comm 4 114.45 us 60.06 us 28.61 us 214847 sched-messaging 2 111.40 us 60.84 us 55.70 us 214848 sched-messaging 2 106.09 us 59.42 us 53.04 us 214837 sched-messaging 1 81.70 us 81.70 us 81.70 us 214709 sched-messaging 68 78.44 us 6.83 us 1.15 us 214633 sched-messaging 69 73.71 us 2.69 us 1.07 us 214632 sched-messaging 4 72.62 us 60.83 us 18.15 us 214850 sched-messaging 2 71.75 us 67.60 us 35.88 us 214840 sched-messaging 2 69.29 us 67.53 us 34.65 us 214804 sched-messaging 2 69.00 us 68.23 us 34.50 us 214826 sched-messaging ... Export cgroup__new() function as it's needed from outside. Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230906174903.346486-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-09-06 10:49:02 -07:00
if (con->filters->nr_cgrps) {
u8 val = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
}
perf lock contention: Add -J/--inject-delay option This is to slow down lock acquistion (on contention locks) deliberately. A possible use case is to estimate impact on application performance by optimization of kernel locking behavior. By delaying the lock it can simulate the worse condition as a control group, and then compare with the current behavior as a optimized condition. The syntax is 'time@function' and the time can have unit suffix like "us" and "ms". For example, I ran a simple test like below. $ sudo perf lock con -abl -L tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 92 1.18 ms 199.54 us 12.79 us ffffffff8a806080 tasklist_lock (rwlock) The contention count was 92 and the average wait time was around 10 us. But if I add 100 usec of delay to the tasklist_lock, $ sudo perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 190 15.67 ms 230.10 us 82.46 us ffffffff8a806080 tasklist_lock (rwlock) The contention count increased and the average wait time was up closed to 100 usec. If I increase the delay even more, $ sudo perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- \ sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1002 2.80 s 3.01 ms 2.80 ms ffffffff8a806080 tasklist_lock (rwlock) Now every sleep process had contention and the wait time was more than 1 msec. This is on my 4 CPU laptop so I guess one CPU has the lock while other 3 are waiting for it mostly. For simplicity, it only supports global locks for now. Committer testing: root@number:~# grep -m1 'model name' /proc/cpuinfo model name : AMD Ryzen 9 9950X3D 16-Core Processor root@number:~# perf lock con -abl -L tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 142 453.85 us 25.39 us 3.20 us ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 100us@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1040 2.39 s 3.11 ms 2.30 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# perf lock con -abl -L tasklist_lock -J 1ms@tasklist_lock -- sh -c 'for i in $(seq 1000); do sleep 1 & done; wait' contended total wait max wait avg wait address symbol 1025 24.72 s 31.01 ms 24.12 ms ffffffffae808080 tasklist_lock (rwlock) root@number:~# Suggested-by: Stephane Eranian <eranian@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20250509171950.183591-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-05-09 10:19:50 -07:00
if (con->nr_delays) {
fd = bpf_map__fd(skel->maps.lock_delays);
for (i = 0; i < con->nr_delays; i++)
bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY);
}
perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-09-02 13:05:14 -07:00
if (con->aggr_mode == LOCK_AGGR_CGROUP)
read_all_cgroups(&con->cgroups);
bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
lock_contention_bpf__attach(skel);
perf lock contention: Handle slab objects in -L/--lock-filter option This is to filter lock contention from specific slab objects only. Like in the lock symbol output, we can use '&' prefix to filter slab object names. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 3 14.99 us 14.44 us 5.00 us ffffffff851c0940 pack_mutex (mutex) 2 2.75 us 2.56 us 1.38 us ffff98d7031fb498 &task_struct (mutex) 4 1.42 us 557 ns 355 ns ffff98d706311400 &kmalloc-cg-512 (mutex) 2 953 ns 714 ns 476 ns ffffffff851c3620 delayed_uprobe_lock (mutex) 1 929 ns 929 ns 929 ns ffff98d7031fb538 &task_struct (mutex) 3 561 ns 210 ns 187 ns ffffffff84a8b3a0 text_mutex (mutex) 1 479 ns 479 ns 479 ns ffffffff851b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 320 ns 195 ns 160 ns ffffffff851cf840 pcpu_alloc_mutex (mutex) 1 212 ns 212 ns 212 ns ffff98d7031784d8 &signal_cache (mutex) 1 177 ns 177 ns 177 ns ffffffff851b4c28 tracepoint_srcu_srcu_usage (mutex) With the filter, it can show contentions from the task_struct only. root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl -L '&task_struct' sleep 1 contended total wait max wait avg wait address symbol 2 1.97 us 1.71 us 987 ns ffff98d7032fd658 &task_struct (mutex) 1 1.20 us 1.20 us 1.20 us ffff98d7032fd6f8 &task_struct (mutex) It can work with other aggregation mode: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -ab -L '&task_struct' sleep 1 contended total wait max wait avg wait type caller 1 25.10 us 25.10 us 25.10 us mutex perf_event_exit_task+0x39 1 21.60 us 21.60 us 21.60 us mutex futex_exit_release+0x21 1 5.56 us 5.56 us 5.56 us mutex futex_exec_release+0x21 Committer testing: root@number:~# perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 1 20.80 us 20.80 us 20.80 us ffff9d417fbd65d0 (spinlock) 8 12.85 us 2.41 us 1.61 us ffff9d415eeb6a40 rq_lock (spinlock) 1 2.55 us 2.55 us 2.55 us ffff9d415f636a40 rq_lock (spinlock) 7 1.92 us 840 ns 274 ns ffff9d39c2cbc8c4 (spinlock) 1 1.23 us 1.23 us 1.23 us ffff9d415fb36a40 rq_lock (spinlock) 2 928 ns 738 ns 464 ns ffff9d39c1fa6660 &kmalloc-rnd-14-192 (rwlock) 4 788 ns 252 ns 197 ns ffffffffb8608a80 jiffies_lock (spinlock) 1 304 ns 304 ns 304 ns ffff9d39c2c979c4 (spinlock) 1 216 ns 216 ns 216 ns ffff9d3a0225c660 &kmalloc-rnd-14-192 (rwlock) 1 89 ns 89 ns 89 ns ffff9d3a0adbf3e0 &kmalloc-rnd-14-192 (rwlock) 1 61 ns 61 ns 61 ns ffff9d415f9b6a40 rq_lock (spinlock) root@number:~# uname -r 6.13.0-rc2 root@number:~# Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:09 -08:00
/* run the slab iterator after attaching */
run_slab_cache_iter();
if (con->filters->nr_slabs) {
u8 val = 1;
int cache_fd;
long key, *prev_key;
fd = bpf_map__fd(skel->maps.slab_filter);
/* Read the slab cache map and build a hash with its address */
cache_fd = bpf_map__fd(skel->maps.slab_caches);
prev_key = NULL;
while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) {
struct slab_cache_data data;
if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0)
break;
for (i = 0; i < con->filters->nr_slabs; i++) {
if (!strcmp(con->filters->slabs[i], data.name)) {
bpf_map_update_elem(fd, &key, &val, BPF_ANY);
break;
}
}
prev_key = &key;
}
}
return 0;
}
/*
* Run the BPF program directly using BPF_PROG_TEST_RUN to update the end
* timestamp in ktime so that it can calculate delta easily.
*/
static void mark_end_timestamp(void)
{
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.flags = BPF_F_TEST_RUN_ON_CPU,
);
int prog_fd = bpf_program__fd(skel->progs.end_timestamp);
bpf_prog_test_run_opts(prog_fd, &opts);
}
static void update_lock_stat(int map_fd, int pid, u64 end_ts,
enum lock_aggr_mode aggr_mode,
struct tstamp_data *ts_data)
{
u64 delta;
struct contention_key stat_key = {};
struct contention_data stat_data;
if (ts_data->timestamp >= end_ts)
return;
delta = end_ts - ts_data->timestamp;
switch (aggr_mode) {
case LOCK_AGGR_CALLER:
stat_key.stack_id = ts_data->stack_id;
break;
case LOCK_AGGR_TASK:
stat_key.pid = pid;
break;
case LOCK_AGGR_ADDR:
stat_key.lock_addr_or_cgroup = ts_data->lock;
break;
case LOCK_AGGR_CGROUP:
/* TODO */
return;
default:
return;
}
if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0)
return;
stat_data.total_time += delta;
stat_data.count++;
if (delta > stat_data.max_time)
stat_data.max_time = delta;
if (delta < stat_data.min_time)
stat_data.min_time = delta;
bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST);
}
/*
* Account entries in the tstamp map (which didn't see the corresponding
* lock:contention_end tracepoint) using end_ts.
*/
static void account_end_timestamp(struct lock_contention *con)
{
int ts_fd, stat_fd;
int *prev_key, key;
u64 end_ts = skel->bss->end_ts;
int total_cpus;
enum lock_aggr_mode aggr_mode = con->aggr_mode;
struct tstamp_data ts_data, *cpu_data;
/* Iterate per-task tstamp map (key = TID) */
ts_fd = bpf_map__fd(skel->maps.tstamp);
stat_fd = bpf_map__fd(skel->maps.lock_stat);
prev_key = NULL;
while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) {
int pid = key;
if (aggr_mode == LOCK_AGGR_TASK && con->owner)
pid = ts_data.flags;
update_lock_stat(stat_fd, pid, end_ts, aggr_mode,
&ts_data);
}
prev_key = &key;
}
/* Now it'll check per-cpu tstamp map which doesn't have TID. */
if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP)
return;
total_cpus = cpu__max_cpu().cpu;
ts_fd = bpf_map__fd(skel->maps.tstamp_cpu);
cpu_data = calloc(total_cpus, sizeof(*cpu_data));
if (cpu_data == NULL)
return;
prev_key = NULL;
while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0)
goto next;
for (int i = 0; i < total_cpus; i++) {
perf lock contention: Fix spinlock and rwlock accounting The spinlock and rwlock use a single-element per-cpu array to track current locks due to performance reason. But this means the key is always available and it cannot simply account lock stats in the array because some of them are invalid. In fact, the contention_end() program in the BPF invalidates the entry by setting the 'lock' value to 0 instead of deleting the entry for the hashmap. So it should skip entries with the lock value of 0 in the account_end_timestamp(). Otherwise, it'd have spurious high contention on an idle machine: $ sudo perf lock con -ab -Y spinlock sleep 3 contended total wait max wait avg wait type caller 8 4.72 s 1.84 s 590.46 ms spinlock rcu_core+0xc7 8 1.87 s 1.87 s 233.48 ms spinlock process_one_work+0x1b5 2 1.87 s 1.87 s 933.92 ms spinlock worker_thread+0x1a2 3 1.81 s 1.81 s 603.93 ms spinlock tmigr_update_events+0x13c 2 1.72 s 1.72 s 861.98 ms spinlock tick_do_update_jiffies64+0x25 6 42.48 us 13.02 us 7.08 us spinlock futex_q_lock+0x2a 1 13.03 us 13.03 us 13.03 us spinlock futex_wake+0xce 1 11.61 us 11.61 us 11.61 us spinlock rcu_core+0xc7 I don't believe it has contention on a spinlock longer than 1 second. After this change, it only reports some small contentions. $ sudo perf lock con -ab -Y spinlock sleep 3 contended total wait max wait avg wait type caller 4 133.51 us 43.29 us 33.38 us spinlock tick_do_update_jiffies64+0x25 4 69.06 us 31.82 us 17.27 us spinlock process_one_work+0x1b5 2 50.66 us 25.77 us 25.33 us spinlock rcu_core+0xc7 1 28.45 us 28.45 us 28.45 us spinlock rcu_core+0xc7 1 24.77 us 24.77 us 24.77 us spinlock tmigr_update_events+0x13c 1 23.34 us 23.34 us 23.34 us spinlock raw_spin_rq_lock_nested+0x15 Fixes: b5711042a1c8cc88 ("perf lock contention: Use per-cpu array map for spinlocks") Reported-by: Xi Wang <xii@google.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: bpf@vger.kernel.org Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Link: https://lore.kernel.org/r/20240828052953.1445862-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-27 22:29:53 -07:00
if (cpu_data[i].lock == 0)
continue;
update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
&cpu_data[i]);
}
next:
prev_key = &key;
}
free(cpu_data);
}
int lock_contention_start(void)
{
skel->bss->enabled = 1;
return 0;
}
int lock_contention_stop(void)
{
skel->bss->enabled = 0;
mark_end_timestamp();
return 0;
}
static const char *lock_contention_get_name(struct lock_contention *con,
struct contention_key *key,
u64 *stack_trace, u32 flags)
{
int idx = 0;
u64 addr;
static char name_buf[KSYM_NAME_LEN];
struct symbol *sym;
struct map *kmap;
struct machine *machine = con->machine;
if (con->aggr_mode == LOCK_AGGR_TASK) {
struct contention_task_data task;
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
int pid = key->pid;
int task_fd = bpf_map__fd(skel->maps.task_data);
/* do not update idle comm which contains CPU number */
if (pid) {
struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
perf lock: Report owner stack in usermode This patch parses `owner_lock_stat` into a RB tree, enabling ordered reporting of owner lock statistics with stack traces. It also updates the documentation for the `-o` option in contention mode, decouples `-o` from `-t`, and issues a warning to inform users about the new behavior of `-ov`. Example output: $ sudo ~/linux/tools/perf/perf lock con -abvo -Y mutex-spin -E3 perf bench sched pipe ... contended total wait max wait avg wait type caller 171 1.55 ms 20.26 us 9.06 us mutex pipe_read+0x57 0xffffffffac6318e7 pipe_read+0x57 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 36 193.71 us 15.27 us 5.38 us mutex pipe_write+0x50 0xffffffffac631ee0 pipe_write+0x50 0xffffffffac6241db vfs_write+0x3bb 0xffffffffac6244ab ksys_write+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 4 51.22 us 16.47 us 12.80 us mutex do_epoll_wait+0x24d 0xffffffffac691f0d do_epoll_wait+0x24d 0xffffffffac69249b do_epoll_pwait.part.0+0xb 0xffffffffac693ba5 __x64_sys_epoll_pwait+0x95 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 === owner stack trace === 3 31.24 us 15.27 us 10.41 us mutex pipe_read+0x348 0xffffffffac631bd8 pipe_read+0x348 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 ... Signed-off-by: Chun-Tse Shao <ctshao@google.com> Tested-by: Athira Rajeev <atrajeev@linux.ibm.com> Link: https://lore.kernel.org/r/20250227003359.732948-5-ctshao@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-02-26 16:28:56 -08:00
if (t != NULL &&
!bpf_map_lookup_elem(task_fd, &pid, &task) &&
thread__set_comm(t, task.comm, /*timestamp=*/0)) {
snprintf(name_buf, sizeof(name_buf), "%s", task.comm);
return name_buf;
}
}
perf lock: Report owner stack in usermode This patch parses `owner_lock_stat` into a RB tree, enabling ordered reporting of owner lock statistics with stack traces. It also updates the documentation for the `-o` option in contention mode, decouples `-o` from `-t`, and issues a warning to inform users about the new behavior of `-ov`. Example output: $ sudo ~/linux/tools/perf/perf lock con -abvo -Y mutex-spin -E3 perf bench sched pipe ... contended total wait max wait avg wait type caller 171 1.55 ms 20.26 us 9.06 us mutex pipe_read+0x57 0xffffffffac6318e7 pipe_read+0x57 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 36 193.71 us 15.27 us 5.38 us mutex pipe_write+0x50 0xffffffffac631ee0 pipe_write+0x50 0xffffffffac6241db vfs_write+0x3bb 0xffffffffac6244ab ksys_write+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 4 51.22 us 16.47 us 12.80 us mutex do_epoll_wait+0x24d 0xffffffffac691f0d do_epoll_wait+0x24d 0xffffffffac69249b do_epoll_pwait.part.0+0xb 0xffffffffac693ba5 __x64_sys_epoll_pwait+0x95 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 === owner stack trace === 3 31.24 us 15.27 us 10.41 us mutex pipe_read+0x348 0xffffffffac631bd8 pipe_read+0x348 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 ... Signed-off-by: Chun-Tse Shao <ctshao@google.com> Tested-by: Athira Rajeev <atrajeev@linux.ibm.com> Link: https://lore.kernel.org/r/20250227003359.732948-5-ctshao@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-02-26 16:28:56 -08:00
return "";
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
int lock_fd = bpf_map__fd(skel->maps.lock_syms);
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
struct slab_cache_data *slab_data;
/* per-process locks set upper bits of the flags */
if (flags & LCD_F_MMAP_LOCK)
return "mmap_lock";
if (flags & LCD_F_SIGHAND_LOCK)
return "siglock";
/* global locks with symbols */
sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap);
if (sym)
return sym->name;
/* try semi-global locks collected separately */
if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) {
if (flags == LOCK_CLASS_RQLOCK)
return "rq_lock";
}
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) {
if (flags == LOCK_CLASS_ZONE_LOCK)
return "zone_lock";
}
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
/* look slab_hash for dynamic locks in a slab object */
if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
return name_buf;
}
return "";
}
if (con->aggr_mode == LOCK_AGGR_CGROUP) {
u64 cgrp_id = key->lock_addr_or_cgroup;
struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id);
if (cgrp)
return cgrp->name;
snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id);
return name_buf;
}
/* LOCK_AGGR_CALLER: skip lock internal functions */
while (machine__is_lock_function(machine, stack_trace[idx]) &&
idx < con->max_stack - 1)
idx++;
addr = stack_trace[idx];
sym = machine__find_kernel_symbol(machine, addr, &kmap);
if (sym) {
unsigned long offset;
perf map: Add helper for ->map_ip() and ->unmap_ip() Later changes will add reference count checking for struct map, add a helper function to invoke the map_ip and unmap_ip function pointers. The helper allows the reference count check to be in fewer places. Committer notes: Add missing conversions to: tools/perf/util/map.c tools/perf/util/cs-etm.c tools/perf/util/annotate.c tools/perf/arch/powerpc/util/sym-handling.c tools/perf/arch/s390/annotate/instructions.c Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: German Gomez <german.gomez@arm.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Miaoqian Lin <linmq006@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Riccardo Mancini <rickyman7@gmail.com> Cc: Shunsuke Nakamura <nakamura.shun@fujitsu.com> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Stephen Brennan <stephen.s.brennan@oracle.com> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Yury Norov <yury.norov@gmail.com> Link: https://lore.kernel.org/r/20230404205954.2245628-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-04-04 13:59:44 -07:00
offset = map__map_ip(kmap, addr) - sym->start;
if (offset == 0)
return sym->name;
snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset);
} else {
snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr);
}
return name_buf;
}
perf lock: Report owner stack in usermode This patch parses `owner_lock_stat` into a RB tree, enabling ordered reporting of owner lock statistics with stack traces. It also updates the documentation for the `-o` option in contention mode, decouples `-o` from `-t`, and issues a warning to inform users about the new behavior of `-ov`. Example output: $ sudo ~/linux/tools/perf/perf lock con -abvo -Y mutex-spin -E3 perf bench sched pipe ... contended total wait max wait avg wait type caller 171 1.55 ms 20.26 us 9.06 us mutex pipe_read+0x57 0xffffffffac6318e7 pipe_read+0x57 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 36 193.71 us 15.27 us 5.38 us mutex pipe_write+0x50 0xffffffffac631ee0 pipe_write+0x50 0xffffffffac6241db vfs_write+0x3bb 0xffffffffac6244ab ksys_write+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 4 51.22 us 16.47 us 12.80 us mutex do_epoll_wait+0x24d 0xffffffffac691f0d do_epoll_wait+0x24d 0xffffffffac69249b do_epoll_pwait.part.0+0xb 0xffffffffac693ba5 __x64_sys_epoll_pwait+0x95 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 === owner stack trace === 3 31.24 us 15.27 us 10.41 us mutex pipe_read+0x348 0xffffffffac631bd8 pipe_read+0x348 0xffffffffac623862 vfs_read+0x332 0xffffffffac62434b ksys_read+0xbb 0xfffffffface604b2 do_syscall_64+0x82 0xffffffffad00012f entry_SYSCALL_64_after_hwframe+0x76 ... Signed-off-by: Chun-Tse Shao <ctshao@google.com> Tested-by: Athira Rajeev <atrajeev@linux.ibm.com> Link: https://lore.kernel.org/r/20250227003359.732948-5-ctshao@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
2025-02-26 16:28:56 -08:00
struct lock_stat *pop_owner_stack_trace(struct lock_contention *con)
{
int stacks_fd, stat_fd;
u64 *stack_trace = NULL;
s32 stack_id;
struct contention_key ckey = {};
struct contention_data cdata = {};
size_t stack_size = con->max_stack * sizeof(*stack_trace);
struct lock_stat *st = NULL;
stacks_fd = bpf_map__fd(skel->maps.owner_stacks);
stat_fd = bpf_map__fd(skel->maps.owner_stat);
if (!stacks_fd || !stat_fd)
goto out_err;
stack_trace = zalloc(stack_size);
if (stack_trace == NULL)
goto out_err;
if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace))
goto out_err;
bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id);
ckey.stack_id = stack_id;
bpf_map_lookup_elem(stat_fd, &ckey, &cdata);
st = zalloc(sizeof(struct lock_stat));
if (!st)
goto out_err;
st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) :
"unknown");
if (!st->name)
goto out_err;
st->flags = cdata.flags;
st->nr_contended = cdata.count;
st->wait_time_total = cdata.total_time;
st->wait_time_max = cdata.max_time;
st->wait_time_min = cdata.min_time;
st->callstack = stack_trace;
if (cdata.count)
st->avg_wait_time = cdata.total_time / cdata.count;
bpf_map_delete_elem(stacks_fd, stack_trace);
bpf_map_delete_elem(stat_fd, &ckey);
return st;
out_err:
free(stack_trace);
free(st);
return NULL;
}
int lock_contention_read(struct lock_contention *con)
{
int fd, stack, err = 0;
struct contention_key *prev_key, key = {};
struct contention_data data = {};
struct lock_stat *st = NULL;
struct machine *machine = con->machine;
u64 *stack_trace;
size_t stack_size = con->max_stack * sizeof(*stack_trace);
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
con->fails.task = skel->bss->task_fail;
con->fails.stack = skel->bss->stack_fail;
con->fails.time = skel->bss->time_fail;
con->fails.data = skel->bss->data_fail;
stack_trace = zalloc(stack_size);
if (stack_trace == NULL)
return -1;
account_end_timestamp(con);
if (con->aggr_mode == LOCK_AGGR_TASK) {
struct thread *idle = machine__findnew_thread(machine,
/*pid=*/0,
/*tid=*/0);
thread__set_comm(idle, "swapper", /*timestamp=*/0);
}
if (con->aggr_mode == LOCK_AGGR_ADDR) {
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.flags = BPF_F_TEST_RUN_ON_CPU,
);
int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms);
bpf_prog_test_run_opts(prog_fd, &opts);
}
/* make sure it loads the kernel map */
perf maps: Add maps__load_first() Avoid bpf_lock_contention_read touching the internal maps data structure by adding a helper function. As access is done directly on the map in maps, hold the read lock to stop it being removed. Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Cc: Changbin Du <changbin.du@huawei.com> Cc: Colin Ian King <colin.i.king@gmail.com> Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez <german.gomez@arm.com> Cc: Guilherme Amadio <amadio@gentoo.org> Cc: Huacai Chen <chenhuacai@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Li Dong <lidong@vivo.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Ming Wang <wangming01@loongson.cn> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nick Terrell <terrelln@fb.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Sean Christopherson <seanjc@google.com> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Vincent Whitchurch <vincent.whitchurch@axis.com> Cc: Wenyu Liu <liuwenyu7@huawei.com> Cc: Yang Jihong <yangjihong1@huawei.com> Link: https://lore.kernel.org/r/20231207011722.1220634-20-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-12-06 17:16:53 -08:00
maps__load_first(machine->kmaps);
prev_key = NULL;
while (!bpf_map_get_next_key(fd, prev_key, &key)) {
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
s64 ls_key;
const char *name;
/* to handle errors in the loop body */
err = -1;
bpf_map_lookup_elem(fd, &key, &data);
if (con->save_callstack) {
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
con->nr_filtered += data.count;
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
goto next;
}
}
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
switch (con->aggr_mode) {
case LOCK_AGGR_CALLER:
ls_key = key.stack_id;
break;
case LOCK_AGGR_TASK:
ls_key = key.pid;
break;
case LOCK_AGGR_ADDR:
case LOCK_AGGR_CGROUP:
ls_key = key.lock_addr_or_cgroup;
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
break;
default:
goto next;
}
st = lock_stat_find(ls_key);
if (st != NULL) {
st->wait_time_total += data.total_time;
if (st->wait_time_max < data.max_time)
st->wait_time_max = data.max_time;
if (st->wait_time_min > data.min_time)
st->wait_time_min = data.min_time;
st->nr_contended += data.count;
if (st->nr_contended)
st->avg_wait_time = st->wait_time_total / st->nr_contended;
goto next;
}
name = lock_contention_get_name(con, &key, stack_trace, data.flags);
perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-02-02 18:13:24 -08:00
st = lock_stat_findnew(ls_key, name, data.flags);
if (st == NULL)
break;
st->nr_contended = data.count;
st->wait_time_total = data.total_time;
st->wait_time_max = data.max_time;
st->wait_time_min = data.min_time;
if (data.count)
st->avg_wait_time = data.total_time / data.count;
perf lock contention: Revise needs_callstack() condition It needs callstacks for two reasons: * for stack aggregation mode, the map key is the stack id and it can also show the full stack traces when -v is used * for other aggregation modes, the stack filter can be used to limit lock contentions from known call paths The -v option is meaningful (in terms of stack trace) only for stack aggregation mode, so it should not set the save_callstack for other mode like with -t or -l options. I've noticed this with the following command line: $ sudo ./perf lock con -ablv -E 3 -M 16 -- ./perf bench sched messaging ... contended total wait max wait avg wait address symbol 88 4.59 ms 108.07 us 52.13 us ffff935757f46ec0 (spinlock) 33 905.22 us 73.67 us 27.43 us ffff935757f41700 (spinlock) 28 703.69 us 79.28 us 25.13 us ffff938a3d9b0c80 rq_lock (spinlock) === output for debug === bad: 12272, total: 12421 bad rate: 98.80 % histogram of failure reasons task: 8285 stack: 3987 <---------- here time: 0 data: 0 It should not have any failure on stacks since it doesn't use it. No functional change intended. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230406210611.1622492-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2023-04-06 14:06:10 -07:00
if (con->aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
st->callstack = memdup(stack_trace, stack_size);
if (st->callstack == NULL)
break;
perf lock contention: Show full callstack with -v option Currently it shows a caller function for each entry, but users need to see the full call stacks sometimes. Use -v/--verbose option to do that. # perf lock con -a -b -v sleep 3 Looking at the vmlinux_path (8 entries long) symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols contended total wait max wait avg wait type caller 1 10.74 us 10.74 us 10.74 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb841015 tick_do_update_jiffies64+0x25 0xffffffffbb8409ee tick_irq_enter+0x9e 1 7.70 us 7.70 us 7.70 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb7bc27e raw_spin_rq_lock_nested+0xe 0xffffffffbb7cef9c load_balance+0x66c Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220912055314.744552-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2022-09-11 22:53:12 -07:00
}
next:
prev_key = &key;
/* we're fine now, reset the error */
err = 0;
}
free(stack_trace);
return err;
}
int lock_contention_finish(struct lock_contention *con)
{
if (skel) {
skel->bss->enabled = 0;
lock_contention_bpf__destroy(skel);
}
while (!RB_EMPTY_ROOT(&con->cgroups)) {
struct rb_node *node = rb_first(&con->cgroups);
struct cgroup *cgrp = rb_entry(node, struct cgroup, node);
rb_erase(node, &con->cgroups);
cgroup__put(cgrp);
}
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
exit_slab_cache_iter();
perf lock contention: Symbolize zone->lock using BTF The struct zone is embedded in struct pglist_data which can be allocated for each NUMA node early in the boot process. As it's not a slab object nor a global lock, this was not symbolized. Since the zone->lock is often contended, it'd be nice if we can symbolize it. On NUMA systems, node_data array will have pointers for struct pglist_data. By following the pointer, it can calculate the address of each zone and its lock using BTF. On UMA, it can just use contig_page_data and its zones. The following example shows the zone lock contention at the end. $ sudo ./perf lock con -abl -E 5 -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.038 [sec] contended total wait max wait avg wait address symbol 5167 18.17 ms 10.27 us 3.52 us ffff953340052d00 &kmem_cache_node (spinlock) 38 11.75 ms 465.49 us 309.13 us ffff95334060c480 &sock_inode_cache (spinlock) 3916 10.13 ms 10.43 us 2.59 us ffff953342aecb40 &kmem_cache_node (spinlock) 2963 10.02 ms 13.75 us 3.38 us ffff9533d2344098 &kmalloc-rnd-08-2k (spinlock) 216 5.05 ms 99.49 us 23.39 us ffff9542bf7d65d0 zone_lock (spinlock) Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: bpf@vger.kernel.org Cc: linux-mm@kvack.org Link: https://lore.kernel.org/r/20250401063055.7431-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-03-31 23:30:55 -07:00
btf__free(con->btf);
perf lock contention: Resolve slab object name using BPF The bpf_get_kmem_cache() kfunc can return an address of the slab cache (kmem_cache). As it has the name of the slab cache from the iterator, we can use it to symbolize some dynamic kernel locks in a slab. Before: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 3.34 us 2.87 us 1.67 us ffff9d7800ad9600 (mutex) 2 2.16 us 1.93 us 1.08 us ffff9d7804b992d8 (mutex) 4 1.37 us 517 ns 343 ns ffff9d78036e6e00 (mutex) 1 1.27 us 1.27 us 1.27 us ffff9d7804b99378 (mutex) 2 845 ns 599 ns 422 ns ffffffff9e1c3620 delayed_uprobe_lock (mutex) 1 845 ns 845 ns 845 ns ffffffff9da0b280 jiffies_lock (spinlock) 2 377 ns 259 ns 188 ns ffffffff9e1cf840 pcpu_alloc_mutex (mutex) 1 305 ns 305 ns 305 ns ffffffff9e1b4cf8 tracepoint_srcu_srcu_usage (mutex) 1 295 ns 295 ns 295 ns ffffffff9e1c0940 pack_mutex (mutex) 1 232 ns 232 ns 232 ns ffff9d7804b7d8d8 (mutex) 1 180 ns 180 ns 180 ns ffffffff9e1b4c28 tracepoint_srcu_srcu_usage (mutex) 1 165 ns 165 ns 165 ns ffffffff9da8b3a0 text_mutex (mutex) After: root@virtme-ng:/home/namhyung/project/linux# tools/perf/perf lock con -abl sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) 3 421 ns 164 ns 140 ns ffffffffa3a8b3a0 text_mutex (mutex) 1 409 ns 409 ns 409 ns ffffffffa41b4cf8 tracepoint_srcu_srcu_usage (mutex) 2 362 ns 239 ns 181 ns ffffffffa41cf840 pcpu_alloc_mutex (mutex) 1 220 ns 220 ns 220 ns ffff9d5e82b534d8 &signal_cache (mutex) 1 215 ns 215 ns 215 ns ffffffffa41b4c28 tracepoint_srcu_srcu_usage (mutex) Note that the name starts with '&' sign for slab objects to inform they are dynamic locks. It won't give the accurate lock or type names but it's still useful. We may add type info to the slab cache later to get the exact name of the lock in the type later. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Chun-Tse Shao <ctshao@google.com> Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kees Cook <kees@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Song Liu <song@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Link: https://lore.kernel.org/r/20241220060009.507297-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-12-19 22:00:08 -08:00
return 0;
}