mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

We are starting to deploy mmap_lock tracepoint monitoring across our fleet and the early results showed that these tracepoints are consuming significant amount of CPUs in kernfs_path_from_node when enabled. It seems like the kernel is trying to resolve the cgroup path in the fast path of the locking code path when the tracepoints are enabled. In addition for some application their metrics are regressing when monitoring is enabled. The cgroup path resolution can be slow and should not be done in the fast path. Most userspace tools, like bpftrace, provides functionality to get the cgroup path from cgroup id, so let's just trace the cgroup id and the users can use better tools to get the path in the slow path. Link: https://lkml.kernel.org/r/20241125171617.113892-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Yosry Ahmed <yosryahmed@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
79 lines
1.7 KiB
C
79 lines
1.7 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#undef TRACE_SYSTEM
|
|
#define TRACE_SYSTEM mmap_lock
|
|
|
|
#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
|
|
#define _TRACE_MMAP_LOCK_H
|
|
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/tracepoint.h>
|
|
#include <linux/types.h>
|
|
|
|
struct mm_struct;
|
|
|
|
DECLARE_EVENT_CLASS(mmap_lock,
|
|
|
|
TP_PROTO(struct mm_struct *mm, bool write),
|
|
|
|
TP_ARGS(mm, write),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(u64, memcg_id)
|
|
__field(bool, write)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->memcg_id = cgroup_id_from_mm(mm);
|
|
__entry->write = write;
|
|
),
|
|
|
|
TP_printk(
|
|
"mm=%p memcg_id=%llu write=%s",
|
|
__entry->mm, __entry->memcg_id,
|
|
__entry->write ? "true" : "false"
|
|
)
|
|
);
|
|
|
|
#define DEFINE_MMAP_LOCK_EVENT(name) \
|
|
DEFINE_EVENT(mmap_lock, name, \
|
|
TP_PROTO(struct mm_struct *mm, bool write), \
|
|
TP_ARGS(mm, write))
|
|
|
|
DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking);
|
|
DEFINE_MMAP_LOCK_EVENT(mmap_lock_released);
|
|
|
|
TRACE_EVENT(mmap_lock_acquire_returned,
|
|
|
|
TP_PROTO(struct mm_struct *mm, bool write, bool success),
|
|
|
|
TP_ARGS(mm, write, success),
|
|
|
|
TP_STRUCT__entry(
|
|
__field(struct mm_struct *, mm)
|
|
__field(u64, memcg_id)
|
|
__field(bool, write)
|
|
__field(bool, success)
|
|
),
|
|
|
|
TP_fast_assign(
|
|
__entry->mm = mm;
|
|
__entry->memcg_id = cgroup_id_from_mm(mm);
|
|
__entry->write = write;
|
|
__entry->success = success;
|
|
),
|
|
|
|
TP_printk(
|
|
"mm=%p memcg_id=%llu write=%s success=%s",
|
|
__entry->mm,
|
|
__entry->memcg_id,
|
|
__entry->write ? "true" : "false",
|
|
__entry->success ? "true" : "false"
|
|
)
|
|
);
|
|
|
|
#endif /* _TRACE_MMAP_LOCK_H */
|
|
|
|
/* This part must be outside protection */
|
|
#include <trace/define_trace.h>
|