mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

This is a breakdown of perf_mem_data_src.mem_dtlb values. It assumes PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level. And having PERF_MEM_TLB_MISS means that it failed to find one in any levels of TLB. For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits. Also it seems Intel machines don't distinguish L1 or L2 precisely. So I added ANY_HIT (printed as "L?-Hit") to handle the case. $ perf mem report -F overhead,dtlb,dso --stdio ... # --- D-TLB ---- # Overhead L?-Hit Miss Shared Object # ........ .............. ................. # 67.03% 99.5% 0.5% [unknown] 31.23% 99.2% 0.8% [kernel.kallsyms] 1.08% 97.8% 2.2% [i915] 0.36% 100.0% 0.0% [JIT] tid 6853 0.12% 100.0% 0.0% [drm] 0.05% 100.0% 0.0% [drm_kms_helper] 0.05% 100.0% 0.0% [ext4] 0.02% 100.0% 0.0% [aesni_intel] 0.02% 100.0% 0.0% [crc32c_intel] 0.02% 100.0% 0.0% [dm_crypt] ... Committer testing: # perf report --header | grep cpudesc # cpudesc : AMD Ryzen 9 9950X3D 16-Core Processor # perf mem report -F overhead,dtlb,dso --stdio | head -20 # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 2K of event 'cycles:P' # Total weight : 2637 # Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc # # ---------- D-TLB ----------- # Overhead L1-Hit L2-Hit Miss Other Shared Object # ........ ............................ ................................. # 77.47% 18.4% 0.1% 0.6% 80.9% [kernel.kallsyms] 5.61% 36.5% 0.7% 1.4% 61.5% libxul.so 2.77% 39.7% 0.0% 12.3% 47.9% libc.so.6 2.01% 34.0% 1.9% 1.9% 62.3% libglib-2.0.so.0.8400.1 1.93% 31.4% 2.0% 2.0% 64.7% [amdgpu] 1.63% 48.8% 0.0% 0.0% 51.2% [JIT] tid 60168 1.14% 3.3% 0.0% 0.0% 96.7% [vdso] # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Link: https://lore.kernel.org/r/20250430205548.789750-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
149 lines
4.9 KiB
C
149 lines
4.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __PERF_MEM_EVENTS_H
|
|
#define __PERF_MEM_EVENTS_H
|
|
|
|
#include <stdbool.h>
|
|
#include <linux/types.h>
|
|
|
|
struct perf_mem_event {
|
|
bool supported;
|
|
bool ldlat;
|
|
u32 aux_event;
|
|
const char *tag;
|
|
const char *name;
|
|
const char *event_name;
|
|
};
|
|
|
|
enum {
|
|
PERF_MEM_EVENTS__LOAD,
|
|
PERF_MEM_EVENTS__STORE,
|
|
PERF_MEM_EVENTS__LOAD_STORE,
|
|
PERF_MEM_EVENTS__MAX,
|
|
};
|
|
|
|
struct evsel;
|
|
struct mem_info;
|
|
struct perf_pmu;
|
|
|
|
extern unsigned int perf_mem_events__loads_ldlat;
|
|
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
|
|
extern bool perf_mem_record[PERF_MEM_EVENTS__MAX];
|
|
|
|
int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
|
|
int perf_pmu__mem_events_init(void);
|
|
|
|
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
|
|
struct perf_pmu *perf_mem_events_find_pmu(void);
|
|
int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
|
|
bool is_mem_loads_aux_event(struct evsel *leader);
|
|
|
|
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
|
|
int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
|
|
char **event_name_storage_out);
|
|
|
|
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
|
|
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
|
|
int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
|
|
int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
|
|
int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
|
|
|
|
int perf_script__meminfo_scnprintf(char *bf, size_t size, const struct mem_info *mem_info);
|
|
|
|
struct c2c_stats {
|
|
u32 nr_entries;
|
|
|
|
u32 locks; /* count of 'lock' transactions */
|
|
u32 store; /* count of all stores in trace */
|
|
u32 st_uncache; /* stores to uncacheable address */
|
|
u32 st_noadrs; /* cacheable store with no address */
|
|
u32 st_l1hit; /* count of stores that hit L1D */
|
|
u32 st_l1miss; /* count of stores that miss L1D */
|
|
u32 st_na; /* count of stores with memory level is not available */
|
|
u32 load; /* count of all loads in trace */
|
|
u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
|
|
u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
|
|
u32 ld_uncache; /* loads to uncacheable address */
|
|
u32 ld_io; /* loads to io address */
|
|
u32 ld_miss; /* loads miss */
|
|
u32 ld_noadrs; /* cacheable load with no address */
|
|
u32 ld_fbhit; /* count of loads hitting Fill Buffer */
|
|
u32 ld_l1hit; /* count of loads that hit L1D */
|
|
u32 ld_l2hit; /* count of loads that hit L2D */
|
|
u32 ld_llchit; /* count of loads that hit LLC */
|
|
u32 lcl_hitm; /* count of loads with local HITM */
|
|
u32 rmt_hitm; /* count of loads with remote HITM */
|
|
u32 tot_hitm; /* count of loads with local and remote HITM */
|
|
u32 lcl_peer; /* count of loads with local peer cache */
|
|
u32 rmt_peer; /* count of loads with remote peer cache */
|
|
u32 tot_peer; /* count of loads with local and remote peer cache */
|
|
u32 rmt_hit; /* count of loads with remote hit clean; */
|
|
u32 lcl_dram; /* count of loads miss to local DRAM */
|
|
u32 rmt_dram; /* count of loads miss to remote DRAM */
|
|
u32 blk_data; /* count of loads blocked by data */
|
|
u32 blk_addr; /* count of loads blocked by address conflict */
|
|
u32 nomap; /* count of load/stores with no phys addrs */
|
|
u32 noparse; /* count of unparsable data sources */
|
|
};
|
|
|
|
struct hist_entry;
|
|
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
|
|
void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
|
|
|
|
enum mem_stat_type {
|
|
PERF_MEM_STAT_OP,
|
|
PERF_MEM_STAT_CACHE,
|
|
PERF_MEM_STAT_MEMORY,
|
|
PERF_MEM_STAT_SNOOP,
|
|
PERF_MEM_STAT_DTLB,
|
|
};
|
|
|
|
#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */
|
|
|
|
enum mem_stat_op {
|
|
MEM_STAT_OP_LOAD,
|
|
MEM_STAT_OP_STORE,
|
|
MEM_STAT_OP_LDST,
|
|
MEM_STAT_OP_PFETCH,
|
|
MEM_STAT_OP_EXEC,
|
|
MEM_STAT_OP_OTHER,
|
|
};
|
|
|
|
enum mem_stat_cache {
|
|
MEM_STAT_CACHE_L1,
|
|
MEM_STAT_CACHE_L2,
|
|
MEM_STAT_CACHE_L3,
|
|
MEM_STAT_CACHE_L4,
|
|
MEM_STAT_CACHE_L1_BUF,
|
|
MEM_STAT_CACHE_L2_BUF,
|
|
MEM_STAT_CACHE_OTHER,
|
|
};
|
|
|
|
enum mem_stat_memory {
|
|
MEM_STAT_MEMORY_RAM,
|
|
MEM_STAT_MEMORY_MSC,
|
|
MEM_STAT_MEMORY_UNC,
|
|
MEM_STAT_MEMORY_CXL,
|
|
MEM_STAT_MEMORY_IO,
|
|
MEM_STAT_MEMORY_PMEM,
|
|
MEM_STAT_MEMORY_OTHER,
|
|
};
|
|
|
|
enum mem_stat_snoop {
|
|
MEM_STAT_SNOOP_HIT,
|
|
MEM_STAT_SNOOP_HITM,
|
|
MEM_STAT_SNOOP_MISS,
|
|
MEM_STAT_SNOOP_OTHER,
|
|
};
|
|
|
|
enum mem_stat_dtlb {
|
|
MEM_STAT_DTLB_L1_HIT,
|
|
MEM_STAT_DTLB_L2_HIT,
|
|
MEM_STAT_DTLB_ANY_HIT,
|
|
MEM_STAT_DTLB_MISS,
|
|
MEM_STAT_DTLB_OTHER,
|
|
};
|
|
|
|
int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
|
|
const char *mem_stat_name(const enum mem_stat_type mst, const int idx);
|
|
|
|
#endif /* __PERF_MEM_EVENTS_H */
|