linux/tools/perf/util/mem-events.h
Namhyung Kim f7458176a7 perf mem: Add 'dtlb' output field
This is a breakdown of perf_mem_data_src.mem_dtlb values.  It assumes
PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level.

And having PERF_MEM_TLB_MISS means that it failed to find one in any
levels of TLB.  For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits.

Also it seems Intel machines don't distinguish L1 or L2 precisely.  So I
added ANY_HIT (printed as "L?-Hit") to handle the case.

  $ perf mem report -F overhead,dtlb,dso --stdio
  ...
  #           --- D-TLB ----
  # Overhead   L?-Hit   Miss  Shared Object
  # ........  ..............  .................
  #
      67.03%    99.5%   0.5%  [unknown]
      31.23%    99.2%   0.8%  [kernel.kallsyms]
       1.08%    97.8%   2.2%  [i915]
       0.36%   100.0%   0.0%  [JIT] tid 6853
       0.12%   100.0%   0.0%  [drm]
       0.05%   100.0%   0.0%  [drm_kms_helper]
       0.05%   100.0%   0.0%  [ext4]
       0.02%   100.0%   0.0%  [aesni_intel]
       0.02%   100.0%   0.0%  [crc32c_intel]
       0.02%   100.0%   0.0%  [dm_crypt]
       ...

Committer testing:

  # perf report --header | grep cpudesc
  # cpudesc : AMD Ryzen 9 9950X3D 16-Core Processor
  # perf mem report -F overhead,dtlb,dso --stdio | head -20
  # To display the perf.data header info, please use --header/--header-only options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 2K of event 'cycles:P'
  # Total weight : 2637
  # Sort order   : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc
  #
  #           ---------- D-TLB -----------
  # Overhead   L1-Hit L2-Hit   Miss  Other  Shared Object
  # ........  ............................  .................................
  #
      77.47%    18.4%   0.1%   0.6%  80.9%  [kernel.kallsyms]
       5.61%    36.5%   0.7%   1.4%  61.5%  libxul.so
       2.77%    39.7%   0.0%  12.3%  47.9%  libc.so.6
       2.01%    34.0%   1.9%   1.9%  62.3%  libglib-2.0.so.0.8400.1
       1.93%    31.4%   2.0%   2.0%  64.7%  [amdgpu]
       1.63%    48.8%   0.0%   0.0%  51.2%  [JIT] tid 60168
       1.14%     3.3%   0.0%   0.0%  96.7%  [vdso]
  #

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20250430205548.789750-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-05-02 15:36:14 -03:00

149 lines
4.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_MEM_EVENTS_H
#define __PERF_MEM_EVENTS_H
#include <stdbool.h>
#include <linux/types.h>
struct perf_mem_event {
bool supported;
bool ldlat;
u32 aux_event;
const char *tag;
const char *name;
const char *event_name;
};
enum {
PERF_MEM_EVENTS__LOAD,
PERF_MEM_EVENTS__STORE,
PERF_MEM_EVENTS__LOAD_STORE,
PERF_MEM_EVENTS__MAX,
};
struct evsel;
struct mem_info;
struct perf_pmu;
extern unsigned int perf_mem_events__loads_ldlat;
extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
extern bool perf_mem_record[PERF_MEM_EVENTS__MAX];
int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
int perf_pmu__mem_events_init(void);
struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
struct perf_pmu *perf_mem_events_find_pmu(void);
int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
bool is_mem_loads_aux_event(struct evsel *leader);
void perf_pmu__mem_events_list(struct perf_pmu *pmu);
int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
char **event_name_storage_out);
int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info);
int perf_script__meminfo_scnprintf(char *bf, size_t size, const struct mem_info *mem_info);
struct c2c_stats {
u32 nr_entries;
u32 locks; /* count of 'lock' transactions */
u32 store; /* count of all stores in trace */
u32 st_uncache; /* stores to uncacheable address */
u32 st_noadrs; /* cacheable store with no address */
u32 st_l1hit; /* count of stores that hit L1D */
u32 st_l1miss; /* count of stores that miss L1D */
u32 st_na; /* count of stores with memory level is not available */
u32 load; /* count of all loads in trace */
u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
u32 ld_uncache; /* loads to uncacheable address */
u32 ld_io; /* loads to io address */
u32 ld_miss; /* loads miss */
u32 ld_noadrs; /* cacheable load with no address */
u32 ld_fbhit; /* count of loads hitting Fill Buffer */
u32 ld_l1hit; /* count of loads that hit L1D */
u32 ld_l2hit; /* count of loads that hit L2D */
u32 ld_llchit; /* count of loads that hit LLC */
u32 lcl_hitm; /* count of loads with local HITM */
u32 rmt_hitm; /* count of loads with remote HITM */
u32 tot_hitm; /* count of loads with local and remote HITM */
u32 lcl_peer; /* count of loads with local peer cache */
u32 rmt_peer; /* count of loads with remote peer cache */
u32 tot_peer; /* count of loads with local and remote peer cache */
u32 rmt_hit; /* count of loads with remote hit clean; */
u32 lcl_dram; /* count of loads miss to local DRAM */
u32 rmt_dram; /* count of loads miss to remote DRAM */
u32 blk_data; /* count of loads blocked by data */
u32 blk_addr; /* count of loads blocked by address conflict */
u32 nomap; /* count of load/stores with no phys addrs */
u32 noparse; /* count of unparsable data sources */
};
struct hist_entry;
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
enum mem_stat_type {
PERF_MEM_STAT_OP,
PERF_MEM_STAT_CACHE,
PERF_MEM_STAT_MEMORY,
PERF_MEM_STAT_SNOOP,
PERF_MEM_STAT_DTLB,
};
#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */
enum mem_stat_op {
MEM_STAT_OP_LOAD,
MEM_STAT_OP_STORE,
MEM_STAT_OP_LDST,
MEM_STAT_OP_PFETCH,
MEM_STAT_OP_EXEC,
MEM_STAT_OP_OTHER,
};
enum mem_stat_cache {
MEM_STAT_CACHE_L1,
MEM_STAT_CACHE_L2,
MEM_STAT_CACHE_L3,
MEM_STAT_CACHE_L4,
MEM_STAT_CACHE_L1_BUF,
MEM_STAT_CACHE_L2_BUF,
MEM_STAT_CACHE_OTHER,
};
enum mem_stat_memory {
MEM_STAT_MEMORY_RAM,
MEM_STAT_MEMORY_MSC,
MEM_STAT_MEMORY_UNC,
MEM_STAT_MEMORY_CXL,
MEM_STAT_MEMORY_IO,
MEM_STAT_MEMORY_PMEM,
MEM_STAT_MEMORY_OTHER,
};
enum mem_stat_snoop {
MEM_STAT_SNOOP_HIT,
MEM_STAT_SNOOP_HITM,
MEM_STAT_SNOOP_MISS,
MEM_STAT_SNOOP_OTHER,
};
enum mem_stat_dtlb {
MEM_STAT_DTLB_L1_HIT,
MEM_STAT_DTLB_L2_HIT,
MEM_STAT_DTLB_ANY_HIT,
MEM_STAT_DTLB_MISS,
MEM_STAT_DTLB_OTHER,
};
int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
const char *mem_stat_name(const enum mem_stat_type mst, const int idx);
#endif /* __PERF_MEM_EVENTS_H */