mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
perf ftrace latency: Introduce --bucket-range to ask for linear bucketing
In addition to showing it exponentially, using log2() to figure out the histogram index, allow for showing it linearly: The preexisting more, the default: # perf ftrace latency --use-nsec --use-bpf \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 1 ns | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 238 | # | 128 - 256 ns | 1704 | ########## | 256 - 512 ns | 672 | ### | 512 - 1024 ns | 4458 | ########################## | 1 - 2 us | 677 | #### | 2 - 4 us | 5 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - ... ms | 0 | | # The new histogram mode: # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 1 ns | 0 | | 1 - 151 ns | 265 | # | 151 - 301 ns | 1797 | ########### | 301 - 451 ns | 258 | # | 451 - 601 ns | 289 | # | 601 - 751 ns | 2049 | ############# | 751 - 901 ns | 967 | ###### | 901 - 1051 ns | 513 | ### | 1.05 - 1.20 us | 114 | | 1.20 - 1.35 us | 559 | ### | 1.35 - 1.50 us | 189 | # | 1.50 - 1.65 us | 137 | | 1.65 - 1.80 us | 32 | | 1.80 - 1.95 us | 2 | | 1.95 - 2.10 us | 0 | | 2.10 - 2.25 us | 1 | | 2.25 - 2.40 us | 1 | | 2.40 - 2.55 us | 0 | | 2.55 - 2.70 us | 0 | | 2.70 - 2.85 us | 0 | | 2.85 - 3.00 us | 1 | | 3.00 - ... us | 4 | | # Co-developed-by: Gabriele Monaco <gmonaco@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Clark Williams <williams@redhat.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org Signed-off-by: Gabriele Monaco <gmonaco@redhat.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
12115c6037
commit
e8536dd47a
5 changed files with 73 additions and 13 deletions
|
@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
|
||||||
--use-nsec::
|
--use-nsec::
|
||||||
Use nano-second instead of micro-second as a base unit of the histogram.
|
Use nano-second instead of micro-second as a base unit of the histogram.
|
||||||
|
|
||||||
|
--bucket-range=::
|
||||||
|
Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
|
||||||
|
|
||||||
|
|
||||||
OPTIONS for 'perf ftrace profile'
|
OPTIONS for 'perf ftrace profile'
|
||||||
---------------------------------
|
---------------------------------
|
||||||
|
|
|
@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
|
||||||
if (ftrace->use_nsec)
|
if (ftrace->use_nsec)
|
||||||
num *= 1000;
|
num *= 1000;
|
||||||
|
|
||||||
|
if (!ftrace->bucket_range) {
|
||||||
i = log2(num);
|
i = log2(num);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
i = 0;
|
i = 0;
|
||||||
|
} else {
|
||||||
|
// Less than 1 unit (ms or ns), or, in the future,
|
||||||
|
// than the min latency desired.
|
||||||
|
i = 0;
|
||||||
|
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
|
||||||
|
i = num / ftrace->bucket_range + 1;
|
||||||
|
}
|
||||||
if (i >= NUM_BUCKET)
|
if (i >= NUM_BUCKET)
|
||||||
i = NUM_BUCKET - 1;
|
i = NUM_BUCKET - 1;
|
||||||
|
|
||||||
|
@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
|
||||||
" DURATION ", "COUNT", bar_total, "GRAPH");
|
" DURATION ", "COUNT", bar_total, "GRAPH");
|
||||||
|
|
||||||
bar_len = buckets[0] * bar_total / total;
|
bar_len = buckets[0] * bar_total / total;
|
||||||
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
|
|
||||||
|
printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
|
||||||
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
|
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
|
||||||
|
|
||||||
for (i = 1; i < NUM_BUCKET - 1; i++) {
|
for (i = 1; i < NUM_BUCKET - 1; i++) {
|
||||||
int start = (1 << (i - 1));
|
int start, stop;
|
||||||
int stop = 1 << i;
|
|
||||||
const char *unit = use_nsec ? "ns" : "us";
|
const char *unit = use_nsec ? "ns" : "us";
|
||||||
|
|
||||||
|
if (!ftrace->bucket_range) {
|
||||||
|
start = (1 << (i - 1));
|
||||||
|
stop = 1 << i;
|
||||||
|
|
||||||
if (start >= 1024) {
|
if (start >= 1024) {
|
||||||
start >>= 10;
|
start >>= 10;
|
||||||
stop >>= 10;
|
stop >>= 10;
|
||||||
unit = use_nsec ? "us" : "ms";
|
unit = use_nsec ? "us" : "ms";
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
start = (i - 1) * ftrace->bucket_range + 1;
|
||||||
|
stop = i * ftrace->bucket_range + 1;
|
||||||
|
|
||||||
|
if (start >= 1000) {
|
||||||
|
double dstart = start / 1000.0,
|
||||||
|
dstop = stop / 1000.0;
|
||||||
|
printf(" %4.2f - %-4.2f", dstart, dstop);
|
||||||
|
unit = use_nsec ? "us" : "ms";
|
||||||
|
goto print_bucket_info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf(" %4d - %4d", start, stop);
|
||||||
|
print_bucket_info:
|
||||||
bar_len = buckets[i] * bar_total / total;
|
bar_len = buckets[i] * bar_total / total;
|
||||||
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
|
printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
|
||||||
start, stop, unit, buckets[i], bar_len, bar,
|
|
||||||
bar_total - bar_len, "");
|
bar_total - bar_len, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
|
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
|
||||||
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
|
if (!ftrace->bucket_range) {
|
||||||
1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
|
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
|
||||||
|
} else {
|
||||||
|
int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;
|
||||||
|
|
||||||
|
if (upper_outlier >= 1000) {
|
||||||
|
double dstart = upper_outlier / 1000.0;
|
||||||
|
|
||||||
|
printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
|
||||||
|
} else {
|
||||||
|
printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
|
||||||
bar_len, bar, bar_total - bar_len, "");
|
bar_len, bar, bar_total - bar_len, "");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
|
||||||
#endif
|
#endif
|
||||||
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
|
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
|
||||||
"Use nano-second histogram"),
|
"Use nano-second histogram"),
|
||||||
|
OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
|
||||||
|
"Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
|
||||||
OPT_PARENT(common_options),
|
OPT_PARENT(common_options),
|
||||||
};
|
};
|
||||||
const struct option profile_options[] = {
|
const struct option profile_options[] = {
|
||||||
|
|
|
@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
skel->rodata->bucket_range = ftrace->bucket_range;
|
||||||
|
|
||||||
/* don't need to set cpu filter for system-wide mode */
|
/* don't need to set cpu filter for system-wide mode */
|
||||||
if (ftrace->target.cpu_list) {
|
if (ftrace->target.cpu_list) {
|
||||||
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
|
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
|
||||||
|
|
|
@ -41,6 +41,7 @@ int enabled = 0;
|
||||||
const volatile int has_cpu = 0;
|
const volatile int has_cpu = 0;
|
||||||
const volatile int has_task = 0;
|
const volatile int has_task = 0;
|
||||||
const volatile int use_nsec = 0;
|
const volatile int use_nsec = 0;
|
||||||
|
const volatile unsigned int bucket_range;
|
||||||
|
|
||||||
SEC("kprobe/func")
|
SEC("kprobe/func")
|
||||||
int BPF_PROG(func_begin)
|
int BPF_PROG(func_begin)
|
||||||
|
@ -100,12 +101,25 @@ int BPF_PROG(func_end)
|
||||||
if (delta < 0)
|
if (delta < 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (bucket_range != 0) {
|
||||||
|
delta /= cmp_base;
|
||||||
|
// Less than 1 unit (ms or ns), or, in the future,
|
||||||
|
// than the min latency desired.
|
||||||
|
key = 0;
|
||||||
|
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
|
||||||
|
key = delta / bucket_range + 1;
|
||||||
|
if (key >= NUM_BUCKET)
|
||||||
|
key = NUM_BUCKET - 1;
|
||||||
|
}
|
||||||
|
goto do_lookup;
|
||||||
|
}
|
||||||
// calculate index using delta
|
// calculate index using delta
|
||||||
for (key = 0; key < (NUM_BUCKET - 1); key++) {
|
for (key = 0; key < (NUM_BUCKET - 1); key++) {
|
||||||
if (delta < (cmp_base << key))
|
if (delta < (cmp_base << key))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do_lookup:
|
||||||
hist = bpf_map_lookup_elem(&latency, &key);
|
hist = bpf_map_lookup_elem(&latency, &key);
|
||||||
if (!hist)
|
if (!hist)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -20,6 +20,7 @@ struct perf_ftrace {
|
||||||
unsigned long percpu_buffer_size;
|
unsigned long percpu_buffer_size;
|
||||||
bool inherit;
|
bool inherit;
|
||||||
bool use_nsec;
|
bool use_nsec;
|
||||||
|
unsigned int bucket_range;
|
||||||
int graph_depth;
|
int graph_depth;
|
||||||
int func_stack_trace;
|
int func_stack_trace;
|
||||||
int func_irq_info;
|
int func_irq_info;
|
||||||
|
|
Loading…
Add table
Reference in a new issue