perf ftrace latency: Introduce --bucket-range to ask for linear bucketing

In addition to showing it exponentially, using log2() to figure out the
histogram index, allow for showing it linearly:

The preexisting more, the default:

  # perf ftrace latency --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -    2 ns |          0 |                                         |
       2 -    4 ns |          0 |                                         |
       4 -    8 ns |          0 |                                         |
       8 -   16 ns |          0 |                                         |
      16 -   32 ns |          0 |                                         |
      32 -   64 ns |          0 |                                         |
      64 -  128 ns |        238 | #                                       |
     128 -  256 ns |       1704 | ##########                              |
     256 -  512 ns |        672 | ###                                     |
     512 - 1024 ns |       4458 | ##########################              |
       1 -    2 us |        677 | ####                                    |
       2 -    4 us |          5 |                                         |
       4 -    8 us |          0 |                                         |
       8 -   16 us |          0 |                                         |
      16 -   32 us |          0 |                                         |
      32 -   64 us |          0 |                                         |
      64 -  128 us |          0 |                                         |
     128 -  256 us |          0 |                                         |
     256 -  512 us |          0 |                                         |
     512 - 1024 us |          0 |                                         |
       1 - ...  ms |          0 |                                         |
  #

The new histogram mode:

  # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -  151 ns |        265 | #                                       |
     151 -  301 ns |       1797 | ###########                             |
     301 -  451 ns |        258 | #                                       |
     451 -  601 ns |        289 | #                                       |
     601 -  751 ns |       2049 | #############                           |
     751 -  901 ns |        967 | ######                                  |
     901 - 1051 ns |        513 | ###                                     |
    1.05 - 1.20 us |        114 |                                         |
    1.20 - 1.35 us |        559 | ###                                     |
    1.35 - 1.50 us |        189 | #                                       |
    1.50 - 1.65 us |        137 |                                         |
    1.65 - 1.80 us |         32 |                                         |
    1.80 - 1.95 us |          2 |                                         |
    1.95 - 2.10 us |          0 |                                         |
    2.10 - 2.25 us |          1 |                                         |
    2.25 - 2.40 us |          1 |                                         |
    2.40 - 2.55 us |          0 |                                         |
    2.55 - 2.70 us |          0 |                                         |
    2.70 - 2.85 us |          0 |                                         |
    2.85 - 3.00 us |          1 |                                         |
    3.00 - ...  us |          4 |                                         |
  #

Co-developed-by: Gabriele Monaco <gmonaco@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Arnaldo Carvalho de Melo 2024-11-12 15:12:12 -03:00
parent 12115c6037
commit e8536dd47a
5 changed files with 73 additions and 13 deletions

View file

@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
--use-nsec::
Use nano-second instead of micro-second as a base unit of the histogram.
--bucket-range=::
Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
OPTIONS for 'perf ftrace profile'
---------------------------------

View file

@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
if (ftrace->use_nsec)
num *= 1000;
i = log2(num);
if (i < 0)
if (!ftrace->bucket_range) {
i = log2(num);
if (i < 0)
i = 0;
} else {
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
i = 0;
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
i = num / ftrace->bucket_range + 1;
}
if (i >= NUM_BUCKET)
i = NUM_BUCKET - 1;
@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
" DURATION ", "COUNT", bar_total, "GRAPH");
bar_len = buckets[0] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
int start, stop;
const char *unit = use_nsec ? "ns" : "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
if (!ftrace->bucket_range) {
start = (1 << (i - 1));
stop = 1 << i;
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
}
} else {
start = (i - 1) * ftrace->bucket_range + 1;
stop = i * ftrace->bucket_range + 1;
if (start >= 1000) {
double dstart = start / 1000.0,
dstop = stop / 1000.0;
printf(" %4.2f - %-4.2f", dstart, dstop);
unit = use_nsec ? "us" : "ms";
goto print_bucket_info;
}
}
printf(" %4d - %4d", start, stop);
print_bucket_info:
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
start, stop, unit, buckets[i], bar_len, bar,
printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
if (!ftrace->bucket_range) {
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
} else {
int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;
if (upper_outlier >= 1000) {
double dstart = upper_outlier / 1000.0;
printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
} else {
printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
}
}
printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
bar_len, bar, bar_total - bar_len, "");
}
@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
#endif
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
"Use nano-second histogram"),
OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
"Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {

View file

@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}
skel->rodata->bucket_range = ftrace->bucket_range;
/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);

View file

@ -41,6 +41,7 @@ int enabled = 0;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;
SEC("kprobe/func")
int BPF_PROG(func_begin)
@ -100,12 +101,25 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;
if (bucket_range != 0) {
delta /= cmp_base;
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
key = 0;
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
key = delta / bucket_range + 1;
if (key >= NUM_BUCKET)
key = NUM_BUCKET - 1;
}
goto do_lookup;
}
// calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
if (delta < (cmp_base << key))
break;
}
do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;

View file

@ -20,6 +20,7 @@ struct perf_ftrace {
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
unsigned int bucket_range;
int graph_depth;
int func_stack_trace;
int func_irq_info;