perf intel-tpebs: Filter non-workload samples

If perf is running with a benchmark then we want the retirement
latency samples associated with the benchmark rather than from the
system as a whole.

Use the workload's PID to filter out samples that aren't from the
workload or its children.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Weilin Wang <weilin.wang@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20250430200108.243234-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2025-04-30 13:01:08 -07:00 committed by Arnaldo Carvalho de Melo
parent 1c5721ca89
commit bcfab08db7

View file

@ -3,7 +3,7 @@
* intel_tpebs.c: Intel TPEBS support * intel_tpebs.c: Intel TPEBS support
*/ */
#include <api/fs/fs.h>
#include <sys/param.h> #include <sys/param.h>
#include <subcmd/run-command.h> #include <subcmd/run-command.h>
#include <thread.h> #include <thread.h>
@ -121,6 +121,59 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel)
return ret; return ret;
} }
static bool is_child_pid(pid_t parent, pid_t child)
{
if (parent < 0 || child < 0)
return false;
while (true) {
char path[PATH_MAX];
char line[256];
FILE *fp;
new_child:
if (parent == child)
return true;
if (child <= 0)
return false;
scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child);
fp = fopen(path, "r");
if (!fp) {
/* Presumably the process went away. Assume not a child. */
return false;
}
while (fgets(line, sizeof(line), fp) != NULL) {
if (strncmp(line, "PPid:", 5) == 0) {
fclose(fp);
if (sscanf(line + 5, "%d", &child) != 1) {
/* Unexpected error parsing. */
return false;
}
goto new_child;
}
}
/* Unexpected EOF. */
fclose(fp);
return false;
}
}
static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t)
{
pid_t workload_pid = t->evsel->evlist->workload.pid;
pid_t sample_pid = sample->pid;
if (workload_pid < 0 || workload_pid == sample_pid)
return false;
if (!t->evsel->core.attr.inherit)
return true;
return !is_child_pid(workload_pid, sample_pid);
}
static int process_sample_event(const struct perf_tool *tool __maybe_unused, static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused, union perf_event *event __maybe_unused,
struct perf_sample *sample, struct perf_sample *sample,
@ -140,6 +193,10 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
mutex_unlock(tpebs_mtx_get()); mutex_unlock(tpebs_mtx_get());
return -EINVAL; return -EINVAL;
} }
if (should_ignore_sample(sample, t)) {
mutex_unlock(tpebs_mtx_get());
return 0;
}
/* /*
* Need to handle per core results? We are assuming average retire * Need to handle per core results? We are assuming average retire
* latency value will be used. Save the number of samples and the sum of * latency value will be used. Save the number of samples and the sum of