mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
perf intel-tpebs: Filter non-workload samples
If perf is running with a benchmark then we want the retirement latency samples associated with the benchmark rather than from the system as a whole. Use the workload's PID to filter out samples that aren't from the workload or its children. Signed-off-by: Ian Rogers <irogers@google.com> Tested-by: Weilin Wang <weilin.wang@intel.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@linaro.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20250430200108.243234-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
1c5721ca89
commit
bcfab08db7
1 changed files with 58 additions and 1 deletions
|
@ -3,7 +3,7 @@
|
||||||
* intel_tpebs.c: Intel TPEBS support
|
* intel_tpebs.c: Intel TPEBS support
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <api/fs/fs.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <subcmd/run-command.h>
|
#include <subcmd/run-command.h>
|
||||||
#include <thread.h>
|
#include <thread.h>
|
||||||
|
@ -121,6 +121,59 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_child_pid(pid_t parent, pid_t child)
|
||||||
|
{
|
||||||
|
if (parent < 0 || child < 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
char path[PATH_MAX];
|
||||||
|
char line[256];
|
||||||
|
FILE *fp;
|
||||||
|
|
||||||
|
new_child:
|
||||||
|
if (parent == child)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (child <= 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child);
|
||||||
|
fp = fopen(path, "r");
|
||||||
|
if (!fp) {
|
||||||
|
/* Presumably the process went away. Assume not a child. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
while (fgets(line, sizeof(line), fp) != NULL) {
|
||||||
|
if (strncmp(line, "PPid:", 5) == 0) {
|
||||||
|
fclose(fp);
|
||||||
|
if (sscanf(line + 5, "%d", &child) != 1) {
|
||||||
|
/* Unexpected error parsing. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
goto new_child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Unexpected EOF. */
|
||||||
|
fclose(fp);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t)
|
||||||
|
{
|
||||||
|
pid_t workload_pid = t->evsel->evlist->workload.pid;
|
||||||
|
pid_t sample_pid = sample->pid;
|
||||||
|
|
||||||
|
if (workload_pid < 0 || workload_pid == sample_pid)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!t->evsel->core.attr.inherit)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return !is_child_pid(workload_pid, sample_pid);
|
||||||
|
}
|
||||||
|
|
||||||
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
|
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
|
||||||
union perf_event *event __maybe_unused,
|
union perf_event *event __maybe_unused,
|
||||||
struct perf_sample *sample,
|
struct perf_sample *sample,
|
||||||
|
@ -140,6 +193,10 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
|
||||||
mutex_unlock(tpebs_mtx_get());
|
mutex_unlock(tpebs_mtx_get());
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
if (should_ignore_sample(sample, t)) {
|
||||||
|
mutex_unlock(tpebs_mtx_get());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Need to handle per core results? We are assuming average retire
|
* Need to handle per core results? We are assuming average retire
|
||||||
* latency value will be used. Save the number of samples and the sum of
|
* latency value will be used. Save the number of samples and the sum of
|
||||||
|
|
Loading…
Add table
Reference in a new issue