linux/tools/perf/tests/shell/lib/stat_output.sh

183 lines
3.6 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
{
[ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ]
}
# $1 name $2 extra_opt
check_no_args()
{
echo -n "Checking $1 output: no args "
perf stat $2 true
commachecker --no-args
echo "[Success]"
}
check_system_wide()
{
echo -n "Checking $1 output: system wide "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -a $2 true
commachecker --system-wide
echo "[Success]"
}
check_system_wide_no_aggr()
{
echo -n "Checking $1 output: system wide no aggregation "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat -A -a --no-merge $2 true
commachecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking $1 output: interval "
perf stat -I 1000 $2 true
commachecker --interval
echo "[Success]"
}
check_event()
{
echo -n "Checking $1 output: event "
perf stat -e cpu-clock $2 true
commachecker --event
echo "[Success]"
}
check_per_core()
{
echo -n "Checking $1 output: per core "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-core -a $2 true
commachecker --per-core
echo "[Success]"
}
check_per_thread()
{
echo -n "Checking $1 output: per thread "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-thread -p $$ $2 true
commachecker --per-thread
echo "[Success]"
}
check_per_cache_instance()
{
echo -n "Checking $1 output: per cache instance "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-cache -a $2 true
commachecker --per-cache
echo "[Success]"
}
perf stat: Support per-cluster aggregation Some platforms have 'cluster' topology and CPUs in the cluster will share resources like L3 Cache Tag (for HiSilicon Kunpeng SoC) or L2 cache (for Intel Jacobsville). Currently parsing and building cluster topology have been supported since [1]. perf stat has already supported aggregation for other topologies like die or socket, etc. It'll be useful to aggregate per-cluster to find problems like L3T bandwidth contention. This patch add support for "--per-cluster" option for per-cluster aggregation. Also update the docs and related test. The output will be like: [root@localhost tmp]# perf stat -a -e LLC-load --per-cluster -- sleep 5 Performance counter stats for 'system wide': S56-D0-CLS158 4 1,321,521,570 LLC-load S56-D0-CLS594 4 794,211,453 LLC-load S56-D0-CLS1030 4 41,623 LLC-load S56-D0-CLS1466 4 41,646 LLC-load S56-D0-CLS1902 4 16,863 LLC-load S56-D0-CLS2338 4 15,721 LLC-load S56-D0-CLS2774 4 22,671 LLC-load [...] On a legacy system without cluster or cluster support, the output will be look like: [root@localhost perf]# perf stat -a -e cycles --per-cluster -- sleep 1 Performance counter stats for 'system wide': S56-D0-CLS0 64 18,011,485 cycles S7182-D0-CLS0 64 16,548,835 cycles Note that this patch doesn't mix the cluster information in the outputs of --per-core to avoid breaking any tools/scripts using it. Note that perf recently supports "--per-cache" aggregation, but it's not the same with the cluster although cluster CPUs may share some cache resources. For example on my machine all clusters within a die share the same L3 cache: $ cat /sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_list 0-31 $ cat /sys/devices/system/cpu/cpu0/topology/cluster_cpus_list 0-3 [1] commit c5e22feffdd7 ("topology: Represent clusters of CPUs within a die") Tested-by: Jie Zhan <zhanjie9@hisilicon.com> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Cc: james.clark@arm.com Cc: 21cnbao@gmail.com Cc: prime.zeng@hisilicon.com Cc: Jonathan.Cameron@huawei.com Cc: fanghao11@huawei.com Cc: linuxarm@huawei.com Cc: tim.c.chen@intel.com Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240208024026.2691-1-yangyicong@huawei.com
2024-02-08 10:40:26 +08:00
check_per_cluster()
{
echo -n "Checking $1 output: per cluster "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-cluster -a $2 true
echo "[Success]"
}
check_per_die()
{
echo -n "Checking $1 output: per die "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-die -a $2 true
commachecker --per-die
echo "[Success]"
}
check_per_node()
{
echo -n "Checking $1 output: per node "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-node -a $2 true
commachecker --per-node
echo "[Success]"
}
check_per_socket()
{
echo -n "Checking $1 output: per socket "
if ParanoidAndNotRoot 0
then
echo "[Skip] paranoid and not root"
return
fi
perf stat --per-socket -a $2 true
commachecker --per-socket
echo "[Success]"
}
# The perf stat options for per-socket, per-core, per-die
# and -A ( no_aggr mode ) uses the info fetched from this
# directory: "/sys/devices/system/cpu/cpu*/topology". For
# example, socket value is fetched from "physical_package_id"
# file in topology directory.
# Reference: cpu__get_topology_int in util/cpumap.c
# If the platform doesn't expose topology information, values
# will be set to -1. For example, incase of pSeries platform
# of powerpc, value for "physical_package_id" is restricted
# and set to -1. Check here validates the socket-id read from
# topology file before proceeding further
FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
FILE_NAME="physical_package_id"
function check_for_topology()
{
if ! ParanoidAndNotRoot 0
then
socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
[ -z $socket_file ] && {
echo 0
return
}
socket_id=`cat $socket_file`
[ $socket_id == -1 ] && {
echo 1
return
}
fi
echo 0
}