2024-03-29 14:58:10 -07:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <errno.h>
|
2024-03-29 14:58:11 -07:00
|
|
|
#include <fcntl.h>
|
2024-03-29 14:58:10 -07:00
|
|
|
#include <inttypes.h>
|
|
|
|
#include <libgen.h>
|
|
|
|
#include <regex.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <subcmd/run-command.h>
|
|
|
|
|
|
|
|
#include "annotate.h"
|
2024-07-18 14:13:45 +05:30
|
|
|
#include "annotate-data.h"
|
2024-03-29 14:58:10 -07:00
|
|
|
#include "build-id.h"
|
|
|
|
#include "debug.h"
|
|
|
|
#include "disasm.h"
|
2024-07-31 11:58:56 -03:00
|
|
|
#include "disasm_bpf.h"
|
2024-03-29 14:58:10 -07:00
|
|
|
#include "dso.h"
|
2024-11-08 15:45:48 -08:00
|
|
|
#include "dwarf-regs.h"
|
2024-03-29 14:58:10 -07:00
|
|
|
#include "env.h"
|
|
|
|
#include "evsel.h"
|
|
|
|
#include "map.h"
|
|
|
|
#include "maps.h"
|
2024-03-29 14:58:11 -07:00
|
|
|
#include "namespaces.h"
|
2024-03-29 14:58:10 -07:00
|
|
|
#include "srcline.h"
|
|
|
|
#include "symbol.h"
|
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
static regex_t file_lineno;
|
|
|
|
|
|
|
|
/* These can be referred from the arch-dependent code */
|
|
|
|
static struct ins_ops call_ops;
|
|
|
|
static struct ins_ops dec_ops;
|
|
|
|
static struct ins_ops jump_ops;
|
|
|
|
static struct ins_ops mov_ops;
|
|
|
|
static struct ins_ops nop_ops;
|
|
|
|
static struct ins_ops lock_ops;
|
|
|
|
static struct ins_ops ret_ops;
|
2024-07-18 14:13:50 +05:30
|
|
|
static struct ins_ops load_store_ops;
|
perf annotate: Add some of the arithmetic instructions to support instruction tracking in powerpc
Data-type profiling has the concept of instruction tracking.
Example sequence in powerpc:
ld r10,264(r3)
mr r31,r3
<<after some sequence>
ld r9,312(r31)
or differently
lwz r10,264(r3)
add r31, r3, RB
lwz r9, 0(r31)
If a sample is hit at "lwz r9, 0(r31)", data type of r31 depends
on previous instruction sequence here. So to track the previous
instructions, patch adds changes to identify some of the arithmetic
instructions which are having opcode as 31.
Since memory instructions also has cases with opcode 31, use the bits
22:30 to filter the arithmetic instructions here.
Also there are instructions with just two operands like "addme", "addze".
This patch adds new instructions ops "arithmetic_ops" to handle this
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-10-atrajeev@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:52 +05:30
|
|
|
static struct ins_ops arithmetic_ops;
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name);
|
|
|
|
static int call__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name);
|
|
|
|
|
|
|
|
static void ins__sort(struct arch *arch);
|
|
|
|
static int disasm_line__parse(char *line, const char **namep, char **rawp);
|
perf annotate: Add disasm_line__parse() to parse raw instruction for powerpc
Currently, the perf tool infrastructure uses the disasm_line__parse
function to parse disassembled line.
Example snippet from objdump:
objdump --start-address=<address> --stop-address=<address> -d --no-show-raw-insn -C <vmlinux>
c0000000010224b4: lwz r10,0(r9)
This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset.
In powerpc, the approach for data type profiling uses raw instruction
instead of result from objdump to identify the instruction category and
extract the source/target registers.
Example: 38 01 81 e8 ld r4,312(r1)
Here "38 01 81 e8" is the raw instruction representation. Add function
"disasm_line__parse_powerpc" to handle parsing of raw instruction.
Also update "struct disasm_line" to save the binary code/
With the change, function captures:
line -> "38 01 81 e8 ld r4,312(r1)"
raw instruction "38 01 81 e8"
Raw instruction is used later to extract the reg/offset fields. Macros
are added to extract opcode and register fields. "struct disasm_line"
is updated to carry union of "bytes" and "raw_insn" of 32 bit to carry raw
code (raw).
Function "disasm_line__parse_powerpc fills the raw instruction hex value
and can use macros to get opcode. There is no changes in existing code
paths, which parses the disassembled code. The size of raw instruction
depends on architecture.
In case of powerpc, the parsing the disasm line needs to handle cases
for reading binary code directly from DSO as well as parsing the objdump
result. Hence adding the logic into separate function instead of
updating "disasm_line__parse". The architecture using the instruction
name and present approach is not altered. Since this approach targets
powerpc, the macro implementation is added for powerpc as of now.
Since the disasm_line__parse is used in other cases (perf annotate) and
not only data tye profiling, the powerpc callback includes changes to
work with binary code as well as mnemonic representation.
Also in case if the DSO read fails and libcapstone is not supported, the
approach fallback to use objdump as option. Hence as option, patch has
changes to ensure objdump option also works well.
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-5-atrajeev@linux.vnet.ibm.com
[ Add check for strndup() result ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:47 +05:30
|
|
|
static int disasm_line__parse_powerpc(struct disasm_line *dl);
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
static char *expand_tabs(char *line, char **storage, size_t *storage_len);
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
static __attribute__((constructor)) void symbol__init_regexpr(void)
|
|
|
|
{
|
|
|
|
regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arch__grow_instructions(struct arch *arch)
|
|
|
|
{
|
|
|
|
struct ins *new_instructions;
|
|
|
|
size_t new_nr_allocated;
|
|
|
|
|
|
|
|
if (arch->nr_instructions_allocated == 0 && arch->instructions)
|
|
|
|
goto grow_from_non_allocated_table;
|
|
|
|
|
|
|
|
new_nr_allocated = arch->nr_instructions_allocated + 128;
|
|
|
|
new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
|
|
|
|
if (new_instructions == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
out_update_instructions:
|
|
|
|
arch->instructions = new_instructions;
|
|
|
|
arch->nr_instructions_allocated = new_nr_allocated;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
grow_from_non_allocated_table:
|
|
|
|
new_nr_allocated = arch->nr_instructions + 128;
|
|
|
|
new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
|
|
|
|
if (new_instructions == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
memcpy(new_instructions, arch->instructions, arch->nr_instructions);
|
|
|
|
goto out_update_instructions;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
|
|
|
|
{
|
|
|
|
struct ins *ins;
|
|
|
|
|
|
|
|
if (arch->nr_instructions == arch->nr_instructions_allocated &&
|
|
|
|
arch__grow_instructions(arch))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ins = &arch->instructions[arch->nr_instructions];
|
|
|
|
ins->name = strdup(name);
|
|
|
|
if (!ins->name)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ins->ops = ops;
|
|
|
|
arch->nr_instructions++;
|
|
|
|
|
|
|
|
ins__sort(arch);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "arch/arc/annotate/instructions.c"
|
|
|
|
#include "arch/arm/annotate/instructions.c"
|
|
|
|
#include "arch/arm64/annotate/instructions.c"
|
|
|
|
#include "arch/csky/annotate/instructions.c"
|
|
|
|
#include "arch/loongarch/annotate/instructions.c"
|
|
|
|
#include "arch/mips/annotate/instructions.c"
|
|
|
|
#include "arch/x86/annotate/instructions.c"
|
|
|
|
#include "arch/powerpc/annotate/instructions.c"
|
|
|
|
#include "arch/riscv64/annotate/instructions.c"
|
|
|
|
#include "arch/s390/annotate/instructions.c"
|
|
|
|
#include "arch/sparc/annotate/instructions.c"
|
|
|
|
|
|
|
|
static struct arch architectures[] = {
|
|
|
|
{
|
|
|
|
.name = "arc",
|
|
|
|
.init = arc__annotate_init,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "arm",
|
|
|
|
.init = arm__annotate_init,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "arm64",
|
|
|
|
.init = arm64__annotate_init,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "csky",
|
|
|
|
.init = csky__annotate_init,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "mips",
|
|
|
|
.init = mips__annotate_init,
|
|
|
|
.objdump = {
|
|
|
|
.comment_char = '#',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "x86",
|
|
|
|
.init = x86__annotate_init,
|
|
|
|
.instructions = x86__instructions,
|
|
|
|
.nr_instructions = ARRAY_SIZE(x86__instructions),
|
|
|
|
.insn_suffix = "bwlq",
|
|
|
|
.objdump = {
|
|
|
|
.comment_char = '#',
|
|
|
|
.register_char = '%',
|
|
|
|
.memory_ref_char = '(',
|
|
|
|
.imm_char = '$',
|
|
|
|
},
|
2024-10-16 17:13:53 -07:00
|
|
|
#ifdef HAVE_LIBDW_SUPPORT
|
2024-07-18 14:13:45 +05:30
|
|
|
.update_insn_state = update_insn_state_x86,
|
|
|
|
#endif
|
2024-03-29 14:58:10 -07:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "powerpc",
|
|
|
|
.init = powerpc__annotate_init,
|
2024-10-16 17:13:53 -07:00
|
|
|
#ifdef HAVE_LIBDW_SUPPORT
|
2024-07-18 14:13:54 +05:30
|
|
|
.update_insn_state = update_insn_state_powerpc,
|
|
|
|
#endif
|
2024-03-29 14:58:10 -07:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "riscv64",
|
|
|
|
.init = riscv64__annotate_init,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "s390",
|
|
|
|
.init = s390__annotate_init,
|
|
|
|
.objdump = {
|
|
|
|
.comment_char = '#',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "sparc",
|
|
|
|
.init = sparc__annotate_init,
|
|
|
|
.objdump = {
|
|
|
|
.comment_char = '#',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "loongarch",
|
|
|
|
.init = loongarch__annotate_init,
|
|
|
|
.objdump = {
|
|
|
|
.comment_char = '#',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static int arch__key_cmp(const void *name, const void *archp)
|
|
|
|
{
|
|
|
|
const struct arch *arch = archp;
|
|
|
|
|
|
|
|
return strcmp(name, arch->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int arch__cmp(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const struct arch *aa = a;
|
|
|
|
const struct arch *ab = b;
|
|
|
|
|
|
|
|
return strcmp(aa->name, ab->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void arch__sort(void)
|
|
|
|
{
|
|
|
|
const int nmemb = ARRAY_SIZE(architectures);
|
|
|
|
|
|
|
|
qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct arch *arch__find(const char *name)
|
|
|
|
{
|
|
|
|
const int nmemb = ARRAY_SIZE(architectures);
|
|
|
|
static bool sorted;
|
|
|
|
|
|
|
|
if (!sorted) {
|
|
|
|
arch__sort();
|
|
|
|
sorted = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool arch__is(struct arch *arch, const char *name)
|
|
|
|
{
|
|
|
|
return !strcmp(arch->name, name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ins_ops__delete(struct ins_operands *ops)
|
|
|
|
{
|
|
|
|
if (ops == NULL)
|
|
|
|
return;
|
|
|
|
zfree(&ops->source.raw);
|
|
|
|
zfree(&ops->source.name);
|
|
|
|
zfree(&ops->target.raw);
|
|
|
|
zfree(&ops->target.name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
int ins__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
if (ins->ops->scnprintf)
|
|
|
|
return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
|
|
|
|
|
|
|
|
return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
|
|
|
|
{
|
|
|
|
if (!arch || !arch->ins_is_fused)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return arch->ins_is_fused(arch, ins1, ins2);
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
|
|
|
|
struct disasm_line *dl __maybe_unused)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
char *endptr, *tok, *name;
|
|
|
|
struct map *map = ms->map;
|
|
|
|
struct addr_map_symbol target = {
|
|
|
|
.ms = { .map = map, },
|
|
|
|
};
|
|
|
|
|
|
|
|
ops->target.addr = strtoull(ops->raw, &endptr, 16);
|
|
|
|
|
|
|
|
name = strchr(endptr, '<');
|
|
|
|
if (name == NULL)
|
|
|
|
goto indirect_call;
|
|
|
|
|
|
|
|
name++;
|
|
|
|
|
|
|
|
if (arch->objdump.skip_functions_char &&
|
|
|
|
strchr(name, arch->objdump.skip_functions_char))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
tok = strchr(name, '>');
|
|
|
|
if (tok == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
*tok = '\0';
|
|
|
|
ops->target.name = strdup(name);
|
|
|
|
*tok = '>';
|
|
|
|
|
|
|
|
if (ops->target.name == NULL)
|
|
|
|
return -1;
|
|
|
|
find_target:
|
|
|
|
target.addr = map__objdump_2mem(map, ops->target.addr);
|
|
|
|
|
|
|
|
if (maps__find_ams(ms->maps, &target) == 0 &&
|
|
|
|
map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
|
|
|
|
ops->target.sym = target.ms.sym;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
indirect_call:
|
|
|
|
tok = strchr(endptr, '*');
|
|
|
|
if (tok != NULL) {
|
|
|
|
endptr++;
|
|
|
|
|
|
|
|
/* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
|
|
|
|
* Do not parse such instruction. */
|
|
|
|
if (strstr(endptr, "(%r") == NULL)
|
|
|
|
ops->target.addr = strtoull(endptr, NULL, 16);
|
|
|
|
}
|
|
|
|
goto find_target;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int call__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
if (ops->target.sym)
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
|
|
|
|
|
|
|
|
if (ops->target.addr == 0)
|
|
|
|
return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
|
|
|
|
|
|
|
|
if (ops->target.name)
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
|
|
|
|
|
|
|
|
return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops call_ops = {
|
|
|
|
.parse = call__parse,
|
|
|
|
.scnprintf = call__scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
bool ins__is_call(const struct ins *ins)
|
|
|
|
{
|
|
|
|
return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prevents from matching commas in the comment section, e.g.:
|
|
|
|
* ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
|
|
|
|
*
|
|
|
|
* and skip comma as part of function arguments, e.g.:
|
|
|
|
* 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
|
|
|
|
*/
|
|
|
|
static inline const char *validate_comma(const char *c, struct ins_operands *ops)
|
|
|
|
{
|
|
|
|
if (ops->jump.raw_comment && c > ops->jump.raw_comment)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
|
|
|
|
struct disasm_line *dl __maybe_unused)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
struct map *map = ms->map;
|
|
|
|
struct symbol *sym = ms->sym;
|
|
|
|
struct addr_map_symbol target = {
|
|
|
|
.ms = { .map = map, },
|
|
|
|
};
|
|
|
|
const char *c = strchr(ops->raw, ',');
|
|
|
|
u64 start, end;
|
|
|
|
|
|
|
|
ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
|
|
|
|
ops->jump.raw_func_start = strchr(ops->raw, '<');
|
|
|
|
|
|
|
|
c = validate_comma(c, ops);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Examples of lines to parse for the _cpp_lex_token@@Base
|
|
|
|
* function:
|
|
|
|
*
|
|
|
|
* 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
|
|
|
|
* 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
|
|
|
|
*
|
|
|
|
* The first is a jump to an offset inside the same function,
|
|
|
|
* the second is to another function, i.e. that 0xa72 is an
|
|
|
|
* offset in the cpp_named_operator2name@@base function.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* skip over possible up to 2 operands to get to address, e.g.:
|
|
|
|
* tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
|
|
|
|
*/
|
|
|
|
if (c++ != NULL) {
|
|
|
|
ops->target.addr = strtoull(c, NULL, 16);
|
|
|
|
if (!ops->target.addr) {
|
|
|
|
c = strchr(c, ',');
|
|
|
|
c = validate_comma(c, ops);
|
|
|
|
if (c++ != NULL)
|
|
|
|
ops->target.addr = strtoull(c, NULL, 16);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ops->target.addr = strtoull(ops->raw, NULL, 16);
|
|
|
|
}
|
|
|
|
|
|
|
|
target.addr = map__objdump_2mem(map, ops->target.addr);
|
|
|
|
start = map__unmap_ip(map, sym->start);
|
|
|
|
end = map__unmap_ip(map, sym->end);
|
|
|
|
|
|
|
|
ops->target.outside = target.addr < start || target.addr > end;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
|
|
|
|
|
|
|
|
cpp_named_operator2name@@Base+0xa72
|
|
|
|
|
|
|
|
* Point to a place that is after the cpp_named_operator2name
|
|
|
|
* boundaries, i.e. in the ELF symbol table for cc1
|
|
|
|
* cpp_named_operator2name is marked as being 32-bytes long, but it in
|
|
|
|
* fact is much larger than that, so we seem to need a symbols__find()
|
|
|
|
* routine that looks for >= current->start and < next_symbol->start,
|
|
|
|
* possibly just for C++ objects?
|
|
|
|
*
|
|
|
|
* For now lets just make some progress by marking jumps to outside the
|
|
|
|
* current function as call like.
|
|
|
|
*
|
|
|
|
* Actual navigation will come next, with further understanding of how
|
|
|
|
* the symbol searching and disassembly should be done.
|
|
|
|
*/
|
|
|
|
if (maps__find_ams(ms->maps, &target) == 0 &&
|
|
|
|
map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
|
|
|
|
ops->target.sym = target.ms.sym;
|
|
|
|
|
|
|
|
if (!ops->target.outside) {
|
|
|
|
ops->target.offset = target.addr - start;
|
|
|
|
ops->target.offset_avail = true;
|
|
|
|
} else {
|
|
|
|
ops->target.offset_avail = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
const char *c;
|
|
|
|
|
|
|
|
if (!ops->target.addr || ops->target.offset < 0)
|
|
|
|
return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
|
|
|
|
|
|
|
|
if (ops->target.outside && ops->target.sym != NULL)
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
|
|
|
|
|
|
|
|
c = strchr(ops->raw, ',');
|
|
|
|
c = validate_comma(c, ops);
|
|
|
|
|
|
|
|
if (c != NULL) {
|
|
|
|
const char *c2 = strchr(c + 1, ',');
|
|
|
|
|
|
|
|
c2 = validate_comma(c2, ops);
|
|
|
|
/* check for 3-op insn */
|
|
|
|
if (c2 != NULL)
|
|
|
|
c = c2;
|
|
|
|
c++;
|
|
|
|
|
|
|
|
/* mirror arch objdump's space-after-comma style */
|
|
|
|
if (*c == ' ')
|
|
|
|
c++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
|
|
|
|
ins->name, c ? c - ops->raw : 0, ops->raw,
|
|
|
|
ops->target.offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void jump__delete(struct ins_operands *ops __maybe_unused)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The ops->jump.raw_comment and ops->jump.raw_func_start belong to the
|
|
|
|
* raw string, don't free them.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops jump_ops = {
|
|
|
|
.free = jump__delete,
|
|
|
|
.parse = jump__parse,
|
|
|
|
.scnprintf = jump__scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
bool ins__is_jump(const struct ins *ins)
|
|
|
|
{
|
|
|
|
return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
|
|
|
|
{
|
|
|
|
char *endptr, *name, *t;
|
|
|
|
|
|
|
|
if (strstr(raw, "(%rip)") == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*addrp = strtoull(comment, &endptr, 16);
|
|
|
|
if (endptr == comment)
|
|
|
|
return 0;
|
|
|
|
name = strchr(endptr, '<');
|
|
|
|
if (name == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
name++;
|
|
|
|
|
|
|
|
t = strchr(name, '>');
|
|
|
|
if (t == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
*t = '\0';
|
|
|
|
*namep = strdup(name);
|
|
|
|
*t = '>';
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms,
|
|
|
|
struct disasm_line *dl __maybe_unused)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
|
|
|
|
if (ops->locked.ops == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
|
|
|
|
goto out_free_ops;
|
|
|
|
|
2024-07-18 14:13:50 +05:30
|
|
|
ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0);
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
if (ops->locked.ins.ops == NULL)
|
|
|
|
goto out_free_ops;
|
|
|
|
|
|
|
|
if (ops->locked.ins.ops->parse &&
|
2024-07-18 14:13:49 +05:30
|
|
|
ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0)
|
2024-03-29 14:58:10 -07:00
|
|
|
goto out_free_ops;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free_ops:
|
|
|
|
zfree(&ops->locked.ops);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
int printed;
|
|
|
|
|
|
|
|
if (ops->locked.ins.ops == NULL)
|
|
|
|
return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
|
|
|
|
|
|
|
|
printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
|
|
|
|
return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
|
|
|
|
size - printed, ops->locked.ops, max_ins_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void lock__delete(struct ins_operands *ops)
|
|
|
|
{
|
|
|
|
struct ins *ins = &ops->locked.ins;
|
|
|
|
|
|
|
|
if (ins->ops && ins->ops->free)
|
|
|
|
ins->ops->free(ops->locked.ops);
|
|
|
|
else
|
|
|
|
ins_ops__delete(ops->locked.ops);
|
|
|
|
|
|
|
|
zfree(&ops->locked.ops);
|
2024-08-12 21:06:12 -07:00
|
|
|
zfree(&ops->locked.ins.name);
|
2024-03-29 14:58:10 -07:00
|
|
|
zfree(&ops->target.raw);
|
|
|
|
zfree(&ops->target.name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops lock_ops = {
|
|
|
|
.free = lock__delete,
|
|
|
|
.parse = lock__parse,
|
|
|
|
.scnprintf = lock__scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if the operand has more than one registers like x86 SIB addressing:
|
|
|
|
* 0x1234(%rax, %rbx, 8)
|
|
|
|
*
|
|
|
|
* But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check
|
|
|
|
* the input string after 'memory_ref_char' if exists.
|
|
|
|
*/
|
|
|
|
static bool check_multi_regs(struct arch *arch, const char *op)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
if (arch->objdump.register_char == 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (arch->objdump.memory_ref_char) {
|
|
|
|
op = strchr(op, arch->objdump.memory_ref_char);
|
|
|
|
if (op == NULL)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
while ((op = strchr(op, arch->objdump.register_char)) != NULL) {
|
|
|
|
count++;
|
|
|
|
op++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return count > 1;
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
|
|
|
|
struct disasm_line *dl __maybe_unused)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
char *s = strchr(ops->raw, ','), *target, *comment, prev;
|
|
|
|
|
|
|
|
if (s == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
*s = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
|
|
|
|
* then it needs to have the closing parenthesis.
|
|
|
|
*/
|
|
|
|
if (strchr(ops->raw, '(')) {
|
|
|
|
*s = ',';
|
|
|
|
s = strchr(ops->raw, ')');
|
|
|
|
if (s == NULL || s[1] != ',')
|
|
|
|
return -1;
|
|
|
|
*++s = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
ops->source.raw = strdup(ops->raw);
|
|
|
|
*s = ',';
|
|
|
|
|
|
|
|
if (ops->source.raw == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ops->source.multi_regs = check_multi_regs(arch, ops->source.raw);
|
|
|
|
|
|
|
|
target = skip_spaces(++s);
|
|
|
|
comment = strchr(s, arch->objdump.comment_char);
|
|
|
|
|
|
|
|
if (comment != NULL)
|
|
|
|
s = comment - 1;
|
|
|
|
else
|
|
|
|
s = strchr(s, '\0') - 1;
|
|
|
|
|
|
|
|
while (s > target && isspace(s[0]))
|
|
|
|
--s;
|
|
|
|
s++;
|
|
|
|
prev = *s;
|
|
|
|
*s = '\0';
|
|
|
|
|
|
|
|
ops->target.raw = strdup(target);
|
|
|
|
*s = prev;
|
|
|
|
|
|
|
|
if (ops->target.raw == NULL)
|
|
|
|
goto out_free_source;
|
|
|
|
|
|
|
|
ops->target.multi_regs = check_multi_regs(arch, ops->target.raw);
|
|
|
|
|
|
|
|
if (comment == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
comment = skip_spaces(comment);
|
|
|
|
comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
|
|
|
|
comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_free_source:
|
|
|
|
zfree(&ops->source.raw);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
|
|
|
|
ops->source.name ?: ops->source.raw,
|
|
|
|
ops->target.name ?: ops->target.raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops mov_ops = {
|
|
|
|
.parse = mov__parse,
|
|
|
|
.scnprintf = mov__scnprintf,
|
|
|
|
};
|
|
|
|
|
perf annotate: Add some of the arithmetic instructions to support instruction tracking in powerpc
Data-type profiling has the concept of instruction tracking.
Example sequence in powerpc:
ld r10,264(r3)
mr r31,r3
<<after some sequence>
ld r9,312(r31)
or differently
lwz r10,264(r3)
add r31, r3, RB
lwz r9, 0(r31)
If a sample is hit at "lwz r9, 0(r31)", data type of r31 depends
on previous instruction sequence here. So to track the previous
instructions, patch adds changes to identify some of the arithmetic
instructions which are having opcode as 31.
Since memory instructions also has cases with opcode 31, use the bits
22:30 to filter the arithmetic instructions here.
Also there are instructions with just two operands like "addme", "addze".
This patch adds new instructions ops "arithmetic_ops" to handle this
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-10-atrajeev@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:52 +05:30
|
|
|
#define PPC_22_30(R) (((R) >> 1) & 0x1ff)
|
|
|
|
#define MINUS_EXT_XO_FORM 234
|
|
|
|
#define SUB_EXT_XO_FORM 232
|
|
|
|
#define ADD_ZERO_EXT_XO_FORM 202
|
|
|
|
#define SUB_ZERO_EXT_XO_FORM 200
|
|
|
|
|
|
|
|
static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
|
|
|
|
ops->raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sets the fields: multi_regs and "mem_ref".
|
|
|
|
* "mem_ref" is set for ops->source which is later used to
|
|
|
|
* fill the objdump->memory_ref-char field. This ops is currently
|
|
|
|
* used by powerpc and since binary instruction code is used to
|
|
|
|
* extract opcode, regs and offset, no other parsing is needed here.
|
|
|
|
*
|
|
|
|
* Dont set multi regs for 4 cases since it has only one operand
|
|
|
|
* for source:
|
|
|
|
* - Add to Minus One Extended XO-form ( Ex: addme, addmeo )
|
|
|
|
* - Subtract From Minus One Extended XO-form ( Ex: subfme )
|
|
|
|
* - Add to Zero Extended XO-form ( Ex: addze, addzeo )
|
|
|
|
* - Subtract From Zero Extended XO-form ( Ex: subfze )
|
|
|
|
*/
|
|
|
|
static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
|
|
|
|
struct map_symbol *ms __maybe_unused, struct disasm_line *dl)
|
|
|
|
{
|
|
|
|
int opcode = PPC_OP(dl->raw.raw_insn);
|
|
|
|
|
|
|
|
ops->source.mem_ref = false;
|
|
|
|
if (opcode == 31) {
|
|
|
|
if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \
|
|
|
|
&& (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM))
|
|
|
|
ops->source.multi_regs = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
ops->target.mem_ref = false;
|
|
|
|
ops->target.multi_regs = false;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops arithmetic_ops = {
|
|
|
|
.parse = arithmetic__parse,
|
|
|
|
.scnprintf = arithmetic__scnprintf,
|
|
|
|
};
|
|
|
|
|
2024-07-18 14:13:50 +05:30
|
|
|
static int load_store__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
|
|
|
|
ops->raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sets the fields: multi_regs and "mem_ref".
|
|
|
|
* "mem_ref" is set for ops->source which is later used to
|
|
|
|
* fill the objdump->memory_ref-char field. This ops is currently
|
|
|
|
* used by powerpc and since binary instruction code is used to
|
|
|
|
* extract opcode, regs and offset, no other parsing is needed here
|
|
|
|
*/
|
|
|
|
static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops,
|
|
|
|
struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused)
|
|
|
|
{
|
|
|
|
ops->source.mem_ref = true;
|
|
|
|
ops->source.multi_regs = false;
|
2024-07-18 14:13:51 +05:30
|
|
|
/* opcode 31 is of X form */
|
|
|
|
if (PPC_OP(dl->raw.raw_insn) == 31)
|
|
|
|
ops->source.multi_regs = true;
|
2024-07-18 14:13:50 +05:30
|
|
|
|
|
|
|
ops->target.mem_ref = false;
|
|
|
|
ops->target.multi_regs = false;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops load_store_ops = {
|
|
|
|
.parse = load_store__parse,
|
|
|
|
.scnprintf = load_store__scnprintf,
|
|
|
|
};
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused,
|
|
|
|
struct disasm_line *dl __maybe_unused)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
char *target, *comment, *s, prev;
|
|
|
|
|
|
|
|
target = s = ops->raw;
|
|
|
|
|
|
|
|
while (s[0] != '\0' && !isspace(s[0]))
|
|
|
|
++s;
|
|
|
|
prev = *s;
|
|
|
|
*s = '\0';
|
|
|
|
|
|
|
|
ops->target.raw = strdup(target);
|
|
|
|
*s = prev;
|
|
|
|
|
|
|
|
if (ops->target.raw == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
comment = strchr(s, arch->objdump.comment_char);
|
|
|
|
if (comment == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
comment = skip_spaces(comment);
|
|
|
|
comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
|
|
|
|
ops->target.name ?: ops->target.raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops dec_ops = {
|
|
|
|
.parse = dec__parse,
|
|
|
|
.scnprintf = dec__scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
|
|
|
|
struct ins_operands *ops __maybe_unused, int max_ins_name)
|
|
|
|
{
|
|
|
|
return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct ins_ops nop_ops = {
|
|
|
|
.scnprintf = nop__scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct ins_ops ret_ops = {
|
|
|
|
.scnprintf = ins__raw_scnprintf,
|
|
|
|
};
|
|
|
|
|
|
|
|
bool ins__is_nop(const struct ins *ins)
|
|
|
|
{
|
|
|
|
return ins->ops == &nop_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ins__is_ret(const struct ins *ins)
|
|
|
|
{
|
|
|
|
return ins->ops == &ret_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ins__is_lock(const struct ins *ins)
|
|
|
|
{
|
|
|
|
return ins->ops == &lock_ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ins__key_cmp(const void *name, const void *insp)
|
|
|
|
{
|
|
|
|
const struct ins *ins = insp;
|
|
|
|
|
|
|
|
return strcmp(name, ins->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int ins__cmp(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const struct ins *ia = a;
|
|
|
|
const struct ins *ib = b;
|
|
|
|
|
|
|
|
return strcmp(ia->name, ib->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ins__sort(struct arch *arch)
|
|
|
|
{
|
|
|
|
const int nmemb = arch->nr_instructions;
|
|
|
|
|
|
|
|
qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:58 +05:30
|
|
|
static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
struct ins *ins;
|
|
|
|
const int nmemb = arch->nr_instructions;
|
|
|
|
|
2024-07-18 14:13:50 +05:30
|
|
|
if (arch__is(arch, "powerpc")) {
|
|
|
|
/*
|
|
|
|
* For powerpc, identify the instruction ops
|
|
|
|
* from the opcode using raw_insn.
|
|
|
|
*/
|
|
|
|
struct ins_ops *ops;
|
|
|
|
|
2024-07-18 14:13:58 +05:30
|
|
|
ops = check_ppc_insn(dl);
|
2024-07-18 14:13:50 +05:30
|
|
|
if (ops)
|
|
|
|
return ops;
|
|
|
|
}
|
|
|
|
|
2024-03-29 14:58:10 -07:00
|
|
|
if (!arch->sorted_instructions) {
|
|
|
|
ins__sort(arch);
|
|
|
|
arch->sorted_instructions = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
|
|
|
|
if (ins)
|
|
|
|
return ins->ops;
|
|
|
|
|
|
|
|
if (arch->insn_suffix) {
|
|
|
|
char tmp[32];
|
|
|
|
char suffix;
|
|
|
|
size_t len = strlen(name);
|
|
|
|
|
|
|
|
if (len == 0 || len >= sizeof(tmp))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
suffix = name[len - 1];
|
|
|
|
if (strchr(arch->insn_suffix, suffix) == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
strcpy(tmp, name);
|
|
|
|
tmp[len - 1] = '\0'; /* remove the suffix and check again */
|
|
|
|
|
|
|
|
ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
|
|
|
|
}
|
|
|
|
return ins ? ins->ops : NULL;
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:58 +05:30
|
|
|
struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
2024-07-18 14:13:58 +05:30
|
|
|
struct ins_ops *ops = __ins__find(arch, name, dl);
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
if (!ops && arch->associate_instruction_ops)
|
|
|
|
ops = arch->associate_instruction_ops(arch, name);
|
|
|
|
|
|
|
|
return ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
|
|
|
|
{
|
2024-07-18 14:13:58 +05:30
|
|
|
dl->ins.ops = ins__find(arch, dl->ins.name, dl);
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
if (!dl->ins.ops)
|
|
|
|
return;
|
|
|
|
|
2024-07-18 14:13:49 +05:30
|
|
|
if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0)
|
2024-03-29 14:58:10 -07:00
|
|
|
dl->ins.ops = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int disasm_line__parse(char *line, const char **namep, char **rawp)
|
|
|
|
{
|
|
|
|
char tmp, *name = skip_spaces(line);
|
|
|
|
|
|
|
|
if (name[0] == '\0')
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
*rawp = name + 1;
|
|
|
|
|
|
|
|
while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
|
|
|
|
++*rawp;
|
|
|
|
|
|
|
|
tmp = (*rawp)[0];
|
|
|
|
(*rawp)[0] = '\0';
|
|
|
|
*namep = strdup(name);
|
|
|
|
|
|
|
|
if (*namep == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
(*rawp)[0] = tmp;
|
|
|
|
*rawp = strim(*rawp);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
perf annotate: Add disasm_line__parse() to parse raw instruction for powerpc
Currently, the perf tool infrastructure uses the disasm_line__parse
function to parse disassembled line.
Example snippet from objdump:
objdump --start-address=<address> --stop-address=<address> -d --no-show-raw-insn -C <vmlinux>
c0000000010224b4: lwz r10,0(r9)
This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset.
In powerpc, the approach for data type profiling uses raw instruction
instead of result from objdump to identify the instruction category and
extract the source/target registers.
Example: 38 01 81 e8 ld r4,312(r1)
Here "38 01 81 e8" is the raw instruction representation. Add function
"disasm_line__parse_powerpc" to handle parsing of raw instruction.
Also update "struct disasm_line" to save the binary code/
With the change, function captures:
line -> "38 01 81 e8 ld r4,312(r1)"
raw instruction "38 01 81 e8"
Raw instruction is used later to extract the reg/offset fields. Macros
are added to extract opcode and register fields. "struct disasm_line"
is updated to carry union of "bytes" and "raw_insn" of 32 bit to carry raw
code (raw).
Function "disasm_line__parse_powerpc fills the raw instruction hex value
and can use macros to get opcode. There is no changes in existing code
paths, which parses the disassembled code. The size of raw instruction
depends on architecture.
In case of powerpc, the parsing the disasm line needs to handle cases
for reading binary code directly from DSO as well as parsing the objdump
result. Hence adding the logic into separate function instead of
updating "disasm_line__parse". The architecture using the instruction
name and present approach is not altered. Since this approach targets
powerpc, the macro implementation is added for powerpc as of now.
Since the disasm_line__parse is used in other cases (perf annotate) and
not only data tye profiling, the powerpc callback includes changes to
work with binary code as well as mnemonic representation.
Also in case if the DSO read fails and libcapstone is not supported, the
approach fallback to use objdump as option. Hence as option, patch has
changes to ensure objdump option also works well.
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-5-atrajeev@linux.vnet.ibm.com
[ Add check for strndup() result ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:47 +05:30
|
|
|
/*
|
|
|
|
* Parses the result captured from symbol__disassemble_*
|
|
|
|
* Example, line read from DSO file in powerpc:
|
|
|
|
* line: 38 01 81 e8
|
|
|
|
* opcode: fetched from arch specific get_opcode_insn
|
|
|
|
* rawp_insn: e8810138
|
|
|
|
*
|
|
|
|
* rawp_insn is used later to extract the reg/offset fields
|
|
|
|
*/
|
|
|
|
#define PPC_OP(op) (((op) >> 26) & 0x3F)
|
|
|
|
#define RAW_BYTES 11
|
|
|
|
|
|
|
|
static int disasm_line__parse_powerpc(struct disasm_line *dl)
|
|
|
|
{
|
|
|
|
char *line = dl->al.line;
|
|
|
|
const char **namep = &dl->ins.name;
|
|
|
|
char **rawp = &dl->ops.raw;
|
|
|
|
char *tmp_raw_insn, *name_raw_insn = skip_spaces(line);
|
|
|
|
char *name = skip_spaces(name_raw_insn + RAW_BYTES);
|
|
|
|
int objdump = 0;
|
|
|
|
|
|
|
|
if (strlen(line) > RAW_BYTES)
|
|
|
|
objdump = 1;
|
|
|
|
|
|
|
|
if (name_raw_insn[0] == '\0')
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (objdump) {
|
|
|
|
disasm_line__parse(name, namep, rawp);
|
|
|
|
} else
|
|
|
|
*namep = "";
|
|
|
|
|
|
|
|
tmp_raw_insn = strndup(name_raw_insn, 11);
|
|
|
|
if (tmp_raw_insn == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
remove_spaces(tmp_raw_insn);
|
|
|
|
|
|
|
|
sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn);
|
|
|
|
if (objdump)
|
|
|
|
dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-03-29 14:58:10 -07:00
|
|
|
static void annotation_line__init(struct annotation_line *al,
|
|
|
|
struct annotate_args *args,
|
|
|
|
int nr)
|
|
|
|
{
|
|
|
|
al->offset = args->offset;
|
|
|
|
al->line = strdup(args->line);
|
|
|
|
al->line_nr = args->line_nr;
|
|
|
|
al->fileloc = args->fileloc;
|
|
|
|
al->data_nr = nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void annotation_line__exit(struct annotation_line *al)
|
|
|
|
{
|
|
|
|
zfree_srcline(&al->path);
|
|
|
|
zfree(&al->line);
|
|
|
|
zfree(&al->cycles);
|
perf annotate: Display the branch counter histogram
Display the branch counter histogram in the annotation view.
Press 'B' to display the branch counter's abbreviation list as well.
Samples: 1M of events 'anon group { branch-instructions:ppp, branch-misses }',
4000 Hz, Event count (approx.):
f3 /home/sdp/test/tchain_edit [Percent: local period]
Percent │ IPC Cycle Branch Counter (Average IPC: 1.39, IPC Coverage: 29.4%)
│ 0000000000401755 <f3>:
0.00 0.00 │ endbr64
│ push %rbp
│ mov %rsp,%rbp
│ movl $0x0,-0x4(%rbp)
0.00 0.00 │1.33 3 |A |- | ↓ jmp 25
11.03 11.03 │ 11: mov -0x4(%rbp),%eax
│ and $0x1,%eax
│ test %eax,%eax
17.13 17.13 │2.41 1 |A |- | ↓ je 21
│ addl $0x1,-0x4(%rbp)
21.84 21.84 │2.22 2 |AA |- | ↓ jmp 25
17.13 17.13 │ 21: addl $0x1,-0x4(%rbp)
21.84 21.84 │ 25: cmpl $0x270f,-0x4(%rbp)
11.03 11.03 │0.61 3 |A |- | ↑ jle 11
│ nop
│ pop %rbp
0.00 0.00 │0.24 20 |AA |B | ← ret
Originally-by: Tinghao Zhang <tinghao.zhang@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20240813160208.2493643-8-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-13 09:02:06 -07:00
|
|
|
zfree(&al->br_cntr);
|
2024-03-29 14:58:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static size_t disasm_line_size(int nr)
|
|
|
|
{
|
|
|
|
struct annotation_line *al;
|
|
|
|
|
|
|
|
return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocating the disasm annotation line data with
|
|
|
|
* following structure:
|
|
|
|
*
|
|
|
|
* -------------------------------------------
|
|
|
|
* struct disasm_line | struct annotation_line
|
|
|
|
* -------------------------------------------
|
|
|
|
*
|
|
|
|
* We have 'struct annotation_line' member as last member
|
|
|
|
* of 'struct disasm_line' to have an easy access.
|
|
|
|
*/
|
|
|
|
struct disasm_line *disasm_line__new(struct annotate_args *args)
|
|
|
|
{
|
|
|
|
struct disasm_line *dl = NULL;
|
2024-08-03 14:13:31 -07:00
|
|
|
struct annotation *notes = symbol__annotation(args->ms.sym);
|
|
|
|
int nr = notes->src->nr_events;
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
dl = zalloc(disasm_line_size(nr));
|
|
|
|
if (!dl)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
annotation_line__init(&dl->al, args, nr);
|
|
|
|
if (dl->al.line == NULL)
|
|
|
|
goto out_delete;
|
|
|
|
|
|
|
|
if (args->offset != -1) {
|
perf annotate: Add disasm_line__parse() to parse raw instruction for powerpc
Currently, the perf tool infrastructure uses the disasm_line__parse
function to parse disassembled line.
Example snippet from objdump:
objdump --start-address=<address> --stop-address=<address> -d --no-show-raw-insn -C <vmlinux>
c0000000010224b4: lwz r10,0(r9)
This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset.
In powerpc, the approach for data type profiling uses raw instruction
instead of result from objdump to identify the instruction category and
extract the source/target registers.
Example: 38 01 81 e8 ld r4,312(r1)
Here "38 01 81 e8" is the raw instruction representation. Add function
"disasm_line__parse_powerpc" to handle parsing of raw instruction.
Also update "struct disasm_line" to save the binary code/
With the change, function captures:
line -> "38 01 81 e8 ld r4,312(r1)"
raw instruction "38 01 81 e8"
Raw instruction is used later to extract the reg/offset fields. Macros
are added to extract opcode and register fields. "struct disasm_line"
is updated to carry union of "bytes" and "raw_insn" of 32 bit to carry raw
code (raw).
Function "disasm_line__parse_powerpc fills the raw instruction hex value
and can use macros to get opcode. There is no changes in existing code
paths, which parses the disassembled code. The size of raw instruction
depends on architecture.
In case of powerpc, the parsing the disasm line needs to handle cases
for reading binary code directly from DSO as well as parsing the objdump
result. Hence adding the logic into separate function instead of
updating "disasm_line__parse". The architecture using the instruction
name and present approach is not altered. Since this approach targets
powerpc, the macro implementation is added for powerpc as of now.
Since the disasm_line__parse is used in other cases (perf annotate) and
not only data tye profiling, the powerpc callback includes changes to
work with binary code as well as mnemonic representation.
Also in case if the DSO read fails and libcapstone is not supported, the
approach fallback to use objdump as option. Hence as option, patch has
changes to ensure objdump option also works well.
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-5-atrajeev@linux.vnet.ibm.com
[ Add check for strndup() result ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:47 +05:30
|
|
|
if (arch__is(args->arch, "powerpc")) {
|
|
|
|
if (disasm_line__parse_powerpc(dl) < 0)
|
|
|
|
goto out_free_line;
|
|
|
|
} else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
|
2024-03-29 14:58:10 -07:00
|
|
|
goto out_free_line;
|
|
|
|
|
|
|
|
disasm_line__init_ins(dl, args->arch, &args->ms);
|
|
|
|
}
|
|
|
|
|
|
|
|
return dl;
|
|
|
|
|
|
|
|
out_free_line:
|
|
|
|
zfree(&dl->al.line);
|
|
|
|
out_delete:
|
|
|
|
free(dl);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void disasm_line__free(struct disasm_line *dl)
|
|
|
|
{
|
|
|
|
if (dl->ins.ops && dl->ins.ops->free)
|
|
|
|
dl->ins.ops->free(&dl->ops);
|
|
|
|
else
|
|
|
|
ins_ops__delete(&dl->ops);
|
|
|
|
zfree(&dl->ins.name);
|
|
|
|
annotation_line__exit(&dl->al);
|
|
|
|
free(dl);
|
|
|
|
}
|
|
|
|
|
|
|
|
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
|
|
|
|
{
|
|
|
|
if (raw || !dl->ins.ops)
|
|
|
|
return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
|
|
|
|
|
|
|
|
return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
|
|
|
|
* which looks like following
|
|
|
|
*
|
|
|
|
* 0000000000415500 <_init>:
|
|
|
|
* 415500: sub $0x8,%rsp
|
|
|
|
* 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
|
|
|
|
* 41550b: test %rax,%rax
|
|
|
|
* 41550e: je 415515 <_init+0x15>
|
|
|
|
* 415510: callq 416e70 <__gmon_start__@plt>
|
|
|
|
* 415515: add $0x8,%rsp
|
|
|
|
* 415519: retq
|
|
|
|
*
|
|
|
|
* it will be parsed and saved into struct disasm_line as
|
|
|
|
* <offset> <name> <ops.raw>
|
|
|
|
*
|
|
|
|
* The offset will be a relative offset from the start of the symbol and -1
|
|
|
|
* means that it's not a disassembly line so should be treated differently.
|
|
|
|
* The ops.raw part will be parsed further according to type of the instruction.
|
|
|
|
*/
|
|
|
|
static int symbol__parse_objdump_line(struct symbol *sym,
|
|
|
|
struct annotate_args *args,
|
|
|
|
char *parsed_line, int *line_nr, char **fileloc)
|
|
|
|
{
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct disasm_line *dl;
|
|
|
|
char *tmp;
|
|
|
|
s64 line_ip, offset = -1;
|
|
|
|
regmatch_t match[2];
|
|
|
|
|
|
|
|
/* /filename:linenr ? Save line number and ignore. */
|
|
|
|
if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
|
|
|
|
*line_nr = atoi(parsed_line + match[1].rm_so);
|
|
|
|
free(*fileloc);
|
|
|
|
*fileloc = strdup(parsed_line);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Process hex address followed by ':'. */
|
|
|
|
line_ip = strtoull(parsed_line, &tmp, 16);
|
|
|
|
if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start),
|
|
|
|
end = map__rip_2objdump(map, sym->end);
|
|
|
|
|
|
|
|
offset = line_ip - start;
|
|
|
|
if ((u64)line_ip < start || (u64)line_ip >= end)
|
|
|
|
offset = -1;
|
|
|
|
else
|
|
|
|
parsed_line = tmp + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
args->offset = offset;
|
|
|
|
args->line = parsed_line;
|
|
|
|
args->line_nr = *line_nr;
|
|
|
|
args->fileloc = *fileloc;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
(*line_nr)++;
|
|
|
|
|
|
|
|
if (dl == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!disasm_line__has_local_offset(dl)) {
|
|
|
|
dl->ops.target.offset = dl->ops.target.addr -
|
|
|
|
map__rip_2objdump(map, sym->start);
|
|
|
|
dl->ops.target.offset_avail = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* kcore has no symbols, so add the call target symbol */
|
|
|
|
if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
|
|
|
|
struct addr_map_symbol target = {
|
|
|
|
.addr = dl->ops.target.addr,
|
|
|
|
.ms = { .map = map, },
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!maps__find_ams(args->ms.maps, &target) &&
|
|
|
|
target.ms.sym->start == target.al_addr)
|
|
|
|
dl->ops.target.sym = target.ms.sym;
|
|
|
|
}
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void delete_last_nop(struct symbol *sym)
|
|
|
|
{
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct list_head *list = ¬es->src->source;
|
|
|
|
struct disasm_line *dl;
|
|
|
|
|
|
|
|
while (!list_empty(list)) {
|
|
|
|
dl = list_entry(list->prev, struct disasm_line, al.node);
|
|
|
|
|
|
|
|
if (dl->ins.ops) {
|
|
|
|
if (!ins__is_nop(&dl->ins))
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
if (!strstr(dl->al.line, " nop ") &&
|
|
|
|
!strstr(dl->al.line, " nopl ") &&
|
|
|
|
!strstr(dl->al.line, " nopw "))
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_del_init(&dl->al.node);
|
|
|
|
disasm_line__free(dl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
|
|
|
|
{
|
|
|
|
struct dso *dso = map__dso(ms->map);
|
|
|
|
|
|
|
|
BUG_ON(buflen == 0);
|
|
|
|
|
|
|
|
if (errnum >= 0) {
|
|
|
|
str_error_r(errnum, buf, buflen);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (errnum) {
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
|
|
|
|
char bf[SBUILD_ID_SIZE + 15] = " with build id ";
|
|
|
|
char *build_id_msg = NULL;
|
|
|
|
|
2024-05-04 14:38:01 -07:00
|
|
|
if (dso__has_build_id(dso)) {
|
|
|
|
build_id__sprintf(dso__bid(dso), bf + 15);
|
2024-03-29 14:58:10 -07:00
|
|
|
build_id_msg = bf;
|
|
|
|
}
|
|
|
|
scnprintf(buf, buflen,
|
|
|
|
"No vmlinux file%s\nwas found in the path.\n\n"
|
|
|
|
"Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
|
|
|
|
"Please use:\n\n"
|
|
|
|
" perf buildid-cache -vu vmlinux\n\n"
|
|
|
|
"or:\n\n"
|
|
|
|
" --vmlinux vmlinux\n", build_id_msg ?: "");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
|
|
|
|
scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
|
|
|
|
break;
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
|
|
|
|
scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
|
|
|
|
break;
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
|
|
|
|
scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
|
|
|
|
break;
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
|
2024-05-04 14:38:01 -07:00
|
|
|
scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso));
|
2024-03-29 14:58:10 -07:00
|
|
|
break;
|
|
|
|
case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
|
|
|
|
scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
|
2024-05-04 14:38:01 -07:00
|
|
|
dso__long_name(dso));
|
2024-03-29 14:58:10 -07:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
|
|
|
|
{
|
|
|
|
char linkname[PATH_MAX];
|
|
|
|
char *build_id_filename;
|
|
|
|
char *build_id_path = NULL;
|
|
|
|
char *pos;
|
|
|
|
int len;
|
|
|
|
|
2024-05-04 14:38:01 -07:00
|
|
|
if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS &&
|
2024-03-29 14:58:10 -07:00
|
|
|
!dso__is_kcore(dso))
|
|
|
|
return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
|
|
|
|
|
|
|
|
build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
|
|
|
|
if (build_id_filename) {
|
|
|
|
__symbol__join_symfs(filename, filename_size, build_id_filename);
|
|
|
|
free(build_id_filename);
|
|
|
|
} else {
|
2024-05-04 14:38:01 -07:00
|
|
|
if (dso__has_build_id(dso))
|
2024-03-29 14:58:10 -07:00
|
|
|
return ENOMEM;
|
|
|
|
goto fallback;
|
|
|
|
}
|
|
|
|
|
|
|
|
build_id_path = strdup(filename);
|
|
|
|
if (!build_id_path)
|
|
|
|
return ENOMEM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* old style build-id cache has name of XX/XXXXXXX.. while
|
|
|
|
* new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
|
|
|
|
* extract the build-id part of dirname in the new style only.
|
|
|
|
*/
|
|
|
|
pos = strrchr(build_id_path, '/');
|
|
|
|
if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
|
|
|
|
dirname(build_id_path);
|
|
|
|
|
|
|
|
if (dso__is_kcore(dso))
|
|
|
|
goto fallback;
|
|
|
|
|
|
|
|
len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
|
|
|
|
if (len < 0)
|
|
|
|
goto fallback;
|
|
|
|
|
|
|
|
linkname[len] = '\0';
|
|
|
|
if (strstr(linkname, DSO__NAME_KALLSYMS) ||
|
|
|
|
access(filename, R_OK)) {
|
|
|
|
fallback:
|
|
|
|
/*
|
|
|
|
* If we don't have build-ids or the build-id file isn't in the
|
|
|
|
* cache, or is just a kallsyms file, well, lets hope that this
|
|
|
|
* DSO is the same as when 'perf record' ran.
|
|
|
|
*/
|
2024-05-04 14:38:01 -07:00
|
|
|
if (dso__kernel(dso) && dso__long_name(dso)[0] == '/')
|
|
|
|
snprintf(filename, filename_size, "%s", dso__long_name(dso));
|
2024-03-29 14:58:10 -07:00
|
|
|
else
|
2024-05-04 14:38:01 -07:00
|
|
|
__symbol__join_symfs(filename, filename_size, dso__long_name(dso));
|
2024-03-29 14:58:10 -07:00
|
|
|
|
2024-05-04 14:38:01 -07:00
|
|
|
mutex_lock(dso__lock(dso));
|
|
|
|
if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) {
|
2024-03-29 14:58:10 -07:00
|
|
|
char *new_name = dso__filename_with_chroot(dso, filename);
|
|
|
|
if (new_name) {
|
|
|
|
strlcpy(filename, new_name, filename_size);
|
|
|
|
free(new_name);
|
|
|
|
}
|
|
|
|
}
|
2024-05-04 14:38:01 -07:00
|
|
|
mutex_unlock(dso__lock(dso));
|
|
|
|
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
|
|
|
|
dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE);
|
2024-03-29 14:58:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
free(build_id_path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-03-29 14:58:11 -07:00
|
|
|
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
|
|
|
#include <capstone/capstone.h>
|
|
|
|
|
2024-07-18 14:13:56 +05:30
|
|
|
int capstone_init(struct machine *machine, csh *cs_handle, bool is64, bool disassembler_style);
|
|
|
|
|
2024-03-29 14:58:11 -07:00
|
|
|
static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
|
|
|
|
csh *handle)
|
|
|
|
{
|
|
|
|
struct annotation_options *opt = args->options;
|
|
|
|
cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32;
|
|
|
|
|
|
|
|
/* TODO: support more architectures */
|
|
|
|
if (!arch__is(args->arch, "x86"))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!opt->disassembler_style ||
|
|
|
|
!strcmp(opt->disassembler_style, "att"))
|
|
|
|
cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
|
|
|
|
|
2024-03-29 14:58:12 -07:00
|
|
|
/*
|
|
|
|
* Resolving address operands to symbols is implemented
|
|
|
|
* on x86 by investigating instruction details.
|
|
|
|
*/
|
|
|
|
cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON);
|
|
|
|
|
2024-03-29 14:58:11 -07:00
|
|
|
return 0;
|
|
|
|
}
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
#endif
|
2024-03-29 14:58:11 -07:00
|
|
|
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
#if defined(HAVE_LIBCAPSTONE_SUPPORT) || defined(HAVE_LIBLLVM_SUPPORT)
|
2024-03-29 14:58:11 -07:00
|
|
|
struct find_file_offset_data {
|
|
|
|
u64 ip;
|
|
|
|
u64 offset;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* This will be called for each PHDR in an ELF binary */
|
|
|
|
static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
|
|
|
|
{
|
|
|
|
struct find_file_offset_data *data = arg;
|
|
|
|
|
|
|
|
if (start <= data->ip && data->ip < start + len) {
|
|
|
|
data->offset = pgoff + data->ip - start;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-08-03 17:20:07 +02:00
|
|
|
static u8 *
|
|
|
|
read_symbol(const char *filename, struct map *map, struct symbol *sym,
|
|
|
|
u64 *len, bool *is_64bit)
|
|
|
|
{
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
struct nscookie nsc;
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start);
|
|
|
|
u64 end = map__rip_2objdump(map, sym->end);
|
|
|
|
int fd, count;
|
|
|
|
u8 *buf = NULL;
|
|
|
|
struct find_file_offset_data data = {
|
|
|
|
.ip = start,
|
|
|
|
};
|
|
|
|
|
|
|
|
*is_64bit = false;
|
|
|
|
|
|
|
|
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
|
|
|
|
fd = open(filename, O_RDONLY);
|
|
|
|
nsinfo__mountns_exit(&nsc);
|
|
|
|
if (fd < 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
|
|
|
|
is_64bit) == 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
*len = end - start;
|
|
|
|
buf = malloc(*len);
|
|
|
|
if (buf == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
count = pread(fd, buf, *len, data.offset);
|
|
|
|
close(fd);
|
|
|
|
fd = -1;
|
|
|
|
|
|
|
|
if ((u64)count != *len)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (fd >= 0)
|
|
|
|
close(fd);
|
|
|
|
free(buf);
|
|
|
|
return NULL;
|
|
|
|
}
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
#endif
|
2024-08-03 17:20:07 +02:00
|
|
|
|
2024-11-11 12:17:33 -03:00
|
|
|
#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT)
|
|
|
|
static void symbol__disassembler_missing(const char *disassembler, const char *filename,
|
|
|
|
struct symbol *sym)
|
|
|
|
{
|
|
|
|
pr_debug("The %s disassembler isn't linked in for %s in %s\n",
|
|
|
|
disassembler, sym->name, filename);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
2024-03-29 14:58:12 -07:00
|
|
|
static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
|
|
|
|
struct annotate_args *args, u64 addr)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct symbol *sym;
|
|
|
|
|
|
|
|
/* TODO: support more architectures */
|
|
|
|
if (!arch__is(args->arch, "x86"))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (insn->detail == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < insn->detail->x86.op_count; i++) {
|
|
|
|
cs_x86_op *op = &insn->detail->x86.operands[i];
|
|
|
|
u64 orig_addr;
|
|
|
|
|
|
|
|
if (op->type != X86_OP_MEM)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* only print RIP-based global symbols for now */
|
|
|
|
if (op->mem.base != X86_REG_RIP)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* get the target address */
|
|
|
|
orig_addr = addr + insn->size + op->mem.disp;
|
|
|
|
addr = map__objdump_2mem(map, orig_addr);
|
|
|
|
|
2024-05-04 14:38:01 -07:00
|
|
|
if (dso__kernel(map__dso(map))) {
|
2024-03-29 14:58:12 -07:00
|
|
|
/*
|
|
|
|
* The kernel maps can be splitted into sections,
|
|
|
|
* let's find the map first and the search the symbol.
|
|
|
|
*/
|
|
|
|
map = maps__find(map__kmaps(map), addr);
|
|
|
|
if (map == NULL)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* convert it to map-relative address for search */
|
|
|
|
addr = map__map_ip(map, addr);
|
|
|
|
|
|
|
|
sym = map__find_symbol(map, addr);
|
|
|
|
if (sym == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (addr == sym->start) {
|
|
|
|
scnprintf(buf, len, "\t# %"PRIx64" <%s>",
|
|
|
|
orig_addr, sym->name);
|
|
|
|
} else {
|
|
|
|
scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
|
|
|
|
orig_addr, sym->name, addr - sym->start);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-18 14:13:57 +05:30
|
|
|
static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args)
|
|
|
|
{
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
struct nscookie nsc;
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start);
|
|
|
|
u64 end = map__rip_2objdump(map, sym->end);
|
|
|
|
u64 len = end - start;
|
|
|
|
u64 offset;
|
|
|
|
int i, fd, count;
|
|
|
|
bool is_64bit = false;
|
|
|
|
bool needs_cs_close = false;
|
|
|
|
u8 *buf = NULL;
|
|
|
|
struct find_file_offset_data data = {
|
|
|
|
.ip = start,
|
|
|
|
};
|
|
|
|
csh handle;
|
|
|
|
char disasm_buf[512];
|
|
|
|
struct disasm_line *dl;
|
|
|
|
u32 *line;
|
|
|
|
bool disassembler_style = false;
|
|
|
|
|
|
|
|
if (args->options->objdump_path)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
|
|
|
|
fd = open(filename, O_RDONLY);
|
|
|
|
nsinfo__mountns_exit(&nsc);
|
|
|
|
if (fd < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
|
|
|
|
&is_64bit) == 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (!args->options->disassembler_style ||
|
|
|
|
!strcmp(args->options->disassembler_style, "att"))
|
|
|
|
disassembler_style = true;
|
|
|
|
|
|
|
|
if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
needs_cs_close = true;
|
|
|
|
|
|
|
|
buf = malloc(len);
|
|
|
|
if (buf == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
count = pread(fd, buf, len, data.offset);
|
|
|
|
close(fd);
|
|
|
|
fd = -1;
|
|
|
|
|
|
|
|
if ((u64)count != len)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
line = (u32 *)buf;
|
|
|
|
|
|
|
|
/* add the function address and name */
|
|
|
|
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
|
|
|
|
start, sym->name);
|
|
|
|
|
|
|
|
args->offset = -1;
|
|
|
|
args->line = disasm_buf;
|
|
|
|
args->line_nr = 0;
|
|
|
|
args->fileloc = NULL;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO: enable disassm for powerpc
|
|
|
|
* count = cs_disasm(handle, buf, len, start, len, &insn);
|
|
|
|
*
|
|
|
|
* For now, only binary code is saved in disassembled line
|
|
|
|
* to be used in "type" and "typeoff" sort keys. Each raw code
|
|
|
|
* is 32 bit instruction. So use "len/4" to get the number of
|
|
|
|
* entries.
|
|
|
|
*/
|
|
|
|
count = len/4;
|
|
|
|
|
|
|
|
for (i = 0, offset = 0; i < count; i++) {
|
|
|
|
args->offset = offset;
|
|
|
|
sprintf(args->line, "%x", line[i]);
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
2024-10-19 23:41:56 +08:00
|
|
|
break;
|
2024-07-18 14:13:57 +05:30
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It failed in the middle */
|
|
|
|
if (offset != len) {
|
|
|
|
struct list_head *list = ¬es->src->source;
|
|
|
|
|
|
|
|
/* Discard all lines and fallback to objdump */
|
|
|
|
while (!list_empty(list)) {
|
|
|
|
dl = list_first_entry(list, struct disasm_line, al.node);
|
|
|
|
|
|
|
|
list_del_init(&dl->al.node);
|
|
|
|
disasm_line__free(dl);
|
|
|
|
}
|
|
|
|
count = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (needs_cs_close)
|
|
|
|
cs_close(&handle);
|
|
|
|
free(buf);
|
|
|
|
return count < 0 ? count : 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (fd >= 0)
|
|
|
|
close(fd);
|
|
|
|
count = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2024-03-29 14:58:11 -07:00
|
|
|
static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args)
|
|
|
|
{
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start);
|
2024-08-03 17:20:07 +02:00
|
|
|
u64 len;
|
2024-03-29 14:58:11 -07:00
|
|
|
u64 offset;
|
2024-10-16 16:56:21 -07:00
|
|
|
int i, count, free_count;
|
2024-03-29 14:58:11 -07:00
|
|
|
bool is_64bit = false;
|
|
|
|
bool needs_cs_close = false;
|
|
|
|
u8 *buf = NULL;
|
|
|
|
csh handle;
|
2024-10-16 16:56:21 -07:00
|
|
|
cs_insn *insn = NULL;
|
2024-03-29 14:58:11 -07:00
|
|
|
char disasm_buf[512];
|
|
|
|
struct disasm_line *dl;
|
|
|
|
|
|
|
|
if (args->options->objdump_path)
|
|
|
|
return -1;
|
|
|
|
|
2024-08-03 17:20:07 +02:00
|
|
|
buf = read_symbol(filename, map, sym, &len, &is_64bit);
|
2024-03-29 14:58:11 -07:00
|
|
|
if (buf == NULL)
|
2024-08-03 17:20:07 +02:00
|
|
|
return -1;
|
2024-03-29 14:58:11 -07:00
|
|
|
|
|
|
|
/* add the function address and name */
|
|
|
|
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
|
|
|
|
start, sym->name);
|
|
|
|
|
|
|
|
args->offset = -1;
|
|
|
|
args->line = disasm_buf;
|
|
|
|
args->line_nr = 0;
|
|
|
|
args->fileloc = NULL;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
2024-08-03 17:20:07 +02:00
|
|
|
if (open_capstone_handle(args, is_64bit, &handle) < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
needs_cs_close = true;
|
|
|
|
|
2024-10-16 16:56:21 -07:00
|
|
|
free_count = count = cs_disasm(handle, buf, len, start, len, &insn);
|
2024-03-29 14:58:11 -07:00
|
|
|
for (i = 0, offset = 0; i < count; i++) {
|
2024-03-29 14:58:12 -07:00
|
|
|
int printed;
|
|
|
|
|
|
|
|
printed = scnprintf(disasm_buf, sizeof(disasm_buf),
|
|
|
|
" %-7s %s",
|
|
|
|
insn[i].mnemonic, insn[i].op_str);
|
|
|
|
print_capstone_detail(&insn[i], disasm_buf + printed,
|
|
|
|
sizeof(disasm_buf) - printed, args,
|
|
|
|
start + offset);
|
2024-03-29 14:58:11 -07:00
|
|
|
|
|
|
|
args->offset = offset;
|
|
|
|
args->line = disasm_buf;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
offset += insn[i].size;
|
|
|
|
}
|
|
|
|
|
2024-04-24 17:51:56 -07:00
|
|
|
/* It failed in the middle: probably due to unknown instructions */
|
|
|
|
if (offset != len) {
|
|
|
|
struct list_head *list = ¬es->src->source;
|
|
|
|
|
|
|
|
/* Discard all lines and fallback to objdump */
|
|
|
|
while (!list_empty(list)) {
|
|
|
|
dl = list_first_entry(list, struct disasm_line, al.node);
|
|
|
|
|
|
|
|
list_del_init(&dl->al.node);
|
|
|
|
disasm_line__free(dl);
|
|
|
|
}
|
|
|
|
count = -1;
|
|
|
|
}
|
|
|
|
|
2024-03-29 14:58:11 -07:00
|
|
|
out:
|
2024-10-16 16:56:21 -07:00
|
|
|
if (needs_cs_close) {
|
2024-03-29 14:58:11 -07:00
|
|
|
cs_close(&handle);
|
2024-10-16 16:56:21 -07:00
|
|
|
if (free_count > 0)
|
|
|
|
cs_free(insn, free_count);
|
|
|
|
}
|
2024-03-29 14:58:11 -07:00
|
|
|
free(buf);
|
|
|
|
return count < 0 ? count : 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (needs_cs_close) {
|
|
|
|
struct disasm_line *tmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It probably failed in the middle of the above loop.
|
|
|
|
* Release any resources it might add.
|
|
|
|
*/
|
|
|
|
list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) {
|
|
|
|
list_del(&dl->al.node);
|
2024-10-19 23:41:55 +08:00
|
|
|
disasm_line__free(dl);
|
2024-03-29 14:58:11 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
count = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
2024-11-11 12:17:33 -03:00
|
|
|
#else // HAVE_LIBCAPSTONE_SUPPORT
|
|
|
|
static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args __maybe_unused)
|
|
|
|
{
|
|
|
|
symbol__disassembler_missing("capstone", filename, sym);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args __maybe_unused)
|
|
|
|
{
|
|
|
|
symbol__disassembler_missing("capstone powerpc", filename, sym);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#endif // HAVE_LIBCAPSTONE_SUPPORT
|
2024-03-29 14:58:11 -07:00
|
|
|
|
perf annotate: Add support to capture and parse raw instruction in powerpc using dso__data_read_offset utility
Add support to capture and parse raw instruction in powerpc.
Currently, the perf tool infrastructure uses two ways to disassemble
and understand the instruction. One is objdump and other option is
via libcapstone.
Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
with "objdump" while disassemble. Example from powerpc with this option
for an instruction address is:
Snippet from:
objdump --start-address=<address> --stop-address=<address> -d --no-show-raw-insn -C <vmlinux>
c0000000010224b4: lwz r10,0(r9)
This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset. Also to find whether there is a memory
reference in the operands, "memory_ref_char" field of objdump is used.
For x86, "(" is used as memory_ref_char to tackle instructions of the
form "mov (%rax), %rcx".
In case of powerpc, not all instructions using "(" are the only memory
instructions. Example, above instruction can also be of extended form (X
form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
and extract the source/target registers, patch adds support to use raw
instruction for powerpc. Approach used is to read the raw instruction
directly from the DSO file using "dso__data_read_offset" utility which
is already implemented in perf infrastructure in "util/dso.c".
Example:
38 01 81 e8 ld r4,312(r1)
Here "38 01 81 e8" is the raw instruction representation. In powerpc,
this translates to instruction form: "ld RT,DS(RA)" and binary code
as:
| 58 | RT | RA | DS | |
-------------------------------------
0 6 11 16 30 31
Function "symbol__disassemble_dso" is updated to read raw instruction
directly from DSO using dso__data_read_offset utility. In case of
above example, this captures:
line: 38 01 81 e8
The above works well when 'perf report' is invoked with only sort keys
for data type ie type and typeoff.
Because there is no instruction level annotation needed if only data
type information is requested for.
For annotating sample, along with type and typeoff sort key, "sym" sort
key is also needed. And by default invoking just "perf report" uses sort
key "sym" that displays the symbol information.
With approach changes in powerpc which first reads DSO for raw
instruction, "perf annotate" and "perf report" + a key breaks since
it doesn't do the instruction level disassembly.
Snippet of result from 'perf report':
Samples: 1K of event 'mem-loads', 4000 Hz, Event count (approx.): 937238
do_work /usr/bin/pmlogger [Percent: local period]
Percent│ ea230010
│ 3a550010
│ 3a600000
│ 38f60001
│ 39490008
│ 42400438
51.44 │ 81290008
│ 7d485378
Here, raw instruction is displayed in the output instead of human
readable annotated form.
One way to get the appropriate data is to specify "--objdump path", by
which code annotation will be done. But the default behaviour will be
changed. To fix this breakage, check if "sym" sort key is set. If so
fallback and use the libcapstone/objdump way of disassmbling the sample.
With the changes and "perf report"
Samples: 1K of event 'mem-loads', 4000 Hz, Event count (approx.): 937238
do_work /usr/bin/pmlogger [Percent: local period]
Percent│ ld r17,16(r3)
│ addi r18,r21,16
│ li r19,0
│ 8b0: rldicl r10,r10,63,33
│ addi r10,r10,1
│ mtctr r10
│ ↓ b 8e4
│ 8c0: addi r7,r22,1
│ addi r10,r9,8
│ ↓ bdz d00
51.44 │ lwz r9,8(r9)
│ mr r8,r10
│ cmpw r20,r9
Committer notes:
Just add the extern for 'sort_order' in disasm.c so that we don't end up
breaking the build due to this type colision with capstone and libbpf:
In file included from /usr/include/capstone/capstone.h:325,
from /git/perf-6.10.0/tools/perf/util/print_insn.h:23,
from builtin-script.c:38:
/usr/include/capstone/bpf.h:94:14: error: 'bpf_insn' defined as wrong kind of tag
94 | typedef enum bpf_insn {
I reported this to the bpf mailing list, see one of the links below.
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-6-atrajeev@linux.vnet.ibm.com
Link: https://lore.kernel.org/bpf/ZqOltPk9VQGgJZAA@x1/T/#u
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:48 +05:30
|
|
|
static int symbol__disassemble_raw(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args)
|
|
|
|
{
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start);
|
|
|
|
u64 end = map__rip_2objdump(map, sym->end);
|
|
|
|
u64 len = end - start;
|
|
|
|
u64 offset;
|
|
|
|
int i, count;
|
|
|
|
u8 *buf = NULL;
|
|
|
|
char disasm_buf[512];
|
|
|
|
struct disasm_line *dl;
|
|
|
|
u32 *line;
|
|
|
|
|
|
|
|
/* Return if objdump is specified explicitly */
|
|
|
|
if (args->options->objdump_path)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename);
|
|
|
|
|
|
|
|
buf = malloc(len);
|
|
|
|
if (buf == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
count = dso__data_read_offset(dso, NULL, sym->start, buf, len);
|
|
|
|
|
|
|
|
line = (u32 *)buf;
|
|
|
|
|
|
|
|
if ((u64)count != len)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* add the function address and name */
|
|
|
|
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
|
|
|
|
start, sym->name);
|
|
|
|
|
|
|
|
args->offset = -1;
|
|
|
|
args->line = disasm_buf;
|
|
|
|
args->line_nr = 0;
|
|
|
|
args->fileloc = NULL;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
/* Each raw instruction is 4 byte */
|
|
|
|
count = len/4;
|
|
|
|
|
|
|
|
for (i = 0, offset = 0; i < count; i++) {
|
|
|
|
args->offset = offset;
|
|
|
|
sprintf(args->line, "%x", line[i]);
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
2024-10-19 23:41:57 +08:00
|
|
|
break;
|
perf annotate: Add support to capture and parse raw instruction in powerpc using dso__data_read_offset utility
Add support to capture and parse raw instruction in powerpc.
Currently, the perf tool infrastructure uses two ways to disassemble
and understand the instruction. One is objdump and other option is
via libcapstone.
Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
with "objdump" while disassemble. Example from powerpc with this option
for an instruction address is:
Snippet from:
objdump --start-address=<address> --stop-address=<address> -d --no-show-raw-insn -C <vmlinux>
c0000000010224b4: lwz r10,0(r9)
This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset. Also to find whether there is a memory
reference in the operands, "memory_ref_char" field of objdump is used.
For x86, "(" is used as memory_ref_char to tackle instructions of the
form "mov (%rax), %rcx".
In case of powerpc, not all instructions using "(" are the only memory
instructions. Example, above instruction can also be of extended form (X
form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
and extract the source/target registers, patch adds support to use raw
instruction for powerpc. Approach used is to read the raw instruction
directly from the DSO file using "dso__data_read_offset" utility which
is already implemented in perf infrastructure in "util/dso.c".
Example:
38 01 81 e8 ld r4,312(r1)
Here "38 01 81 e8" is the raw instruction representation. In powerpc,
this translates to instruction form: "ld RT,DS(RA)" and binary code
as:
| 58 | RT | RA | DS | |
-------------------------------------
0 6 11 16 30 31
Function "symbol__disassemble_dso" is updated to read raw instruction
directly from DSO using dso__data_read_offset utility. In case of
above example, this captures:
line: 38 01 81 e8
The above works well when 'perf report' is invoked with only sort keys
for data type ie type and typeoff.
Because there is no instruction level annotation needed if only data
type information is requested for.
For annotating sample, along with type and typeoff sort key, "sym" sort
key is also needed. And by default invoking just "perf report" uses sort
key "sym" that displays the symbol information.
With approach changes in powerpc which first reads DSO for raw
instruction, "perf annotate" and "perf report" + a key breaks since
it doesn't do the instruction level disassembly.
Snippet of result from 'perf report':
Samples: 1K of event 'mem-loads', 4000 Hz, Event count (approx.): 937238
do_work /usr/bin/pmlogger [Percent: local period]
Percent│ ea230010
│ 3a550010
│ 3a600000
│ 38f60001
│ 39490008
│ 42400438
51.44 │ 81290008
│ 7d485378
Here, raw instruction is displayed in the output instead of human
readable annotated form.
One way to get the appropriate data is to specify "--objdump path", by
which code annotation will be done. But the default behaviour will be
changed. To fix this breakage, check if "sym" sort key is set. If so
fallback and use the libcapstone/objdump way of disassmbling the sample.
With the changes and "perf report"
Samples: 1K of event 'mem-loads', 4000 Hz, Event count (approx.): 937238
do_work /usr/bin/pmlogger [Percent: local period]
Percent│ ld r17,16(r3)
│ addi r18,r21,16
│ li r19,0
│ 8b0: rldicl r10,r10,63,33
│ addi r10,r10,1
│ mtctr r10
│ ↓ b 8e4
│ 8c0: addi r7,r22,1
│ addi r10,r9,8
│ ↓ bdz d00
51.44 │ lwz r9,8(r9)
│ mr r8,r10
│ cmpw r20,r9
Committer notes:
Just add the extern for 'sort_order' in disasm.c so that we don't end up
breaking the build due to this type colision with capstone and libbpf:
In file included from /usr/include/capstone/capstone.h:325,
from /git/perf-6.10.0/tools/perf/util/print_insn.h:23,
from builtin-script.c:38:
/usr/include/capstone/bpf.h:94:14: error: 'bpf_insn' defined as wrong kind of tag
94 | typedef enum bpf_insn {
I reported this to the bpf mailing list, see one of the links below.
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Akanksha J N <akanksha@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Hari Bathini <hbathini@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Segher Boessenkool <segher@kernel.crashing.org>
Link: https://lore.kernel.org/lkml/20240718084358.72242-6-atrajeev@linux.vnet.ibm.com
Link: https://lore.kernel.org/bpf/ZqOltPk9VQGgJZAA@x1/T/#u
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-07-18 14:13:48 +05:30
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
offset += 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* It failed in the middle */
|
|
|
|
if (offset != len) {
|
|
|
|
struct list_head *list = ¬es->src->source;
|
|
|
|
|
|
|
|
/* Discard all lines and fallback to objdump */
|
|
|
|
while (!list_empty(list)) {
|
|
|
|
dl = list_first_entry(list, struct disasm_line, al.node);
|
|
|
|
|
|
|
|
list_del_init(&dl->al.node);
|
|
|
|
disasm_line__free(dl);
|
|
|
|
}
|
|
|
|
count = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
free(buf);
|
|
|
|
return count < 0 ? count : 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
count = -1;
|
|
|
|
goto out;
|
|
|
|
}
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
|
|
|
|
#ifdef HAVE_LIBLLVM_SUPPORT
|
|
|
|
#include <llvm-c/Disassembler.h>
|
|
|
|
#include <llvm-c/Target.h>
|
|
|
|
#include "util/llvm-c-helpers.h"
|
|
|
|
|
|
|
|
struct symbol_lookup_storage {
|
|
|
|
u64 branch_addr;
|
|
|
|
u64 pcrel_load_addr;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Whenever LLVM wants to resolve an address into a symbol, it calls this
|
|
|
|
* callback. We don't ever actually _return_ anything (in particular, because
|
|
|
|
* it puts quotation marks around what we return), but we use this as a hint
|
|
|
|
* that there is a branch or PC-relative address in the expression that we
|
|
|
|
* should add some textual annotation for after the instruction. The caller
|
|
|
|
* will use this information to add the actual annotation.
|
|
|
|
*/
|
|
|
|
static const char *
|
|
|
|
symbol_lookup_callback(void *disinfo, uint64_t value,
|
|
|
|
uint64_t *ref_type,
|
|
|
|
uint64_t address __maybe_unused,
|
|
|
|
const char **ref __maybe_unused)
|
|
|
|
{
|
|
|
|
struct symbol_lookup_storage *storage = disinfo;
|
|
|
|
|
|
|
|
if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch)
|
|
|
|
storage->branch_addr = value;
|
|
|
|
else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load)
|
|
|
|
storage->pcrel_load_addr = value;
|
|
|
|
*ref_type = LLVMDisassembler_ReferenceType_InOut_None;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args)
|
|
|
|
{
|
|
|
|
struct annotation *notes = symbol__annotation(sym);
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
u64 start = map__rip_2objdump(map, sym->start);
|
|
|
|
u8 *buf;
|
|
|
|
u64 len;
|
|
|
|
u64 pc;
|
|
|
|
bool is_64bit;
|
|
|
|
char triplet[64];
|
|
|
|
char disasm_buf[2048];
|
|
|
|
size_t disasm_len;
|
|
|
|
struct disasm_line *dl;
|
|
|
|
LLVMDisasmContextRef disasm = NULL;
|
|
|
|
struct symbol_lookup_storage storage;
|
|
|
|
char *line_storage = NULL;
|
|
|
|
size_t line_storage_len = 0;
|
|
|
|
int ret = -1;
|
|
|
|
|
|
|
|
if (args->options->objdump_path)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
LLVMInitializeAllTargetInfos();
|
|
|
|
LLVMInitializeAllTargetMCs();
|
|
|
|
LLVMInitializeAllDisassemblers();
|
|
|
|
|
|
|
|
buf = read_symbol(filename, map, sym, &len, &is_64bit);
|
|
|
|
if (buf == NULL)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (arch__is(args->arch, "x86")) {
|
|
|
|
if (is_64bit)
|
|
|
|
scnprintf(triplet, sizeof(triplet), "x86_64-pc-linux");
|
|
|
|
else
|
|
|
|
scnprintf(triplet, sizeof(triplet), "i686-pc-linux");
|
|
|
|
} else {
|
|
|
|
scnprintf(triplet, sizeof(triplet), "%s-linux-gnu",
|
|
|
|
args->arch->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
disasm = LLVMCreateDisasm(triplet, &storage, 0, NULL,
|
|
|
|
symbol_lookup_callback);
|
|
|
|
if (disasm == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (args->options->disassembler_style &&
|
|
|
|
!strcmp(args->options->disassembler_style, "intel"))
|
|
|
|
LLVMSetDisasmOptions(disasm,
|
|
|
|
LLVMDisassembler_Option_AsmPrinterVariant);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This needs to be set after AsmPrinterVariant, due to a bug in LLVM;
|
|
|
|
* setting AsmPrinterVariant makes a new instruction printer, making it
|
|
|
|
* forget about the PrintImmHex flag (which is applied before if both
|
|
|
|
* are given to the same call).
|
|
|
|
*/
|
|
|
|
LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
|
|
|
|
|
|
|
|
/* add the function address and name */
|
|
|
|
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
|
|
|
|
start, sym->name);
|
|
|
|
|
|
|
|
args->offset = -1;
|
|
|
|
args->line = disasm_buf;
|
|
|
|
args->line_nr = 0;
|
|
|
|
args->fileloc = NULL;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
pc = start;
|
|
|
|
for (u64 offset = 0; offset < len; ) {
|
|
|
|
unsigned int ins_len;
|
|
|
|
|
|
|
|
storage.branch_addr = 0;
|
|
|
|
storage.pcrel_load_addr = 0;
|
|
|
|
|
|
|
|
ins_len = LLVMDisasmInstruction(disasm, buf + offset,
|
|
|
|
len - offset, pc,
|
|
|
|
disasm_buf, sizeof(disasm_buf));
|
|
|
|
if (ins_len == 0)
|
|
|
|
goto err;
|
|
|
|
disasm_len = strlen(disasm_buf);
|
|
|
|
|
|
|
|
if (storage.branch_addr != 0) {
|
|
|
|
char *name = llvm_name_for_code(dso, filename,
|
|
|
|
storage.branch_addr);
|
|
|
|
if (name != NULL) {
|
|
|
|
disasm_len += scnprintf(disasm_buf + disasm_len,
|
|
|
|
sizeof(disasm_buf) -
|
|
|
|
disasm_len,
|
|
|
|
" <%s>", name);
|
|
|
|
free(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (storage.pcrel_load_addr != 0) {
|
|
|
|
char *name = llvm_name_for_data(dso, filename,
|
|
|
|
storage.pcrel_load_addr);
|
|
|
|
disasm_len += scnprintf(disasm_buf + disasm_len,
|
|
|
|
sizeof(disasm_buf) - disasm_len,
|
|
|
|
" # %#"PRIx64,
|
|
|
|
storage.pcrel_load_addr);
|
|
|
|
if (name) {
|
|
|
|
disasm_len += scnprintf(disasm_buf + disasm_len,
|
|
|
|
sizeof(disasm_buf) -
|
|
|
|
disasm_len,
|
|
|
|
" <%s>", name);
|
|
|
|
free(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
args->offset = offset;
|
|
|
|
args->line = expand_tabs(disasm_buf, &line_storage,
|
|
|
|
&line_storage_len);
|
|
|
|
args->line_nr = 0;
|
|
|
|
args->fileloc = NULL;
|
|
|
|
args->ms.sym = sym;
|
|
|
|
|
|
|
|
llvm_addr2line(filename, pc, &args->fileloc,
|
|
|
|
(unsigned int *)&args->line_nr, false, NULL);
|
|
|
|
|
|
|
|
dl = disasm_line__new(args);
|
|
|
|
if (dl == NULL)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
annotation_line__add(&dl->al, ¬es->src->source);
|
|
|
|
|
|
|
|
free(args->fileloc);
|
|
|
|
pc += ins_len;
|
|
|
|
offset += ins_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
LLVMDisasmDispose(disasm);
|
|
|
|
free(buf);
|
|
|
|
free(line_storage);
|
|
|
|
return ret;
|
|
|
|
}
|
2024-11-11 12:17:33 -03:00
|
|
|
#else // HAVE_LIBLLVM_SUPPORT
|
|
|
|
static int symbol__disassemble_llvm(char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args __maybe_unused)
|
|
|
|
{
|
|
|
|
symbol__disassembler_missing("LLVM", filename, sym);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#endif // HAVE_LIBLLVM_SUPPORT
|
perf annotate: LLVM-based disassembler
Support using LLVM as a disassembler method, allowing helperless
annotation in non-distro builds. (It is also much faster than
using libbfd or bfd objdump on binaries with a lot of debug
information.)
This is nearly identical to the output of llvm-objdump; there are
some very rare whitespace differences, some minor changes to demangling
(since we use perf's regular demangling and not LLVM's own) and
the occasional case where llvm-objdump makes a different choice
when multiple symbols share the same address.
It should work across all of LLVM's supported architectures, although
I've only tested 64-bit x86, and finding the right triple from perf's
idea of machine architecture can sometimes be a bit tricky. Ideally, we
should have some way of finding the triplet just from the file itself.
Committer notes:
Address this on 32-bit systems by using PRIu64 from inttypes.h
3 17.58 almalinux:9-i386 : FAIL gcc version 11.4.1 20231218 (Red Hat 11.4.1-3) (GCC)
util/llvm-c-helpers.cpp: In function ‘char* make_symbol_relative_string(dso*, const char*, u64, u64)’:
util/llvm-c-helpers.cpp:150:52: error: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘u64’ {aka
+‘long long unsigned int’} [-Werror=format=]
150 | snprintf(buf, sizeof(buf), "%s+0x%lx",
| ~~^
| |
| long unsigned int
| %llx
151 | demangled ? demangled : sym_name, addr - base_addr);
| ~~~~~~~~~~~~~~~~
| |
| u64 {aka long long unsigned int}
cc1plus: all warnings being treated as errors
Signed-off-by: Steinar H. Gunderson <sesse@google.com>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20240803152008.2818485-3-sesse@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-08-03 17:20:08 +02:00
|
|
|
|
2024-03-29 14:58:10 -07:00
|
|
|
/*
|
|
|
|
* Possibly create a new version of line with tabs expanded. Returns the
|
|
|
|
* existing or new line, storage is updated if a new line is allocated. If
|
|
|
|
* allocation fails then NULL is returned.
|
|
|
|
*/
|
|
|
|
static char *expand_tabs(char *line, char **storage, size_t *storage_len)
|
|
|
|
{
|
|
|
|
size_t i, src, dst, len, new_storage_len, num_tabs;
|
|
|
|
char *new_line;
|
|
|
|
size_t line_len = strlen(line);
|
|
|
|
|
|
|
|
for (num_tabs = 0, i = 0; i < line_len; i++)
|
|
|
|
if (line[i] == '\t')
|
|
|
|
num_tabs++;
|
|
|
|
|
|
|
|
if (num_tabs == 0)
|
|
|
|
return line;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Space for the line and '\0', less the leading and trailing
|
|
|
|
* spaces. Each tab may introduce 7 additional spaces.
|
|
|
|
*/
|
|
|
|
new_storage_len = line_len + 1 + (num_tabs * 7);
|
|
|
|
|
|
|
|
new_line = malloc(new_storage_len);
|
|
|
|
if (new_line == NULL) {
|
|
|
|
pr_err("Failure allocating memory for tab expansion\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy regions starting at src and expand tabs. If there are two
|
|
|
|
* adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
|
|
|
|
* are inserted.
|
|
|
|
*/
|
|
|
|
for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
|
|
|
|
if (line[i] == '\t') {
|
|
|
|
len = i - src;
|
|
|
|
memcpy(&new_line[dst], &line[src], len);
|
|
|
|
dst += len;
|
|
|
|
new_line[dst++] = ' ';
|
|
|
|
while (dst % 8 != 0)
|
|
|
|
new_line[dst++] = ' ';
|
|
|
|
src = i + 1;
|
|
|
|
num_tabs--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Expand the last region. */
|
|
|
|
len = line_len - src;
|
|
|
|
memcpy(&new_line[dst], &line[src], len);
|
|
|
|
dst += len;
|
|
|
|
new_line[dst] = '\0';
|
|
|
|
|
|
|
|
free(*storage);
|
|
|
|
*storage = new_line;
|
|
|
|
*storage_len = new_storage_len;
|
|
|
|
return new_line;
|
|
|
|
}
|
|
|
|
|
2024-11-11 12:17:32 -03:00
|
|
|
static int symbol__disassemble_objdump(const char *filename, struct symbol *sym,
|
|
|
|
struct annotate_args *args)
|
2024-03-29 14:58:10 -07:00
|
|
|
{
|
|
|
|
struct annotation_options *opts = &annotate_opts;
|
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
char *command;
|
|
|
|
FILE *file;
|
|
|
|
int lineno = 0;
|
|
|
|
char *fileloc = NULL;
|
|
|
|
int nline;
|
|
|
|
char *line;
|
|
|
|
size_t line_len;
|
|
|
|
const char *objdump_argv[] = {
|
|
|
|
"/bin/sh",
|
|
|
|
"-c",
|
|
|
|
NULL, /* Will be the objdump command to run. */
|
|
|
|
"--",
|
|
|
|
NULL, /* Will be the symfs path. */
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
struct child_process objdump_process;
|
2024-11-11 12:17:32 -03:00
|
|
|
int err;
|
2024-03-29 14:58:11 -07:00
|
|
|
|
2024-03-29 14:58:10 -07:00
|
|
|
err = asprintf(&command,
|
|
|
|
"%s %s%s --start-address=0x%016" PRIx64
|
|
|
|
" --stop-address=0x%016" PRIx64
|
|
|
|
" %s -d %s %s %s %c%s%c %s%s -C \"$1\"",
|
|
|
|
opts->objdump_path ?: "objdump",
|
|
|
|
opts->disassembler_style ? "-M " : "",
|
|
|
|
opts->disassembler_style ?: "",
|
|
|
|
map__rip_2objdump(map, sym->start),
|
|
|
|
map__rip_2objdump(map, sym->end),
|
|
|
|
opts->show_linenr ? "-l" : "",
|
|
|
|
opts->show_asm_raw ? "" : "--no-show-raw-insn",
|
|
|
|
opts->annotate_src ? "-S" : "",
|
|
|
|
opts->prefix ? "--prefix " : "",
|
|
|
|
opts->prefix ? '"' : ' ',
|
|
|
|
opts->prefix ?: "",
|
|
|
|
opts->prefix ? '"' : ' ',
|
|
|
|
opts->prefix_strip ? "--prefix-strip=" : "",
|
|
|
|
opts->prefix_strip ?: "");
|
|
|
|
|
|
|
|
if (err < 0) {
|
|
|
|
pr_err("Failure allocating memory for the command to run\n");
|
2024-11-11 12:17:32 -03:00
|
|
|
return err;
|
2024-03-29 14:58:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pr_debug("Executing: %s\n", command);
|
|
|
|
|
|
|
|
objdump_argv[2] = command;
|
2024-11-11 12:17:32 -03:00
|
|
|
objdump_argv[4] = filename;
|
2024-03-29 14:58:10 -07:00
|
|
|
|
|
|
|
/* Create a pipe to read from for stdout */
|
|
|
|
memset(&objdump_process, 0, sizeof(objdump_process));
|
|
|
|
objdump_process.argv = objdump_argv;
|
|
|
|
objdump_process.out = -1;
|
|
|
|
objdump_process.err = -1;
|
|
|
|
objdump_process.no_stderr = 1;
|
|
|
|
if (start_command(&objdump_process)) {
|
|
|
|
pr_err("Failure starting to run %s\n", command);
|
|
|
|
err = -1;
|
|
|
|
goto out_free_command;
|
|
|
|
}
|
|
|
|
|
|
|
|
file = fdopen(objdump_process.out, "r");
|
|
|
|
if (!file) {
|
|
|
|
pr_err("Failure creating FILE stream for %s\n", command);
|
|
|
|
/*
|
|
|
|
* If we were using debug info should retry with
|
|
|
|
* original binary.
|
|
|
|
*/
|
|
|
|
err = -1;
|
|
|
|
goto out_close_stdout;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Storage for getline. */
|
|
|
|
line = NULL;
|
|
|
|
line_len = 0;
|
|
|
|
|
|
|
|
nline = 0;
|
|
|
|
while (!feof(file)) {
|
|
|
|
const char *match;
|
|
|
|
char *expanded_line;
|
|
|
|
|
|
|
|
if (getline(&line, &line_len, file) < 0 || !line)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Skip lines containing "filename:" */
|
2024-11-11 12:17:32 -03:00
|
|
|
match = strstr(line, filename);
|
|
|
|
if (match && match[strlen(filename)] == ':')
|
2024-03-29 14:58:10 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
expanded_line = strim(line);
|
|
|
|
expanded_line = expand_tabs(expanded_line, &line, &line_len);
|
|
|
|
if (!expanded_line)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The source code line number (lineno) needs to be kept in
|
|
|
|
* across calls to symbol__parse_objdump_line(), so that it
|
|
|
|
* can associate it with the instructions till the next one.
|
|
|
|
* See disasm_line__new() and struct disasm_line::line_nr.
|
|
|
|
*/
|
|
|
|
if (symbol__parse_objdump_line(sym, args, expanded_line,
|
|
|
|
&lineno, &fileloc) < 0)
|
|
|
|
break;
|
|
|
|
nline++;
|
|
|
|
}
|
|
|
|
free(line);
|
|
|
|
free(fileloc);
|
|
|
|
|
|
|
|
err = finish_command(&objdump_process);
|
|
|
|
if (err)
|
|
|
|
pr_err("Error running %s\n", command);
|
|
|
|
|
|
|
|
if (nline == 0) {
|
|
|
|
err = -1;
|
|
|
|
pr_err("No output from %s\n", command);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* kallsyms does not have symbol sizes so there may a nop at the end.
|
|
|
|
* Remove it.
|
|
|
|
*/
|
|
|
|
if (dso__is_kcore(dso))
|
|
|
|
delete_last_nop(sym);
|
|
|
|
|
|
|
|
fclose(file);
|
|
|
|
|
|
|
|
out_close_stdout:
|
|
|
|
close(objdump_process.out);
|
|
|
|
|
|
|
|
out_free_command:
|
|
|
|
free(command);
|
2024-11-11 12:17:32 -03:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
static int annotation_options__init_disassemblers(struct annotation_options *options)
|
|
|
|
{
|
|
|
|
char *disassembler;
|
|
|
|
|
|
|
|
if (options->disassemblers_str == NULL) {
|
|
|
|
const char *default_disassemblers_str =
|
|
|
|
#ifdef HAVE_LIBLLVM_SUPPORT
|
|
|
|
"llvm,"
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_LIBCAPSTONE_SUPPORT
|
|
|
|
"capstone,"
|
|
|
|
#endif
|
|
|
|
"objdump";
|
|
|
|
|
|
|
|
options->disassemblers_str = strdup(default_disassemblers_str);
|
|
|
|
if (!options->disassemblers_str)
|
|
|
|
goto out_enomem;
|
|
|
|
}
|
|
|
|
|
|
|
|
disassembler = strdup(options->disassemblers_str);
|
|
|
|
if (disassembler == NULL)
|
|
|
|
goto out_enomem;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
char *comma = strchr(disassembler, ',');
|
|
|
|
|
|
|
|
if (comma != NULL)
|
|
|
|
*comma = '\0';
|
|
|
|
|
|
|
|
options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
|
|
|
|
|
|
|
|
if (comma == NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
disassembler = comma + 1;
|
|
|
|
|
|
|
|
if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
|
|
|
|
pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
|
|
|
|
MAX_DISASSEMBLERS, disassembler);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_enomem:
|
|
|
|
pr_err("Not enough memory for annotate.disassemblers\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2024-11-11 12:17:32 -03:00
|
|
|
int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
|
|
|
|
{
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
struct annotation_options *options = args->options;
|
2024-11-11 12:17:32 -03:00
|
|
|
struct map *map = args->ms.map;
|
|
|
|
struct dso *dso = map__dso(map);
|
|
|
|
char symfs_filename[PATH_MAX];
|
|
|
|
bool delete_extract = false;
|
|
|
|
struct kcore_extract kce;
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
const char *disassembler;
|
2024-11-11 12:17:32 -03:00
|
|
|
bool decomp = false;
|
|
|
|
int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
|
|
|
|
symfs_filename, sym->name, map__unmap_ip(map, sym->start),
|
|
|
|
map__unmap_ip(map, sym->end));
|
|
|
|
|
|
|
|
pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name);
|
|
|
|
|
|
|
|
if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) {
|
|
|
|
return symbol__disassemble_bpf(sym, args);
|
|
|
|
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
|
|
|
|
return symbol__disassemble_bpf_image(sym, args);
|
|
|
|
} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
|
|
|
|
return -1;
|
|
|
|
} else if (dso__is_kcore(dso)) {
|
|
|
|
kce.addr = map__rip_2objdump(map, sym->start);
|
|
|
|
kce.kcore_filename = symfs_filename;
|
|
|
|
kce.len = sym->end - sym->start;
|
|
|
|
kce.offs = sym->start;
|
|
|
|
|
|
|
|
if (!kcore_extract__create(&kce)) {
|
|
|
|
delete_extract = true;
|
|
|
|
strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
|
|
|
|
}
|
|
|
|
} else if (dso__needs_decompress(dso)) {
|
|
|
|
char tmp[KMOD_DECOMP_LEN];
|
|
|
|
|
|
|
|
if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
decomp = true;
|
|
|
|
strcpy(symfs_filename, tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For powerpc data type profiling, use the dso__data_read_offset to
|
|
|
|
* read raw instruction directly and interpret the binary code to
|
|
|
|
* understand instructions and register fields. For sort keys as type
|
|
|
|
* and typeoff, disassemble to mnemonic notation is not required in
|
|
|
|
* case of powerpc.
|
|
|
|
*/
|
|
|
|
if (arch__is(args->arch, "powerpc")) {
|
|
|
|
extern const char *sort_order;
|
|
|
|
|
|
|
|
if (sort_order && !strstr(sort_order, "sym")) {
|
|
|
|
err = symbol__disassemble_raw(symfs_filename, sym, args);
|
|
|
|
if (err == 0)
|
|
|
|
goto out_remove_tmp;
|
2024-11-11 12:17:33 -03:00
|
|
|
|
2024-11-11 12:17:32 -03:00
|
|
|
err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args);
|
|
|
|
if (err == 0)
|
|
|
|
goto out_remove_tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
err = annotation_options__init_disassemblers(options);
|
|
|
|
if (err)
|
2024-11-11 12:17:32 -03:00
|
|
|
goto out_remove_tmp;
|
2024-11-11 12:17:33 -03:00
|
|
|
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
err = -1;
|
2024-11-11 12:17:33 -03:00
|
|
|
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
|
|
|
|
disassembler = options->disassemblers[i];
|
2024-03-29 14:58:10 -07:00
|
|
|
|
perf disasm: Allow configuring what disassemblers to use
The perf tools annotation code used for a long time parsing the output
of binutils's objdump (or its reimplementations, like llvm's) to then
parse and augment it with samples, allow navigation, etc.
More recently disassemblers from the capstone and llvm (libraries, not
parsing the output of tools using those libraries to mimic binutils's
objdump output) were introduced.
So when all those methods are available, there is a static preference
for a series of attempts of disassembling a binary, with the 'llvm,
capstone, objdump' sequence being hard coded.
This patch allows users to change that sequence, specifying via a 'perf
config' 'annotate.disassemblers' entry which and in what order
disassemblers should be attempted.
As alluded to in the comments in the source code of this series, this
flexibility is useful for users and developers alike, elliminating the
requirement to rebuild the tool with some specific set of libraries to
see how the output of disassembling would be for one of these methods.
root@x1:~# rm -f ~/.perfconfig
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
symbol__disassemble:
filename=/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux,
sym=update_load_avg, start=0xffffffffb6148fe0, en>
annotating [0x6ff7170]
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux :
[0x7407ca0] update_load_avg
Disassembled with llvm
annotate.disassemblers=llvm,capstone,objdump
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = capstone
root@x1:~#
root@x1:~# perf annotate -v --stdio2 update_load_avg
<SNIP>
Disassembled with capstone
annotate.disassemblers=capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent 0xffffffff81148fe0 <update_load_avg>:
1.61 pushq %r15
pushq %r14
1.00 pushq %r13
movl %edx,%r13d
1.90 pushq %r12
pushq %rbp
movq %rsi,%rbp
pushq %rbx
movq %rdi,%rbx
subq $0x18,%rsp
15.14 movl 0x1a4(%rdi),%eax
root@x1:~# perf config annotate.disassemblers=objdump,capstone
root@x1:~# perf config annotate.disassemblers
annotate.disassemblers=objdump,capstone
root@x1:~# cat ~/.perfconfig
# this file is auto-generated.
[annotate]
disassemblers = objdump,capstone
root@x1:~# perf annotate -v --stdio2 update_load_avg
Executing: objdump --start-address=0xffffffff81148fe0 \
--stop-address=0xffffffff811497aa \
-d --no-show-raw-insn -S -C "$1"
Disassembled with objdump
annotate.disassemblers=objdump,capstone
Samples: 66 of event 'cpu_atom/cycles/P', 10000 Hz,
Event count (approx.): 5185444, [percent: local period]
update_load_avg()
/usr/lib/debug/lib/modules/6.11.4-201.fc40.x86_64/vmlinux
Percent
Disassembly of section .text:
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
ffffffff81148fe0 <update_load_avg>:
#define DO_ATTACH 0x4
#define DO_DETACH 0x8
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct cfs_rq *cfs_rq,
struct sched_entity *se,
int flags)
{
1.61 push %r15
push %r14
1.00 push %r13
mov %edx,%r13d
1.90 push %r12
push %rbp
mov %rsi,%rbp
push %rbx
mov %rdi,%rbx
sub $0x18,%rsp
}
/* rq->task_clock normalized against any time
this cfs_rq has spent throttled */
static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
{
if (unlikely(cfs_rq->throttle_count))
15.14 mov 0x1a4(%rdi),%eax
root@x1:~#
After adding a way to select the disassembler from the command line a
'perf test' comparing the output of the various diassemblers should be
introduced, to test these codebases.
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Steinar H. Gunderson <sesse@google.com>
Link: https://lore.kernel.org/r/20241111151734.1018476-4-acme@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2024-11-11 12:17:34 -03:00
|
|
|
if (!strcmp(disassembler, "llvm"))
|
|
|
|
err = symbol__disassemble_llvm(symfs_filename, sym, args);
|
|
|
|
else if (!strcmp(disassembler, "capstone"))
|
|
|
|
err = symbol__disassemble_capstone(symfs_filename, sym, args);
|
|
|
|
else if (!strcmp(disassembler, "objdump"))
|
|
|
|
err = symbol__disassemble_objdump(symfs_filename, sym, args);
|
|
|
|
else
|
|
|
|
pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err == 0) {
|
|
|
|
pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
|
|
|
|
disassembler, options->disassemblers_str);
|
|
|
|
}
|
2024-03-29 14:58:10 -07:00
|
|
|
out_remove_tmp:
|
|
|
|
if (decomp)
|
|
|
|
unlink(symfs_filename);
|
|
|
|
|
|
|
|
if (delete_extract)
|
|
|
|
kcore_extract__delete(&kce);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|