2019-05-27 08:55:01 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-04-16 15:20:36 -07:00
|
|
|
/*
|
|
|
|
* Signal trampoline for 64 bits processes in a ppc64 kernel for
|
|
|
|
* use in the vDSO
|
|
|
|
*
|
|
|
|
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
|
|
|
|
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
|
|
|
|
*/
|
powerpc/64/signal: Balance return predictor stack in signal trampoline
Returning from an interrupt or syscall to a signal handler currently
begins execution directly at the handler's entry point, with LR set to
the address of the sigreturn trampoline. When the signal handler
function returns, it runs the trampoline. It looks like this:
# interrupt at user address xyz
# kernel stuff... signal is raised
rfid
# void handler(int sig)
addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
mflr 0
std 0,16(1)
stdu 1,-96(1)
# handler stuff
ld 0,16(1)
mtlr 0
blr
# __kernel_sigtramp_rt64
addi r1,r1,__SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
# kernel executes rt_sigreturn
rfid
# back to user address xyz
Note the blr with no matching bl. This can corrupt the return
predictor.
Solve this by instead resuming execution at the signal trampoline
which then calls the signal handler. qtrace-tools link_stack checker
confirms the entire user/kernel/vdso cycle is balanced after this
patch, whereas it's not upstream.
Alan confirms the dwarf unwind info still looks good. gdb still
recognises the signal frame and can step into parent frames if it
break inside a signal handler.
Performance is pretty noisy, not a very significant change on a POWER9
here, but branch misses are consistently a lot lower on a
microbenchmark:
Performance counter stats for './signal':
13,085.72 msec task-clock # 1.000 CPUs utilized
45,024,760,101 cycles # 3.441 GHz
65,102,895,542 instructions # 1.45 insn per cycle
11,271,673,787 branches # 861.372 M/sec
59,468,979 branch-misses # 0.53% of all branches
12,989.09 msec task-clock # 1.000 CPUs utilized
44,692,719,559 cycles # 3.441 GHz
65,109,984,964 instructions # 1.46 insn per cycle
11,282,136,057 branches # 868.585 M/sec
39,786,942 branch-misses # 0.35% of all branches
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200511101952.1463138-1-npiggin@gmail.com
2020-05-11 20:19:52 +10:00
|
|
|
#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */
|
2005-04-16 15:20:36 -07:00
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/ppc_asm.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include <asm/vdso.h>
|
2005-10-10 14:14:55 +10:00
|
|
|
#include <asm/ptrace.h> /* XXX for __SIGNAL_FRAMESIZE */
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
.text
|
|
|
|
|
|
|
|
.balign 8
|
powerpc/64/signal: Balance return predictor stack in signal trampoline
Returning from an interrupt or syscall to a signal handler currently
begins execution directly at the handler's entry point, with LR set to
the address of the sigreturn trampoline. When the signal handler
function returns, it runs the trampoline. It looks like this:
# interrupt at user address xyz
# kernel stuff... signal is raised
rfid
# void handler(int sig)
addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
mflr 0
std 0,16(1)
stdu 1,-96(1)
# handler stuff
ld 0,16(1)
mtlr 0
blr
# __kernel_sigtramp_rt64
addi r1,r1,__SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
# kernel executes rt_sigreturn
rfid
# back to user address xyz
Note the blr with no matching bl. This can corrupt the return
predictor.
Solve this by instead resuming execution at the signal trampoline
which then calls the signal handler. qtrace-tools link_stack checker
confirms the entire user/kernel/vdso cycle is balanced after this
patch, whereas it's not upstream.
Alan confirms the dwarf unwind info still looks good. gdb still
recognises the signal frame and can step into parent frames if it
break inside a signal handler.
Performance is pretty noisy, not a very significant change on a POWER9
here, but branch misses are consistently a lot lower on a
microbenchmark:
Performance counter stats for './signal':
13,085.72 msec task-clock # 1.000 CPUs utilized
45,024,760,101 cycles # 3.441 GHz
65,102,895,542 instructions # 1.45 insn per cycle
11,271,673,787 branches # 861.372 M/sec
59,468,979 branch-misses # 0.53% of all branches
12,989.09 msec task-clock # 1.000 CPUs utilized
44,692,719,559 cycles # 3.441 GHz
65,109,984,964 instructions # 1.46 insn per cycle
11,282,136,057 branches # 868.585 M/sec
39,786,942 branch-misses # 0.35% of all branches
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200511101952.1463138-1-npiggin@gmail.com
2020-05-11 20:19:52 +10:00
|
|
|
.balign IFETCH_ALIGN_BYTES
|
2005-04-16 15:20:36 -07:00
|
|
|
V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
|
powerpc/64/signal: Balance return predictor stack in signal trampoline
Returning from an interrupt or syscall to a signal handler currently
begins execution directly at the handler's entry point, with LR set to
the address of the sigreturn trampoline. When the signal handler
function returns, it runs the trampoline. It looks like this:
# interrupt at user address xyz
# kernel stuff... signal is raised
rfid
# void handler(int sig)
addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
mflr 0
std 0,16(1)
stdu 1,-96(1)
# handler stuff
ld 0,16(1)
mtlr 0
blr
# __kernel_sigtramp_rt64
addi r1,r1,__SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
# kernel executes rt_sigreturn
rfid
# back to user address xyz
Note the blr with no matching bl. This can corrupt the return
predictor.
Solve this by instead resuming execution at the signal trampoline
which then calls the signal handler. qtrace-tools link_stack checker
confirms the entire user/kernel/vdso cycle is balanced after this
patch, whereas it's not upstream.
Alan confirms the dwarf unwind info still looks good. gdb still
recognises the signal frame and can step into parent frames if it
break inside a signal handler.
Performance is pretty noisy, not a very significant change on a POWER9
here, but branch misses are consistently a lot lower on a
microbenchmark:
Performance counter stats for './signal':
13,085.72 msec task-clock # 1.000 CPUs utilized
45,024,760,101 cycles # 3.441 GHz
65,102,895,542 instructions # 1.45 insn per cycle
11,271,673,787 branches # 861.372 M/sec
59,468,979 branch-misses # 0.53% of all branches
12,989.09 msec task-clock # 1.000 CPUs utilized
44,692,719,559 cycles # 3.441 GHz
65,109,984,964 instructions # 1.46 insn per cycle
11,282,136,057 branches # 868.585 M/sec
39,786,942 branch-misses # 0.35% of all branches
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200511101952.1463138-1-npiggin@gmail.com
2020-05-11 20:19:52 +10:00
|
|
|
.Lsigrt_start:
|
|
|
|
bctrl /* call the handler */
|
2005-04-16 15:20:36 -07:00
|
|
|
addi r1, r1, __SIGNAL_FRAMESIZE
|
|
|
|
li r0,__NR_rt_sigreturn
|
|
|
|
sc
|
|
|
|
.Lsigrt_end:
|
|
|
|
V_FUNCTION_END(__kernel_sigtramp_rt64)
|
powerpc/64/signal: Balance return predictor stack in signal trampoline
Returning from an interrupt or syscall to a signal handler currently
begins execution directly at the handler's entry point, with LR set to
the address of the sigreturn trampoline. When the signal handler
function returns, it runs the trampoline. It looks like this:
# interrupt at user address xyz
# kernel stuff... signal is raised
rfid
# void handler(int sig)
addis 2,12,.TOC.-.LCF0@ha
addi 2,2,.TOC.-.LCF0@l
mflr 0
std 0,16(1)
stdu 1,-96(1)
# handler stuff
ld 0,16(1)
mtlr 0
blr
# __kernel_sigtramp_rt64
addi r1,r1,__SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
# kernel executes rt_sigreturn
rfid
# back to user address xyz
Note the blr with no matching bl. This can corrupt the return
predictor.
Solve this by instead resuming execution at the signal trampoline
which then calls the signal handler. qtrace-tools link_stack checker
confirms the entire user/kernel/vdso cycle is balanced after this
patch, whereas it's not upstream.
Alan confirms the dwarf unwind info still looks good. gdb still
recognises the signal frame and can step into parent frames if it
break inside a signal handler.
Performance is pretty noisy, not a very significant change on a POWER9
here, but branch misses are consistently a lot lower on a
microbenchmark:
Performance counter stats for './signal':
13,085.72 msec task-clock # 1.000 CPUs utilized
45,024,760,101 cycles # 3.441 GHz
65,102,895,542 instructions # 1.45 insn per cycle
11,271,673,787 branches # 861.372 M/sec
59,468,979 branch-misses # 0.53% of all branches
12,989.09 msec task-clock # 1.000 CPUs utilized
44,692,719,559 cycles # 3.441 GHz
65,109,984,964 instructions # 1.46 insn per cycle
11,282,136,057 branches # 868.585 M/sec
39,786,942 branch-misses # 0.35% of all branches
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200511101952.1463138-1-npiggin@gmail.com
2020-05-11 20:19:52 +10:00
|
|
|
/* The .balign 8 above and the following zeros mimic the old stack
|
2005-04-16 15:20:36 -07:00
|
|
|
trampoline layout. The last magic value is the ucontext pointer,
|
|
|
|
chosen in such a way that older libgcc unwind code returns a zero
|
|
|
|
for a sigcontext pointer. */
|
|
|
|
.long 0,0,0
|
|
|
|
.quad 0,-21*8
|
|
|
|
|
|
|
|
/* Register r1 can be found at offset 8 of a pt_regs structure.
|
|
|
|
A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
|
|
|
|
#define cfa_save \
|
|
|
|
.byte 0x0f; /* DW_CFA_def_cfa_expression */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x23; .uleb128 RSIZE; /* DW_OP_plus_uconst */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* Register REGNO can be found at offset OFS of a pt_regs structure.
|
|
|
|
A pointer to the pt_regs is stored in memory at the old sp plus PTREGS. */
|
|
|
|
#define rsave(regno, ofs) \
|
|
|
|
.byte 0x10; /* DW_CFA_expression */ \
|
|
|
|
.uleb128 regno; /* regno */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.ifne ofs; \
|
|
|
|
.byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
|
|
|
|
.endif; \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
|
|
|
|
of the VMX reg struct. A pointer to the VMX reg struct is at VREGS in
|
|
|
|
the pt_regs struct. This macro is for REGNO == 0, and contains
|
|
|
|
'subroutines' that the other macros jump to. */
|
|
|
|
#define vsave_msr0(regno) \
|
|
|
|
.byte 0x10; /* DW_CFA_expression */ \
|
|
|
|
.uleb128 regno + 77; /* regno */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x30 + regno; /* DW_OP_lit0 */ \
|
|
|
|
2: \
|
|
|
|
.byte 0x40; /* DW_OP_lit16 */ \
|
|
|
|
.byte 0x1e; /* DW_OP_mul */ \
|
|
|
|
3: \
|
|
|
|
.byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x12; /* DW_OP_dup */ \
|
|
|
|
.byte 0x23; /* DW_OP_plus_uconst */ \
|
|
|
|
.uleb128 33*RSIZE; /* msr offset */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x0c; .long 1 << 25; /* DW_OP_const4u */ \
|
|
|
|
.byte 0x1a; /* DW_OP_and */ \
|
|
|
|
.byte 0x12; /* DW_OP_dup, ret 0 if bra taken */ \
|
|
|
|
.byte 0x30; /* DW_OP_lit0 */ \
|
|
|
|
.byte 0x29; /* DW_OP_eq */ \
|
|
|
|
.byte 0x28; .short 0x7fff; /* DW_OP_bra to end */ \
|
|
|
|
.byte 0x13; /* DW_OP_drop, pop the 0 */ \
|
|
|
|
.byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x22; /* DW_OP_plus */ \
|
|
|
|
.byte 0x2f; .short 0x7fff; /* DW_OP_skip to end */ \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* If msr bit 1<<25 is set, then VMX register REGNO is at offset REGNO*16
|
|
|
|
of the VMX reg struct. REGNO is 1 thru 31. */
|
|
|
|
#define vsave_msr1(regno) \
|
|
|
|
.byte 0x10; /* DW_CFA_expression */ \
|
|
|
|
.uleb128 regno + 77; /* regno */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x30 + regno; /* DW_OP_lit n */ \
|
|
|
|
.byte 0x2f; .short 2b - 9f; /* DW_OP_skip */ \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* If msr bit 1<<25 is set, then VMX register REGNO is at offset OFS of
|
|
|
|
the VMX save block. */
|
|
|
|
#define vsave_msr2(regno, ofs) \
|
|
|
|
.byte 0x10; /* DW_CFA_expression */ \
|
|
|
|
.uleb128 regno + 77; /* regno */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x0a; .short ofs; /* DW_OP_const2u */ \
|
|
|
|
.byte 0x2f; .short 3b - 9f; /* DW_OP_skip */ \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* VMX register REGNO is at offset OFS of the VMX save area. */
|
|
|
|
#define vsave(regno, ofs) \
|
|
|
|
.byte 0x10; /* DW_CFA_expression */ \
|
|
|
|
.uleb128 regno + 77; /* regno */ \
|
|
|
|
.uleb128 9f - 1f; /* length */ \
|
|
|
|
1: \
|
|
|
|
.byte 0x71; .sleb128 PTREGS; /* DW_OP_breg1 */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x23; .uleb128 VREGS; /* DW_OP_plus_uconst */ \
|
|
|
|
.byte 0x06; /* DW_OP_deref */ \
|
|
|
|
.byte 0x23; .uleb128 ofs; /* DW_OP_plus_uconst */ \
|
|
|
|
9:
|
|
|
|
|
|
|
|
/* This is where the pt_regs pointer can be found on the stack. */
|
2007-10-17 14:26:50 +10:00
|
|
|
#define PTREGS 128+168+56
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/* Size of regs. */
|
2007-10-17 14:26:50 +10:00
|
|
|
#define RSIZE 8
|
|
|
|
|
|
|
|
/* Size of CR reg in DWARF unwind info. */
|
|
|
|
#define CRSIZE 4
|
2005-04-16 15:20:36 -07:00
|
|
|
|
powerpc: Wrong DWARF CFI in the kernel vdso for little-endian / ELFv2
I've finally tracked down why my CR signal-unwind test case still
fails on little-endian. The problem turned to be that the kernel
installs a signal trampoline in the vDSO, and provides a DWARF CFI
record for that trampoline. This CFI describes the save location
for CR:
rsave (70, 38*RSIZE + (RSIZE - CRSIZE))
which is correct for big-endian, but points to the wrong word on
little-endian. This is wrong no matter which ABI.
In addition, for the ELFv2 ABI, we should not only provide a CFI
record for register 70 (cr2), but for all CR fields separately.
Strictly speaking, I guess this would mean providing two separate
vDSO images, one for ELFv1 processes and one for ELFv2 processes (or
maybe playing some tricks with conditional DWARF expressions).
However, having CFI records for the other CR fields in ELFv1 is not
actually wrong, they just will be ignored. So it seems the simplest
fix would be just to always provide CFI for all the fields.
Signed-off-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-11-21 07:38:05 +11:00
|
|
|
/* Offset of CR reg within a full word. */
|
|
|
|
#ifdef __LITTLE_ENDIAN__
|
|
|
|
#define CROFF 0
|
|
|
|
#else
|
|
|
|
#define CROFF (RSIZE - CRSIZE)
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 15:20:36 -07:00
|
|
|
/* This is the offset of the VMX reg pointer. */
|
2007-10-17 14:26:50 +10:00
|
|
|
#define VREGS 48*RSIZE+33*8
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/* Describe where general purpose regs are saved. */
|
|
|
|
#define EH_FRAME_GEN \
|
|
|
|
cfa_save; \
|
|
|
|
rsave ( 0, 0*RSIZE); \
|
|
|
|
rsave ( 2, 2*RSIZE); \
|
|
|
|
rsave ( 3, 3*RSIZE); \
|
|
|
|
rsave ( 4, 4*RSIZE); \
|
|
|
|
rsave ( 5, 5*RSIZE); \
|
|
|
|
rsave ( 6, 6*RSIZE); \
|
|
|
|
rsave ( 7, 7*RSIZE); \
|
|
|
|
rsave ( 8, 8*RSIZE); \
|
|
|
|
rsave ( 9, 9*RSIZE); \
|
|
|
|
rsave (10, 10*RSIZE); \
|
|
|
|
rsave (11, 11*RSIZE); \
|
|
|
|
rsave (12, 12*RSIZE); \
|
|
|
|
rsave (13, 13*RSIZE); \
|
|
|
|
rsave (14, 14*RSIZE); \
|
|
|
|
rsave (15, 15*RSIZE); \
|
|
|
|
rsave (16, 16*RSIZE); \
|
|
|
|
rsave (17, 17*RSIZE); \
|
|
|
|
rsave (18, 18*RSIZE); \
|
|
|
|
rsave (19, 19*RSIZE); \
|
|
|
|
rsave (20, 20*RSIZE); \
|
|
|
|
rsave (21, 21*RSIZE); \
|
|
|
|
rsave (22, 22*RSIZE); \
|
|
|
|
rsave (23, 23*RSIZE); \
|
|
|
|
rsave (24, 24*RSIZE); \
|
|
|
|
rsave (25, 25*RSIZE); \
|
|
|
|
rsave (26, 26*RSIZE); \
|
|
|
|
rsave (27, 27*RSIZE); \
|
|
|
|
rsave (28, 28*RSIZE); \
|
|
|
|
rsave (29, 29*RSIZE); \
|
|
|
|
rsave (30, 30*RSIZE); \
|
|
|
|
rsave (31, 31*RSIZE); \
|
|
|
|
rsave (67, 32*RSIZE); /* ap, used as temp for nip */ \
|
|
|
|
rsave (65, 36*RSIZE); /* lr */ \
|
powerpc: Wrong DWARF CFI in the kernel vdso for little-endian / ELFv2
I've finally tracked down why my CR signal-unwind test case still
fails on little-endian. The problem turned to be that the kernel
installs a signal trampoline in the vDSO, and provides a DWARF CFI
record for that trampoline. This CFI describes the save location
for CR:
rsave (70, 38*RSIZE + (RSIZE - CRSIZE))
which is correct for big-endian, but points to the wrong word on
little-endian. This is wrong no matter which ABI.
In addition, for the ELFv2 ABI, we should not only provide a CFI
record for register 70 (cr2), but for all CR fields separately.
Strictly speaking, I guess this would mean providing two separate
vDSO images, one for ELFv1 processes and one for ELFv2 processes (or
maybe playing some tricks with conditional DWARF expressions).
However, having CFI records for the other CR fields in ELFv1 is not
actually wrong, they just will be ignored. So it seems the simplest
fix would be just to always provide CFI for all the fields.
Signed-off-by: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2013-11-21 07:38:05 +11:00
|
|
|
rsave (68, 38*RSIZE + CROFF); /* cr fields */ \
|
|
|
|
rsave (69, 38*RSIZE + CROFF); \
|
|
|
|
rsave (70, 38*RSIZE + CROFF); \
|
|
|
|
rsave (71, 38*RSIZE + CROFF); \
|
|
|
|
rsave (72, 38*RSIZE + CROFF); \
|
|
|
|
rsave (73, 38*RSIZE + CROFF); \
|
|
|
|
rsave (74, 38*RSIZE + CROFF); \
|
|
|
|
rsave (75, 38*RSIZE + CROFF)
|
2005-04-16 15:20:36 -07:00
|
|
|
|
|
|
|
/* Describe where the FP regs are saved. */
|
|
|
|
#define EH_FRAME_FP \
|
|
|
|
rsave (32, 48*RSIZE + 0*8); \
|
|
|
|
rsave (33, 48*RSIZE + 1*8); \
|
|
|
|
rsave (34, 48*RSIZE + 2*8); \
|
|
|
|
rsave (35, 48*RSIZE + 3*8); \
|
|
|
|
rsave (36, 48*RSIZE + 4*8); \
|
|
|
|
rsave (37, 48*RSIZE + 5*8); \
|
|
|
|
rsave (38, 48*RSIZE + 6*8); \
|
|
|
|
rsave (39, 48*RSIZE + 7*8); \
|
|
|
|
rsave (40, 48*RSIZE + 8*8); \
|
|
|
|
rsave (41, 48*RSIZE + 9*8); \
|
|
|
|
rsave (42, 48*RSIZE + 10*8); \
|
|
|
|
rsave (43, 48*RSIZE + 11*8); \
|
|
|
|
rsave (44, 48*RSIZE + 12*8); \
|
|
|
|
rsave (45, 48*RSIZE + 13*8); \
|
|
|
|
rsave (46, 48*RSIZE + 14*8); \
|
|
|
|
rsave (47, 48*RSIZE + 15*8); \
|
|
|
|
rsave (48, 48*RSIZE + 16*8); \
|
|
|
|
rsave (49, 48*RSIZE + 17*8); \
|
|
|
|
rsave (50, 48*RSIZE + 18*8); \
|
|
|
|
rsave (51, 48*RSIZE + 19*8); \
|
|
|
|
rsave (52, 48*RSIZE + 20*8); \
|
|
|
|
rsave (53, 48*RSIZE + 21*8); \
|
|
|
|
rsave (54, 48*RSIZE + 22*8); \
|
|
|
|
rsave (55, 48*RSIZE + 23*8); \
|
|
|
|
rsave (56, 48*RSIZE + 24*8); \
|
|
|
|
rsave (57, 48*RSIZE + 25*8); \
|
|
|
|
rsave (58, 48*RSIZE + 26*8); \
|
|
|
|
rsave (59, 48*RSIZE + 27*8); \
|
|
|
|
rsave (60, 48*RSIZE + 28*8); \
|
|
|
|
rsave (61, 48*RSIZE + 29*8); \
|
|
|
|
rsave (62, 48*RSIZE + 30*8); \
|
|
|
|
rsave (63, 48*RSIZE + 31*8)
|
|
|
|
|
|
|
|
/* Describe where the VMX regs are saved. */
|
|
|
|
#ifdef CONFIG_ALTIVEC
|
|
|
|
#define EH_FRAME_VMX \
|
|
|
|
vsave_msr0 ( 0); \
|
|
|
|
vsave_msr1 ( 1); \
|
|
|
|
vsave_msr1 ( 2); \
|
|
|
|
vsave_msr1 ( 3); \
|
|
|
|
vsave_msr1 ( 4); \
|
|
|
|
vsave_msr1 ( 5); \
|
|
|
|
vsave_msr1 ( 6); \
|
|
|
|
vsave_msr1 ( 7); \
|
|
|
|
vsave_msr1 ( 8); \
|
|
|
|
vsave_msr1 ( 9); \
|
|
|
|
vsave_msr1 (10); \
|
|
|
|
vsave_msr1 (11); \
|
|
|
|
vsave_msr1 (12); \
|
|
|
|
vsave_msr1 (13); \
|
|
|
|
vsave_msr1 (14); \
|
|
|
|
vsave_msr1 (15); \
|
|
|
|
vsave_msr1 (16); \
|
|
|
|
vsave_msr1 (17); \
|
|
|
|
vsave_msr1 (18); \
|
|
|
|
vsave_msr1 (19); \
|
|
|
|
vsave_msr1 (20); \
|
|
|
|
vsave_msr1 (21); \
|
|
|
|
vsave_msr1 (22); \
|
|
|
|
vsave_msr1 (23); \
|
|
|
|
vsave_msr1 (24); \
|
|
|
|
vsave_msr1 (25); \
|
|
|
|
vsave_msr1 (26); \
|
|
|
|
vsave_msr1 (27); \
|
|
|
|
vsave_msr1 (28); \
|
|
|
|
vsave_msr1 (29); \
|
|
|
|
vsave_msr1 (30); \
|
|
|
|
vsave_msr1 (31); \
|
|
|
|
vsave_msr2 (33, 32*16+12); \
|
|
|
|
vsave (32, 33*16)
|
|
|
|
#else
|
|
|
|
#define EH_FRAME_VMX
|
|
|
|
#endif
|
|
|
|
|
|
|
|
.section .eh_frame,"a",@progbits
|
|
|
|
.Lcie:
|
|
|
|
.long .Lcie_end - .Lcie_start
|
|
|
|
.Lcie_start:
|
|
|
|
.long 0 /* CIE ID */
|
|
|
|
.byte 1 /* Version number */
|
2006-03-31 02:30:28 -08:00
|
|
|
.string "zRS" /* NUL-terminated augmentation string */
|
2005-04-16 15:20:36 -07:00
|
|
|
.uleb128 4 /* Code alignment factor */
|
|
|
|
.sleb128 -8 /* Data alignment factor */
|
|
|
|
.byte 67 /* Return address register column, ap */
|
|
|
|
.uleb128 1 /* Augmentation value length */
|
|
|
|
.byte 0x14 /* DW_EH_PE_pcrel | DW_EH_PE_udata8. */
|
|
|
|
.byte 0x0c,1,0 /* DW_CFA_def_cfa: r1 ofs 0 */
|
|
|
|
.balign 8
|
|
|
|
.Lcie_end:
|
|
|
|
|
|
|
|
.long .Lfde0_end - .Lfde0_start
|
|
|
|
.Lfde0_start:
|
|
|
|
.long .Lfde0_start - .Lcie /* CIE pointer. */
|
|
|
|
.quad .Lsigrt_start - . /* PC start, length */
|
|
|
|
.quad .Lsigrt_end - .Lsigrt_start
|
|
|
|
.uleb128 0 /* Augmentation */
|
|
|
|
EH_FRAME_GEN
|
|
|
|
EH_FRAME_FP
|
|
|
|
EH_FRAME_VMX
|
|
|
|
# Do we really need to describe the frame at this point? ie. will
|
|
|
|
# we ever have some call chain that returns somewhere past the addi?
|
|
|
|
# I don't think so, since gcc doesn't support async signals.
|
|
|
|
# .byte 0x41 /* DW_CFA_advance_loc 1*4 */
|
|
|
|
#undef PTREGS
|
|
|
|
#define PTREGS 168+56
|
|
|
|
# EH_FRAME_GEN
|
|
|
|
# EH_FRAME_FP
|
|
|
|
# EH_FRAME_VMX
|
|
|
|
.balign 8
|
|
|
|
.Lfde0_end:
|