powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
|
|
|
|
#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
|
|
|
|
|
|
|
|
#ifdef __ASSEMBLY__
|
|
|
|
|
|
|
|
#include <asm/ppc_asm.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The macros sets two stack frames, one for the caller and one for the callee
|
|
|
|
* because there are no requirement for the caller to set a stack frame when
|
|
|
|
* calling VDSO so it may have omitted to set one, especially on PPC64
|
|
|
|
*/
|
|
|
|
|
|
|
|
.macro cvdso_call funct
|
|
|
|
.cfi_startproc
|
|
|
|
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
|
|
|
|
mflr r0
|
|
|
|
.cfi_register lr, r0
|
|
|
|
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
|
|
|
|
PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
|
2020-11-27 00:10:04 +11:00
|
|
|
#ifdef __powerpc64__
|
|
|
|
PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
|
|
|
|
#endif
|
2020-09-27 09:16:32 +00:00
|
|
|
get_datapage r5
|
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
addi r5, r5, VDSO_DATA_OFFSET
|
|
|
|
bl DOTSYM(\funct)
|
|
|
|
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
|
2020-11-27 00:10:04 +11:00
|
|
|
#ifdef __powerpc64__
|
|
|
|
PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
|
|
|
|
#endif
|
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
cmpwi r3, 0
|
|
|
|
mtlr r0
|
|
|
|
.cfi_restore lr
|
|
|
|
addi r1, r1, 2 * PPC_MIN_STKFRM
|
|
|
|
crclr so
|
|
|
|
beqlr+
|
|
|
|
crset so
|
|
|
|
neg r3, r3
|
|
|
|
blr
|
|
|
|
.cfi_endproc
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro cvdso_call_time funct
|
|
|
|
.cfi_startproc
|
|
|
|
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
|
|
|
|
mflr r0
|
|
|
|
.cfi_register lr, r0
|
|
|
|
PPC_STLU r1, -PPC_MIN_STKFRM(r1)
|
|
|
|
PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
|
2020-11-27 00:10:04 +11:00
|
|
|
#ifdef __powerpc64__
|
|
|
|
PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
|
|
|
|
#endif
|
2020-09-27 09:16:32 +00:00
|
|
|
get_datapage r4
|
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
addi r4, r4, VDSO_DATA_OFFSET
|
|
|
|
bl DOTSYM(\funct)
|
|
|
|
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
|
2020-11-27 00:10:04 +11:00
|
|
|
#ifdef __powerpc64__
|
|
|
|
PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
|
|
|
|
#endif
|
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
crclr so
|
|
|
|
mtlr r0
|
|
|
|
.cfi_restore lr
|
|
|
|
addi r1, r1, 2 * PPC_MIN_STKFRM
|
|
|
|
blr
|
|
|
|
.cfi_endproc
|
|
|
|
.endm
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#include <asm/vdso/timebase.h>
|
|
|
|
#include <asm/barrier.h>
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
#include <uapi/linux/time.h>
|
|
|
|
|
|
|
|
#define VDSO_HAS_CLOCK_GETRES 1
|
|
|
|
|
|
|
|
#define VDSO_HAS_TIME 1
|
|
|
|
|
|
|
|
static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
|
|
|
|
const unsigned long _r4)
|
|
|
|
{
|
|
|
|
register long r0 asm("r0") = _r0;
|
|
|
|
register unsigned long r3 asm("r3") = _r3;
|
|
|
|
register unsigned long r4 asm("r4") = _r4;
|
|
|
|
register int ret asm ("r3");
|
|
|
|
|
|
|
|
asm volatile(
|
|
|
|
" sc\n"
|
|
|
|
" bns+ 1f\n"
|
|
|
|
" neg %0, %0\n"
|
|
|
|
"1:\n"
|
|
|
|
: "=r" (ret), "+r" (r4), "+r" (r0)
|
|
|
|
: "r" (r3)
|
|
|
|
: "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr");
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline
|
|
|
|
int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz)
|
|
|
|
{
|
|
|
|
return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline
|
|
|
|
int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
|
|
|
{
|
|
|
|
return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline
|
|
|
|
int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
|
|
|
|
{
|
|
|
|
return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_VDSO32
|
|
|
|
|
|
|
|
#define BUILD_VDSO32 1
|
|
|
|
|
|
|
|
static __always_inline
|
|
|
|
int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
|
|
|
|
{
|
|
|
|
return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
|
|
|
|
}
|
|
|
|
|
|
|
|
static __always_inline
|
|
|
|
int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
|
|
|
|
{
|
|
|
|
return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
|
|
|
|
const struct vdso_data *vd)
|
|
|
|
{
|
|
|
|
return get_tb();
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct vdso_data *__arch_get_vdso_data(void);
|
|
|
|
|
|
|
|
static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
#define vdso_clocksource_ok vdso_clocksource_ok
|
|
|
|
|
|
|
|
/*
|
|
|
|
* powerpc specific delta calculation.
|
|
|
|
*
|
|
|
|
* This variant removes the masking of the subtraction because the
|
|
|
|
* clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
|
|
|
|
* which would result in a pointless operation. The compiler cannot
|
|
|
|
* optimize it away as the mask comes from the vdso data and is not compile
|
|
|
|
* time constant.
|
|
|
|
*/
|
|
|
|
static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
|
|
|
|
{
|
|
|
|
return (cycles - last) * mult;
|
|
|
|
}
|
|
|
|
#define vdso_calc_delta vdso_calc_delta
|
|
|
|
|
|
|
|
#ifndef __powerpc64__
|
|
|
|
static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
|
|
|
|
{
|
|
|
|
u32 hi = ns >> 32;
|
|
|
|
u32 lo = ns;
|
|
|
|
|
|
|
|
lo >>= shift;
|
|
|
|
lo |= hi << (32 - shift);
|
|
|
|
hi >>= shift;
|
|
|
|
|
|
|
|
if (likely(hi == 0))
|
|
|
|
return lo;
|
|
|
|
|
|
|
|
return ((u64)hi << 32) | lo;
|
|
|
|
}
|
|
|
|
#define vdso_shift_ns vdso_shift_ns
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
|
|
|
|
const struct vdso_data *vd);
|
|
|
|
int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
|
|
|
|
const struct vdso_data *vd);
|
|
|
|
#else
|
|
|
|
int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
|
|
|
|
const struct vdso_data *vd);
|
2020-11-27 00:10:06 +11:00
|
|
|
int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
|
|
|
|
const struct vdso_data *vd);
|
powerpc/vdso: Prepare for switching VDSO to generic C implementation.
Prepare for switching VDSO to generic C implementation in following
patch. Here, we:
- Prepare the helpers to call the C VDSO functions
- Prepare the required callbacks for the C VDSO functions
- Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
- Add the C trampolines to the generic C VDSO functions
powerpc is a bit special for VDSO as well as system calls in the
way that it requires setting CR SO bit which cannot be done in C.
Therefore, entry/exit needs to be performed in ASM.
Implementing __arch_get_vdso_data() would clobber the link register,
requiring the caller to save it. As the ASM calling function already
has to set a stack frame and saves the link register before calling
the C vdso function, retriving the vdso data pointer there is lighter.
Implement __arch_vdso_capable() and always return true.
Provide vdso_shift_ns(), as the generic x >> s gives the following
bad result:
18: 35 25 ff e0 addic. r9,r5,-32
1c: 41 80 00 10 blt 2c <shift+0x14>
20: 7c 64 4c 30 srw r4,r3,r9
24: 38 60 00 00 li r3,0
...
2c: 54 69 08 3c rlwinm r9,r3,1,0,30
30: 21 45 00 1f subfic r10,r5,31
34: 7c 84 2c 30 srw r4,r4,r5
38: 7d 29 50 30 slw r9,r9,r10
3c: 7c 63 2c 30 srw r3,r3,r5
40: 7d 24 23 78 or r4,r9,r4
In our case the shift is always <= 32. In addition, the upper 32 bits
of the result are likely nul. Lets GCC know it, it also optimises the
following calculations.
With the patch, we get:
0: 21 25 00 20 subfic r9,r5,32
4: 7c 69 48 30 slw r9,r3,r9
8: 7c 84 2c 30 srw r4,r4,r5
c: 7d 24 23 78 or r4,r9,r4
10: 7c 63 2c 30 srw r3,r3,r5
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-6-mpe@ellerman.id.au
2020-11-27 00:10:03 +11:00
|
|
|
int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
|
|
|
|
const struct vdso_data *vd);
|
|
|
|
#endif
|
|
|
|
int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
|
|
|
|
const struct vdso_data *vd);
|
|
|
|
__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time,
|
|
|
|
const struct vdso_data *vd);
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */
|