2019-05-27 08:55:01 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-10-20 09:23:26 +10:00
|
|
|
/*
|
|
|
|
* Common time prototypes and such for all ppc machines.
|
|
|
|
*
|
|
|
|
* Written by Cort Dougan (cort@cs.nmt.edu) to merge
|
|
|
|
* Paul Mackerras' version and mine for PReP and Pmac.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __POWERPC_TIME_H
|
|
|
|
#define __POWERPC_TIME_H
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
|
|
|
|
#include <asm/processor.h>
|
2016-07-23 14:42:40 +05:30
|
|
|
#include <asm/cpu_has_feature.h>
|
2020-11-27 00:10:00 +11:00
|
|
|
#include <asm/vdso/timebase.h>
|
2005-10-20 09:23:26 +10:00
|
|
|
|
|
|
|
/* time.c */
|
2021-11-23 19:51:43 +10:00
|
|
|
extern u64 decrementer_max;
|
|
|
|
|
2005-10-20 09:23:26 +10:00
|
|
|
extern unsigned long tb_ticks_per_jiffy;
|
|
|
|
extern unsigned long tb_ticks_per_usec;
|
|
|
|
extern unsigned long tb_ticks_per_sec;
|
2012-04-18 06:01:19 +00:00
|
|
|
extern struct clock_event_device decrementer_clockevent;
|
2021-03-24 05:09:39 -04:00
|
|
|
extern u64 decrementer_max;
|
2005-10-20 09:23:26 +10:00
|
|
|
|
|
|
|
|
|
|
|
extern void generic_calibrate_decr(void);
|
|
|
|
|
|
|
|
/* Some sane defaults: 125 MHz timebase, 1GHz processor */
|
|
|
|
extern unsigned long ppc_proc_freq;
|
|
|
|
#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8)
|
|
|
|
extern unsigned long ppc_tb_freq;
|
|
|
|
#define DEFAULT_TB_FREQ 125000000UL
|
|
|
|
|
2019-03-05 01:12:19 +05:30
|
|
|
extern bool tb_invalid;
|
|
|
|
|
2005-10-20 09:23:26 +10:00
|
|
|
struct div_result {
|
|
|
|
u64 result_high;
|
|
|
|
u64 result_low;
|
|
|
|
};
|
|
|
|
|
2014-06-05 17:38:02 +05:30
|
|
|
static inline u64 get_vtb(void)
|
|
|
|
{
|
|
|
|
if (cpu_has_feature(CPU_FTR_ARCH_207S))
|
2016-07-23 14:42:39 +05:30
|
|
|
return mfspr(SPRN_VTB);
|
2020-10-01 10:59:20 +00:00
|
|
|
|
2014-06-05 17:38:02 +05:30
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-10-20 09:23:26 +10:00
|
|
|
/* Accessor functions for the decrementer register.
|
|
|
|
* The 4xx doesn't even have a decrementer. I tried to use the
|
|
|
|
* generic timer interrupt code, which seems OK, with the 4xx PIT
|
|
|
|
* in auto-reload mode. The problem is PIT stops counting when it
|
|
|
|
* hits zero. If it would wrap, we could use it just like a decrementer.
|
|
|
|
*/
|
2016-07-01 16:20:39 +10:00
|
|
|
static inline u64 get_dec(void)
|
2005-10-20 09:23:26 +10:00
|
|
|
{
|
2020-10-01 10:59:19 +00:00
|
|
|
return mfspr(SPRN_DEC);
|
2005-10-20 09:23:26 +10:00
|
|
|
}
|
|
|
|
|
2007-10-31 22:25:35 +11:00
|
|
|
/*
|
|
|
|
* Note: Book E and 4xx processors differ from other PowerPC processors
|
|
|
|
* in when the decrementer generates its interrupt: on the 1 to 0
|
|
|
|
* transition for Book E/4xx, but on the 0 to -1 transition for others.
|
|
|
|
*/
|
2016-07-01 16:20:39 +10:00
|
|
|
static inline void set_dec(u64 val)
|
2005-10-20 09:23:26 +10:00
|
|
|
{
|
2024-06-28 22:11:58 +10:00
|
|
|
if (IS_ENABLED(CONFIG_BOOKE))
|
2020-10-01 10:59:19 +00:00
|
|
|
mtspr(SPRN_DEC, val);
|
|
|
|
else
|
|
|
|
mtspr(SPRN_DEC, val - 1);
|
2005-10-20 09:23:26 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long tb_ticks_since(unsigned long tstamp)
|
|
|
|
{
|
2020-10-01 12:42:41 +00:00
|
|
|
return mftb() - tstamp;
|
2005-10-20 09:23:26 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
#define mulhwu(x,y) \
|
|
|
|
({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#define mulhdu(x,y) \
|
|
|
|
({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
|
|
|
|
#else
|
powerpc/32: Replace mulhdu() by mul_u64_u64_shr()
Using mul_u64_u64_shr() provides similar calculation as mulhdu()
assembly function, but enables inlining by the compiler.
The home-made assembly function had special handling for when one of
the arguments is not a fully populated u64 but time functions use it
to multiply timebase by a calculated scale which is constructed to
have most significant bit set.
On mpc8xx sched_clock() runs 3% faster. On mpc83xx it is 2%.
As you can see below, sched_clock() is not much bigger than before:
c000cf68 <sched_clock>:
c000cf68: 7d 2d 42 a6 mftbu r9
c000cf6c: 7d 0c 42 a6 mftb r8
c000cf70: 7d 4d 42 a6 mftbu r10
c000cf74: 7c 09 50 40 cmplw r9,r10
c000cf78: 40 82 ff f0 bne c000cf68 <sched_clock>
c000cf7c: 3d 40 c1 37 lis r10,-16073
c000cf80: 38 8a b3 30 addi r4,r10,-19664
c000cf84: 80 ea b3 30 lwz r7,-19664(r10)
c000cf88: 80 64 00 14 lwz r3,20(r4)
c000cf8c: 39 40 00 00 li r10,0
c000cf90: 80 a4 00 04 lwz r5,4(r4)
c000cf94: 80 c4 00 10 lwz r6,16(r4)
c000cf98: 7c 63 40 10 subfc r3,r3,r8
c000cf9c: 80 84 00 08 lwz r4,8(r4)
c000cfa0: 7d 06 49 10 subfe r8,r6,r9
c000cfa4: 7c c7 19 d6 mullw r6,r7,r3
c000cfa8: 7d 25 18 16 mulhwu r9,r5,r3
c000cfac: 7c 08 29 d6 mullw r0,r8,r5
c000cfb0: 7c 67 18 16 mulhwu r3,r7,r3
c000cfb4: 7d 29 30 14 addc r9,r9,r6
c000cfb8: 7c a8 28 16 mulhwu r5,r8,r5
c000cfbc: 7c ca 51 14 adde r6,r10,r10
c000cfc0: 7d 67 41 d6 mullw r11,r7,r8
c000cfc4: 7d 29 00 14 addc r9,r9,r0
c000cfc8: 7c c6 01 94 addze r6,r6
c000cfcc: 7c 63 28 14 addc r3,r3,r5
c000cfd0: 7d 4a 51 14 adde r10,r10,r10
c000cfd4: 7c e7 40 16 mulhwu r7,r7,r8
c000cfd8: 7c 63 58 14 addc r3,r3,r11
c000cfdc: 7d 4a 01 94 addze r10,r10
c000cfe0: 7c 63 30 14 addc r3,r3,r6
c000cfe4: 7d 4a 39 14 adde r10,r10,r7
c000cfe8: 35 24 ff e0 addic. r9,r4,-32
c000cfec: 41 80 00 10 blt c000cffc <sched_clock+0x94>
c000cff0: 7c 63 48 30 slw r3,r3,r9
c000cff4: 38 80 00 00 li r4,0
c000cff8: 4e 80 00 20 blr
c000cffc: 21 04 00 1f subfic r8,r4,31
c000d000: 54 69 f8 7e srwi r9,r3,1
c000d004: 7d 4a 20 30 slw r10,r10,r4
c000d008: 7d 29 44 30 srw r9,r9,r8
c000d00c: 7c 64 20 30 slw r4,r3,r4
c000d010: 7d 23 53 78 or r3,r9,r10
c000d014: 4e 80 00 20 blr
Before this change:
c000d0bc <sched_clock>:
c000d0bc: 94 21 ff f0 stwu r1,-16(r1)
c000d0c0: 7c 08 02 a6 mflr r0
c000d0c4: 90 01 00 14 stw r0,20(r1)
c000d0c8: 93 e1 00 0c stw r31,12(r1)
c000d0cc: 7d 2d 42 a6 mftbu r9
c000d0d0: 7d 0c 42 a6 mftb r8
c000d0d4: 7d 4d 42 a6 mftbu r10
c000d0d8: 7c 09 50 40 cmplw r9,r10
c000d0dc: 40 82 ff f0 bne c000d0cc <sched_clock+0x10>
c000d0e0: 3f e0 c1 37 lis r31,-16073
c000d0e4: 3b ff b3 30 addi r31,r31,-19664
c000d0e8: 80 9f 00 14 lwz r4,20(r31)
c000d0ec: 80 7f 00 10 lwz r3,16(r31)
c000d0f0: 7c 84 40 10 subfc r4,r4,r8
c000d0f4: 80 bf 00 00 lwz r5,0(r31)
c000d0f8: 80 df 00 04 lwz r6,4(r31)
c000d0fc: 7c 63 49 10 subfe r3,r3,r9
c000d100: 48 00 37 85 bl c0010884 <mulhdu>
c000d104: 81 3f 00 08 lwz r9,8(r31)
c000d108: 35 49 ff e0 addic. r10,r9,-32
c000d10c: 41 80 00 20 blt c000d12c <sched_clock+0x70>
c000d110: 80 01 00 14 lwz r0,20(r1)
c000d114: 7c 83 50 30 slw r3,r4,r10
c000d118: 83 e1 00 0c lwz r31,12(r1)
c000d11c: 38 80 00 00 li r4,0
c000d120: 7c 08 03 a6 mtlr r0
c000d124: 38 21 00 10 addi r1,r1,16
c000d128: 4e 80 00 20 blr
c000d12c: 80 01 00 14 lwz r0,20(r1)
c000d130: 54 8a f8 7e srwi r10,r4,1
c000d134: 21 09 00 1f subfic r8,r9,31
c000d138: 83 e1 00 0c lwz r31,12(r1)
c000d13c: 7c 63 48 30 slw r3,r3,r9
c000d140: 7d 4a 44 30 srw r10,r10,r8
c000d144: 7c 84 48 30 slw r4,r4,r9
c000d148: 7d 43 1b 78 or r3,r10,r3
c000d14c: 7c 08 03 a6 mtlr r0
c000d150: 38 21 00 10 addi r1,r1,16
c000d154: 4e 80 00 20 blr
c0010884 <mulhdu>:
c0010884: 2c 06 00 00 cmpwi r6,0
c0010888: 2c 83 00 00 cmpwi cr1,r3,0
c001088c: 7c 8a 23 78 mr r10,r4
c0010890: 7c 84 28 16 mulhwu r4,r4,r5
c0010894: 41 82 00 14 beq c00108a8 <mulhdu+0x24>
c0010898: 7c 0a 30 16 mulhwu r0,r10,r6
c001089c: 7c ea 29 d6 mullw r7,r10,r5
c00108a0: 7c e0 38 14 addc r7,r0,r7
c00108a4: 7c 84 01 94 addze r4,r4
c00108a8: 4d 86 00 20 beqlr cr1
c00108ac: 7d 23 29 d6 mullw r9,r3,r5
c00108b0: 7d 43 28 16 mulhwu r10,r3,r5
c00108b4: 41 82 00 18 beq c00108cc <mulhdu+0x48>
c00108b8: 7c 03 31 d6 mullw r0,r3,r6
c00108bc: 7d 03 30 16 mulhwu r8,r3,r6
c00108c0: 7c e0 38 14 addc r7,r0,r7
c00108c4: 7c 84 41 14 adde r4,r4,r8
c00108c8: 7d 4a 01 94 addze r10,r10
c00108cc: 7c 84 48 14 addc r4,r4,r9
c00108d0: 7c 6a 01 94 addze r3,r10
c00108d4: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/f29e473c193c87bdbd36b209dfdee99d2f0c60dc.1733566130.git.christophe.leroy@csgroup.eu
2024-12-07 11:09:27 +01:00
|
|
|
#define mulhdu(x, y) mul_u64_u64_shr(x, y, 64)
|
2005-10-20 09:23:26 +10:00
|
|
|
#endif
|
|
|
|
|
2007-09-21 13:26:03 +10:00
|
|
|
extern void secondary_cpu_time_init(void);
|
2018-02-25 18:22:27 +01:00
|
|
|
extern void __init time_init(void);
|
2007-06-22 16:54:30 +10:00
|
|
|
|
powerpc/time: Optimise decrementer_check_overflow
decrementer_check_overflow is called from arch_local_irq_restore so
we want to make it as light weight as possible. As such, turn
decrementer_check_overflow into an inline function.
To avoid a circular mess of includes, separate out the two components
of struct decrementer_clock and keep the struct clock_event_device
part local to time.c.
The fast path improves from:
arch_local_irq_restore
0: mflr r0
4: std r0,16(r1)
8: stdu r1,-112(r1)
c: stb r3,578(r13)
10: cmpdi cr7,r3,0
14: beq- cr7,24 <.arch_local_irq_restore+0x24>
...
24: addi r1,r1,112
28: ld r0,16(r1)
2c: mtlr r0
30: blr
to:
arch_local_irq_restore
0: std r30,-16(r1)
4: ld r30,0(r2)
8: stb r3,578(r13)
c: cmpdi cr7,r3,0
10: beq- cr7,6c <.arch_local_irq_restore+0x6c>
...
6c: ld r30,-16(r1)
70: blr
Unfortunately we still setup a local TOC (due to -mminimal-toc). Yet
another sign we should be moving to -mcmodel=medium.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2011-11-23 20:07:22 +00:00
|
|
|
DECLARE_PER_CPU(u64, decrementers_next_tb);
|
2011-11-23 20:07:17 +00:00
|
|
|
|
2021-11-23 19:51:42 +10:00
|
|
|
static inline u64 timer_get_next_tb(void)
|
|
|
|
{
|
|
|
|
return __this_cpu_read(decrementers_next_tb);
|
|
|
|
}
|
|
|
|
|
2021-11-23 19:51:45 +10:00
|
|
|
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
|
|
|
void timer_rearm_host_dec(u64 now);
|
|
|
|
#endif
|
|
|
|
|
KVM: PPC: Book3S HV: Accumulate timing information for real-mode code
This reads the timebase at various points in the real-mode guest
entry/exit code and uses that to accumulate total, minimum and
maximum time spent in those parts of the code. Currently these
times are accumulated per vcpu in 5 parts of the code:
* rm_entry - time taken from the start of kvmppc_hv_entry() until
just before entering the guest.
* rm_intr - time from when we take a hypervisor interrupt in the
guest until we either re-enter the guest or decide to exit to the
host. This includes time spent handling hcalls in real mode.
* rm_exit - time from when we decide to exit the guest until the
return from kvmppc_hv_entry().
* guest - time spend in the guest
* cede - time spent napping in real mode due to an H_CEDE hcall
while other threads in the same vcore are active.
These times are exposed in debugfs in a directory per vcpu that
contains a file called "timings". This file contains one line for
each of the 5 timings above, with the name followed by a colon and
4 numbers, which are the count (number of times the code has been
executed), the total time, the minimum time, and the maximum time,
all in nanoseconds.
The overhead of the extra code amounts to about 30ns for an hcall that
is handled in real mode (e.g. H_SET_DABR), which is about 25%. Since
production environments may not wish to incur this overhead, the new
code is conditional on a new config symbol,
CONFIG_KVM_BOOK3S_HV_EXIT_TIMING.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2015-03-28 14:21:02 +11:00
|
|
|
/* Convert timebase ticks to nanoseconds */
|
|
|
|
unsigned long long tb_to_ns(unsigned long long tb_ticks);
|
|
|
|
|
2021-01-30 23:08:30 +10:00
|
|
|
void timer_broadcast_interrupt(void);
|
|
|
|
|
2022-09-02 18:53:16 +10:00
|
|
|
/* SPLPAR and VIRT_CPU_ACCOUNTING_NATIVE */
|
|
|
|
void pseries_accumulate_stolen_time(void);
|
|
|
|
u64 pseries_calculate_stolen_time(u64 stop_tb);
|
2020-02-26 03:35:34 +10:00
|
|
|
|
2005-10-20 09:23:26 +10:00
|
|
|
#endif /* __KERNEL__ */
|
2006-09-26 17:46:37 -05:00
|
|
|
#endif /* __POWERPC_TIME_H */
|