mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00

== Background == CET defines two register states: CET user, which includes user-mode control registers, and CET supervisor, which consists of shadow-stack pointers for privilege levels 0-2. Current kernels disable shadow stacks in kernel mode, making the CET supervisor state unused and eliminating the need for context switching. == Problem == To virtualize CET for guests, KVM must accurately emulate hardware behavior. A key challenge arises because there is no CPUID flag to indicate that shadow stack is supported only in user mode. Therefore, KVM cannot assume guests will not enable shadow stacks in kernel mode and must preserve the CET supervisor state of vCPUs. == Solution == An initial proposal to manually save and restore CET supervisor states using raw RDMSR/WRMSR in KVM was rejected due to performance concerns and its impact on KVM's ABI. Instead, leveraging the kernel's FPU infrastructure for context switching was favored [1]. The main question then became whether to enable the CET supervisor state globally for all processes or restrict it to vCPU processes. This decision involves a trade-off between a 24-byte XSTATE buffer waste for all non-vCPU processes and approximately 100 lines of code complexity in the kernel [2]. The agreed approach is to first try this optimal solution [3], i.e., restricting the CET supervisor state to guest FPUs only and eliminating unnecessary space waste. The guest-only xfeature infrastructure has already been added. Now, introduce CET supervisor xstate support as the first guest-only feature to prepare for the upcoming CET virtualization in KVM. Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> Signed-off-by: Chao Gao <chao.gao@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> Reviewed-by: John Allen <john.allen@amd.com> Link: https://lore.kernel.org/kvm/ZM1jV3UPL0AMpVDI@google.com/ [1] Link: https://lore.kernel.org/kvm/1c2fd06e-2e97-4724-80ab-8695aa4334e7@intel.com/ [2] Link: https://lore.kernel.org/kvm/2597a87b-1248-b8ce-ce60-94074bc67ea4@intel.com/ [3] Link: https://lore.kernel.org/all/20250522151031.426788-7-chao.gao%40intel.com
134 lines
4.5 KiB
C
134 lines
4.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __ASM_X86_XSAVE_H
|
|
#define __ASM_X86_XSAVE_H
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/fpu/api.h>
|
|
#include <asm/user.h>
|
|
|
|
/* Bit 63 of XCR0 is reserved for future expansion */
|
|
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
|
|
|
|
#define FXSAVE_SIZE 512
|
|
|
|
#define XSAVE_HDR_SIZE 64
|
|
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
|
|
|
|
#define XSAVE_YMM_SIZE 256
|
|
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
|
|
|
|
#define XSAVE_ALIGNMENT 64
|
|
|
|
/* All currently supported user features */
|
|
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
|
|
XFEATURE_MASK_SSE | \
|
|
XFEATURE_MASK_YMM | \
|
|
XFEATURE_MASK_OPMASK | \
|
|
XFEATURE_MASK_ZMM_Hi256 | \
|
|
XFEATURE_MASK_Hi16_ZMM | \
|
|
XFEATURE_MASK_PKRU | \
|
|
XFEATURE_MASK_BNDREGS | \
|
|
XFEATURE_MASK_BNDCSR | \
|
|
XFEATURE_MASK_XTILE | \
|
|
XFEATURE_MASK_APX)
|
|
|
|
/*
|
|
* Features which are restored when returning to user space.
|
|
* PKRU is not restored on return to user space because PKRU
|
|
* is switched eagerly in switch_to() and flush_thread()
|
|
*/
|
|
#define XFEATURE_MASK_USER_RESTORE \
|
|
(XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
|
|
|
|
/* Features which are dynamically enabled for a process on request */
|
|
#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
|
|
|
|
/* Supervisor features which are enabled only in guest FPUs */
|
|
#define XFEATURE_MASK_GUEST_SUPERVISOR XFEATURE_MASK_CET_KERNEL
|
|
|
|
/* All currently supported supervisor features */
|
|
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \
|
|
XFEATURE_MASK_CET_USER | \
|
|
XFEATURE_MASK_GUEST_SUPERVISOR)
|
|
|
|
/*
|
|
* A supervisor state component may not always contain valuable information,
|
|
* and its size may be huge. Saving/restoring such supervisor state components
|
|
* at each context switch can cause high CPU and space overhead, which should
|
|
* be avoided. Such supervisor state components should only be saved/restored
|
|
* on demand. The on-demand supervisor features are set in this mask.
|
|
*
|
|
* Unlike the existing supported supervisor features, an independent supervisor
|
|
* feature does not allocate a buffer in task->fpu, and the corresponding
|
|
* supervisor state component cannot be saved/restored at each context switch.
|
|
*
|
|
* To support an independent supervisor feature, a developer should follow the
|
|
* dos and don'ts as below:
|
|
* - Do dynamically allocate a buffer for the supervisor state component.
|
|
* - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
|
|
* state component to/from the buffer.
|
|
* - Don't set the bit corresponding to the independent supervisor feature in
|
|
* IA32_XSS at run time, since it has been set at boot time.
|
|
*/
|
|
#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR)
|
|
|
|
/*
|
|
* Unsupported supervisor features. When a supervisor feature in this mask is
|
|
* supported in the future, move it to the supported supervisor feature mask.
|
|
*/
|
|
#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT)
|
|
|
|
/* All supervisor states including supported and unsupported states. */
|
|
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
|
|
XFEATURE_MASK_INDEPENDENT | \
|
|
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
|
|
|
|
/*
|
|
* The feature mask required to restore FPU state:
|
|
* - All user states which are not eagerly switched in switch_to()/exec()
|
|
* - The suporvisor states
|
|
*/
|
|
#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
|
|
XFEATURE_MASK_SUPERVISOR_SUPPORTED)
|
|
|
|
/*
|
|
* Features in this mask have space allocated in the signal frame, but may not
|
|
* have that space initialized when the feature is in its init state.
|
|
*/
|
|
#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \
|
|
XFEATURE_MASK_USER_DYNAMIC)
|
|
|
|
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
|
|
|
extern void __init update_regset_xstate_info(unsigned int size,
|
|
u64 xstate_mask);
|
|
|
|
int xfeature_size(int xfeature_nr);
|
|
|
|
void xsaves(struct xregs_state *xsave, u64 mask);
|
|
void xrstors(struct xregs_state *xsave, u64 mask);
|
|
|
|
int xfd_enable_feature(u64 xfd_err);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_64
|
|
DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
|
|
|
|
static __always_inline __pure bool fpu_state_size_dynamic(void)
|
|
{
|
|
return static_branch_unlikely(&__fpu_state_size_dynamic);
|
|
}
|
|
#else
|
|
static __always_inline __pure bool fpu_state_size_dynamic(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#endif
|