2017-10-13 14:56:42 -07:00
|
|
|
/* Declare dependencies between CPUIDs */
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <asm/cpufeature.h>
|
|
|
|
|
|
|
|
struct cpuid_dep {
|
|
|
|
unsigned int feature;
|
|
|
|
unsigned int depends;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Table of CPUID features that depend on others.
|
|
|
|
*
|
|
|
|
* This only includes dependencies that can be usefully disabled, not
|
|
|
|
* features part of the base set (like FPU).
|
|
|
|
*
|
|
|
|
* Note this all is not __init / __initdata because it can be
|
|
|
|
* called from cpu hotplug. It shouldn't do anything in this case,
|
|
|
|
* but it's difficult to tell that to the init reference checker.
|
|
|
|
*/
|
2018-03-07 17:07:34 +01:00
|
|
|
static const struct cpuid_dep cpuid_deps[] = {
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_FXSR, X86_FEATURE_FPU },
|
|
|
|
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_AVX, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
|
|
|
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
2025-04-15 19:16:51 -07:00
|
|
|
{ X86_FEATURE_APX, X86_FEATURE_XSAVE },
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_CMOV, X86_FEATURE_FXSR },
|
|
|
|
{ X86_FEATURE_MMX, X86_FEATURE_FXSR },
|
|
|
|
{ X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
|
|
|
|
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
|
|
|
{ X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
|
|
|
|
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
|
|
|
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
|
|
|
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
|
|
|
|
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
|
|
|
|
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
|
|
|
|
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
|
2024-04-16 23:04:34 -07:00
|
|
|
{ X86_FEATURE_GFNI, X86_FEATURE_XMM2 },
|
2025-02-19 22:01:24 -08:00
|
|
|
{ X86_FEATURE_AVX_VNNI, X86_FEATURE_AVX },
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
|
2024-04-16 23:04:34 -07:00
|
|
|
{ X86_FEATURE_VAES, X86_FEATURE_AVX },
|
|
|
|
{ X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX },
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
|
|
|
|
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
|
|
|
|
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
|
|
|
|
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
|
|
|
|
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
|
|
|
|
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
|
|
|
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
2019-07-17 16:46:32 -07:00
|
|
|
{ X86_FEATURE_AVX512_VP2INTERSECT, X86_FEATURE_AVX512VL },
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
|
|
|
|
{ X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
|
|
|
|
{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
|
2023-01-13 09:20:30 -06:00
|
|
|
{ X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL },
|
|
|
|
{ X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL },
|
2019-07-17 16:46:31 -07:00
|
|
|
{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
|
2020-12-07 19:34:40 -08:00
|
|
|
{ X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW },
|
2020-09-15 09:30:08 -07:00
|
|
|
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
|
2020-08-24 12:11:20 -07:00
|
|
|
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
|
2021-03-19 20:22:17 +13:00
|
|
|
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
|
2021-03-19 20:22:18 +13:00
|
|
|
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
|
|
|
|
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
|
KVM/VMX: Allow exposing EDECCSSA user leaf function to KVM guest
The new Asynchronous Exit (AEX) notification mechanism (AEX-notify)
allows one enclave to receive a notification in the ERESUME after the
enclave exit due to an AEX. EDECCSSA is a new SGX user leaf function
(ENCLU[EDECCSSA]) to facilitate the AEX notification handling. The new
EDECCSSA is enumerated via CPUID(EAX=0x12,ECX=0x0):EAX[11].
Besides Allowing reporting the new AEX-notify attribute to KVM guests,
also allow reporting the new EDECCSSA user leaf function to KVM guests
so the guest can fully utilize the AEX-notify mechanism.
Similar to existing X86_FEATURE_SGX1 and X86_FEATURE_SGX2, introduce a
new scattered X86_FEATURE_SGX_EDECCSSA bit for the new EDECCSSA, and
report it in KVM's supported CPUIDs.
Note, no additional KVM enabling is required to allow the guest to use
EDECCSSA. It's impossible to trap ENCLU (without completely preventing
the guest from using SGX). Advertise EDECCSSA as supported purely so
that userspace doesn't need to special case EDECCSSA, i.e. doesn't need
to manually check host CPUID.
The inability to trap ENCLU also means that KVM can't prevent the guest
from using EDECCSSA, but that virtualization hole is benign as far as
KVM is concerned. EDECCSSA is simply a fancy way to modify internal
enclave state.
More background about how do AEX-notify and EDECCSSA work:
SGX maintains a Current State Save Area Frame (CSSA) for each enclave
thread. When AEX happens, the enclave thread context is saved to the
CSSA and the CSSA is increased by 1. For a normal ERESUME which doesn't
deliver AEX notification, it restores the saved thread context from the
previously saved SSA and decreases the CSSA. If AEX-notify is enabled
for one enclave, the ERESUME acts differently. Instead of restoring the
saved thread context and decreasing the CSSA, it acts like EENTER which
doesn't decrease the CSSA but establishes a clean slate thread context
using the CSSA for the enclave to handle the notification. After some
handling, the enclave must discard the "new-established" SSA and switch
back to the previously saved SSA (upon AEX). Otherwise, the enclave
will run out of SSA space upon further AEXs and eventually fail to run.
To solve this problem, the new EDECCSSA essentially decreases the CSSA.
It can be used by the enclave notification handler to switch back to the
previous saved SSA when needed, i.e. after it handles the notification.
Signed-off-by: Kai Huang <kai.huang@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Sean Christopherson <seanjc@google.com>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Link: https://lore.kernel.org/all/20221101022422.858944-1-kai.huang%40intel.com
2022-11-01 15:24:22 +13:00
|
|
|
{ X86_FEATURE_SGX_EDECCSSA, X86_FEATURE_SGX1 },
|
2021-10-21 15:55:16 -07:00
|
|
|
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
|
x86/fpu: Optimize out sigframe xfeatures when in init state
tl;dr: AMX state is ~8k. Signal frames can have space for this
~8k and each signal entry writes out all 8k even if it is zeros.
Skip writing zeros for AMX to speed up signal delivery by about
4% overall when AMX is in its init state.
This is a user-visible change to the sigframe ABI.
== Hardware XSAVE Background ==
XSAVE state components may be tracked by the processor as being
in their initial configuration. Software can detect which
features are in this configuration by looking at the XSTATE_BV
field in an XSAVE buffer or with the XGETBV(1) instruction.
Both the XSAVE and XSAVEOPT instructions enumerate features s
being in the initial configuration via the XSTATE_BV field in the
XSAVE header, However, XSAVEOPT declines to actually write
features in their initial configuration to the buffer. XSAVE
writes the feature unconditionally, regardless of whether it is
in the initial configuration or not.
Basically, XSAVE users never need to inspect XSTATE_BV to
determine if the feature has been written to the buffer.
XSAVEOPT users *do* need to inspect XSTATE_BV. They might also
need to clear out the buffer if they want to make an isolated
change to the state, like modifying one register.
== Software Signal / XSAVE Background ==
Signal frames have historically been written with XSAVE itself.
Each state is written in its entirety, regardless of being in its
initial configuration.
In other words, the signal frame ABI uses the XSAVE behavior, not
the XSAVEOPT behavior.
== Problem ==
This means that any application which has acquired permission to
use AMX via ARCH_REQ_XCOMP_PERM will write 8k of state to the
signal frame. This 8k write will occur even when AMX was in its
initial configuration and software *knows* this because of
XSTATE_BV.
This problem also exists to a lesser degree with AVX-512 and its
2k of state. However, AVX-512 use does not require
ARCH_REQ_XCOMP_PERM and is more likely to have existing users
which would be impacted by any change in behavior.
== Solution ==
Stop writing out AMX xfeatures which are in their initial state
to the signal frame. This effectively makes the signal frame
XSAVE buffer look as if it were written with a combination of
XSAVEOPT and XSAVE behavior. Userspace which handles XSAVEOPT-
style buffers should be able to handle this naturally.
For now, include only the AMX xfeatures: XTILE and XTILEDATA in
this new behavior. These require new ABI to use anyway, which
makes their users very unlikely to be broken. This XSAVEOPT-like
behavior should be expected for all future dynamic xfeatures. It
may also be extended to legacy features like AVX-512 in the
future.
Only attempt this optimization on systems with dynamic features.
Disable dynamic feature support (XFD) if XGETBV1 is unavailable
by adding a CPUID dependency.
This has been measured to reduce the *overall* cycle cost of
signal delivery by about 4%.
Fixes: 2308ee57d93d ("x86/fpu/amx: Enable the AMX feature in 64-bit mode")
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: "Chang S. Bae" <chang.seok.bae@intel.com>
Link: https://lore.kernel.org/r/20211102224750.FA412E26@davehans-spike.ostc.intel.com
2021-11-02 15:47:50 -07:00
|
|
|
{ X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
|
2021-10-21 15:55:27 -07:00
|
|
|
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
|
2024-09-24 10:01:28 -07:00
|
|
|
{ X86_FEATURE_AMX_FP16, X86_FEATURE_AMX_TILE },
|
|
|
|
{ X86_FEATURE_AMX_BF16, X86_FEATURE_AMX_TILE },
|
|
|
|
{ X86_FEATURE_AMX_INT8, X86_FEATURE_AMX_TILE },
|
2023-06-12 17:10:34 -07:00
|
|
|
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
|
2023-12-05 02:49:55 -08:00
|
|
|
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
|
2024-09-24 10:01:28 -07:00
|
|
|
{ X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL },
|
2017-10-13 14:56:42 -07:00
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
|
|
|
{
|
2017-11-02 13:22:35 +01:00
|
|
|
/*
|
|
|
|
* Note: This could use the non atomic __*_bit() variants, but the
|
|
|
|
* rest of the cpufeature code uses atomics as well, so keep it for
|
|
|
|
* consistency. Cleanup all of it separately.
|
|
|
|
*/
|
|
|
|
if (!c) {
|
|
|
|
clear_cpu_cap(&boot_cpu_data, feature);
|
|
|
|
set_bit(feature, (unsigned long *)cpu_caps_cleared);
|
|
|
|
} else {
|
|
|
|
clear_bit(feature, (unsigned long *)c->x86_capability);
|
|
|
|
}
|
2017-10-13 14:56:42 -07:00
|
|
|
}
|
|
|
|
|
2017-10-18 19:39:35 +02:00
|
|
|
/* Take the capabilities and the BUG bits into account */
|
|
|
|
#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
|
|
|
|
|
2017-10-13 14:56:42 -07:00
|
|
|
static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
|
|
|
{
|
2017-10-18 19:39:35 +02:00
|
|
|
DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
|
2017-10-13 14:56:42 -07:00
|
|
|
const struct cpuid_dep *d;
|
2017-10-18 19:39:35 +02:00
|
|
|
bool changed;
|
|
|
|
|
|
|
|
if (WARN_ON(feature >= MAX_FEATURE_BITS))
|
|
|
|
return;
|
2017-10-13 14:56:42 -07:00
|
|
|
|
2024-03-27 16:43:14 +01:00
|
|
|
if (boot_cpu_has(feature))
|
|
|
|
WARN_ON(alternatives_patched);
|
|
|
|
|
2017-10-13 14:56:42 -07:00
|
|
|
clear_feature(c, feature);
|
|
|
|
|
|
|
|
/* Collect all features to disable, handling dependencies */
|
|
|
|
memset(disable, 0, sizeof(disable));
|
|
|
|
__set_bit(feature, disable);
|
|
|
|
|
|
|
|
/* Loop until we get a stable state. */
|
|
|
|
do {
|
|
|
|
changed = false;
|
|
|
|
for (d = cpuid_deps; d->feature; d++) {
|
|
|
|
if (!test_bit(d->depends, disable))
|
|
|
|
continue;
|
|
|
|
if (__test_and_set_bit(d->feature, disable))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
changed = true;
|
|
|
|
clear_feature(c, d->feature);
|
|
|
|
}
|
|
|
|
} while (changed);
|
|
|
|
}
|
|
|
|
|
|
|
|
void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
|
|
|
{
|
|
|
|
do_clear_cpu_cap(c, feature);
|
|
|
|
}
|
|
|
|
|
|
|
|
void setup_clear_cpu_cap(unsigned int feature)
|
|
|
|
{
|
|
|
|
do_clear_cpu_cap(NULL, feature);
|
|
|
|
}
|
x86/cpufeatures: Warn about unmet CPU feature dependencies
Currently, the cpuid_deps[] table is only exercised when a particular
feature is explicitly disabled and clear_cpu_cap() is called. However,
some of these listed dependencies might already be missing during boot.
These types of errors shouldn't generally happen in production
environments, but they could sometimes sneak through, especially when
VMs and Kconfigs are in the mix. Also, the kernel might introduce
artificial dependencies between unrelated features, such as making LAM
depend on LASS.
Unexpected failures can occur when the kernel tries to use such
features. Add a simple boot-time scan of the cpuid_deps[] table to
detect the missing dependencies. One option is to disable all of such
features during boot, but that may cause regressions in existing
systems. For now, just warn about the missing dependencies to create
awareness.
As a trade-off between spamming the kernel log and keeping track of all
the features that have been warned about, only warn about the first
missing dependency. Any subsequent unmet dependency will only be logged
after the first one has been resolved.
Features are typically represented through unsigned integers within the
kernel, though some of them have user-friendly names if they are exposed
via /proc/cpuinfo.
Show the friendlier name if available, otherwise display the
X86_FEATURE_* numerals to make it easier to identify the feature.
Suggested-by: Tony Luck <tony.luck@intel.com>
Suggested-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/20250313201608.3304135-1-sohil.mehta@intel.com
2025-03-13 20:16:08 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the feature "name" if available, otherwise return
|
|
|
|
* the X86_FEATURE_* numerals to make it easier to identify
|
|
|
|
* the feature.
|
|
|
|
*/
|
|
|
|
static const char *x86_feature_name(unsigned int feature, char *buf)
|
|
|
|
{
|
|
|
|
if (x86_cap_flags[feature])
|
|
|
|
return x86_cap_flags[feature];
|
|
|
|
|
|
|
|
snprintf(buf, 16, "%d*32+%2d", feature / 32, feature % 32);
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
void check_cpufeature_deps(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
char feature_buf[16], depends_buf[16];
|
|
|
|
const struct cpuid_dep *d;
|
|
|
|
|
|
|
|
for (d = cpuid_deps; d->feature; d++) {
|
|
|
|
if (cpu_has(c, d->feature) && !cpu_has(c, d->depends)) {
|
|
|
|
/*
|
|
|
|
* Only warn about the first unmet dependency on the
|
|
|
|
* first CPU where it is encountered to avoid spamming
|
|
|
|
* the kernel log.
|
|
|
|
*/
|
|
|
|
pr_warn_once("x86 CPU feature dependency check failure: CPU%d has '%s' enabled but '%s' disabled. Kernel might be fine, but no guarantees.\n",
|
|
|
|
smp_processor_id(),
|
|
|
|
x86_feature_name(d->feature, feature_buf),
|
|
|
|
x86_feature_name(d->depends, depends_buf));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|