mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
Merge back cpufreq material for 6.9-rc1.
This commit is contained in:
commit
e4d0d7f194
12 changed files with 365 additions and 37 deletions
|
@ -374,6 +374,11 @@
|
|||
selects a performance level in this range and appropriate
|
||||
to the current workload.
|
||||
|
||||
amd_prefcore=
|
||||
[X86]
|
||||
disable
|
||||
Disable amd-pstate preferred core.
|
||||
|
||||
amijoy.map= [HW,JOY] Amiga joystick support
|
||||
Map of devices attached to JOY0DAT and JOY1DAT
|
||||
Format: <a>,<b>
|
||||
|
|
|
@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy
|
|||
efficiency frequency management method on AMD processors.
|
||||
|
||||
|
||||
AMD Pstate Driver Operation Modes
|
||||
=================================
|
||||
``amd-pstate`` Driver Operation Modes
|
||||
======================================
|
||||
|
||||
``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
|
||||
non-autonomous (passive) mode and guided autonomous (guided) mode.
|
||||
|
@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance
|
|||
level and the platform autonomously selects a performance level in this range
|
||||
and appropriate to the current workload.
|
||||
|
||||
``amd-pstate`` Preferred Core
|
||||
=================================
|
||||
|
||||
The core frequency is subjected to the process variation in semiconductors.
|
||||
Not all cores are able to reach the maximum frequency respecting the
|
||||
infrastructure limits. Consequently, AMD has redefined the concept of
|
||||
maximum frequency of a part. This means that a fraction of cores can reach
|
||||
maximum frequency. To find the best process scheduling policy for a given
|
||||
scenario, OS needs to know the core ordering informed by the platform through
|
||||
highest performance capability register of the CPPC interface.
|
||||
|
||||
``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
|
||||
cores that can achieve a higher frequency with lower voltage. The preferred
|
||||
core rankings can dynamically change based on the workload, platform conditions,
|
||||
thermals and ageing.
|
||||
|
||||
The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
|
||||
driver will also determine whether or not ``amd-pstate`` preferred core is
|
||||
supported by the platform.
|
||||
|
||||
``amd-pstate`` driver will provide an initial core ordering when the system boots.
|
||||
The platform uses the CPPC interfaces to communicate the core ranking to the
|
||||
operating system and scheduler to make sure that OS is choosing the cores
|
||||
with highest performance firstly for scheduling the process. When ``amd-pstate``
|
||||
driver receives a message with the highest performance change, it will
|
||||
update the core ranking and set the cpu's priority.
|
||||
|
||||
``amd-pstate`` Preferred Core Switch
|
||||
=====================================
|
||||
Kernel Parameters
|
||||
-----------------
|
||||
|
||||
``amd-pstate`` peferred core`` has two states: enable and disable.
|
||||
Enable/disable states can be chosen by different kernel parameters.
|
||||
Default enable ``amd-pstate`` preferred core.
|
||||
|
||||
``amd_prefcore=disable``
|
||||
|
||||
For systems that support ``amd-pstate`` preferred core, the core rankings will
|
||||
always be advertised by the platform. But OS can choose to ignore that via the
|
||||
kernel parameter ``amd_prefcore=disable``.
|
||||
|
||||
User Space Interface in ``sysfs`` - General
|
||||
===========================================
|
||||
|
||||
|
@ -385,6 +427,19 @@ control its functionality at the system level. They are located in the
|
|||
to the operation mode represented by that string - or to be
|
||||
unregistered in the "disable" case.
|
||||
|
||||
``prefcore``
|
||||
Preferred core state of the driver: "enabled" or "disabled".
|
||||
|
||||
"enabled"
|
||||
Enable the ``amd-pstate`` preferred core.
|
||||
|
||||
"disabled"
|
||||
Disable the ``amd-pstate`` preferred core
|
||||
|
||||
|
||||
This attribute is read-only to check the state of preferred core set
|
||||
by the kernel parameter.
|
||||
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
|
|
|
@ -1054,8 +1054,9 @@ config SCHED_MC
|
|||
|
||||
config SCHED_MC_PRIO
|
||||
bool "CPU core priorities scheduler support"
|
||||
depends on SCHED_MC && CPU_SUP_INTEL
|
||||
select X86_INTEL_PSTATE
|
||||
depends on SCHED_MC
|
||||
select X86_INTEL_PSTATE if CPU_SUP_INTEL
|
||||
select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI
|
||||
select CPU_FREQ
|
||||
default y
|
||||
help
|
||||
|
|
|
@ -1157,6 +1157,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
|||
return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
|
||||
}
|
||||
|
||||
/**
|
||||
* cppc_get_highest_perf - Get the highest performance register value.
|
||||
* @cpunum: CPU from which to get highest performance.
|
||||
* @highest_perf: Return address.
|
||||
*
|
||||
* Return: 0 for success, -EIO otherwise.
|
||||
*/
|
||||
int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
||||
{
|
||||
return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
|
||||
|
||||
/**
|
||||
* cppc_get_epp_perf - Get the epp register value.
|
||||
* @cpunum: CPU from which to get epp preference value.
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
|
||||
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
|
||||
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
|
||||
#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
|
||||
|
||||
MODULE_AUTHOR("Paul Diefenbaugh");
|
||||
MODULE_DESCRIPTION("ACPI Processor Driver");
|
||||
|
@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
|
|||
acpi_bus_generate_netlink_event(device->pnp.device_class,
|
||||
dev_name(&device->dev), event, 0);
|
||||
break;
|
||||
case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
|
||||
cpufreq_update_limits(pr->id);
|
||||
acpi_bus_generate_netlink_event(device->pnp.device_class,
|
||||
dev_name(&device->dev), event, 0);
|
||||
break;
|
||||
default:
|
||||
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
|
||||
break;
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/static_call.h>
|
||||
#include <linux/amd-pstate.h>
|
||||
#include <linux/topology.h>
|
||||
|
||||
#include <acpi/processor.h>
|
||||
#include <acpi/cppc_acpi.h>
|
||||
|
@ -49,6 +50,7 @@
|
|||
|
||||
#define AMD_PSTATE_TRANSITION_LATENCY 20000
|
||||
#define AMD_PSTATE_TRANSITION_DELAY 1000
|
||||
#define AMD_PSTATE_PREFCORE_THRESHOLD 166
|
||||
|
||||
/*
|
||||
* TODO: We need more time to fine tune processors with shared memory solution
|
||||
|
@ -64,6 +66,7 @@ static struct cpufreq_driver amd_pstate_driver;
|
|||
static struct cpufreq_driver amd_pstate_epp_driver;
|
||||
static int cppc_state = AMD_PSTATE_UNDEFINED;
|
||||
static bool cppc_enabled;
|
||||
static bool amd_pstate_prefcore = true;
|
||||
|
||||
/*
|
||||
* AMD Energy Preference Performance (EPP)
|
||||
|
@ -297,13 +300,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* TODO: Introduce AMD specific power feature.
|
||||
*
|
||||
* CPPC entry doesn't indicate the highest performance in some ASICs.
|
||||
/* For platforms that do not support the preferred core feature, the
|
||||
* highest_pef may be configured with 166 or 255, to avoid max frequency
|
||||
* calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
|
||||
* the default max perf.
|
||||
*/
|
||||
highest_perf = amd_get_highest_perf();
|
||||
if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
|
||||
if (cpudata->hw_prefcore)
|
||||
highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
|
||||
else
|
||||
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
|
||||
|
||||
WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
|
@ -311,6 +315,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
|
|||
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
|
||||
return 0;
|
||||
}
|
||||
|
@ -324,8 +329,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
highest_perf = amd_get_highest_perf();
|
||||
if (highest_perf > cppc_perf.highest_perf)
|
||||
if (cpudata->hw_prefcore)
|
||||
highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
|
||||
else
|
||||
highest_perf = cppc_perf.highest_perf;
|
||||
|
||||
WRITE_ONCE(cpudata->highest_perf, highest_perf);
|
||||
|
@ -334,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
|
|||
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
|
||||
cppc_perf.lowest_nonlinear_perf);
|
||||
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
|
||||
WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
|
||||
|
||||
if (cppc_state == AMD_PSTATE_ACTIVE)
|
||||
|
@ -477,12 +484,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
|
|||
|
||||
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
|
||||
{
|
||||
u32 max_limit_perf, min_limit_perf;
|
||||
u32 max_limit_perf, min_limit_perf, lowest_perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
||||
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
||||
|
||||
lowest_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
if (min_limit_perf < lowest_perf)
|
||||
min_limit_perf = lowest_perf;
|
||||
|
||||
if (max_limit_perf < min_limit_perf)
|
||||
max_limit_perf = min_limit_perf;
|
||||
|
||||
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
||||
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
||||
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
|
||||
|
@ -570,7 +584,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
|
|||
if (target_perf < capacity)
|
||||
des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
|
||||
|
||||
min_perf = READ_ONCE(cpudata->highest_perf);
|
||||
min_perf = READ_ONCE(cpudata->lowest_perf);
|
||||
if (_min_perf < capacity)
|
||||
min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
|
||||
|
||||
|
@ -706,6 +720,114 @@ static void amd_perf_ctl_reset(unsigned int cpu)
|
|||
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
|
||||
* due to locking, so queue the work for later.
|
||||
*/
|
||||
static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
|
||||
{
|
||||
sched_set_itmt_support();
|
||||
}
|
||||
static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
|
||||
|
||||
/*
|
||||
* Get the highest performance register value.
|
||||
* @cpu: CPU from which to get highest performance.
|
||||
* @highest_perf: Return address.
|
||||
*
|
||||
* Return: 0 for success, -EIO otherwise.
|
||||
*/
|
||||
static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_CPPC)) {
|
||||
u64 cap1;
|
||||
|
||||
ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
|
||||
if (ret)
|
||||
return ret;
|
||||
WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
|
||||
} else {
|
||||
u64 cppc_highest_perf;
|
||||
|
||||
ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
|
||||
if (ret)
|
||||
return ret;
|
||||
WRITE_ONCE(*highest_perf, cppc_highest_perf);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
#define CPPC_MAX_PERF U8_MAX
|
||||
|
||||
static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
|
||||
{
|
||||
int ret, prio;
|
||||
u32 highest_perf;
|
||||
|
||||
ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
cpudata->hw_prefcore = true;
|
||||
/* check if CPPC preferred core feature is enabled*/
|
||||
if (highest_perf < CPPC_MAX_PERF)
|
||||
prio = (int)highest_perf;
|
||||
else {
|
||||
pr_debug("AMD CPPC preferred core is unsupported!\n");
|
||||
cpudata->hw_prefcore = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!amd_pstate_prefcore)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The priorities can be set regardless of whether or not
|
||||
* sched_set_itmt_support(true) has been called and it is valid to
|
||||
* update them at any time after it has been called.
|
||||
*/
|
||||
sched_set_itmt_core_prio(prio, cpudata->cpu);
|
||||
|
||||
schedule_work(&sched_prefcore_work);
|
||||
}
|
||||
|
||||
static void amd_pstate_update_limits(unsigned int cpu)
|
||||
{
|
||||
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
u32 prev_high = 0, cur_high = 0;
|
||||
int ret;
|
||||
bool highest_perf_changed = false;
|
||||
|
||||
mutex_lock(&amd_pstate_driver_lock);
|
||||
if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
|
||||
goto free_cpufreq_put;
|
||||
|
||||
ret = amd_pstate_get_highest_perf(cpu, &cur_high);
|
||||
if (ret)
|
||||
goto free_cpufreq_put;
|
||||
|
||||
prev_high = READ_ONCE(cpudata->prefcore_ranking);
|
||||
if (prev_high != cur_high) {
|
||||
highest_perf_changed = true;
|
||||
WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
|
||||
|
||||
if (cur_high < CPPC_MAX_PERF)
|
||||
sched_set_itmt_core_prio((int)cur_high, cpu);
|
||||
}
|
||||
|
||||
free_cpufreq_put:
|
||||
cpufreq_cpu_put(policy);
|
||||
|
||||
if (!highest_perf_changed)
|
||||
cpufreq_update_policy(cpu);
|
||||
|
||||
mutex_unlock(&amd_pstate_driver_lock);
|
||||
}
|
||||
|
||||
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
||||
{
|
||||
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
|
||||
|
@ -727,6 +849,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
|
|||
|
||||
cpudata->cpu = policy->cpu;
|
||||
|
||||
amd_pstate_init_prefcore(cpudata);
|
||||
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
@ -877,6 +1001,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
|
|||
return sysfs_emit(buf, "%u\n", perf);
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
u32 perf;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
perf = READ_ONCE(cpudata->prefcore_ranking);
|
||||
|
||||
return sysfs_emit(buf, "%u\n", perf);
|
||||
}
|
||||
|
||||
static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
|
||||
char *buf)
|
||||
{
|
||||
bool hw_prefcore;
|
||||
struct amd_cpudata *cpudata = policy->driver_data;
|
||||
|
||||
hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
|
||||
|
||||
return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
|
||||
}
|
||||
|
||||
static ssize_t show_energy_performance_available_preferences(
|
||||
struct cpufreq_policy *policy, char *buf)
|
||||
{
|
||||
|
@ -1074,18 +1220,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
|
|||
return ret < 0 ? ret : count;
|
||||
}
|
||||
|
||||
static ssize_t prefcore_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
|
||||
}
|
||||
|
||||
cpufreq_freq_attr_ro(amd_pstate_max_freq);
|
||||
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
|
||||
|
||||
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
|
||||
cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
|
||||
cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
|
||||
cpufreq_freq_attr_rw(energy_performance_preference);
|
||||
cpufreq_freq_attr_ro(energy_performance_available_preferences);
|
||||
static DEVICE_ATTR_RW(status);
|
||||
static DEVICE_ATTR_RO(prefcore);
|
||||
|
||||
static struct freq_attr *amd_pstate_attr[] = {
|
||||
&amd_pstate_max_freq,
|
||||
&amd_pstate_lowest_nonlinear_freq,
|
||||
&amd_pstate_highest_perf,
|
||||
&amd_pstate_prefcore_ranking,
|
||||
&amd_pstate_hw_prefcore,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -1093,6 +1250,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
|
|||
&amd_pstate_max_freq,
|
||||
&amd_pstate_lowest_nonlinear_freq,
|
||||
&amd_pstate_highest_perf,
|
||||
&amd_pstate_prefcore_ranking,
|
||||
&amd_pstate_hw_prefcore,
|
||||
&energy_performance_preference,
|
||||
&energy_performance_available_preferences,
|
||||
NULL,
|
||||
|
@ -1100,6 +1259,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
|
|||
|
||||
static struct attribute *pstate_global_attributes[] = {
|
||||
&dev_attr_status.attr,
|
||||
&dev_attr_prefcore.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
@ -1151,6 +1311,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
|
|||
cpudata->cpu = policy->cpu;
|
||||
cpudata->epp_policy = 0;
|
||||
|
||||
amd_pstate_init_prefcore(cpudata);
|
||||
|
||||
ret = amd_pstate_init_perf(cpudata);
|
||||
if (ret)
|
||||
goto free_cpudata1;
|
||||
|
@ -1232,6 +1394,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
|
|||
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
|
||||
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
|
||||
|
||||
if (min_limit_perf < min_perf)
|
||||
min_limit_perf = min_perf;
|
||||
|
||||
if (max_limit_perf < min_limit_perf)
|
||||
max_limit_perf = min_limit_perf;
|
||||
|
||||
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
|
||||
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
|
||||
|
||||
|
@ -1432,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_driver = {
|
|||
.suspend = amd_pstate_cpu_suspend,
|
||||
.resume = amd_pstate_cpu_resume,
|
||||
.set_boost = amd_pstate_set_boost,
|
||||
.update_limits = amd_pstate_update_limits,
|
||||
.name = "amd-pstate",
|
||||
.attr = amd_pstate_attr,
|
||||
};
|
||||
|
@ -1446,6 +1615,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
|
|||
.online = amd_pstate_epp_cpu_online,
|
||||
.suspend = amd_pstate_epp_suspend,
|
||||
.resume = amd_pstate_epp_resume,
|
||||
.update_limits = amd_pstate_update_limits,
|
||||
.name = "amd-pstate-epp",
|
||||
.attr = amd_pstate_epp_attr,
|
||||
};
|
||||
|
@ -1567,7 +1737,17 @@ static int __init amd_pstate_param(char *str)
|
|||
|
||||
return amd_pstate_set_driver(mode_idx);
|
||||
}
|
||||
|
||||
static int __init amd_prefcore_param(char *str)
|
||||
{
|
||||
if (!strcmp(str, "disable"))
|
||||
amd_pstate_prefcore = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
early_param("amd_pstate", amd_pstate_param);
|
||||
early_param("amd_prefcore", amd_prefcore_param);
|
||||
|
||||
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
|
||||
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
|
||||
|
|
|
@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
|
|||
|
||||
latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
|
||||
if (latency) {
|
||||
unsigned int max_delay_us = 2 * MSEC_PER_SEC;
|
||||
|
||||
/*
|
||||
* For platforms that can change the frequency very fast (< 10
|
||||
* If the platform already has high transition_latency, use it
|
||||
* as-is.
|
||||
*/
|
||||
if (latency > max_delay_us)
|
||||
return latency;
|
||||
|
||||
/*
|
||||
* For platforms that can change the frequency very fast (< 2
|
||||
* us), the above formula gives a decent transition delay. But
|
||||
* for platforms where transition_latency is in milliseconds, it
|
||||
* ends up giving unrealistic values.
|
||||
*
|
||||
* Cap the default transition delay to 10 ms, which seems to be
|
||||
* Cap the default transition delay to 2 ms, which seems to be
|
||||
* a reasonable amount of time after which we should reevaluate
|
||||
* the frequency.
|
||||
*/
|
||||
return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000);
|
||||
return min(latency * LATENCY_MULTIPLIER, max_delay_us);
|
||||
}
|
||||
|
||||
return LATENCY_MULTIPLIER;
|
||||
|
@ -1571,7 +1580,8 @@ static int cpufreq_online(unsigned int cpu)
|
|||
if (cpufreq_driver->ready)
|
||||
cpufreq_driver->ready(policy);
|
||||
|
||||
if (cpufreq_thermal_control_enabled(cpufreq_driver))
|
||||
/* Register cpufreq cooling only for a new policy */
|
||||
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
|
||||
policy->cdev = of_cpufreq_cooling_register(policy);
|
||||
|
||||
pr_debug("initialization complete\n");
|
||||
|
@ -1655,11 +1665,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy)
|
|||
else
|
||||
policy->last_policy = policy->policy;
|
||||
|
||||
if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
|
||||
cpufreq_cooling_unregister(policy->cdev);
|
||||
policy->cdev = NULL;
|
||||
}
|
||||
|
||||
if (has_target())
|
||||
cpufreq_exit_governor(policy);
|
||||
|
||||
|
@ -1720,6 +1725,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unregister cpufreq cooling once all the CPUs of the policy are
|
||||
* removed.
|
||||
*/
|
||||
if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
|
||||
cpufreq_cooling_unregister(policy->cdev);
|
||||
policy->cdev = NULL;
|
||||
}
|
||||
|
||||
/* We did light-weight exit earlier, do full tear down now */
|
||||
if (cpufreq_driver->offline)
|
||||
cpufreq_driver->exit(policy);
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#define DEF_SAMPLING_DOWN_FACTOR (1)
|
||||
#define MAX_SAMPLING_DOWN_FACTOR (100000)
|
||||
#define MICRO_FREQUENCY_UP_THRESHOLD (95)
|
||||
#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
|
||||
#define MIN_FREQUENCY_UP_THRESHOLD (1)
|
||||
#define MAX_FREQUENCY_UP_THRESHOLD (100)
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/acpi.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pm_qos.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include <asm/cpu.h>
|
||||
|
@ -201,8 +202,6 @@ struct global_params {
|
|||
* @prev_aperf: Last APERF value read from APERF MSR
|
||||
* @prev_mperf: Last MPERF value read from MPERF MSR
|
||||
* @prev_tsc: Last timestamp counter (TSC) value
|
||||
* @prev_cummulative_iowait: IO Wait time difference from last and
|
||||
* current sample
|
||||
* @sample: Storage for storing last Sample data
|
||||
* @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios
|
||||
* @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios
|
||||
|
@ -241,7 +240,6 @@ struct cpudata {
|
|||
u64 prev_aperf;
|
||||
u64 prev_mperf;
|
||||
u64 prev_tsc;
|
||||
u64 prev_cummulative_iowait;
|
||||
struct sample sample;
|
||||
int32_t min_perf_ratio;
|
||||
int32_t max_perf_ratio;
|
||||
|
@ -3407,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void)
|
|||
return !!(value & 0x1);
|
||||
}
|
||||
|
||||
static const struct x86_cpu_id intel_epp_balance_perf[] = {
|
||||
#define POWERSAVE_MASK GENMASK(7, 0)
|
||||
#define BALANCE_POWER_MASK GENMASK(15, 8)
|
||||
#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16)
|
||||
#define PERFORMANCE_MASK GENMASK(31, 24)
|
||||
|
||||
#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \
|
||||
(FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\
|
||||
FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\
|
||||
FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\
|
||||
FIELD_PREP_CONST(PERFORMANCE_MASK, performance))
|
||||
|
||||
#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \
|
||||
(HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\
|
||||
balance_perf, HWP_EPP_PERFORMANCE))
|
||||
|
||||
static const struct x86_cpu_id intel_epp_default[] = {
|
||||
/*
|
||||
* Set EPP value as 102, this is the max suggested EPP
|
||||
* which can result in one core turbo frequency for
|
||||
* AlderLake Mobile CPUs.
|
||||
*/
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
|
||||
HWP_EPP_BALANCE_POWERSAVE, 115, 16)),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -3512,11 +3527,24 @@ hwp_cpu_matched:
|
|||
intel_pstate_sysfs_expose_params();
|
||||
|
||||
if (hwp_active) {
|
||||
const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf);
|
||||
const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default);
|
||||
const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor);
|
||||
|
||||
if (id)
|
||||
epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data;
|
||||
if (id) {
|
||||
epp_values[EPP_INDEX_POWERSAVE] =
|
||||
FIELD_GET(POWERSAVE_MASK, id->driver_data);
|
||||
epp_values[EPP_INDEX_BALANCE_POWERSAVE] =
|
||||
FIELD_GET(BALANCE_POWER_MASK, id->driver_data);
|
||||
epp_values[EPP_INDEX_BALANCE_PERFORMANCE] =
|
||||
FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data);
|
||||
epp_values[EPP_INDEX_PERFORMANCE] =
|
||||
FIELD_GET(PERFORMANCE_MASK, id->driver_data);
|
||||
pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n",
|
||||
epp_values[EPP_INDEX_POWERSAVE],
|
||||
epp_values[EPP_INDEX_BALANCE_POWERSAVE],
|
||||
epp_values[EPP_INDEX_BALANCE_PERFORMANCE],
|
||||
epp_values[EPP_INDEX_PERFORMANCE]);
|
||||
}
|
||||
|
||||
if (hybrid_id) {
|
||||
hybrid_scaling_factor = hybrid_id->driver_data;
|
||||
|
|
|
@ -139,6 +139,7 @@ struct cppc_cpudata {
|
|||
#ifdef CONFIG_ACPI_CPPC_LIB
|
||||
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
|
||||
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
|
||||
extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
|
||||
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
|
||||
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
|
||||
extern int cppc_set_enable(int cpu, bool enable);
|
||||
|
@ -167,6 +168,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
|
|||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
|
|
|
@ -39,11 +39,16 @@ struct amd_aperf_mperf {
|
|||
* @cppc_req_cached: cached performance request hints
|
||||
* @highest_perf: the maximum performance an individual processor may reach,
|
||||
* assuming ideal conditions
|
||||
* For platforms that do not support the preferred core feature, the
|
||||
* highest_pef may be configured with 166 or 255, to avoid max frequency
|
||||
* calculated wrongly. we take the fixed value as the highest_perf.
|
||||
* @nominal_perf: the maximum sustained performance level of the processor,
|
||||
* assuming ideal operating conditions
|
||||
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
|
||||
* savings are achieved
|
||||
* @lowest_perf: the absolute lowest performance level of the processor
|
||||
* @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
|
||||
* priority.
|
||||
* @max_freq: the frequency that mapped to highest_perf
|
||||
* @min_freq: the frequency that mapped to lowest_perf
|
||||
* @nominal_freq: the frequency that mapped to nominal_perf
|
||||
|
@ -52,6 +57,9 @@ struct amd_aperf_mperf {
|
|||
* @prev: Last Aperf/Mperf/tsc count value read from register
|
||||
* @freq: current cpu frequency value
|
||||
* @boost_supported: check whether the Processor or SBIOS supports boost mode
|
||||
* @hw_prefcore: check whether HW supports preferred core featue.
|
||||
* Only when hw_prefcore and early prefcore param are true,
|
||||
* AMD P-State driver supports preferred core featue.
|
||||
* @epp_policy: Last saved policy used to set energy-performance preference
|
||||
* @epp_cached: Cached CPPC energy-performance preference value
|
||||
* @policy: Cpufreq policy value
|
||||
|
@ -70,6 +78,7 @@ struct amd_cpudata {
|
|||
u32 nominal_perf;
|
||||
u32 lowest_nonlinear_perf;
|
||||
u32 lowest_perf;
|
||||
u32 prefcore_ranking;
|
||||
u32 min_limit_perf;
|
||||
u32 max_limit_perf;
|
||||
u32 min_limit_freq;
|
||||
|
@ -85,6 +94,7 @@ struct amd_cpudata {
|
|||
|
||||
u64 freq;
|
||||
bool boost_supported;
|
||||
bool hw_prefcore;
|
||||
|
||||
/* EPP feature related attributes*/
|
||||
s16 epp_policy;
|
||||
|
|
|
@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void)
|
|||
return false;
|
||||
}
|
||||
static inline void disable_cpufreq(void) { }
|
||||
static inline void cpufreq_update_limits(unsigned int cpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_FREQ_STAT
|
||||
|
@ -568,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
|
|||
|
||||
/*
|
||||
* The polling frequency depends on the capability of the processor. Default
|
||||
* polling frequency is 1000 times the transition latency of the processor. The
|
||||
* ondemand governor will work on any processor with transition latency <= 10ms,
|
||||
* using appropriate sampling rate.
|
||||
* polling frequency is 1000 times the transition latency of the processor.
|
||||
*/
|
||||
#define LATENCY_MULTIPLIER (1000)
|
||||
|
||||
|
@ -1021,6 +1020,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
|
|||
efficiencies);
|
||||
}
|
||||
|
||||
static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx)
|
||||
{
|
||||
unsigned int freq;
|
||||
|
||||
if (idx < 0)
|
||||
return false;
|
||||
|
||||
freq = policy->freq_table[idx].frequency;
|
||||
|
||||
return freq == clamp_val(freq, policy->min, policy->max);
|
||||
}
|
||||
|
||||
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
|
||||
unsigned int target_freq,
|
||||
unsigned int relation)
|
||||
|
@ -1054,7 +1065,8 @@ retry:
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (idx < 0 && efficiencies) {
|
||||
/* Limit frequency index to honor policy->min/max */
|
||||
if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
|
||||
efficiencies = false;
|
||||
goto retry;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue