mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
Merge branch 'pm-em'
Merge Enery Model changes for 6.9-rc1: - Allow the Energy Model to be updated dynamically (Lukasz Luba). * pm-em: (24 commits) PM: EM: Fix nr_states warnings in static checks Documentation: EM: Update with runtime modification design PM: EM: Add em_dev_compute_costs() PM: EM: Remove old table PM: EM: Change debugfs configuration to use runtime EM table data drivers/thermal/devfreq_cooling: Use new Energy Model interface drivers/thermal/cpufreq_cooling: Use new Energy Model interface powercap/dtpm_devfreq: Use new Energy Model interface to get table powercap/dtpm_cpu: Use new Energy Model interface to get table PM: EM: Optimize em_cpu_energy() and remove division PM: EM: Support late CPUs booting and capacity adjustment PM: EM: Add performance field to struct em_perf_state and optimize PM: EM: Add em_perf_state_from_pd() to get performance states table PM: EM: Introduce em_dev_update_perf_domain() for EM updates PM: EM: Add functions for memory allocations for new EM tables PM: EM: Use runtime modified EM for CPUs energy estimation in EAS PM: EM: Introduce runtime modifiable table PM: EM: Split the allocation and initialization of the EM table PM: EM: Check if the get_cost() callback is present in em_compute_costs() PM: EM: Introduce em_compute_costs() ...
This commit is contained in:
commit
3bd834640b
7 changed files with 832 additions and 181 deletions
|
@ -71,6 +71,31 @@ whose performance is scaled together. Performance domains generally have a
|
||||||
required to have the same micro-architecture. CPUs in different performance
|
required to have the same micro-architecture. CPUs in different performance
|
||||||
domains can have different micro-architectures.
|
domains can have different micro-architectures.
|
||||||
|
|
||||||
|
To better reflect power variation due to static power (leakage) the EM
|
||||||
|
supports runtime modifications of the power values. The mechanism relies on
|
||||||
|
RCU to free the modifiable EM perf_state table memory. Its user, the task
|
||||||
|
scheduler, also uses RCU to access this memory. The EM framework provides
|
||||||
|
API for allocating/freeing the new memory for the modifiable EM table.
|
||||||
|
The old memory is freed automatically using RCU callback mechanism when there
|
||||||
|
are no owners anymore for the given EM runtime table instance. This is tracked
|
||||||
|
using kref mechanism. The device driver which provided the new EM at runtime,
|
||||||
|
should call EM API to free it safely when it's no longer needed. The EM
|
||||||
|
framework will handle the clean-up when it's possible.
|
||||||
|
|
||||||
|
The kernel code which want to modify the EM values is protected from concurrent
|
||||||
|
access using a mutex. Therefore, the device driver code must run in sleeping
|
||||||
|
context when it tries to modify the EM.
|
||||||
|
|
||||||
|
With the runtime modifiable EM we switch from a 'single and during the entire
|
||||||
|
runtime static EM' (system property) design to a 'single EM which can be
|
||||||
|
changed during runtime according e.g. to the workload' (system and workload
|
||||||
|
property) design.
|
||||||
|
|
||||||
|
It is possible also to modify the CPU performance values for each EM's
|
||||||
|
performance state. Thus, the full power and performance profile (which
|
||||||
|
is an exponential curve) can be changed according e.g. to the workload
|
||||||
|
or system property.
|
||||||
|
|
||||||
|
|
||||||
2. Core APIs
|
2. Core APIs
|
||||||
------------
|
------------
|
||||||
|
@ -175,10 +200,82 @@ CPUfreq governor is in use in case of CPU device. Currently this calculation is
|
||||||
not provided for other type of devices.
|
not provided for other type of devices.
|
||||||
|
|
||||||
More details about the above APIs can be found in ``<linux/energy_model.h>``
|
More details about the above APIs can be found in ``<linux/energy_model.h>``
|
||||||
or in Section 2.4
|
or in Section 2.5
|
||||||
|
|
||||||
|
|
||||||
2.4 Description details of this API
|
2.4 Runtime modifications
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Drivers willing to update the EM at runtime should use the following dedicated
|
||||||
|
function to allocate a new instance of the modified EM. The API is listed
|
||||||
|
below::
|
||||||
|
|
||||||
|
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
|
||||||
|
|
||||||
|
This allows to allocate a structure which contains the new EM table with
|
||||||
|
also RCU and kref needed by the EM framework. The 'struct em_perf_table'
|
||||||
|
contains array 'struct em_perf_state state[]' which is a list of performance
|
||||||
|
states in ascending order. That list must be populated by the device driver
|
||||||
|
which wants to update the EM. The list of frequencies can be taken from
|
||||||
|
existing EM (created during boot). The content in the 'struct em_perf_state'
|
||||||
|
must be populated by the driver as well.
|
||||||
|
|
||||||
|
This is the API which does the EM update, using RCU pointers swap::
|
||||||
|
|
||||||
|
int em_dev_update_perf_domain(struct device *dev,
|
||||||
|
struct em_perf_table __rcu *new_table);
|
||||||
|
|
||||||
|
Drivers must provide a pointer to the allocated and initialized new EM
|
||||||
|
'struct em_perf_table'. That new EM will be safely used inside the EM framework
|
||||||
|
and will be visible to other sub-systems in the kernel (thermal, powercap).
|
||||||
|
The main design goal for this API is to be fast and avoid extra calculations
|
||||||
|
or memory allocations at runtime. When pre-computed EMs are available in the
|
||||||
|
device driver, than it should be possible to simply re-use them with low
|
||||||
|
performance overhead.
|
||||||
|
|
||||||
|
In order to free the EM, provided earlier by the driver (e.g. when the module
|
||||||
|
is unloaded), there is a need to call the API::
|
||||||
|
|
||||||
|
void em_table_free(struct em_perf_table __rcu *table);
|
||||||
|
|
||||||
|
It will allow the EM framework to safely remove the memory, when there is
|
||||||
|
no other sub-system using it, e.g. EAS.
|
||||||
|
|
||||||
|
To use the power values in other sub-systems (like thermal, powercap) there is
|
||||||
|
a need to call API which protects the reader and provide consistency of the EM
|
||||||
|
table data::
|
||||||
|
|
||||||
|
struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd);
|
||||||
|
|
||||||
|
It returns the 'struct em_perf_state' pointer which is an array of performance
|
||||||
|
states in ascending order.
|
||||||
|
This function must be called in the RCU read lock section (after the
|
||||||
|
rcu_read_lock()). When the EM table is not needed anymore there is a need to
|
||||||
|
call rcu_real_unlock(). In this way the EM safely uses the RCU read section
|
||||||
|
and protects the users. It also allows the EM framework to manage the memory
|
||||||
|
and free it. More details how to use it can be found in Section 3.2 in the
|
||||||
|
example driver.
|
||||||
|
|
||||||
|
There is dedicated API for device drivers to calculate em_perf_state::cost
|
||||||
|
values::
|
||||||
|
|
||||||
|
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||||
|
int nr_states);
|
||||||
|
|
||||||
|
These 'cost' values from EM are used in EAS. The new EM table should be passed
|
||||||
|
together with the number of entries and device pointer. When the computation
|
||||||
|
of the cost values is done properly the return value from the function is 0.
|
||||||
|
The function takes care for right setting of inefficiency for each performance
|
||||||
|
state as well. It updates em_perf_state::flags accordingly.
|
||||||
|
Then such prepared new EM can be passed to the em_dev_update_perf_domain()
|
||||||
|
function, which will allow to use it.
|
||||||
|
|
||||||
|
More details about the above APIs can be found in ``<linux/energy_model.h>``
|
||||||
|
or in Section 3.2 with an example code showing simple implementation of the
|
||||||
|
updating mechanism in a device driver.
|
||||||
|
|
||||||
|
|
||||||
|
2.5 Description details of this API
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
.. kernel-doc:: include/linux/energy_model.h
|
.. kernel-doc:: include/linux/energy_model.h
|
||||||
:internal:
|
:internal:
|
||||||
|
@ -187,8 +284,11 @@ or in Section 2.4
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
|
||||||
3. Example driver
|
3. Examples
|
||||||
-----------------
|
-----------
|
||||||
|
|
||||||
|
3.1 Example driver with EM registration
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
The CPUFreq framework supports dedicated callback for registering
|
The CPUFreq framework supports dedicated callback for registering
|
||||||
the EM for a given CPU(s) 'policy' object: cpufreq_driver::register_em().
|
the EM for a given CPU(s) 'policy' object: cpufreq_driver::register_em().
|
||||||
|
@ -242,3 +342,78 @@ EM framework::
|
||||||
39 static struct cpufreq_driver foo_cpufreq_driver = {
|
39 static struct cpufreq_driver foo_cpufreq_driver = {
|
||||||
40 .register_em = foo_cpufreq_register_em,
|
40 .register_em = foo_cpufreq_register_em,
|
||||||
41 };
|
41 };
|
||||||
|
|
||||||
|
|
||||||
|
3.2 Example driver with EM modification
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
This section provides a simple example of a thermal driver modifying the EM.
|
||||||
|
The driver implements a foo_thermal_em_update() function. The driver is woken
|
||||||
|
up periodically to check the temperature and modify the EM data::
|
||||||
|
|
||||||
|
-> drivers/soc/example/example_em_mod.c
|
||||||
|
|
||||||
|
01 static void foo_get_new_em(struct foo_context *ctx)
|
||||||
|
02 {
|
||||||
|
03 struct em_perf_table __rcu *em_table;
|
||||||
|
04 struct em_perf_state *table, *new_table;
|
||||||
|
05 struct device *dev = ctx->dev;
|
||||||
|
06 struct em_perf_domain *pd;
|
||||||
|
07 unsigned long freq;
|
||||||
|
08 int i, ret;
|
||||||
|
09
|
||||||
|
10 pd = em_pd_get(dev);
|
||||||
|
11 if (!pd)
|
||||||
|
12 return;
|
||||||
|
13
|
||||||
|
14 em_table = em_table_alloc(pd);
|
||||||
|
15 if (!em_table)
|
||||||
|
16 return;
|
||||||
|
17
|
||||||
|
18 new_table = em_table->state;
|
||||||
|
19
|
||||||
|
20 rcu_read_lock();
|
||||||
|
21 table = em_perf_state_from_pd(pd);
|
||||||
|
22 for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
|
23 freq = table[i].frequency;
|
||||||
|
24 foo_get_power_perf_values(dev, freq, &new_table[i]);
|
||||||
|
25 }
|
||||||
|
26 rcu_read_unlock();
|
||||||
|
27
|
||||||
|
28 /* Calculate 'cost' values for EAS */
|
||||||
|
29 ret = em_dev_compute_costs(dev, table, pd->nr_perf_states);
|
||||||
|
30 if (ret) {
|
||||||
|
31 dev_warn(dev, "EM: compute costs failed %d\n", ret);
|
||||||
|
32 em_free_table(em_table);
|
||||||
|
33 return;
|
||||||
|
34 }
|
||||||
|
35
|
||||||
|
36 ret = em_dev_update_perf_domain(dev, em_table);
|
||||||
|
37 if (ret) {
|
||||||
|
38 dev_warn(dev, "EM: update failed %d\n", ret);
|
||||||
|
39 em_free_table(em_table);
|
||||||
|
40 return;
|
||||||
|
41 }
|
||||||
|
42
|
||||||
|
43 /*
|
||||||
|
44 * Since it's one-time-update drop the usage counter.
|
||||||
|
45 * The EM framework will later free the table when needed.
|
||||||
|
46 */
|
||||||
|
47 em_table_free(em_table);
|
||||||
|
48 }
|
||||||
|
49
|
||||||
|
50 /*
|
||||||
|
51 * Function called periodically to check the temperature and
|
||||||
|
52 * update the EM if needed
|
||||||
|
53 */
|
||||||
|
54 static void foo_thermal_em_update(struct foo_context *ctx)
|
||||||
|
55 {
|
||||||
|
56 struct device *dev = ctx->dev;
|
||||||
|
57 int cpu;
|
||||||
|
58
|
||||||
|
59 ctx->temperature = foo_get_temp(dev, ctx);
|
||||||
|
60 if (ctx->temperature < FOO_EM_UPDATE_TEMP_THRESHOLD)
|
||||||
|
61 return;
|
||||||
|
62
|
||||||
|
63 foo_get_new_em(ctx);
|
||||||
|
64 }
|
||||||
|
|
|
@ -42,6 +42,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
|
||||||
{
|
{
|
||||||
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
||||||
struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
|
struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
|
||||||
|
struct em_perf_state *table;
|
||||||
struct cpumask cpus;
|
struct cpumask cpus;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
u64 power;
|
u64 power;
|
||||||
|
@ -50,20 +51,22 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
|
||||||
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
|
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
|
||||||
nr_cpus = cpumask_weight(&cpus);
|
nr_cpus = cpumask_weight(&cpus);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
|
|
||||||
power = pd->table[i].power * nr_cpus;
|
power = table[i].power * nr_cpus;
|
||||||
|
|
||||||
if (power > power_limit)
|
if (power > power_limit)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
freq = pd->table[i - 1].frequency;
|
freq = table[i - 1].frequency;
|
||||||
|
power_limit = table[i - 1].power * nr_cpus;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
freq_qos_update_request(&dtpm_cpu->qos_req, freq);
|
freq_qos_update_request(&dtpm_cpu->qos_req, freq);
|
||||||
|
|
||||||
power_limit = pd->table[i - 1].power * nr_cpus;
|
|
||||||
|
|
||||||
return power_limit;
|
return power_limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,9 +90,11 @@ static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
|
||||||
static u64 get_pd_power_uw(struct dtpm *dtpm)
|
static u64 get_pd_power_uw(struct dtpm *dtpm)
|
||||||
{
|
{
|
||||||
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
||||||
|
struct em_perf_state *table;
|
||||||
struct em_perf_domain *pd;
|
struct em_perf_domain *pd;
|
||||||
struct cpumask *pd_mask;
|
struct cpumask *pd_mask;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
|
u64 power = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
pd = em_cpu_get(dtpm_cpu->cpu);
|
pd = em_cpu_get(dtpm_cpu->cpu);
|
||||||
|
@ -98,33 +103,43 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
|
||||||
|
|
||||||
freq = cpufreq_quick_get(dtpm_cpu->cpu);
|
freq = cpufreq_quick_get(dtpm_cpu->cpu);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
|
|
||||||
if (pd->table[i].frequency < freq)
|
if (table[i].frequency < freq)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
return scale_pd_power_uw(pd_mask, pd->table[i].power);
|
power = scale_pd_power_uw(pd_mask, table[i].power);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return 0;
|
return power;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int update_pd_power_uw(struct dtpm *dtpm)
|
static int update_pd_power_uw(struct dtpm *dtpm)
|
||||||
{
|
{
|
||||||
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
|
||||||
struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
|
struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
|
||||||
|
struct em_perf_state *table;
|
||||||
struct cpumask cpus;
|
struct cpumask cpus;
|
||||||
int nr_cpus;
|
int nr_cpus;
|
||||||
|
|
||||||
cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
|
cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
|
||||||
nr_cpus = cpumask_weight(&cpus);
|
nr_cpus = cpumask_weight(&cpus);
|
||||||
|
|
||||||
dtpm->power_min = em->table[0].power;
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(em);
|
||||||
|
|
||||||
|
dtpm->power_min = table[0].power;
|
||||||
dtpm->power_min *= nr_cpus;
|
dtpm->power_min *= nr_cpus;
|
||||||
|
|
||||||
dtpm->power_max = em->table[em->nr_perf_states - 1].power;
|
dtpm->power_max = table[em->nr_perf_states - 1].power;
|
||||||
dtpm->power_max *= nr_cpus;
|
dtpm->power_max *= nr_cpus;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,7 +158,7 @@ static void pd_release(struct dtpm *dtpm)
|
||||||
|
|
||||||
cpufreq_cpu_put(policy);
|
cpufreq_cpu_put(policy);
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(dtpm_cpu);
|
kfree(dtpm_cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,6 +195,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
|
||||||
{
|
{
|
||||||
struct dtpm_cpu *dtpm_cpu;
|
struct dtpm_cpu *dtpm_cpu;
|
||||||
struct cpufreq_policy *policy;
|
struct cpufreq_policy *policy;
|
||||||
|
struct em_perf_state *table;
|
||||||
struct em_perf_domain *pd;
|
struct em_perf_domain *pd;
|
||||||
char name[CPUFREQ_NAME_LEN];
|
char name[CPUFREQ_NAME_LEN];
|
||||||
int ret = -ENOMEM;
|
int ret = -ENOMEM;
|
||||||
|
@ -216,9 +232,12 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_kfree_dtpm_cpu;
|
goto out_kfree_dtpm_cpu;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
ret = freq_qos_add_request(&policy->constraints,
|
ret = freq_qos_add_request(&policy->constraints,
|
||||||
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
|
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
|
||||||
pd->table[pd->nr_perf_states - 1].frequency);
|
table[pd->nr_perf_states - 1].frequency);
|
||||||
|
rcu_read_unlock();
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_dtpm_unregister;
|
goto out_dtpm_unregister;
|
||||||
|
|
||||||
|
|
|
@ -37,11 +37,16 @@ static int update_pd_power_uw(struct dtpm *dtpm)
|
||||||
struct devfreq *devfreq = dtpm_devfreq->devfreq;
|
struct devfreq *devfreq = dtpm_devfreq->devfreq;
|
||||||
struct device *dev = devfreq->dev.parent;
|
struct device *dev = devfreq->dev.parent;
|
||||||
struct em_perf_domain *pd = em_pd_get(dev);
|
struct em_perf_domain *pd = em_pd_get(dev);
|
||||||
|
struct em_perf_state *table;
|
||||||
|
|
||||||
dtpm->power_min = pd->table[0].power;
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
|
|
||||||
dtpm->power_max = pd->table[pd->nr_perf_states - 1].power;
|
dtpm->power_min = table[0].power;
|
||||||
|
|
||||||
|
dtpm->power_max = table[pd->nr_perf_states - 1].power;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,20 +56,23 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
|
||||||
struct devfreq *devfreq = dtpm_devfreq->devfreq;
|
struct devfreq *devfreq = dtpm_devfreq->devfreq;
|
||||||
struct device *dev = devfreq->dev.parent;
|
struct device *dev = devfreq->dev.parent;
|
||||||
struct em_perf_domain *pd = em_pd_get(dev);
|
struct em_perf_domain *pd = em_pd_get(dev);
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
if (pd->table[i].power > power_limit)
|
if (table[i].power > power_limit)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
freq = pd->table[i - 1].frequency;
|
freq = table[i - 1].frequency;
|
||||||
|
power_limit = table[i - 1].power;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
|
dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq);
|
||||||
|
|
||||||
power_limit = pd->table[i - 1].power;
|
|
||||||
|
|
||||||
return power_limit;
|
return power_limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,8 +97,9 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
|
||||||
struct device *dev = devfreq->dev.parent;
|
struct device *dev = devfreq->dev.parent;
|
||||||
struct em_perf_domain *pd = em_pd_get(dev);
|
struct em_perf_domain *pd = em_pd_get(dev);
|
||||||
struct devfreq_dev_status status;
|
struct devfreq_dev_status status;
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
u64 power;
|
u64 power = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
mutex_lock(&devfreq->lock);
|
mutex_lock(&devfreq->lock);
|
||||||
|
@ -100,19 +109,22 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
|
||||||
freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
|
freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ);
|
||||||
_normalize_load(&status);
|
_normalize_load(&status);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
|
|
||||||
if (pd->table[i].frequency < freq)
|
if (table[i].frequency < freq)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
power = pd->table[i].power;
|
power = table[i].power;
|
||||||
power *= status.busy_time;
|
power *= status.busy_time;
|
||||||
power >>= 10;
|
power >>= 10;
|
||||||
|
|
||||||
return power;
|
break;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return 0;
|
return power;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pd_release(struct dtpm *dtpm)
|
static void pd_release(struct dtpm *dtpm)
|
||||||
|
|
|
@ -91,12 +91,16 @@ struct cpufreq_cooling_device {
|
||||||
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
|
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
unsigned int freq)
|
unsigned int freq)
|
||||||
{
|
{
|
||||||
|
struct em_perf_state *table;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(cpufreq_cdev->em);
|
||||||
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
||||||
if (freq > cpufreq_cdev->em->table[i].frequency)
|
if (freq > table[i].frequency)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return cpufreq_cdev->max_level - i - 1;
|
return cpufreq_cdev->max_level - i - 1;
|
||||||
}
|
}
|
||||||
|
@ -104,16 +108,20 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
|
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
u32 freq)
|
u32 freq)
|
||||||
{
|
{
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long power_mw;
|
unsigned long power_mw;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(cpufreq_cdev->em);
|
||||||
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
|
||||||
if (freq > cpufreq_cdev->em->table[i].frequency)
|
if (freq > table[i].frequency)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
power_mw = cpufreq_cdev->em->table[i + 1].power;
|
power_mw = table[i + 1].power;
|
||||||
power_mw /= MICROWATT_PER_MILLIWATT;
|
power_mw /= MICROWATT_PER_MILLIWATT;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return power_mw;
|
return power_mw;
|
||||||
}
|
}
|
||||||
|
@ -121,18 +129,24 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
|
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
u32 power)
|
u32 power)
|
||||||
{
|
{
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long em_power_mw;
|
unsigned long em_power_mw;
|
||||||
|
u32 freq;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(cpufreq_cdev->em);
|
||||||
for (i = cpufreq_cdev->max_level; i > 0; i--) {
|
for (i = cpufreq_cdev->max_level; i > 0; i--) {
|
||||||
/* Convert EM power to milli-Watts to make safe comparison */
|
/* Convert EM power to milli-Watts to make safe comparison */
|
||||||
em_power_mw = cpufreq_cdev->em->table[i].power;
|
em_power_mw = table[i].power;
|
||||||
em_power_mw /= MICROWATT_PER_MILLIWATT;
|
em_power_mw /= MICROWATT_PER_MILLIWATT;
|
||||||
if (power >= em_power_mw)
|
if (power >= em_power_mw)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
freq = table[i].frequency;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
return cpufreq_cdev->em->table[i].frequency;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -262,8 +276,9 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
|
||||||
static int cpufreq_state2power(struct thermal_cooling_device *cdev,
|
static int cpufreq_state2power(struct thermal_cooling_device *cdev,
|
||||||
unsigned long state, u32 *power)
|
unsigned long state, u32 *power)
|
||||||
{
|
{
|
||||||
unsigned int freq, num_cpus, idx;
|
|
||||||
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
|
||||||
|
unsigned int freq, num_cpus, idx;
|
||||||
|
struct em_perf_state *table;
|
||||||
|
|
||||||
/* Request state should be less than max_level */
|
/* Request state should be less than max_level */
|
||||||
if (state > cpufreq_cdev->max_level)
|
if (state > cpufreq_cdev->max_level)
|
||||||
|
@ -272,7 +287,12 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
|
||||||
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
|
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
|
||||||
|
|
||||||
idx = cpufreq_cdev->max_level - state;
|
idx = cpufreq_cdev->max_level - state;
|
||||||
freq = cpufreq_cdev->em->table[idx].frequency;
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(cpufreq_cdev->em);
|
||||||
|
freq = table[idx].frequency;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
|
*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -378,8 +398,17 @@ static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
|
||||||
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
|
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
|
||||||
/* Use the Energy Model table if available */
|
/* Use the Energy Model table if available */
|
||||||
if (cpufreq_cdev->em) {
|
if (cpufreq_cdev->em) {
|
||||||
|
struct em_perf_state *table;
|
||||||
|
unsigned int freq;
|
||||||
|
|
||||||
idx = cpufreq_cdev->max_level - state;
|
idx = cpufreq_cdev->max_level - state;
|
||||||
return cpufreq_cdev->em->table[idx].frequency;
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(cpufreq_cdev->em);
|
||||||
|
freq = table[idx].frequency;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return freq;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
|
||||||
struct devfreq_cooling_device *dfc = cdev->devdata;
|
struct devfreq_cooling_device *dfc = cdev->devdata;
|
||||||
struct devfreq *df = dfc->devfreq;
|
struct devfreq *df = dfc->devfreq;
|
||||||
struct device *dev = df->dev.parent;
|
struct device *dev = df->dev.parent;
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
int perf_idx;
|
int perf_idx;
|
||||||
|
|
||||||
|
@ -100,7 +101,11 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
|
||||||
|
|
||||||
if (dfc->em_pd) {
|
if (dfc->em_pd) {
|
||||||
perf_idx = dfc->max_state - state;
|
perf_idx = dfc->max_state - state;
|
||||||
freq = dfc->em_pd->table[perf_idx].frequency * 1000;
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(dfc->em_pd);
|
||||||
|
freq = table[perf_idx].frequency * 1000;
|
||||||
|
rcu_read_unlock();
|
||||||
} else {
|
} else {
|
||||||
freq = dfc->freq_table[state];
|
freq = dfc->freq_table[state];
|
||||||
}
|
}
|
||||||
|
@ -123,14 +128,21 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
|
||||||
*/
|
*/
|
||||||
static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq)
|
static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq)
|
||||||
{
|
{
|
||||||
int i;
|
struct em_perf_state *table;
|
||||||
|
int i, idx = -EINVAL;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(em_pd);
|
||||||
for (i = 0; i < em_pd->nr_perf_states; i++) {
|
for (i = 0; i < em_pd->nr_perf_states; i++) {
|
||||||
if (em_pd->table[i].frequency == freq)
|
if (table[i].frequency != freq)
|
||||||
return i;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
return -EINVAL;
|
idx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
|
static unsigned long get_voltage(struct devfreq *df, unsigned long freq)
|
||||||
|
@ -181,6 +193,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
|
||||||
struct devfreq_cooling_device *dfc = cdev->devdata;
|
struct devfreq_cooling_device *dfc = cdev->devdata;
|
||||||
struct devfreq *df = dfc->devfreq;
|
struct devfreq *df = dfc->devfreq;
|
||||||
struct devfreq_dev_status status;
|
struct devfreq_dev_status status;
|
||||||
|
struct em_perf_state *table;
|
||||||
unsigned long state;
|
unsigned long state;
|
||||||
unsigned long freq;
|
unsigned long freq;
|
||||||
unsigned long voltage;
|
unsigned long voltage;
|
||||||
|
@ -204,7 +217,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
|
||||||
state = dfc->capped_state;
|
state = dfc->capped_state;
|
||||||
|
|
||||||
/* Convert EM power into milli-Watts first */
|
/* Convert EM power into milli-Watts first */
|
||||||
dfc->res_util = dfc->em_pd->table[state].power;
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(dfc->em_pd);
|
||||||
|
dfc->res_util = table[state].power;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
dfc->res_util /= MICROWATT_PER_MILLIWATT;
|
dfc->res_util /= MICROWATT_PER_MILLIWATT;
|
||||||
|
|
||||||
dfc->res_util *= SCALE_ERROR_MITIGATION;
|
dfc->res_util *= SCALE_ERROR_MITIGATION;
|
||||||
|
@ -225,7 +242,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
|
||||||
_normalize_load(&status);
|
_normalize_load(&status);
|
||||||
|
|
||||||
/* Convert EM power into milli-Watts first */
|
/* Convert EM power into milli-Watts first */
|
||||||
*power = dfc->em_pd->table[perf_idx].power;
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(dfc->em_pd);
|
||||||
|
*power = table[perf_idx].power;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
*power /= MICROWATT_PER_MILLIWATT;
|
*power /= MICROWATT_PER_MILLIWATT;
|
||||||
/* Scale power for utilization */
|
/* Scale power for utilization */
|
||||||
*power *= status.busy_time;
|
*power *= status.busy_time;
|
||||||
|
@ -245,13 +266,19 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
|
||||||
unsigned long state, u32 *power)
|
unsigned long state, u32 *power)
|
||||||
{
|
{
|
||||||
struct devfreq_cooling_device *dfc = cdev->devdata;
|
struct devfreq_cooling_device *dfc = cdev->devdata;
|
||||||
|
struct em_perf_state *table;
|
||||||
int perf_idx;
|
int perf_idx;
|
||||||
|
|
||||||
if (state > dfc->max_state)
|
if (state > dfc->max_state)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
perf_idx = dfc->max_state - state;
|
perf_idx = dfc->max_state - state;
|
||||||
*power = dfc->em_pd->table[perf_idx].power;
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(dfc->em_pd);
|
||||||
|
*power = table[perf_idx].power;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
*power /= MICROWATT_PER_MILLIWATT;
|
*power /= MICROWATT_PER_MILLIWATT;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -264,6 +291,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
|
||||||
struct devfreq *df = dfc->devfreq;
|
struct devfreq *df = dfc->devfreq;
|
||||||
struct devfreq_dev_status status;
|
struct devfreq_dev_status status;
|
||||||
unsigned long freq, em_power_mw;
|
unsigned long freq, em_power_mw;
|
||||||
|
struct em_perf_state *table;
|
||||||
s32 est_power;
|
s32 est_power;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -288,13 +316,16 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
|
||||||
* Find the first cooling state that is within the power
|
* Find the first cooling state that is within the power
|
||||||
* budget. The EM power table is sorted ascending.
|
* budget. The EM power table is sorted ascending.
|
||||||
*/
|
*/
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(dfc->em_pd);
|
||||||
for (i = dfc->max_state; i > 0; i--) {
|
for (i = dfc->max_state; i > 0; i--) {
|
||||||
/* Convert EM power to milli-Watts to make safe comparison */
|
/* Convert EM power to milli-Watts to make safe comparison */
|
||||||
em_power_mw = dfc->em_pd->table[i].power;
|
em_power_mw = table[i].power;
|
||||||
em_power_mw /= MICROWATT_PER_MILLIWATT;
|
em_power_mw /= MICROWATT_PER_MILLIWATT;
|
||||||
if (est_power >= em_power_mw)
|
if (est_power >= em_power_mw)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
*state = dfc->max_state - i;
|
*state = dfc->max_state - i;
|
||||||
dfc->capped_state = *state;
|
dfc->capped_state = *state;
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#include <linux/device.h>
|
#include <linux/device.h>
|
||||||
#include <linux/jump_label.h>
|
#include <linux/jump_label.h>
|
||||||
#include <linux/kobject.h>
|
#include <linux/kobject.h>
|
||||||
|
#include <linux/kref.h>
|
||||||
#include <linux/rcupdate.h>
|
#include <linux/rcupdate.h>
|
||||||
#include <linux/sched/cpufreq.h>
|
#include <linux/sched/cpufreq.h>
|
||||||
#include <linux/sched/topology.h>
|
#include <linux/sched/topology.h>
|
||||||
|
@ -12,6 +13,7 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct em_perf_state - Performance state of a performance domain
|
* struct em_perf_state - Performance state of a performance domain
|
||||||
|
* @performance: CPU performance (capacity) at a given frequency
|
||||||
* @frequency: The frequency in KHz, for consistency with CPUFreq
|
* @frequency: The frequency in KHz, for consistency with CPUFreq
|
||||||
* @power: The power consumed at this level (by 1 CPU or by a registered
|
* @power: The power consumed at this level (by 1 CPU or by a registered
|
||||||
* device). It can be a total power: static and dynamic.
|
* device). It can be a total power: static and dynamic.
|
||||||
|
@ -20,6 +22,7 @@
|
||||||
* @flags: see "em_perf_state flags" description below.
|
* @flags: see "em_perf_state flags" description below.
|
||||||
*/
|
*/
|
||||||
struct em_perf_state {
|
struct em_perf_state {
|
||||||
|
unsigned long performance;
|
||||||
unsigned long frequency;
|
unsigned long frequency;
|
||||||
unsigned long power;
|
unsigned long power;
|
||||||
unsigned long cost;
|
unsigned long cost;
|
||||||
|
@ -36,9 +39,21 @@ struct em_perf_state {
|
||||||
*/
|
*/
|
||||||
#define EM_PERF_STATE_INEFFICIENT BIT(0)
|
#define EM_PERF_STATE_INEFFICIENT BIT(0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct em_perf_table - Performance states table
|
||||||
|
* @rcu: RCU used for safe access and destruction
|
||||||
|
* @kref: Reference counter to track the users
|
||||||
|
* @state: List of performance states, in ascending order
|
||||||
|
*/
|
||||||
|
struct em_perf_table {
|
||||||
|
struct rcu_head rcu;
|
||||||
|
struct kref kref;
|
||||||
|
struct em_perf_state state[];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct em_perf_domain - Performance domain
|
* struct em_perf_domain - Performance domain
|
||||||
* @table: List of performance states, in ascending order
|
* @em_table: Pointer to the runtime modifiable em_perf_table
|
||||||
* @nr_perf_states: Number of performance states
|
* @nr_perf_states: Number of performance states
|
||||||
* @flags: See "em_perf_domain flags"
|
* @flags: See "em_perf_domain flags"
|
||||||
* @cpus: Cpumask covering the CPUs of the domain. It's here
|
* @cpus: Cpumask covering the CPUs of the domain. It's here
|
||||||
|
@ -53,7 +68,7 @@ struct em_perf_state {
|
||||||
* field is unused.
|
* field is unused.
|
||||||
*/
|
*/
|
||||||
struct em_perf_domain {
|
struct em_perf_domain {
|
||||||
struct em_perf_state *table;
|
struct em_perf_table __rcu *em_table;
|
||||||
int nr_perf_states;
|
int nr_perf_states;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long cpus[];
|
unsigned long cpus[];
|
||||||
|
@ -98,27 +113,6 @@ struct em_perf_domain {
|
||||||
#define EM_MAX_NUM_CPUS 16
|
#define EM_MAX_NUM_CPUS 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* To avoid an overflow on 32bit machines while calculating the energy
|
|
||||||
* use a different order in the operation. First divide by the 'cpu_scale'
|
|
||||||
* which would reduce big value stored in the 'cost' field, then multiply by
|
|
||||||
* the 'sum_util'. This would allow to handle existing platforms, which have
|
|
||||||
* e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts.
|
|
||||||
* In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util'
|
|
||||||
* could be 4096, then multiplication: 'cost' * 'sum_util' would overflow.
|
|
||||||
* This reordering of operations has some limitations, we lose small
|
|
||||||
* precision in the estimation (comparing to 64bit platform w/o reordering).
|
|
||||||
*
|
|
||||||
* We are safe on 64bit machine.
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_64BIT
|
|
||||||
#define em_estimate_energy(cost, sum_util, scale_cpu) \
|
|
||||||
(((cost) * (sum_util)) / (scale_cpu))
|
|
||||||
#else
|
|
||||||
#define em_estimate_energy(cost, sum_util, scale_cpu) \
|
|
||||||
(((cost) / (scale_cpu)) * (sum_util))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct em_data_callback {
|
struct em_data_callback {
|
||||||
/**
|
/**
|
||||||
* active_power() - Provide power at the next performance state of
|
* active_power() - Provide power at the next performance state of
|
||||||
|
@ -168,40 +162,48 @@ struct em_data_callback {
|
||||||
|
|
||||||
struct em_perf_domain *em_cpu_get(int cpu);
|
struct em_perf_domain *em_cpu_get(int cpu);
|
||||||
struct em_perf_domain *em_pd_get(struct device *dev);
|
struct em_perf_domain *em_pd_get(struct device *dev);
|
||||||
|
int em_dev_update_perf_domain(struct device *dev,
|
||||||
|
struct em_perf_table __rcu *new_table);
|
||||||
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
|
int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
|
||||||
struct em_data_callback *cb, cpumask_t *span,
|
struct em_data_callback *cb, cpumask_t *span,
|
||||||
bool microwatts);
|
bool microwatts);
|
||||||
void em_dev_unregister_perf_domain(struct device *dev);
|
void em_dev_unregister_perf_domain(struct device *dev);
|
||||||
|
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd);
|
||||||
|
void em_table_free(struct em_perf_table __rcu *table);
|
||||||
|
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||||
|
int nr_states);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
|
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
|
||||||
* @pd : Performance domain for which we want an efficient frequency
|
* @table: List of performance states, in ascending order
|
||||||
* @freq : Frequency to map with the EM
|
* @nr_perf_states: Number of performance states
|
||||||
|
* @max_util: Max utilization to map with the EM
|
||||||
|
* @pd_flags: Performance Domain flags
|
||||||
*
|
*
|
||||||
* It is called from the scheduler code quite frequently and as a consequence
|
* It is called from the scheduler code quite frequently and as a consequence
|
||||||
* doesn't implement any check.
|
* doesn't implement any check.
|
||||||
*
|
*
|
||||||
* Return: An efficient performance state, high enough to meet @freq
|
* Return: An efficient performance state id, high enough to meet @max_util
|
||||||
* requirement.
|
* requirement.
|
||||||
*/
|
*/
|
||||||
static inline
|
static inline int
|
||||||
struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd,
|
em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
|
||||||
unsigned long freq)
|
unsigned long max_util, unsigned long pd_flags)
|
||||||
{
|
{
|
||||||
struct em_perf_state *ps;
|
struct em_perf_state *ps;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < nr_perf_states; i++) {
|
||||||
ps = &pd->table[i];
|
ps = &table[i];
|
||||||
if (ps->frequency >= freq) {
|
if (ps->performance >= max_util) {
|
||||||
if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
|
if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
|
||||||
ps->flags & EM_PERF_STATE_INEFFICIENT)
|
ps->flags & EM_PERF_STATE_INEFFICIENT)
|
||||||
continue;
|
continue;
|
||||||
break;
|
return i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ps;
|
return nr_perf_states - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -224,9 +226,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||||
unsigned long max_util, unsigned long sum_util,
|
unsigned long max_util, unsigned long sum_util,
|
||||||
unsigned long allowed_cpu_cap)
|
unsigned long allowed_cpu_cap)
|
||||||
{
|
{
|
||||||
unsigned long freq, ref_freq, scale_cpu;
|
struct em_perf_table *em_table;
|
||||||
struct em_perf_state *ps;
|
struct em_perf_state *ps;
|
||||||
int cpu;
|
int i;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SCHED_DEBUG
|
||||||
|
WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!sum_util)
|
if (!sum_util)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -234,31 +240,30 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||||
/*
|
/*
|
||||||
* In order to predict the performance state, map the utilization of
|
* In order to predict the performance state, map the utilization of
|
||||||
* the most utilized CPU of the performance domain to a requested
|
* the most utilized CPU of the performance domain to a requested
|
||||||
* frequency, like schedutil. Take also into account that the real
|
* performance, like schedutil. Take also into account that the real
|
||||||
* frequency might be set lower (due to thermal capping). Thus, clamp
|
* performance might be set lower (due to thermal capping). Thus, clamp
|
||||||
* max utilization to the allowed CPU capacity before calculating
|
* max utilization to the allowed CPU capacity before calculating
|
||||||
* effective frequency.
|
* effective performance.
|
||||||
*/
|
*/
|
||||||
cpu = cpumask_first(to_cpumask(pd->cpus));
|
max_util = map_util_perf(max_util);
|
||||||
scale_cpu = arch_scale_cpu_capacity(cpu);
|
|
||||||
ref_freq = arch_scale_freq_ref(cpu);
|
|
||||||
|
|
||||||
max_util = min(max_util, allowed_cpu_cap);
|
max_util = min(max_util, allowed_cpu_cap);
|
||||||
freq = map_util_freq(max_util, ref_freq, scale_cpu);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the lowest performance state of the Energy Model above the
|
* Find the lowest performance state of the Energy Model above the
|
||||||
* requested frequency.
|
* requested performance.
|
||||||
*/
|
*/
|
||||||
ps = em_pd_get_efficient_state(pd, freq);
|
em_table = rcu_dereference(pd->em_table);
|
||||||
|
i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
|
||||||
|
max_util, pd->flags);
|
||||||
|
ps = &em_table->state[i];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The capacity of a CPU in the domain at the performance state (ps)
|
* The performance (capacity) of a CPU in the domain at the performance
|
||||||
* can be computed as:
|
* state (ps) can be computed as:
|
||||||
*
|
*
|
||||||
* ps->freq * scale_cpu
|
* ps->freq * scale_cpu
|
||||||
* ps->cap = -------------------- (1)
|
* ps->performance = -------------------- (1)
|
||||||
* cpu_max_freq
|
* cpu_max_freq
|
||||||
*
|
*
|
||||||
* So, ignoring the costs of idle states (which are not available in
|
* So, ignoring the costs of idle states (which are not available in
|
||||||
* the EM), the energy consumed by this CPU at that performance state
|
* the EM), the energy consumed by this CPU at that performance state
|
||||||
|
@ -266,9 +271,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||||
*
|
*
|
||||||
* ps->power * cpu_util
|
* ps->power * cpu_util
|
||||||
* cpu_nrg = -------------------- (2)
|
* cpu_nrg = -------------------- (2)
|
||||||
* ps->cap
|
* ps->performance
|
||||||
*
|
*
|
||||||
* since 'cpu_util / ps->cap' represents its percentage of busy time.
|
* since 'cpu_util / ps->performance' represents its percentage of busy
|
||||||
|
* time.
|
||||||
*
|
*
|
||||||
* NOTE: Although the result of this computation actually is in
|
* NOTE: Although the result of this computation actually is in
|
||||||
* units of power, it can be manipulated as an energy value
|
* units of power, it can be manipulated as an energy value
|
||||||
|
@ -278,9 +284,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||||
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
|
* By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product
|
||||||
* of two terms:
|
* of two terms:
|
||||||
*
|
*
|
||||||
* ps->power * cpu_max_freq cpu_util
|
* ps->power * cpu_max_freq
|
||||||
* cpu_nrg = ------------------------ * --------- (3)
|
* cpu_nrg = ------------------------ * cpu_util (3)
|
||||||
* ps->freq scale_cpu
|
* ps->freq * scale_cpu
|
||||||
*
|
*
|
||||||
* The first term is static, and is stored in the em_perf_state struct
|
* The first term is static, and is stored in the em_perf_state struct
|
||||||
* as 'ps->cost'.
|
* as 'ps->cost'.
|
||||||
|
@ -290,11 +296,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
|
||||||
* total energy of the domain (which is the simple sum of the energy of
|
* total energy of the domain (which is the simple sum of the energy of
|
||||||
* all of its CPUs) can be factorized as:
|
* all of its CPUs) can be factorized as:
|
||||||
*
|
*
|
||||||
* ps->cost * \Sum cpu_util
|
* pd_nrg = ps->cost * \Sum cpu_util (4)
|
||||||
* pd_nrg = ------------------------ (4)
|
|
||||||
* scale_cpu
|
|
||||||
*/
|
*/
|
||||||
return em_estimate_energy(ps->cost, sum_util, scale_cpu);
|
return ps->cost * sum_util;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -309,6 +313,23 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
|
||||||
return pd->nr_perf_states;
|
return pd->nr_perf_states;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* em_perf_state_from_pd() - Get the performance states table of perf.
|
||||||
|
* domain
|
||||||
|
* @pd : performance domain for which this must be done
|
||||||
|
*
|
||||||
|
* To use this function the rcu_read_lock() should be hold. After the usage
|
||||||
|
* of the performance states table is finished, the rcu_read_unlock() should
|
||||||
|
* be called.
|
||||||
|
*
|
||||||
|
* Return: the pointer to performance states table of the performance domain
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd)
|
||||||
|
{
|
||||||
|
return rcu_dereference(pd->em_table)->state;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
struct em_data_callback {};
|
struct em_data_callback {};
|
||||||
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
|
#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
|
||||||
|
@ -343,6 +364,29 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static inline
|
||||||
|
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static inline void em_table_free(struct em_perf_table __rcu *table) {}
|
||||||
|
static inline
|
||||||
|
int em_dev_update_perf_domain(struct device *dev,
|
||||||
|
struct em_perf_table __rcu *new_table)
|
||||||
|
{
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
static inline
|
||||||
|
struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
static inline
|
||||||
|
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||||
|
int nr_states)
|
||||||
|
{
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -23,6 +23,12 @@
|
||||||
*/
|
*/
|
||||||
static DEFINE_MUTEX(em_pd_mutex);
|
static DEFINE_MUTEX(em_pd_mutex);
|
||||||
|
|
||||||
|
static void em_cpufreq_update_efficiencies(struct device *dev,
|
||||||
|
struct em_perf_state *table);
|
||||||
|
static void em_check_capacity_update(void);
|
||||||
|
static void em_update_workfn(struct work_struct *work);
|
||||||
|
static DECLARE_DELAYED_WORK(em_update_work, em_update_workfn);
|
||||||
|
|
||||||
static bool _is_cpu_device(struct device *dev)
|
static bool _is_cpu_device(struct device *dev)
|
||||||
{
|
{
|
||||||
return (dev->bus == &cpu_subsys);
|
return (dev->bus == &cpu_subsys);
|
||||||
|
@ -31,19 +37,65 @@ static bool _is_cpu_device(struct device *dev)
|
||||||
#ifdef CONFIG_DEBUG_FS
|
#ifdef CONFIG_DEBUG_FS
|
||||||
static struct dentry *rootdir;
|
static struct dentry *rootdir;
|
||||||
|
|
||||||
static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
|
struct em_dbg_info {
|
||||||
|
struct em_perf_domain *pd;
|
||||||
|
int ps_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define DEFINE_EM_DBG_SHOW(name, fname) \
|
||||||
|
static int em_debug_##fname##_show(struct seq_file *s, void *unused) \
|
||||||
|
{ \
|
||||||
|
struct em_dbg_info *em_dbg = s->private; \
|
||||||
|
struct em_perf_state *table; \
|
||||||
|
unsigned long val; \
|
||||||
|
\
|
||||||
|
rcu_read_lock(); \
|
||||||
|
table = em_perf_state_from_pd(em_dbg->pd); \
|
||||||
|
val = table[em_dbg->ps_id].name; \
|
||||||
|
rcu_read_unlock(); \
|
||||||
|
\
|
||||||
|
seq_printf(s, "%lu\n", val); \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
DEFINE_SHOW_ATTRIBUTE(em_debug_##fname)
|
||||||
|
|
||||||
|
DEFINE_EM_DBG_SHOW(frequency, frequency);
|
||||||
|
DEFINE_EM_DBG_SHOW(power, power);
|
||||||
|
DEFINE_EM_DBG_SHOW(cost, cost);
|
||||||
|
DEFINE_EM_DBG_SHOW(performance, performance);
|
||||||
|
DEFINE_EM_DBG_SHOW(flags, inefficiency);
|
||||||
|
|
||||||
|
static void em_debug_create_ps(struct em_perf_domain *em_pd,
|
||||||
|
struct em_dbg_info *em_dbg, int i,
|
||||||
|
struct dentry *pd)
|
||||||
{
|
{
|
||||||
|
struct em_perf_state *table;
|
||||||
|
unsigned long freq;
|
||||||
struct dentry *d;
|
struct dentry *d;
|
||||||
char name[24];
|
char name[24];
|
||||||
|
|
||||||
snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
|
em_dbg[i].pd = em_pd;
|
||||||
|
em_dbg[i].ps_id = i;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(em_pd);
|
||||||
|
freq = table[i].frequency;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
snprintf(name, sizeof(name), "ps:%lu", freq);
|
||||||
|
|
||||||
/* Create per-ps directory */
|
/* Create per-ps directory */
|
||||||
d = debugfs_create_dir(name, pd);
|
d = debugfs_create_dir(name, pd);
|
||||||
debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
|
debugfs_create_file("frequency", 0444, d, &em_dbg[i],
|
||||||
debugfs_create_ulong("power", 0444, d, &ps->power);
|
&em_debug_frequency_fops);
|
||||||
debugfs_create_ulong("cost", 0444, d, &ps->cost);
|
debugfs_create_file("power", 0444, d, &em_dbg[i],
|
||||||
debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
|
&em_debug_power_fops);
|
||||||
|
debugfs_create_file("cost", 0444, d, &em_dbg[i],
|
||||||
|
&em_debug_cost_fops);
|
||||||
|
debugfs_create_file("performance", 0444, d, &em_dbg[i],
|
||||||
|
&em_debug_performance_fops);
|
||||||
|
debugfs_create_file("inefficient", 0444, d, &em_dbg[i],
|
||||||
|
&em_debug_inefficiency_fops);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int em_debug_cpus_show(struct seq_file *s, void *unused)
|
static int em_debug_cpus_show(struct seq_file *s, void *unused)
|
||||||
|
@ -66,6 +118,7 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
|
||||||
|
|
||||||
static void em_debug_create_pd(struct device *dev)
|
static void em_debug_create_pd(struct device *dev)
|
||||||
{
|
{
|
||||||
|
struct em_dbg_info *em_dbg;
|
||||||
struct dentry *d;
|
struct dentry *d;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -79,9 +132,14 @@ static void em_debug_create_pd(struct device *dev)
|
||||||
debugfs_create_file("flags", 0444, d, dev->em_pd,
|
debugfs_create_file("flags", 0444, d, dev->em_pd,
|
||||||
&em_debug_flags_fops);
|
&em_debug_flags_fops);
|
||||||
|
|
||||||
|
em_dbg = devm_kcalloc(dev, dev->em_pd->nr_perf_states,
|
||||||
|
sizeof(*em_dbg), GFP_KERNEL);
|
||||||
|
if (!em_dbg)
|
||||||
|
return;
|
||||||
|
|
||||||
/* Create a sub-directory for each performance state */
|
/* Create a sub-directory for each performance state */
|
||||||
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
|
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
|
||||||
em_debug_create_ps(&dev->em_pd->table[i], d);
|
em_debug_create_ps(dev->em_pd, em_dbg, i, d);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,72 +161,105 @@ static void em_debug_create_pd(struct device *dev) {}
|
||||||
static void em_debug_remove_pd(struct device *dev) {}
|
static void em_debug_remove_pd(struct device *dev) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
|
static void em_destroy_table_rcu(struct rcu_head *rp)
|
||||||
int nr_states, struct em_data_callback *cb,
|
|
||||||
unsigned long flags)
|
|
||||||
{
|
{
|
||||||
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
|
struct em_perf_table __rcu *table;
|
||||||
struct em_perf_state *table;
|
|
||||||
int i, ret;
|
|
||||||
u64 fmax;
|
|
||||||
|
|
||||||
table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
|
table = container_of(rp, struct em_perf_table, rcu);
|
||||||
|
kfree(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void em_release_table_kref(struct kref *kref)
|
||||||
|
{
|
||||||
|
struct em_perf_table __rcu *table;
|
||||||
|
|
||||||
|
/* It was the last owner of this table so we can free */
|
||||||
|
table = container_of(kref, struct em_perf_table, kref);
|
||||||
|
|
||||||
|
call_rcu(&table->rcu, em_destroy_table_rcu);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* em_table_free() - Handles safe free of the EM table when needed
|
||||||
|
* @table : EM table which is going to be freed
|
||||||
|
*
|
||||||
|
* No return values.
|
||||||
|
*/
|
||||||
|
void em_table_free(struct em_perf_table __rcu *table)
|
||||||
|
{
|
||||||
|
kref_put(&table->kref, em_release_table_kref);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* em_table_alloc() - Allocate a new EM table
|
||||||
|
* @pd : EM performance domain for which this must be done
|
||||||
|
*
|
||||||
|
* Allocate a new EM table and initialize its kref to indicate that it
|
||||||
|
* has a user.
|
||||||
|
* Returns allocated table or NULL.
|
||||||
|
*/
|
||||||
|
struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd)
|
||||||
|
{
|
||||||
|
struct em_perf_table __rcu *table;
|
||||||
|
int table_size;
|
||||||
|
|
||||||
|
table_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
|
||||||
|
|
||||||
|
table = kzalloc(sizeof(*table) + table_size, GFP_KERNEL);
|
||||||
if (!table)
|
if (!table)
|
||||||
return -ENOMEM;
|
return NULL;
|
||||||
|
|
||||||
/* Build the list of performance states for this performance domain */
|
kref_init(&table->kref);
|
||||||
for (i = 0, freq = 0; i < nr_states; i++, freq++) {
|
|
||||||
/*
|
|
||||||
* active_power() is a driver callback which ceils 'freq' to
|
|
||||||
* lowest performance state of 'dev' above 'freq' and updates
|
|
||||||
* 'power' and 'freq' accordingly.
|
|
||||||
*/
|
|
||||||
ret = cb->active_power(dev, &power, &freq);
|
|
||||||
if (ret) {
|
|
||||||
dev_err(dev, "EM: invalid perf. state: %d\n",
|
|
||||||
ret);
|
|
||||||
goto free_ps_table;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
return table;
|
||||||
* We expect the driver callback to increase the frequency for
|
}
|
||||||
* higher performance states.
|
|
||||||
*/
|
|
||||||
if (freq <= prev_freq) {
|
|
||||||
dev_err(dev, "EM: non-increasing freq: %lu\n",
|
|
||||||
freq);
|
|
||||||
goto free_ps_table;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
static void em_init_performance(struct device *dev, struct em_perf_domain *pd,
|
||||||
* The power returned by active_state() is expected to be
|
struct em_perf_state *table, int nr_states)
|
||||||
* positive and be in range.
|
{
|
||||||
*/
|
u64 fmax, max_cap;
|
||||||
if (!power || power > EM_MAX_POWER) {
|
int i, cpu;
|
||||||
dev_err(dev, "EM: invalid power: %lu\n",
|
|
||||||
power);
|
|
||||||
goto free_ps_table;
|
|
||||||
}
|
|
||||||
|
|
||||||
table[i].power = power;
|
/* This is needed only for CPUs and EAS skip other devices */
|
||||||
table[i].frequency = prev_freq = freq;
|
if (!_is_cpu_device(dev))
|
||||||
}
|
return;
|
||||||
|
|
||||||
|
cpu = cpumask_first(em_span_cpus(pd));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the performance value for each frequency with
|
||||||
|
* linear relationship. The final CPU capacity might not be ready at
|
||||||
|
* boot time, but the EM will be updated a bit later with correct one.
|
||||||
|
*/
|
||||||
|
fmax = (u64) table[nr_states - 1].frequency;
|
||||||
|
max_cap = (u64) arch_scale_cpu_capacity(cpu);
|
||||||
|
for (i = 0; i < nr_states; i++)
|
||||||
|
table[i].performance = div64_u64(max_cap * table[i].frequency,
|
||||||
|
fmax);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int em_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||||
|
struct em_data_callback *cb, int nr_states,
|
||||||
|
unsigned long flags)
|
||||||
|
{
|
||||||
|
unsigned long prev_cost = ULONG_MAX;
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
/* Compute the cost of each performance state. */
|
/* Compute the cost of each performance state. */
|
||||||
fmax = (u64) table[nr_states - 1].frequency;
|
|
||||||
for (i = nr_states - 1; i >= 0; i--) {
|
for (i = nr_states - 1; i >= 0; i--) {
|
||||||
unsigned long power_res, cost;
|
unsigned long power_res, cost;
|
||||||
|
|
||||||
if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
|
if ((flags & EM_PERF_DOMAIN_ARTIFICIAL) && cb->get_cost) {
|
||||||
ret = cb->get_cost(dev, table[i].frequency, &cost);
|
ret = cb->get_cost(dev, table[i].frequency, &cost);
|
||||||
if (ret || !cost || cost > EM_MAX_POWER) {
|
if (ret || !cost || cost > EM_MAX_POWER) {
|
||||||
dev_err(dev, "EM: invalid cost %lu %d\n",
|
dev_err(dev, "EM: invalid cost %lu %d\n",
|
||||||
cost, ret);
|
cost, ret);
|
||||||
goto free_ps_table;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
power_res = table[i].power;
|
/* increase resolution of 'cost' precision */
|
||||||
cost = div64_u64(fmax * power_res, table[i].frequency);
|
power_res = table[i].power * 10;
|
||||||
|
cost = power_res / table[i].performance;
|
||||||
}
|
}
|
||||||
|
|
||||||
table[i].cost = cost;
|
table[i].cost = cost;
|
||||||
|
@ -182,20 +273,133 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pd->table = table;
|
return 0;
|
||||||
pd->nr_perf_states = nr_states;
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* em_dev_compute_costs() - Calculate cost values for new runtime EM table
|
||||||
|
* @dev : Device for which the EM table is to be updated
|
||||||
|
* @table : The new EM table that is going to get the costs calculated
|
||||||
|
* @nr_states : Number of performance states
|
||||||
|
*
|
||||||
|
* Calculate the em_perf_state::cost values for new runtime EM table. The
|
||||||
|
* values are used for EAS during task placement. It also calculates and sets
|
||||||
|
* the efficiency flag for each performance state. When the function finish
|
||||||
|
* successfully the EM table is ready to be updated and used by EAS.
|
||||||
|
*
|
||||||
|
* Return 0 on success or a proper error in case of failure.
|
||||||
|
*/
|
||||||
|
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
|
||||||
|
int nr_states)
|
||||||
|
{
|
||||||
|
return em_compute_costs(dev, table, NULL, nr_states, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* em_dev_update_perf_domain() - Update runtime EM table for a device
|
||||||
|
* @dev : Device for which the EM is to be updated
|
||||||
|
* @new_table : The new EM table that is going to be used from now
|
||||||
|
*
|
||||||
|
* Update EM runtime modifiable table for the @dev using the provided @table.
|
||||||
|
*
|
||||||
|
* This function uses a mutex to serialize writers, so it must not be called
|
||||||
|
* from a non-sleeping context.
|
||||||
|
*
|
||||||
|
* Return 0 on success or an error code on failure.
|
||||||
|
*/
|
||||||
|
int em_dev_update_perf_domain(struct device *dev,
|
||||||
|
struct em_perf_table __rcu *new_table)
|
||||||
|
{
|
||||||
|
struct em_perf_table __rcu *old_table;
|
||||||
|
struct em_perf_domain *pd;
|
||||||
|
|
||||||
|
if (!dev)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Serialize update/unregister or concurrent updates */
|
||||||
|
mutex_lock(&em_pd_mutex);
|
||||||
|
|
||||||
|
if (!dev->em_pd) {
|
||||||
|
mutex_unlock(&em_pd_mutex);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
pd = dev->em_pd;
|
||||||
|
|
||||||
|
kref_get(&new_table->kref);
|
||||||
|
|
||||||
|
old_table = pd->em_table;
|
||||||
|
rcu_assign_pointer(pd->em_table, new_table);
|
||||||
|
|
||||||
|
em_cpufreq_update_efficiencies(dev, new_table->state);
|
||||||
|
|
||||||
|
em_table_free(old_table);
|
||||||
|
|
||||||
|
mutex_unlock(&em_pd_mutex);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(em_dev_update_perf_domain);
|
||||||
|
|
||||||
|
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
|
||||||
|
struct em_perf_state *table,
|
||||||
|
struct em_data_callback *cb,
|
||||||
|
unsigned long flags)
|
||||||
|
{
|
||||||
|
unsigned long power, freq, prev_freq = 0;
|
||||||
|
int nr_states = pd->nr_perf_states;
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
|
/* Build the list of performance states for this performance domain */
|
||||||
|
for (i = 0, freq = 0; i < nr_states; i++, freq++) {
|
||||||
|
/*
|
||||||
|
* active_power() is a driver callback which ceils 'freq' to
|
||||||
|
* lowest performance state of 'dev' above 'freq' and updates
|
||||||
|
* 'power' and 'freq' accordingly.
|
||||||
|
*/
|
||||||
|
ret = cb->active_power(dev, &power, &freq);
|
||||||
|
if (ret) {
|
||||||
|
dev_err(dev, "EM: invalid perf. state: %d\n",
|
||||||
|
ret);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We expect the driver callback to increase the frequency for
|
||||||
|
* higher performance states.
|
||||||
|
*/
|
||||||
|
if (freq <= prev_freq) {
|
||||||
|
dev_err(dev, "EM: non-increasing freq: %lu\n",
|
||||||
|
freq);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The power returned by active_state() is expected to be
|
||||||
|
* positive and be in range.
|
||||||
|
*/
|
||||||
|
if (!power || power > EM_MAX_POWER) {
|
||||||
|
dev_err(dev, "EM: invalid power: %lu\n",
|
||||||
|
power);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
table[i].power = power;
|
||||||
|
table[i].frequency = prev_freq = freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
em_init_performance(dev, pd, table, nr_states);
|
||||||
|
|
||||||
|
ret = em_compute_costs(dev, table, cb, nr_states, flags);
|
||||||
|
if (ret)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_ps_table:
|
|
||||||
kfree(table);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int em_create_pd(struct device *dev, int nr_states,
|
static int em_create_pd(struct device *dev, int nr_states,
|
||||||
struct em_data_callback *cb, cpumask_t *cpus,
|
struct em_data_callback *cb, cpumask_t *cpus,
|
||||||
unsigned long flags)
|
unsigned long flags)
|
||||||
{
|
{
|
||||||
|
struct em_perf_table __rcu *em_table;
|
||||||
struct em_perf_domain *pd;
|
struct em_perf_domain *pd;
|
||||||
struct device *cpu_dev;
|
struct device *cpu_dev;
|
||||||
int cpu, ret, num_cpus;
|
int cpu, ret, num_cpus;
|
||||||
|
@ -220,11 +424,17 @@ static int em_create_pd(struct device *dev, int nr_states,
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
|
pd->nr_perf_states = nr_states;
|
||||||
if (ret) {
|
|
||||||
kfree(pd);
|
em_table = em_table_alloc(pd);
|
||||||
return ret;
|
if (!em_table)
|
||||||
}
|
goto free_pd;
|
||||||
|
|
||||||
|
ret = em_create_perf_table(dev, pd, em_table->state, cb, flags);
|
||||||
|
if (ret)
|
||||||
|
goto free_pd_table;
|
||||||
|
|
||||||
|
rcu_assign_pointer(pd->em_table, em_table);
|
||||||
|
|
||||||
if (_is_cpu_device(dev))
|
if (_is_cpu_device(dev))
|
||||||
for_each_cpu(cpu, cpus) {
|
for_each_cpu(cpu, cpus) {
|
||||||
|
@ -235,26 +445,37 @@ static int em_create_pd(struct device *dev, int nr_states,
|
||||||
dev->em_pd = pd;
|
dev->em_pd = pd;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
free_pd_table:
|
||||||
|
kfree(em_table);
|
||||||
|
free_pd:
|
||||||
|
kfree(pd);
|
||||||
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void em_cpufreq_update_efficiencies(struct device *dev)
|
static void
|
||||||
|
em_cpufreq_update_efficiencies(struct device *dev, struct em_perf_state *table)
|
||||||
{
|
{
|
||||||
struct em_perf_domain *pd = dev->em_pd;
|
struct em_perf_domain *pd = dev->em_pd;
|
||||||
struct em_perf_state *table;
|
|
||||||
struct cpufreq_policy *policy;
|
struct cpufreq_policy *policy;
|
||||||
int found = 0;
|
int found = 0;
|
||||||
int i;
|
int i, cpu;
|
||||||
|
|
||||||
if (!_is_cpu_device(dev) || !pd)
|
if (!_is_cpu_device(dev))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd)));
|
/* Try to get a CPU which is active and in this PD */
|
||||||
if (!policy) {
|
cpu = cpumask_first_and(em_span_cpus(pd), cpu_active_mask);
|
||||||
dev_warn(dev, "EM: Access to CPUFreq policy failed");
|
if (cpu >= nr_cpu_ids) {
|
||||||
|
dev_warn(dev, "EM: No online CPU for CPUFreq policy\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
table = pd->table;
|
policy = cpufreq_cpu_get(cpu);
|
||||||
|
if (!policy) {
|
||||||
|
dev_warn(dev, "EM: Access to CPUFreq policy failed\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < pd->nr_perf_states; i++) {
|
for (i = 0; i < pd->nr_perf_states; i++) {
|
||||||
if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
|
if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
|
||||||
|
@ -397,13 +618,17 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
|
||||||
|
|
||||||
dev->em_pd->flags |= flags;
|
dev->em_pd->flags |= flags;
|
||||||
|
|
||||||
em_cpufreq_update_efficiencies(dev);
|
em_cpufreq_update_efficiencies(dev, dev->em_pd->em_table->state);
|
||||||
|
|
||||||
em_debug_create_pd(dev);
|
em_debug_create_pd(dev);
|
||||||
dev_info(dev, "EM: created perf domain\n");
|
dev_info(dev, "EM: created perf domain\n");
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
mutex_unlock(&em_pd_mutex);
|
mutex_unlock(&em_pd_mutex);
|
||||||
|
|
||||||
|
if (_is_cpu_device(dev))
|
||||||
|
em_check_capacity_update();
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
|
EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
|
||||||
|
@ -430,9 +655,125 @@ void em_dev_unregister_perf_domain(struct device *dev)
|
||||||
mutex_lock(&em_pd_mutex);
|
mutex_lock(&em_pd_mutex);
|
||||||
em_debug_remove_pd(dev);
|
em_debug_remove_pd(dev);
|
||||||
|
|
||||||
kfree(dev->em_pd->table);
|
em_table_free(dev->em_pd->em_table);
|
||||||
|
|
||||||
kfree(dev->em_pd);
|
kfree(dev->em_pd);
|
||||||
dev->em_pd = NULL;
|
dev->em_pd = NULL;
|
||||||
mutex_unlock(&em_pd_mutex);
|
mutex_unlock(&em_pd_mutex);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
|
EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adjustment of CPU performance values after boot, when all CPUs capacites
|
||||||
|
* are correctly calculated.
|
||||||
|
*/
|
||||||
|
static void em_adjust_new_capacity(struct device *dev,
|
||||||
|
struct em_perf_domain *pd,
|
||||||
|
u64 max_cap)
|
||||||
|
{
|
||||||
|
struct em_perf_table __rcu *em_table;
|
||||||
|
struct em_perf_state *ps, *new_ps;
|
||||||
|
int ret, ps_size;
|
||||||
|
|
||||||
|
em_table = em_table_alloc(pd);
|
||||||
|
if (!em_table) {
|
||||||
|
dev_warn(dev, "EM: allocation failed\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ps = em_table->state;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
ps = em_perf_state_from_pd(pd);
|
||||||
|
/* Initialize data based on old table */
|
||||||
|
ps_size = sizeof(struct em_perf_state) * pd->nr_perf_states;
|
||||||
|
memcpy(new_ps, ps, ps_size);
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
em_init_performance(dev, pd, new_ps, pd->nr_perf_states);
|
||||||
|
ret = em_compute_costs(dev, new_ps, NULL, pd->nr_perf_states,
|
||||||
|
pd->flags);
|
||||||
|
if (ret) {
|
||||||
|
dev_warn(dev, "EM: compute costs failed\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = em_dev_update_perf_domain(dev, em_table);
|
||||||
|
if (ret)
|
||||||
|
dev_warn(dev, "EM: update failed %d\n", ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is one-time-update, so give up the ownership in this updater.
|
||||||
|
* The EM framework has incremented the usage counter and from now
|
||||||
|
* will keep the reference (then free the memory when needed).
|
||||||
|
*/
|
||||||
|
em_table_free(em_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void em_check_capacity_update(void)
|
||||||
|
{
|
||||||
|
cpumask_var_t cpu_done_mask;
|
||||||
|
struct em_perf_state *table;
|
||||||
|
struct em_perf_domain *pd;
|
||||||
|
unsigned long cpu_capacity;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
if (!zalloc_cpumask_var(&cpu_done_mask, GFP_KERNEL)) {
|
||||||
|
pr_warn("no free memory\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if CPUs capacity has changed than update EM */
|
||||||
|
for_each_possible_cpu(cpu) {
|
||||||
|
struct cpufreq_policy *policy;
|
||||||
|
unsigned long em_max_perf;
|
||||||
|
struct device *dev;
|
||||||
|
|
||||||
|
if (cpumask_test_cpu(cpu, cpu_done_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
policy = cpufreq_cpu_get(cpu);
|
||||||
|
if (!policy) {
|
||||||
|
pr_debug("Accessing cpu%d policy failed\n", cpu);
|
||||||
|
schedule_delayed_work(&em_update_work,
|
||||||
|
msecs_to_jiffies(1000));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cpufreq_cpu_put(policy);
|
||||||
|
|
||||||
|
pd = em_cpu_get(cpu);
|
||||||
|
if (!pd || em_is_artificial(pd))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cpumask_or(cpu_done_mask, cpu_done_mask,
|
||||||
|
em_span_cpus(pd));
|
||||||
|
|
||||||
|
cpu_capacity = arch_scale_cpu_capacity(cpu);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
table = em_perf_state_from_pd(pd);
|
||||||
|
em_max_perf = table[pd->nr_perf_states - 1].performance;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the CPU capacity has been adjusted during boot
|
||||||
|
* and trigger the update for new performance values.
|
||||||
|
*/
|
||||||
|
if (em_max_perf == cpu_capacity)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
pr_debug("updating cpu%d cpu_cap=%lu old capacity=%lu\n",
|
||||||
|
cpu, cpu_capacity, em_max_perf);
|
||||||
|
|
||||||
|
dev = get_cpu_device(cpu);
|
||||||
|
em_adjust_new_capacity(dev, pd, cpu_capacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
free_cpumask_var(cpu_done_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void em_update_workfn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
em_check_capacity_update();
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue