mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-05-24 10:39:52 +00:00
sched/eevdf: Fix vruntime adjustment on reweight
vruntime of the (on_rq && !0-lag) entity needs to be adjusted when
it gets re-weighted, and the calculations can be simplified based
on the fact that re-weight won't change the w-average of all the
entities. Please check the proofs in comments.
But adjusting vruntime can also cause position change in RB-tree
hence require re-queue to fix up which might be costly. This might
be avoided by deferring adjustment to the time the entity actually
leaves tree (dequeue/pick), but that will negatively affect task
selection and probably not good enough either.
Fixes: 147f3efaa2
("sched/fair: Implement an EEVDF-like scheduling policy")
Signed-off-by: Abel Wu <wuyun.abel@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20231107090510.71322-2-wuyun.abel@bytedance.com
This commit is contained in:
parent
b85ea95d08
commit
eab03c23c2
1 changed files with 128 additions and 23 deletions
|
@ -3666,41 +3666,140 @@ static inline void
|
||||||
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
|
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||||
unsigned long weight)
|
unsigned long weight)
|
||||||
{
|
{
|
||||||
unsigned long old_weight = se->load.weight;
|
unsigned long old_weight = se->load.weight;
|
||||||
|
u64 avruntime = avg_vruntime(cfs_rq);
|
||||||
|
s64 vlag, vslice;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VRUNTIME
|
||||||
|
* ========
|
||||||
|
*
|
||||||
|
* COROLLARY #1: The virtual runtime of the entity needs to be
|
||||||
|
* adjusted if re-weight at !0-lag point.
|
||||||
|
*
|
||||||
|
* Proof: For contradiction assume this is not true, so we can
|
||||||
|
* re-weight without changing vruntime at !0-lag point.
|
||||||
|
*
|
||||||
|
* Weight VRuntime Avg-VRuntime
|
||||||
|
* before w v V
|
||||||
|
* after w' v' V'
|
||||||
|
*
|
||||||
|
* Since lag needs to be preserved through re-weight:
|
||||||
|
*
|
||||||
|
* lag = (V - v)*w = (V'- v')*w', where v = v'
|
||||||
|
* ==> V' = (V - v)*w/w' + v (1)
|
||||||
|
*
|
||||||
|
* Let W be the total weight of the entities before reweight,
|
||||||
|
* since V' is the new weighted average of entities:
|
||||||
|
*
|
||||||
|
* V' = (WV + w'v - wv) / (W + w' - w) (2)
|
||||||
|
*
|
||||||
|
* by using (1) & (2) we obtain:
|
||||||
|
*
|
||||||
|
* (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
|
||||||
|
* ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
|
||||||
|
* ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
|
||||||
|
* ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
|
||||||
|
*
|
||||||
|
* Since we are doing at !0-lag point which means V != v, we
|
||||||
|
* can simplify (3):
|
||||||
|
*
|
||||||
|
* ==> W / (W + w' - w) = w / w'
|
||||||
|
* ==> Ww' = Ww + ww' - ww
|
||||||
|
* ==> W * (w' - w) = w * (w' - w)
|
||||||
|
* ==> W = w (re-weight indicates w' != w)
|
||||||
|
*
|
||||||
|
* So the cfs_rq contains only one entity, hence vruntime of
|
||||||
|
* the entity @v should always equal to the cfs_rq's weighted
|
||||||
|
* average vruntime @V, which means we will always re-weight
|
||||||
|
* at 0-lag point, thus breach assumption. Proof completed.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* COROLLARY #2: Re-weight does NOT affect weighted average
|
||||||
|
* vruntime of all the entities.
|
||||||
|
*
|
||||||
|
* Proof: According to corollary #1, Eq. (1) should be:
|
||||||
|
*
|
||||||
|
* (V - v)*w = (V' - v')*w'
|
||||||
|
* ==> v' = V' - (V - v)*w/w' (4)
|
||||||
|
*
|
||||||
|
* According to the weighted average formula, we have:
|
||||||
|
*
|
||||||
|
* V' = (WV - wv + w'v') / (W - w + w')
|
||||||
|
* = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
|
||||||
|
* = (WV - wv + w'V' - Vw + wv) / (W - w + w')
|
||||||
|
* = (WV + w'V' - Vw) / (W - w + w')
|
||||||
|
*
|
||||||
|
* ==> V'*(W - w + w') = WV + w'V' - Vw
|
||||||
|
* ==> V' * (W - w) = (W - w) * V (5)
|
||||||
|
*
|
||||||
|
* If the entity is the only one in the cfs_rq, then reweight
|
||||||
|
* always occurs at 0-lag point, so V won't change. Or else
|
||||||
|
* there are other entities, hence W != w, then Eq. (5) turns
|
||||||
|
* into V' = V. So V won't change in either case, proof done.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* So according to corollary #1 & #2, the effect of re-weight
|
||||||
|
* on vruntime should be:
|
||||||
|
*
|
||||||
|
* v' = V' - (V - v) * w / w' (4)
|
||||||
|
* = V - (V - v) * w / w'
|
||||||
|
* = V - vl * w / w'
|
||||||
|
* = V - vl'
|
||||||
|
*/
|
||||||
|
if (avruntime != se->vruntime) {
|
||||||
|
vlag = (s64)(avruntime - se->vruntime);
|
||||||
|
vlag = div_s64(vlag * old_weight, weight);
|
||||||
|
se->vruntime = avruntime - vlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* DEADLINE
|
||||||
|
* ========
|
||||||
|
*
|
||||||
|
* When the weight changes, the virtual time slope changes and
|
||||||
|
* we should adjust the relative virtual deadline accordingly.
|
||||||
|
*
|
||||||
|
* d' = v' + (d - v)*w/w'
|
||||||
|
* = V' - (V - v)*w/w' + (d - v)*w/w'
|
||||||
|
* = V - (V - v)*w/w' + (d - v)*w/w'
|
||||||
|
* = V + (d - V)*w/w'
|
||||||
|
*/
|
||||||
|
vslice = (s64)(se->deadline - avruntime);
|
||||||
|
vslice = div_s64(vslice * old_weight, weight);
|
||||||
|
se->deadline = avruntime + vslice;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||||
|
unsigned long weight)
|
||||||
|
{
|
||||||
|
bool curr = cfs_rq->curr == se;
|
||||||
|
|
||||||
if (se->on_rq) {
|
if (se->on_rq) {
|
||||||
/* commit outstanding execution time */
|
/* commit outstanding execution time */
|
||||||
if (cfs_rq->curr == se)
|
if (curr)
|
||||||
update_curr(cfs_rq);
|
update_curr(cfs_rq);
|
||||||
else
|
else
|
||||||
avg_vruntime_sub(cfs_rq, se);
|
__dequeue_entity(cfs_rq, se);
|
||||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||||
}
|
}
|
||||||
dequeue_load_avg(cfs_rq, se);
|
dequeue_load_avg(cfs_rq, se);
|
||||||
|
|
||||||
update_load_set(&se->load, weight);
|
|
||||||
|
|
||||||
if (!se->on_rq) {
|
if (!se->on_rq) {
|
||||||
/*
|
/*
|
||||||
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
|
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
|
||||||
* we need to scale se->vlag when w_i changes.
|
* we need to scale se->vlag when w_i changes.
|
||||||
*/
|
*/
|
||||||
se->vlag = div_s64(se->vlag * old_weight, weight);
|
se->vlag = div_s64(se->vlag * se->load.weight, weight);
|
||||||
} else {
|
} else {
|
||||||
s64 deadline = se->deadline - se->vruntime;
|
reweight_eevdf(cfs_rq, se, weight);
|
||||||
/*
|
|
||||||
* When the weight changes, the virtual time slope changes and
|
|
||||||
* we should adjust the relative virtual deadline accordingly.
|
|
||||||
*/
|
|
||||||
deadline = div_s64(deadline * old_weight, weight);
|
|
||||||
se->deadline = se->vruntime + deadline;
|
|
||||||
if (se != cfs_rq->curr)
|
|
||||||
min_deadline_cb_propagate(&se->run_node, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_load_set(&se->load, weight);
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
do {
|
do {
|
||||||
u32 divider = get_pelt_divider(&se->avg);
|
u32 divider = get_pelt_divider(&se->avg);
|
||||||
|
@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
|
||||||
enqueue_load_avg(cfs_rq, se);
|
enqueue_load_avg(cfs_rq, se);
|
||||||
if (se->on_rq) {
|
if (se->on_rq) {
|
||||||
update_load_add(&cfs_rq->load, se->load.weight);
|
update_load_add(&cfs_rq->load, se->load.weight);
|
||||||
if (cfs_rq->curr != se)
|
if (!curr) {
|
||||||
avg_vruntime_add(cfs_rq, se);
|
/*
|
||||||
|
* The entity's vruntime has been adjusted, so let's check
|
||||||
|
* whether the rq-wide min_vruntime needs updated too. Since
|
||||||
|
* the calculations above require stable min_vruntime rather
|
||||||
|
* than up-to-date one, we do the update at the end of the
|
||||||
|
* reweight process.
|
||||||
|
*/
|
||||||
|
__enqueue_entity(cfs_rq, se);
|
||||||
|
update_min_vruntime(cfs_rq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3857,13 +3965,10 @@ static void update_cfs_group(struct sched_entity *se)
|
||||||
|
|
||||||
#ifndef CONFIG_SMP
|
#ifndef CONFIG_SMP
|
||||||
shares = READ_ONCE(gcfs_rq->tg->shares);
|
shares = READ_ONCE(gcfs_rq->tg->shares);
|
||||||
|
|
||||||
if (likely(se->load.weight == shares))
|
|
||||||
return;
|
|
||||||
#else
|
#else
|
||||||
shares = calc_group_shares(gcfs_rq);
|
shares = calc_group_shares(gcfs_rq);
|
||||||
#endif
|
#endif
|
||||||
|
if (unlikely(se->load.weight != shares))
|
||||||
reweight_entity(cfs_rq_of(se), se, shares);
|
reweight_entity(cfs_rq_of(se), se, shares);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue