sched/fair: Commit to lag based placement

Removes the FAIR_SLEEPERS code in favour of the new LAG based placement. Specifically, the whole FAIR_SLEEPER thing was a very crude approximation to make up for the lack of lag based placement, specifically the 'service owed' part. This is important for things like 'starve' and 'hackbench'. One side effect of FAIR_SLEEPER is that it caused 'small' unfairness, specifically, by always ignoring up-to 'thresh' sleeptime it would have a 50%/50% time distribution for a 50% sleeper vs a 100% runner, while strictly speaking this should (of course) result in a 33%/67% split (as CFS will also do if the sleep period exceeds 'thresh'). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20230531124604.000198861@infradead.org
2025-08-05 16:54:27 +00:00 · 2023-05-31 13:58:45 +02:00 · 2023-05-31 13:58:45 +02:00 · 76cae9dbe1
commit 76cae9dbe1
parent 147f3efaa2
2 changed files with 1 additions and 66 deletions
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@ -5068,29 +5068,6 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 #endif
 }

-static inline bool entity_is_long_sleeper(struct sched_entity *se)
-{
-	struct cfs_rq *cfs_rq;
-	u64 sleep_time;
-
-	if (se->exec_start == 0)
-		return false;
-
-	cfs_rq = cfs_rq_of(se);
-
-	sleep_time = rq_clock_task(rq_of(cfs_rq));
-
-	/* Happen while migrating because of clock task divergence */
-	if (sleep_time <= se->exec_start)
-		return false;
-
-	sleep_time -= se->exec_start;
-	if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
-		return true;
-
-	return false;
-}
-
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 {
@ -5172,43 +5149,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 		if (WARN_ON_ONCE(!load))
 			load = 1;
 		lag = div_s64(lag, load);
-
-		vruntime -= lag;
 	}

-	if (sched_feat(FAIR_SLEEPERS)) {
-
-		/* sleeps up to a single latency don't count. */
-		if (!initial) {
-			unsigned long thresh;
-
-			if (se_is_idle(se))
-				thresh = sysctl_sched_min_granularity;
-			else
-				thresh = sysctl_sched_latency;
-
-			/*
-			 * Halve their sleep time's effect, to allow
-			 * for a gentler effect of sleepers:
-			 */
-			if (sched_feat(GENTLE_FAIR_SLEEPERS))
-				thresh >>= 1;
-
-			vruntime -= thresh;
-		}
-
-		/*
-		 * Pull vruntime of the entity being placed to the base level of
-		 * cfs_rq, to prevent boosting it if placed backwards.  If the entity
-		 * slept for a long time, don't even try to compare its vruntime with
-		 * the base as it may be too far off and the comparison may get
-		 * inversed due to s64 overflow.
-		 */
-		if (!entity_is_long_sleeper(se))
-			vruntime = max_vruntime(se->vruntime, vruntime);
-	}
-
-	se->vruntime = vruntime;
+	se->vruntime = vruntime - lag;

 	/*
 	 * When joining the competition; the exisiting tasks will be,
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@ -1,13 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */

-/*
- * Only give sleepers 50% of their service deficit. This allows
- * them to run sooner, but does not allow tons of sleepers to
- * rip the spread apart.
- */
-SCHED_FEAT(FAIR_SLEEPERS, false)
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
-
 /*
 * Using the avg_vruntime, do the right thing and preserve lag across
 * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.