sched_ext: Fixes for v6.16-rc1

Two fixes in the built-in idle selection helpers. - Fix prev_cpu handling to guarantee that idle selection never returns a CPU that's not allowed. - Skip cross-node search when !NUMA which could lead to infinite looping due to a bug in NUMA iterator. -----BEGIN PGP SIGNATURE----- iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCaECWBw4cdGpAa2VybmVs Lm9yZwAKCRCxYfJx3gVYGfV9AP4mk63TUsLdqwa6iqE0dZArOAhj8QXETsv1Q0+y FGQKEQEAxPNJx3ifhzgXoQi1SOZkqeq44erbcy0x7owtb+QBVgs= =G1uN -----END PGP SIGNATURE----- Merge tag 'sched_ext-for-6.16-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext Pull sched_ext fixes from Tejun Heo: "Two fixes in the built-in idle selection helpers: - Fix prev_cpu handling to guarantee that idle selection never returns a CPU that's not allowed - Skip cross-node search when !NUMA which could lead to infinite looping due to a bug in NUMA iterator" * tag 'sched_ext-for-6.16-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: idle: Skip cross-node search with !CONFIG_NUMA sched_ext: idle: Properly handle invalid prev_cpu during idle selection
2025-11-01 09:13:37 +00:00 · 2025-06-04 12:07:16 -07:00 · 2025-06-04 12:07:16 -07:00 · 16b70698aa
commit 16b70698aa
parent 3719a04a80 9960be72a5
1 changed files with 19 additions and 18 deletions
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@ -138,6 +138,7 @@ found:
 		goto retry;
 }

+#ifdef CONFIG_NUMA
 /*
 * Tracks nodes that have not yet been visited when searching for an idle
 * CPU across all available nodes.
@ -186,6 +187,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i

 	return cpu;
 }
+#else
+static inline s32
+pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags)
+{
+	return -EBUSY;
+}
+#endif

 /*
 * Find an idle CPU in the system, starting from @node.
@ -447,10 +455,17 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 	const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
 	const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr;
 	int node = scx_cpu_node_if_enabled(prev_cpu);
+	bool is_prev_allowed;
 	s32 cpu;

 	preempt_disable();

+	/*
+	 * Check whether @prev_cpu is still within the allowed set. If not,
+	 * we can still try selecting a nearby CPU.
+	 */
+	is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
+
 	/*
 	 * Determine the subset of CPUs usable by @p within @cpus_allowed.
 	 */
@ -465,21 +480,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 			cpu = -EBUSY;
 			goto out_enable;
 		}
-
-		/*
-		 * If @prev_cpu is not in the allowed CPUs, skip topology
-		 * optimizations and try to pick any idle CPU usable by the
-		 * task.
-		 *
-		 * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
-		 * the current node, as it may optimize some waker->wakee
-		 * workloads.
-		 */
-		if (!cpumask_test_cpu(prev_cpu, allowed)) {
-			node = scx_cpu_node_if_enabled(smp_processor_id());
-			cpu = scx_pick_idle_cpu(allowed, node, flags);
-			goto out_enable;
-		}
 	}

 	/*
@ -525,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 		 * then avoid a migration.
 		 */
 		cpu = smp_processor_id();
-		if (cpus_share_cache(cpu, prev_cpu) &&
+		if (is_prev_allowed && cpus_share_cache(cpu, prev_cpu) &&
 		    scx_idle_test_and_clear_cpu(prev_cpu)) {
 			cpu = prev_cpu;
 			goto out_unlock;
@ -562,7 +562,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 		/*
 		 * Keep using @prev_cpu if it's part of a fully idle core.
 		 */
-		if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
+		if (is_prev_allowed &&
+		    cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
 		    scx_idle_test_and_clear_cpu(prev_cpu)) {
 			cpu = prev_cpu;
 			goto out_unlock;
@ -611,7 +612,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
 	/*
 	 * Use @prev_cpu if it's idle.
 	 */
-	if (scx_idle_test_and_clear_cpu(prev_cpu)) {
+	if (is_prev_allowed && scx_idle_test_and_clear_cpu(prev_cpu)) {
 		cpu = prev_cpu;
 		goto out_unlock;
 	}