Skip to content

Commit b06ccba

Browse files
committed
sched_ext: Fix starvation of scx_enable() under fair-class saturation
During scx_enable(), the READY -> ENABLED task switching loop changes the calling thread's sched_class from fair to ext. Since fair has higher priority than ext, saturating fair-class workloads can indefinitely starve the enable thread, hanging the system. This was introduced when the enable path switched from preempt_disable() to scx_bypass() which doesn't protect against fair-class starvation. Note that the original preempt_disable() protection wasn't complete either - in partial switch modes, the calling thread could still be starved after preempt_enable() as it may have been switched to ext class. Fix it by offloading the enable body to a dedicated system-wide RT (SCHED_FIFO) kthread which cannot be starved by either fair or ext class tasks. scx_enable() lazily creates the kthread on first use and passes the ops pointer through a struct scx_enable_cmd containing the kthread_work, then synchronously waits for completion. The workfn runs on a different kthread from sch->helper (which runs disable_work), so it can safely flush disable_work on the error path without deadlock. Fixes: 8c2090c ("sched_ext: Initialize in bypass mode") Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 1336b57 commit b06ccba

File tree

1 file changed

+56
-10
lines changed

1 file changed

+56
-10
lines changed

kernel/sched/ext.c

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4975,20 +4975,30 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops)
49754975
return 0;
49764976
}
49774977

4978-
static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
4978+
/*
4979+
* scx_enable() is offloaded to a dedicated system-wide RT kthread to avoid
4980+
* starvation. During the READY -> ENABLED task switching loop, the calling
4981+
* thread's sched_class gets switched from fair to ext. As fair has higher
4982+
* priority than ext, the calling thread can be indefinitely starved under
4983+
* fair-class saturation, leading to a system hang.
4984+
*/
4985+
struct scx_enable_cmd {
4986+
struct kthread_work work;
4987+
struct sched_ext_ops *ops;
4988+
int ret;
4989+
};
4990+
4991+
static void scx_enable_workfn(struct kthread_work *work)
49794992
{
4993+
struct scx_enable_cmd *cmd =
4994+
container_of(work, struct scx_enable_cmd, work);
4995+
struct sched_ext_ops *ops = cmd->ops;
49804996
struct scx_sched *sch;
49814997
struct scx_task_iter sti;
49824998
struct task_struct *p;
49834999
unsigned long timeout;
49845000
int i, cpu, ret;
49855001

4986-
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
4987-
cpu_possible_mask)) {
4988-
pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
4989-
return -EINVAL;
4990-
}
4991-
49925002
mutex_lock(&scx_enable_mutex);
49935003

49945004
if (scx_enable_state() != SCX_DISABLED) {
@@ -5205,13 +5215,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
52055215

52065216
atomic_long_inc(&scx_enable_seq);
52075217

5208-
return 0;
5218+
cmd->ret = 0;
5219+
return;
52095220

52105221
err_free_ksyncs:
52115222
free_kick_syncs();
52125223
err_unlock:
52135224
mutex_unlock(&scx_enable_mutex);
5214-
return ret;
5225+
cmd->ret = ret;
5226+
return;
52155227

52165228
err_disable_unlock_all:
52175229
scx_cgroup_unlock();
@@ -5230,7 +5242,41 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
52305242
*/
52315243
scx_error(sch, "scx_enable() failed (%d)", ret);
52325244
kthread_flush_work(&sch->disable_work);
5233-
return 0;
5245+
cmd->ret = 0;
5246+
}
5247+
5248+
static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
5249+
{
5250+
static struct kthread_worker *helper;
5251+
static DEFINE_MUTEX(helper_mutex);
5252+
struct scx_enable_cmd cmd;
5253+
5254+
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
5255+
cpu_possible_mask)) {
5256+
pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
5257+
return -EINVAL;
5258+
}
5259+
5260+
if (!READ_ONCE(helper)) {
5261+
mutex_lock(&helper_mutex);
5262+
if (!helper) {
5263+
helper = kthread_run_worker(0, "scx_enable_helper");
5264+
if (IS_ERR_OR_NULL(helper)) {
5265+
helper = NULL;
5266+
mutex_unlock(&helper_mutex);
5267+
return -ENOMEM;
5268+
}
5269+
sched_set_fifo(helper->task);
5270+
}
5271+
mutex_unlock(&helper_mutex);
5272+
}
5273+
5274+
kthread_init_work(&cmd.work, scx_enable_workfn);
5275+
cmd.ops = ops;
5276+
5277+
kthread_queue_work(READ_ONCE(helper), &cmd.work);
5278+
kthread_flush_work(&cmd.work);
5279+
return cmd.ret;
52345280
}
52355281

52365282

0 commit comments

Comments
 (0)