Skip to content

Commit 6170625

Browse files
committed
Merge tag 'sched-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Fix zero_vruntime tracking when there's a single task running - Fix slice protection logic - Fix the ->vprot logic for reniced tasks - Fix lag clamping in mixed slice workloads - Fix objtool uaccess warning (and bug) in the !CONFIG_RSEQ_SLICE_EXTENSION case caused by unexpected un-inlining, which triggers with older compilers - Fix a comment in the rseq registration rseq_size bound check code - Fix a legacy RSEQ ABI quirk that handled 32-byte area sizes differently, which special size we now reached naturally and want to avoid. The visible ugliness of the new reserved field will be avoided the next time the RSEQ area is extended. * tag 'sched-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: rseq: slice ext: Ensure rseq feature size differs from original rseq size rseq: Clarify rseq registration rseq_size bound check comment sched/core: Fix wakeup_preempt's next_class tracking rseq: Mark rseq_arm_slice_extension_timer() __always_inline sched/fair: Fix lag clamp sched/eevdf: Update se->vprot in reweight_entity() sched/fair: Only set slice protection at pick time sched/fair: Fix zero_vruntime tracking
2 parents cb36eab + 3b68df9 commit 6170625

File tree

10 files changed

+172
-52
lines changed

10 files changed

+172
-52
lines changed

fs/binfmt_elf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <linux/dax.h>
4848
#include <linux/uaccess.h>
4949
#include <uapi/linux/rseq.h>
50+
#include <linux/rseq.h>
5051
#include <asm/param.h>
5152
#include <asm/page.h>
5253

@@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
286287
}
287288
#ifdef CONFIG_RSEQ
288289
NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
289-
NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
290+
NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
290291
#endif
291292
#undef NEW_AUX_ENT
292293
/* AT_NULL is zero; clear the rest too */

include/linux/rseq.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
146146
t->rseq = current->rseq;
147147
}
148148

149+
/*
150+
* Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
151+
* registration. This is the active rseq area size rounded up to next
152+
* power of 2, which guarantees that the rseq structure will always be
153+
* aligned on the nearest power of two large enough to contain it, even
154+
* as it grows.
155+
*/
156+
static inline unsigned int rseq_alloc_align(void)
157+
{
158+
return 1U << get_count_order(offsetof(struct rseq, end));
159+
}
160+
149161
#else /* CONFIG_RSEQ */
150162
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
151163
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }

include/linux/rseq_entry.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,10 @@ static __always_inline bool rseq_grant_slice_extension(bool work_pending)
216216
}
217217

218218
#else /* CONFIG_RSEQ_SLICE_EXTENSION */
219-
static inline bool rseq_slice_extension_enabled(void) { return false; }
220-
static inline bool rseq_arm_slice_extension_timer(void) { return false; }
221-
static inline void rseq_slice_clear_grant(struct task_struct *t) { }
222-
static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
219+
static __always_inline bool rseq_slice_extension_enabled(void) { return false; }
220+
static __always_inline bool rseq_arm_slice_extension_timer(void) { return false; }
221+
static __always_inline void rseq_slice_clear_grant(struct task_struct *t) { }
222+
static __always_inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
223223
#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
224224

225225
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,7 @@ struct sched_entity {
579579
u64 deadline;
580580
u64 min_vruntime;
581581
u64 min_slice;
582+
u64 max_slice;
582583

583584
struct list_head group_node;
584585
unsigned char on_rq;

include/uapi/linux/rseq.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
8787
};
8888

8989
/*
90-
* struct rseq is aligned on 4 * 8 bytes to ensure it is always
91-
* contained within a single cache-line.
90+
* The original size and alignment of the allocation for struct rseq is
91+
* 32 bytes.
9292
*
93-
* A single struct rseq per thread is allowed.
93+
* The allocation size needs to be greater or equal to
94+
* max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
95+
* be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
96+
*
97+
* As an alternative, userspace is allowed to use both the original size
98+
* and alignment of 32 bytes for backward compatibility.
99+
*
100+
* A single active struct rseq registration per thread is allowed.
94101
*/
95102
struct rseq {
96103
/*
@@ -180,10 +187,21 @@ struct rseq {
180187
*/
181188
struct rseq_slice_ctrl slice_ctrl;
182189

190+
/*
191+
* Before rseq became extensible, its original size was 32 bytes even
192+
* though the active rseq area was only 20 bytes.
193+
* Exposing a 32 bytes feature size would make life needlessly painful
194+
* for userspace. Therefore, add a reserved byte after byte 32
195+
* to bump the rseq feature size from 32 to 33.
196+
* The next field to be added to the rseq area will be larger
197+
* than one byte, and will replace this reserved byte.
198+
*/
199+
__u8 __reserved;
200+
183201
/*
184202
* Flexible array member at end of structure, after last feature field.
185203
*/
186204
char end[];
187-
} __attribute__((aligned(4 * sizeof(__u64))));
205+
} __attribute__((aligned(32)));
188206

189207
#endif /* _UAPI_LINUX_RSEQ_H */

kernel/rseq.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include <linux/syscalls.h>
8181
#include <linux/uaccess.h>
8282
#include <linux/types.h>
83+
#include <linux/rseq.h>
8384
#include <asm/ptrace.h>
8485

8586
#define CREATE_TRACE_POINTS
@@ -449,13 +450,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
449450
* auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
450451
* size, the required alignment is the original struct rseq alignment.
451452
*
452-
* In order to be valid, rseq_len is either the original rseq size, or
453-
* large enough to contain all supported fields, as communicated to
453+
* The rseq_len is required to be greater or equal to the original rseq
454+
* size. In order to be valid, rseq_len is either the original rseq size,
455+
* or large enough to contain all supported fields, as communicated to
454456
* user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
455457
*/
456458
if (rseq_len < ORIG_RSEQ_SIZE ||
457459
(rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
458-
(rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
460+
(rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) ||
459461
rseq_len < offsetof(struct rseq, end))))
460462
return -EINVAL;
461463
if (!access_ok(rseq, rseq_len))

kernel/sched/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6830,6 +6830,7 @@ static void __sched notrace __schedule(int sched_mode)
68306830
/* SCX must consult the BPF scheduler to tell if rq is empty */
68316831
if (!rq->nr_running && !scx_enabled()) {
68326832
next = prev;
6833+
rq->next_class = &idle_sched_class;
68336834
goto picked;
68346835
}
68356836
} else if (!preempt && prev_state) {

kernel/sched/ext.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2460,7 +2460,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
24602460
/* see kick_cpus_irq_workfn() */
24612461
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
24622462

2463-
rq->next_class = &ext_sched_class;
2463+
rq_modified_begin(rq, &ext_sched_class);
24642464

24652465
rq_unpin_lock(rq, rf);
24662466
balance_one(rq, prev);
@@ -2475,7 +2475,7 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
24752475
* If @force_scx is true, always try to pick a SCHED_EXT task,
24762476
* regardless of any higher-priority sched classes activity.
24772477
*/
2478-
if (!force_scx && sched_class_above(rq->next_class, &ext_sched_class))
2478+
if (!force_scx && rq_modified_above(rq, &ext_sched_class))
24792479
return RETRY_TASK;
24802480

24812481
keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP;

0 commit comments

Comments
 (0)