Skip to content

Commit dfb630f

Browse files
KAGA-KOKOPeter Zijlstra
authored andcommitted
rseq: Implement rseq_grant_slice_extension()
Provide the actual decision function, which decides whether a time slice extension is granted in the exit to user mode path when NEED_RESCHED is evaluated. The decision is made in two stages. First an inline quick check to avoid going into the actual decision function. This checks whether: #1 the functionality is enabled #2 the exit is a return from interrupt to user mode #3 any TIF bit, which causes extra work is set. That includes TIF_RSEQ, which means the task was already scheduled out. The slow path, which implements the actual user space ABI, is invoked when: A) #1 is true, #2 is true and #3 is false It checks whether user space requested a slice extension by setting the request bit in the rseq slice_ctrl field. If so, it grants the extension and stores the slice expiry time, so that the actual exit code can double check whether the slice is already exhausted before going back. B) #1 - #3 are true _and_ a slice extension was granted in a previous loop iteration In this case the grant is revoked. In case that the user space access faults or invalid state is detected, the task is terminated with SIGSEGV. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251215155709.195303303@linutronix.de
1 parent 7ee58f9 commit dfb630f

File tree

1 file changed

+108
-0
lines changed

1 file changed

+108
-0
lines changed

include/linux/rseq_entry.h

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
4242
#ifdef CONFIG_RSEQ
4343
#include <linux/jump_label.h>
4444
#include <linux/rseq.h>
45+
#include <linux/sched/signal.h>
4546
#include <linux/uaccess.h>
4647

4748
#include <linux/tracepoint-defs.h>
@@ -109,10 +110,116 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t)
109110
t->rseq.slice.state.granted = false;
110111
}
111112

113+
static __always_inline bool rseq_grant_slice_extension(bool work_pending)
114+
{
115+
struct task_struct *curr = current;
116+
struct rseq_slice_ctrl usr_ctrl;
117+
union rseq_slice_state state;
118+
struct rseq __user *rseq;
119+
120+
if (!rseq_slice_extension_enabled())
121+
return false;
122+
123+
/* If not enabled or not a return from interrupt, nothing to do. */
124+
state = curr->rseq.slice.state;
125+
state.enabled &= curr->rseq.event.user_irq;
126+
if (likely(!state.state))
127+
return false;
128+
129+
rseq = curr->rseq.usrptr;
130+
scoped_user_rw_access(rseq, efault) {
131+
132+
/*
133+
* Quick check conditions where a grant is not possible or
134+
* needs to be revoked.
135+
*
136+
* 1) Any TIF bit which needs to do extra work aside of
137+
* rescheduling prevents a grant.
138+
*
139+
* 2) A previous rescheduling request resulted in a slice
140+
* extension grant.
141+
*/
142+
if (unlikely(work_pending || state.granted)) {
143+
/* Clear user control unconditionally. No point for checking */
144+
unsafe_put_user(0U, &rseq->slice_ctrl.all, efault);
145+
rseq_slice_clear_grant(curr);
146+
return false;
147+
}
148+
149+
unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
150+
if (likely(!(usr_ctrl.request)))
151+
return false;
152+
153+
/* Grant the slice extention */
154+
usr_ctrl.request = 0;
155+
usr_ctrl.granted = 1;
156+
unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault);
157+
}
158+
159+
rseq_stat_inc(rseq_stats.s_granted);
160+
161+
curr->rseq.slice.state.granted = true;
162+
/* Store expiry time for arming the timer on the way out */
163+
curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns();
164+
/*
165+
* This is racy against a remote CPU setting TIF_NEED_RESCHED in
166+
* several ways:
167+
*
168+
* 1)
169+
* CPU0 CPU1
170+
* clear_tsk()
171+
* set_tsk()
172+
* clear_preempt()
173+
* Raise scheduler IPI on CPU0
174+
* --> IPI
175+
* fold_need_resched() -> Folds correctly
176+
* 2)
177+
* CPU0 CPU1
178+
* set_tsk()
179+
* clear_tsk()
180+
* clear_preempt()
181+
* Raise scheduler IPI on CPU0
182+
* --> IPI
183+
* fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false
184+
*
185+
* #1 is not any different from a regular remote reschedule as it
186+
* sets the previously not set bit and then raises the IPI which
187+
* folds it into the preempt counter
188+
*
189+
* #2 is obviously incorrect from a scheduler POV, but it's not
190+
* differently incorrect than the code below clearing the
191+
* reschedule request with the safety net of the timer.
192+
*
193+
* The important part is that the clearing is protected against the
194+
* scheduler IPI and also against any other interrupt which might
195+
* end up waking up a task and setting the bits in the middle of
196+
* the operation:
197+
*
198+
* clear_tsk()
199+
* ---> Interrupt
200+
* wakeup_on_this_cpu()
201+
* set_tsk()
202+
* set_preempt()
203+
* clear_preempt()
204+
*
205+
* which would be inconsistent state.
206+
*/
207+
scoped_guard(irq) {
208+
clear_tsk_need_resched(curr);
209+
clear_preempt_need_resched();
210+
}
211+
return true;
212+
213+
efault:
214+
force_sig(SIGSEGV);
215+
return false;
216+
}
217+
112218
#else /* CONFIG_RSEQ_SLICE_EXTENSION */
113219
static inline bool rseq_slice_extension_enabled(void) { return false; }
114220
static inline bool rseq_arm_slice_extension_timer(void) { return false; }
115221
static inline void rseq_slice_clear_grant(struct task_struct *t) { }
222+
static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
116223
#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
117224

118225
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
@@ -671,6 +778,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { }
671778
static inline void rseq_irqentry_exit_to_user_mode(void) { }
672779
static inline void rseq_exit_to_user_mode_legacy(void) { }
673780
static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
781+
static inline bool rseq_grant_slice_extension(bool work_pending) { return false; }
674782
#endif /* !CONFIG_RSEQ */
675783

676784
#endif /* _LINUX_RSEQ_ENTRY_H */

0 commit comments

Comments
 (0)