Skip to content

Commit b0c3d51

Browse files
committed
sched/mmcid: Provide precomputed maximal value
Reading mm::mm_users and mm:::mm_cid::nr_cpus_allowed every time to compute the maximal CID value is just wasteful as that value is only changing on fork(), exit() and eventually when the affinity changes. So it can be easily precomputed at those points and provided in mm::mm_cid for consumption in the hot path. But there is an issue with using mm::mm_users for accounting because that does not necessarily reflect the number of user space tasks as other kernel code can take temporary references on the MM which skew the picture. Solve that by adding a users counter to struct mm_mm_cid, which is modified by fork() and exit() and used for precomputing under mm_mm_cid::lock. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Link: https://patch.msgid.link/20251119172549.832764634@linutronix.de
1 parent bf07052 commit b0c3d51

File tree

4 files changed

+50
-19
lines changed

4 files changed

+50
-19
lines changed

include/linux/rseq_types.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,20 @@ struct mm_cid_pcpu {
117117
/**
118118
* struct mm_mm_cid - Storage for per MM CID data
119119
* @pcpu: Per CPU storage for CIDs associated to a CPU
120+
* @max_cids: The exclusive maximum CID value for allocation and convergence
120121
* @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map
121122
* is growth only.
123+
* @users: The number of tasks sharing this MM. Separate from mm::mm_users
124+
* as that is modified by mmget()/mm_put() by other entities which
125+
* do not actually share the MM.
122126
* @lock: Spinlock to protect all fields except @pcpu. It also protects
123127
* the MM cid cpumask and the MM cidmask bitmap.
124128
*/
125129
struct mm_mm_cid {
126130
struct mm_cid_pcpu __percpu *pcpu;
131+
unsigned int max_cids;
127132
unsigned int nr_cpus_allowed;
133+
unsigned int users;
128134
raw_spinlock_t lock;
129135
}____cacheline_aligned_in_smp;
130136
#else /* CONFIG_SCHED_MM_CID */

kernel/fork.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,6 +2455,7 @@ __latent_entropy struct task_struct *copy_process(
24552455
exit_task_namespaces(p);
24562456
bad_fork_cleanup_mm:
24572457
if (p->mm) {
2458+
sched_mm_cid_exit(p);
24582459
mm_clear_owner(p->mm, p);
24592460
mmput(p->mm);
24602461
}

kernel/sched/core.c

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4485,7 +4485,6 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
44854485
init_numa_balancing(clone_flags, p);
44864486
p->wake_entry.u_flags = CSD_TYPE_TTWU;
44874487
p->migration_pending = NULL;
4488-
init_sched_mm_cid(p);
44894488
}
44904489

44914490
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -10371,15 +10370,27 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
1037110370

1037210371
#ifdef CONFIG_SCHED_MM_CID
1037310372
/*
10374-
* When a task exits, the MM CID held by the task is not longer required as
10375-
* the task cannot return to user space.
10373+
* Update the CID range properties when the constraints change. Invoked via
10374+
* fork(), exit() and affinity changes
1037610375
*/
10376+
static void mm_update_max_cids(struct mm_struct *mm)
10377+
{
10378+
struct mm_mm_cid *mc = &mm->mm_cid;
10379+
unsigned int max_cids;
10380+
10381+
lockdep_assert_held(&mm->mm_cid.lock);
10382+
10383+
/* Calculate the new maximum constraint */
10384+
max_cids = min(mc->nr_cpus_allowed, mc->users);
10385+
WRITE_ONCE(mc->max_cids, max_cids);
10386+
}
10387+
1037710388
static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpumask *affmsk)
1037810389
{
1037910390
struct cpumask *mm_allowed;
1038010391
unsigned int weight;
1038110392

10382-
if (!mm)
10393+
if (!mm || !READ_ONCE(mm->mm_cid.users))
1038310394
return;
1038410395

1038510396
/*
@@ -10389,22 +10400,45 @@ static inline void mm_update_cpus_allowed(struct mm_struct *mm, const struct cpu
1038910400
guard(raw_spinlock)(&mm->mm_cid.lock);
1039010401
mm_allowed = mm_cpus_allowed(mm);
1039110402
weight = cpumask_weighted_or(mm_allowed, mm_allowed, affmsk);
10403+
if (weight == mm->mm_cid.nr_cpus_allowed)
10404+
return;
1039210405
WRITE_ONCE(mm->mm_cid.nr_cpus_allowed, weight);
10406+
mm_update_max_cids(mm);
10407+
}
10408+
10409+
void sched_mm_cid_fork(struct task_struct *t)
10410+
{
10411+
struct mm_struct *mm = t->mm;
10412+
10413+
WARN_ON_ONCE(!mm || t->mm_cid.cid != MM_CID_UNSET);
10414+
10415+
guard(raw_spinlock)(&mm->mm_cid.lock);
10416+
t->mm_cid.active = 1;
10417+
mm->mm_cid.users++;
10418+
/* Preset last_cid for mm_cid_select() */
10419+
t->mm_cid.last_cid = READ_ONCE(mm->mm_cid.max_cids) - 1;
10420+
mm_update_max_cids(mm);
1039310421
}
1039410422

10423+
/*
10424+
* When a task exits, the MM CID held by the task is not longer required as
10425+
* the task cannot return to user space.
10426+
*/
1039510427
void sched_mm_cid_exit(struct task_struct *t)
1039610428
{
1039710429
struct mm_struct *mm = t->mm;
1039810430

1039910431
if (!mm || !t->mm_cid.active)
1040010432
return;
1040110433

10402-
guard(preempt)();
10434+
guard(raw_spinlock)(&mm->mm_cid.lock);
1040310435
t->mm_cid.active = 0;
10436+
mm->mm_cid.users--;
1040410437
if (t->mm_cid.cid != MM_CID_UNSET) {
1040510438
clear_bit(t->mm_cid.cid, mm_cidmask(mm));
1040610439
t->mm_cid.cid = MM_CID_UNSET;
1040710440
}
10441+
mm_update_max_cids(mm);
1040810442
}
1040910443

1041010444
/* Deactivate MM CID allocation across execve() */
@@ -10416,22 +10450,11 @@ void sched_mm_cid_before_execve(struct task_struct *t)
1041610450
/* Reactivate MM CID after successful execve() */
1041710451
void sched_mm_cid_after_execve(struct task_struct *t)
1041810452
{
10419-
struct mm_struct *mm = t->mm;
10420-
10421-
if (!mm)
10422-
return;
10423-
10453+
sched_mm_cid_fork(t);
1042410454
guard(preempt)();
10425-
t->mm_cid.active = 1;
1042610455
mm_cid_select(t);
1042710456
}
1042810457

10429-
void sched_mm_cid_fork(struct task_struct *t)
10430-
{
10431-
WARN_ON_ONCE(!t->mm || t->mm_cid.cid != MM_CID_UNSET);
10432-
t->mm_cid.active = 1;
10433-
}
10434-
1043510458
void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
1043610459
{
1043710460
struct mm_cid_pcpu __percpu *pcpu = mm->mm_cid.pcpu;
@@ -10440,7 +10463,9 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
1044010463
for_each_possible_cpu(cpu)
1044110464
per_cpu_ptr(pcpu, cpu)->cid = MM_CID_UNSET;
1044210465

10466+
mm->mm_cid.max_cids = 0;
1044310467
mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed;
10468+
mm->mm_cid.users = 0;
1044410469
raw_spin_lock_init(&mm->mm_cid.lock);
1044510470
cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
1044610471
bitmap_zero(mm_cidmask(mm), num_possible_cpus());

kernel/sched/sched.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3571,7 +3571,7 @@ static inline bool mm_cid_get(struct task_struct *t)
35713571
struct mm_struct *mm = t->mm;
35723572
unsigned int max_cids;
35733573

3574-
max_cids = min_t(int, READ_ONCE(mm->mm_cid.nr_cpus_allowed), atomic_read(&mm->mm_users));
3574+
max_cids = READ_ONCE(mm->mm_cid.max_cids);
35753575

35763576
/* Try to reuse the last CID of this task */
35773577
if (__mm_cid_get(t, t->mm_cid.last_cid, max_cids))
@@ -3614,7 +3614,6 @@ static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *n
36143614
}
36153615

36163616
#else /* !CONFIG_SCHED_MM_CID: */
3617-
static inline void init_sched_mm_cid(struct task_struct *t) { }
36183617
static inline void mm_cid_select(struct task_struct *t) { }
36193618
static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { }
36203619
#endif /* !CONFIG_SCHED_MM_CID */

0 commit comments

Comments
 (0)