Skip to content

Commit cb36eab

Browse files
committed
Merge tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events fixes from Ingo Molnar: - Fix lock ordering bug found by lockdep in perf_event_wakeup() - Fix uncore counter enumeration on Granite Rapids and Sierra Forest - Fix perf_mmap() refcount bug found by Syzkaller - Fix __perf_event_overflow() vs perf_remove_from_context() race * tag 'perf-urgent-2026-03-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf: Fix __perf_event_overflow() vs perf_remove_from_context() race perf/core: Fix refcount bug and potential UAF in perf_mmap perf/x86/intel/uncore: Add per-scheduler IMC CAS count events perf/core: Fix invalid wait context in ctx_sched_in()
2 parents b410220 + c9bc175 commit cb36eab

File tree

2 files changed

+89
-22
lines changed

2 files changed

+89
-22
lines changed

arch/x86/events/intel/uncore_snbep.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6497,6 +6497,32 @@ static struct intel_uncore_type gnr_uncore_ubox = {
64976497
.attr_update = uncore_alias_groups,
64986498
};
64996499

6500+
static struct uncore_event_desc gnr_uncore_imc_events[] = {
6501+
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"),
6502+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0, "event=0x05,umask=0xcf"),
6503+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.scale, "6.103515625e-5"),
6504+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch0.unit, "MiB"),
6505+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1, "event=0x06,umask=0xcf"),
6506+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.scale, "6.103515625e-5"),
6507+
INTEL_UNCORE_EVENT_DESC(cas_count_read_sch1.unit, "MiB"),
6508+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0, "event=0x05,umask=0xf0"),
6509+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.scale, "6.103515625e-5"),
6510+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch0.unit, "MiB"),
6511+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1, "event=0x06,umask=0xf0"),
6512+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.scale, "6.103515625e-5"),
6513+
INTEL_UNCORE_EVENT_DESC(cas_count_write_sch1.unit, "MiB"),
6514+
{ /* end: all zeroes */ },
6515+
};
6516+
6517+
static struct intel_uncore_type gnr_uncore_imc = {
6518+
SPR_UNCORE_MMIO_COMMON_FORMAT(),
6519+
.name = "imc",
6520+
.fixed_ctr_bits = 48,
6521+
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
6522+
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
6523+
.event_descs = gnr_uncore_imc_events,
6524+
};
6525+
65006526
static struct intel_uncore_type gnr_uncore_pciex8 = {
65016527
SPR_UNCORE_PCI_COMMON_FORMAT(),
65026528
.name = "pciex8",
@@ -6544,7 +6570,7 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = {
65446570
NULL,
65456571
&spr_uncore_pcu,
65466572
&gnr_uncore_ubox,
6547-
&spr_uncore_imc,
6573+
&gnr_uncore_imc,
65486574
NULL,
65496575
&gnr_uncore_upi,
65506576
NULL,

kernel/events/core.c

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4138,7 +4138,8 @@ static int merge_sched_in(struct perf_event *event, void *data)
41384138
if (*perf_event_fasync(event))
41394139
event->pending_kill = POLL_ERR;
41404140

4141-
perf_event_wakeup(event);
4141+
event->pending_wakeup = 1;
4142+
irq_work_queue(&event->pending_irq);
41424143
} else {
41434144
struct perf_cpu_pmu_context *cpc = this_cpc(event->pmu_ctx->pmu);
41444145

@@ -7464,28 +7465,28 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
74647465
ret = perf_mmap_aux(vma, event, nr_pages);
74657466
if (ret)
74667467
return ret;
7467-
}
74687468

7469-
/*
7470-
* Since pinned accounting is per vm we cannot allow fork() to copy our
7471-
* vma.
7472-
*/
7473-
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
7474-
vma->vm_ops = &perf_mmap_vmops;
7469+
/*
7470+
* Since pinned accounting is per vm we cannot allow fork() to copy our
7471+
* vma.
7472+
*/
7473+
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
7474+
vma->vm_ops = &perf_mmap_vmops;
74757475

7476-
mapped = get_mapped(event, event_mapped);
7477-
if (mapped)
7478-
mapped(event, vma->vm_mm);
7476+
mapped = get_mapped(event, event_mapped);
7477+
if (mapped)
7478+
mapped(event, vma->vm_mm);
74797479

7480-
/*
7481-
* Try to map it into the page table. On fail, invoke
7482-
* perf_mmap_close() to undo the above, as the callsite expects
7483-
* full cleanup in this case and therefore does not invoke
7484-
* vmops::close().
7485-
*/
7486-
ret = map_range(event->rb, vma);
7487-
if (ret)
7488-
perf_mmap_close(vma);
7480+
/*
7481+
* Try to map it into the page table. On fail, invoke
7482+
* perf_mmap_close() to undo the above, as the callsite expects
7483+
* full cleanup in this case and therefore does not invoke
7484+
* vmops::close().
7485+
*/
7486+
ret = map_range(event->rb, vma);
7487+
if (ret)
7488+
perf_mmap_close(vma);
7489+
}
74897490

74907491
return ret;
74917492
}
@@ -10776,6 +10777,13 @@ int perf_event_overflow(struct perf_event *event,
1077610777
struct perf_sample_data *data,
1077710778
struct pt_regs *regs)
1077810779
{
10780+
/*
10781+
* Entry point from hardware PMI, interrupts should be disabled here.
10782+
* This serializes us against perf_event_remove_from_context() in
10783+
* things like perf_event_release_kernel().
10784+
*/
10785+
lockdep_assert_irqs_disabled();
10786+
1077910787
return __perf_event_overflow(event, 1, data, regs);
1078010788
}
1078110789

@@ -10852,6 +10860,19 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
1085210860
{
1085310861
struct hw_perf_event *hwc = &event->hw;
1085410862

10863+
/*
10864+
* This is:
10865+
* - software preempt
10866+
* - tracepoint preempt
10867+
* - tp_target_task irq (ctx->lock)
10868+
* - uprobes preempt/irq
10869+
* - kprobes preempt/irq
10870+
* - hw_breakpoint irq
10871+
*
10872+
* Any of these are sufficient to hold off RCU and thus ensure @event
10873+
* exists.
10874+
*/
10875+
lockdep_assert_preemption_disabled();
1085510876
local64_add(nr, &event->count);
1085610877

1085710878
if (!regs)
@@ -10860,6 +10881,16 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
1086010881
if (!is_sampling_event(event))
1086110882
return;
1086210883

10884+
/*
10885+
* Serialize against event_function_call() IPIs like normal overflow
10886+
* event handling. Specifically, must not allow
10887+
* perf_event_release_kernel() -> perf_remove_from_context() to make
10888+
* progress and 'release' the event from under us.
10889+
*/
10890+
guard(irqsave)();
10891+
if (event->state != PERF_EVENT_STATE_ACTIVE)
10892+
return;
10893+
1086310894
if ((event->attr.sample_type & PERF_SAMPLE_PERIOD) && !event->attr.freq) {
1086410895
data->period = nr;
1086510896
return perf_swevent_overflow(event, 1, data, regs);
@@ -11358,6 +11389,11 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
1135811389
struct perf_sample_data data;
1135911390
struct perf_event *event;
1136011391

11392+
/*
11393+
* Per being a tracepoint, this runs with preemption disabled.
11394+
*/
11395+
lockdep_assert_preemption_disabled();
11396+
1136111397
struct perf_raw_record raw = {
1136211398
.frag = {
1136311399
.size = entry_size,
@@ -11690,6 +11726,11 @@ void perf_bp_event(struct perf_event *bp, void *data)
1169011726
struct perf_sample_data sample;
1169111727
struct pt_regs *regs = data;
1169211728

11729+
/*
11730+
* Exception context, will have interrupts disabled.
11731+
*/
11732+
lockdep_assert_irqs_disabled();
11733+
1169311734
perf_sample_data_init(&sample, bp->attr.bp_addr, 0);
1169411735

1169511736
if (!bp->hw.state && !perf_exclude_event(bp, regs))
@@ -12154,7 +12195,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
1215412195

1215512196
if (regs && !perf_exclude_event(event, regs)) {
1215612197
if (!(event->attr.exclude_idle && is_idle_task(current)))
12157-
if (__perf_event_overflow(event, 1, &data, regs))
12198+
if (perf_event_overflow(event, &data, regs))
1215812199
ret = HRTIMER_NORESTART;
1215912200
}
1216012201

0 commit comments

Comments
 (0)