Skip to content

Commit 59566b0

Browse files
committed
x86/ftrace: Have ftrace trampolines turn read-only at the end of system boot up
Booting one of my machines, it triggered the following crash: Kernel/User page tables isolation: enabled ftrace: allocating 36577 entries in 143 pages Starting tracer 'function' BUG: unable to handle page fault for address: ffffffffa000005c #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 2014067 P4D 2014067 PUD 2015063 PMD 7b253067 PTE 7b25206 Oops: 0003 [hardkernel#1] PREEMPT SMP PTI CPU: 0 PID: 0 Comm: swapper Not tainted 5.4.0-test+ hardkernel#24 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be filled by O.E.M., BIOS SDBLI944.86P 05/08/2007 RIP: 0010:text_poke_early+0x4a/0x58 Code: 34 24 48 89 54 24 08 e8 bf 72 0b 00 48 8b 34 24 48 8b 4c 24 08 84 c0 74 0b 48 89 df f3 a4 48 83 c4 10 5b c3 9c 58 fa 48 89 df <f3> a4 50 9d 48 83 c4 10 5b e9 d6 f9 ff ff 0 41 57 49 RSP: 0000:ffffffff82003d38 EFLAGS: 00010046 RAX: 0000000000000046 RBX: ffffffffa000005c RCX: 0000000000000005 RDX: 0000000000000005 RSI: ffffffff825b9a90 RDI: ffffffffa000005c RBP: ffffffffa000005c R08: 0000000000000000 R09: ffffffff8206e6e0 R10: ffff88807b01f4c0 R11: ffffffff8176c106 R12: ffffffff8206e6e0 R13: ffffffff824f2440 R14: 0000000000000000 R15: ffffffff8206eac0 FS: 0000000000000000(0000) GS:ffff88807d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa000005c CR3: 0000000002012000 CR4: 00000000000006b0 Call Trace: text_poke_bp+0x27/0x64 ? mutex_lock+0x36/0x5d arch_ftrace_update_trampoline+0x287/0x2d5 ? ftrace_replace_code+0x14b/0x160 ? ftrace_update_ftrace_func+0x65/0x6c __register_ftrace_function+0x6d/0x81 ftrace_startup+0x23/0xc1 register_ftrace_function+0x20/0x37 func_set_flag+0x59/0x77 __set_tracer_option.isra.19+0x20/0x3e trace_set_options+0xd6/0x13e apply_trace_boot_options+0x44/0x6d register_tracer+0x19e/0x1ac early_trace_init+0x21b/0x2c9 start_kernel+0x241/0x518 ? load_ucode_intel_bsp+0x21/0x52 secondary_startup_64+0xa4/0xb0 I was able to trigger it on other machines, when I added to the kernel command line of both "ftrace=function" and "trace_options=func_stack_trace". The cause is the "ftrace=function" would register the function tracer and create a trampoline, and it will set it as executable and read-only. Then the "trace_options=func_stack_trace" would then update the same trampoline to include the stack tracer version of the function tracer. But since the trampoline already exists, it updates it with text_poke_bp(). The problem is that text_poke_bp() called while system_state == SYSTEM_BOOTING, it will simply do a memcpy() and not the page mapping, as it would think that the text is still read-write. But in this case it is not, and we take a fault and crash. Instead, lets keep the ftrace trampolines read-write during boot up, and then when the kernel executable text is set to read-only, the ftrace trampolines get set to read-only as well. Link: https://lkml.kernel.org/r/20200430202147.4dc6e2de@oasis.local.home Cc: Ingo Molnar <mingo@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: stable@vger.kernel.org Fixes: 768ae44 ("x86/ftrace: Use text_poke()") Acked-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
1 parent 611d0a9 commit 59566b0

File tree

5 files changed

+60
-23
lines changed

5 files changed

+60
-23
lines changed

arch/x86/include/asm/ftrace.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ struct dyn_arch_ftrace {
5656

5757
#ifndef __ASSEMBLY__
5858

59+
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
60+
extern void set_ftrace_ops_ro(void);
61+
#else
62+
static inline void set_ftrace_ops_ro(void) { }
63+
#endif
64+
5965
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
6066
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
6167
{

arch/x86/kernel/ftrace.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,14 +407,41 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
407407

408408
set_vm_flush_reset_perms(trampoline);
409409

410-
set_memory_ro((unsigned long)trampoline, npages);
410+
if (likely(system_state != SYSTEM_BOOTING))
411+
set_memory_ro((unsigned long)trampoline, npages);
411412
set_memory_x((unsigned long)trampoline, npages);
412413
return (unsigned long)trampoline;
413414
fail:
414415
tramp_free(trampoline);
415416
return 0;
416417
}
417418

419+
void set_ftrace_ops_ro(void)
420+
{
421+
struct ftrace_ops *ops;
422+
unsigned long start_offset;
423+
unsigned long end_offset;
424+
unsigned long npages;
425+
unsigned long size;
426+
427+
do_for_each_ftrace_op(ops, ftrace_ops_list) {
428+
if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
429+
continue;
430+
431+
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
432+
start_offset = (unsigned long)ftrace_regs_caller;
433+
end_offset = (unsigned long)ftrace_regs_caller_end;
434+
} else {
435+
start_offset = (unsigned long)ftrace_caller;
436+
end_offset = (unsigned long)ftrace_epilogue;
437+
}
438+
size = end_offset - start_offset;
439+
size = size + RET_SIZE + sizeof(void *);
440+
npages = DIV_ROUND_UP(size, PAGE_SIZE);
441+
set_memory_ro((unsigned long)ops->trampoline, npages);
442+
} while_for_each_ftrace_op(ops);
443+
}
444+
418445
static unsigned long calc_trampoline_call_offset(bool save_regs)
419446
{
420447
unsigned long start_offset;

arch/x86/mm/init_64.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include <asm/init.h>
5555
#include <asm/uv/uv.h>
5656
#include <asm/setup.h>
57+
#include <asm/ftrace.h>
5758

5859
#include "mm_internal.h"
5960

@@ -1291,6 +1292,8 @@ void mark_rodata_ro(void)
12911292
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
12921293
set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
12931294

1295+
set_ftrace_ops_ro();
1296+
12941297
#ifdef CONFIG_CPA_DEBUG
12951298
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end);
12961299
set_memory_rw(start, (end-start) >> PAGE_SHIFT);

include/linux/ftrace.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,29 @@ struct ftrace_ops {
210210
#endif
211211
};
212212

213+
extern struct ftrace_ops __rcu *ftrace_ops_list;
214+
extern struct ftrace_ops ftrace_list_end;
215+
216+
/*
217+
* Traverse the ftrace_global_list, invoking all entries. The reason that we
218+
* can use rcu_dereference_raw_check() is that elements removed from this list
219+
* are simply leaked, so there is no need to interact with a grace-period
220+
* mechanism. The rcu_dereference_raw_check() calls are needed to handle
221+
* concurrent insertions into the ftrace_global_list.
222+
*
223+
* Silly Alpha and silly pointer-speculation compiler optimizations!
224+
*/
225+
#define do_for_each_ftrace_op(op, list) \
226+
op = rcu_dereference_raw_check(list); \
227+
do
228+
229+
/*
230+
* Optimized for just a single item in the list (as that is the normal case).
231+
*/
232+
#define while_for_each_ftrace_op(op) \
233+
while (likely(op = rcu_dereference_raw_check((op)->next)) && \
234+
unlikely((op) != &ftrace_list_end))
235+
213236
/*
214237
* Type of the current tracing.
215238
*/

kernel/trace/ftrace_internal.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,6 @@
44

55
#ifdef CONFIG_FUNCTION_TRACER
66

7-
/*
8-
* Traverse the ftrace_global_list, invoking all entries. The reason that we
9-
* can use rcu_dereference_raw_check() is that elements removed from this list
10-
* are simply leaked, so there is no need to interact with a grace-period
11-
* mechanism. The rcu_dereference_raw_check() calls are needed to handle
12-
* concurrent insertions into the ftrace_global_list.
13-
*
14-
* Silly Alpha and silly pointer-speculation compiler optimizations!
15-
*/
16-
#define do_for_each_ftrace_op(op, list) \
17-
op = rcu_dereference_raw_check(list); \
18-
do
19-
20-
/*
21-
* Optimized for just a single item in the list (as that is the normal case).
22-
*/
23-
#define while_for_each_ftrace_op(op) \
24-
while (likely(op = rcu_dereference_raw_check((op)->next)) && \
25-
unlikely((op) != &ftrace_list_end))
26-
27-
extern struct ftrace_ops __rcu *ftrace_ops_list;
28-
extern struct ftrace_ops ftrace_list_end;
297
extern struct mutex ftrace_lock;
308
extern struct ftrace_ops global_ops;
319

0 commit comments

Comments
 (0)