Skip to content

Commit 7a3975c

Browse files
fix(runtime): replace TLS with destructors in allocator hot path
The PianoAllocator accessed STACK (RefCell<Vec<StackEntry>>) from track_alloc/track_dealloc, which has a destructor. On Rust < 1.93.1 this triggers "fatal runtime error: the global allocator may not use TLS with destructors, aborting". Rust 1.93.1 relaxed this check (rust-lang/rust#144465), masking the bug in our test suite. Replace STACK access with a destructor-free Cell<AllocSnapshot> where AllocSnapshot is Copy (no destructor). enter() saves and zeroes the Cell; Guard::drop() reads and restores, correctly handling nesting. Benchmark shows ~0ns overhead per allocation vs ~25ns before. Also fix build_instrumented to remove RUSTUP_TOOLCHAIN so target projects' rust-toolchain.toml is respected (previously, nested cargo inherited the parent toolchain, silently masking version-specific bugs). Update integration test to pin Rust 1.91.0 with rayon, representative of real programs like chainsaw that triggered the crash. Closes #27
1 parent 36bb8ce commit 7a3975c

File tree

4 files changed

+289
-73
lines changed

4 files changed

+289
-73
lines changed

piano-runtime/src/alloc.rs

Lines changed: 75 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
use std::alloc::{GlobalAlloc, Layout};
2-
use std::time::Instant;
3-
4-
use crate::collector::STACK;
2+
use std::cell::Cell;
3+
4+
/// Allocation counters accumulated in the allocator hot path.
5+
/// All fields are plain integers → `Copy` → `Cell<AllocSnapshot>` has no
6+
/// destructor → safe for use in global allocator TLS on all Rust versions.
7+
#[derive(Clone, Copy, Default)]
8+
pub(crate) struct AllocSnapshot {
9+
pub(crate) alloc_count: u32,
10+
pub(crate) alloc_bytes: u64,
11+
pub(crate) free_count: u32,
12+
pub(crate) free_bytes: u64,
13+
}
514

6-
// Re-entrancy guard: when tracking code itself allocates, we skip tracking.
715
thread_local! {
8-
static IN_ALLOC_TRACKING: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
16+
/// Destructor-free counters that the allocator hot path increments.
17+
/// `enter()` saves and zeroes this; `Guard::drop()` reads and restores.
18+
pub(crate) static ALLOC_COUNTERS: Cell<AllocSnapshot> = const { Cell::new(AllocSnapshot::new()) };
19+
}
20+
21+
impl AllocSnapshot {
22+
pub(crate) const fn new() -> Self {
23+
Self {
24+
alloc_count: 0,
25+
alloc_bytes: 0,
26+
free_count: 0,
27+
free_bytes: 0,
28+
}
29+
}
930
}
1031

1132
/// A global allocator wrapper that tracks allocation counts and bytes
1233
/// per instrumented function scope, with zero timing distortion.
1334
///
14-
/// Wraps any inner `GlobalAlloc`. Measures its own bookkeeping overhead
15-
/// and subtracts it from the current function's timing via `overhead_ns`.
35+
/// Wraps any inner `GlobalAlloc`. Uses a destructor-free `Cell<AllocSnapshot>`
36+
/// for thread-local bookkeeping, which is safe on all Rust versions
37+
/// (including < 1.93.1 where TLS with destructors is forbidden for
38+
/// global allocators).
1639
pub struct PianoAllocator<A: GlobalAlloc> {
1740
inner: A,
1841
}
@@ -52,56 +75,26 @@ unsafe impl<A: GlobalAlloc> GlobalAlloc for PianoAllocator<A> {
5275
}
5376
}
5477

78+
#[inline(always)]
5579
fn track_alloc(bytes: u64) {
56-
let ok = IN_ALLOC_TRACKING.try_with(|flag| {
57-
if flag.get() {
58-
return false;
59-
}
60-
flag.set(true);
61-
true
80+
// Cell::get/set are plain memory reads/writes — no allocation, no
81+
// re-entrancy risk, no destructor. Safe from the global allocator.
82+
let _ = ALLOC_COUNTERS.try_with(|cell| {
83+
let mut snap = cell.get();
84+
snap.alloc_count += 1;
85+
snap.alloc_bytes += bytes;
86+
cell.set(snap);
6287
});
63-
if ok != Ok(true) {
64-
return;
65-
}
66-
67-
let t0 = Instant::now();
68-
let _ = STACK.try_with(|stack| {
69-
if let Ok(mut s) = stack.try_borrow_mut()
70-
&& let Some(top) = s.last_mut()
71-
{
72-
top.alloc_count += 1;
73-
top.alloc_bytes += bytes;
74-
top.overhead_ns += t0.elapsed().as_nanos();
75-
}
76-
});
77-
78-
let _ = IN_ALLOC_TRACKING.try_with(|flag| flag.set(false));
7988
}
8089

90+
#[inline(always)]
8191
fn track_dealloc(bytes: u64) {
82-
let ok = IN_ALLOC_TRACKING.try_with(|flag| {
83-
if flag.get() {
84-
return false;
85-
}
86-
flag.set(true);
87-
true
92+
let _ = ALLOC_COUNTERS.try_with(|cell| {
93+
let mut snap = cell.get();
94+
snap.free_count += 1;
95+
snap.free_bytes += bytes;
96+
cell.set(snap);
8897
});
89-
if ok != Ok(true) {
90-
return;
91-
}
92-
93-
let t0 = Instant::now();
94-
let _ = STACK.try_with(|stack| {
95-
if let Ok(mut s) = stack.try_borrow_mut()
96-
&& let Some(top) = s.last_mut()
97-
{
98-
top.free_count += 1;
99-
top.free_bytes += bytes;
100-
top.overhead_ns += t0.elapsed().as_nanos();
101-
}
102-
});
103-
104-
let _ = IN_ALLOC_TRACKING.try_with(|flag| flag.set(false));
10598
}
10699

107100
#[cfg(test)]
@@ -126,6 +119,37 @@ mod tests {
126119
assert_eq!(rec.free_bytes, 256);
127120
}
128121

122+
#[test]
123+
fn alloc_tracking_nested_scopes() {
124+
reset();
125+
{
126+
let _outer = enter("outer_alloc");
127+
track_alloc(100);
128+
{
129+
let _inner = enter("inner_alloc");
130+
track_alloc(200);
131+
track_dealloc(50);
132+
}
133+
track_alloc(300);
134+
track_dealloc(75);
135+
}
136+
let invocations = collect_invocations();
137+
let outer = invocations.iter().find(|r| r.name == "outer_alloc").unwrap();
138+
let inner = invocations.iter().find(|r| r.name == "inner_alloc").unwrap();
139+
140+
// Inner scope should only see its own allocations
141+
assert_eq!(inner.alloc_count, 1, "inner alloc_count");
142+
assert_eq!(inner.alloc_bytes, 200, "inner alloc_bytes");
143+
assert_eq!(inner.free_count, 1, "inner free_count");
144+
assert_eq!(inner.free_bytes, 50, "inner free_bytes");
145+
146+
// Outer scope should see its own allocations (before + after inner)
147+
assert_eq!(outer.alloc_count, 2, "outer alloc_count");
148+
assert_eq!(outer.alloc_bytes, 400, "outer alloc_bytes");
149+
assert_eq!(outer.free_count, 1, "outer free_count");
150+
assert_eq!(outer.free_bytes, 75, "outer free_bytes");
151+
}
152+
129153
#[test]
130154
fn alloc_tracking_does_not_distort_timing() {
131155
reset();

piano-runtime/src/collector.rs

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,8 @@ pub(crate) struct StackEntry {
7676
pub(crate) name: &'static str,
7777
pub(crate) start: Instant,
7878
pub(crate) children_ms: f64,
79-
pub(crate) overhead_ns: u128,
80-
pub(crate) alloc_count: u32,
81-
pub(crate) alloc_bytes: u64,
82-
pub(crate) free_count: u32,
83-
pub(crate) free_bytes: u64,
79+
/// Saved ALLOC_COUNTERS from before this scope, restored on Guard::drop.
80+
pub(crate) saved_alloc: crate::alloc::AllocSnapshot,
8481
pub(crate) depth: u16,
8582
}
8683

@@ -124,18 +121,31 @@ pub struct Guard {
124121

125122
impl Drop for Guard {
126123
fn drop(&mut self) {
124+
// Read this scope's alloc counters and restore the parent's saved state.
125+
let scope_alloc = crate::alloc::ALLOC_COUNTERS
126+
.try_with(|cell| {
127+
let snap = cell.get();
128+
// Will be overwritten below after we pop the stack entry.
129+
snap
130+
})
131+
.unwrap_or_default();
132+
127133
STACK.with(|stack| {
128134
let entry = stack.borrow_mut().pop();
129135
let Some(entry) = entry else {
130136
eprintln!("piano-runtime: guard dropped without matching stack entry (bug)");
131137
return;
132138
};
139+
140+
// Restore parent's saved alloc counters.
141+
let _ = crate::alloc::ALLOC_COUNTERS.try_with(|cell| {
142+
cell.set(entry.saved_alloc);
143+
});
144+
133145
let elapsed_ns = entry.start.elapsed().as_nanos() as u64;
134146
let elapsed_ms = elapsed_ns as f64 / 1_000_000.0;
135147
let children_ns = (entry.children_ms * 1_000_000.0) as u64;
136-
let self_ns = elapsed_ns
137-
.saturating_sub(children_ns)
138-
.saturating_sub(entry.overhead_ns as u64);
148+
let self_ns = elapsed_ns.saturating_sub(children_ns);
139149
let start_ns = entry.start.duration_since(*EPOCH).as_nanos() as u64;
140150
let children_ms = entry.children_ms;
141151

@@ -160,10 +170,10 @@ impl Drop for Guard {
160170
start_ns,
161171
elapsed_ns,
162172
self_ns,
163-
alloc_count: entry.alloc_count,
164-
alloc_bytes: entry.alloc_bytes,
165-
free_count: entry.free_count,
166-
free_bytes: entry.free_bytes,
173+
alloc_count: scope_alloc.alloc_count,
174+
alloc_bytes: scope_alloc.alloc_bytes,
175+
free_count: scope_alloc.free_count,
176+
free_bytes: scope_alloc.free_bytes,
167177
depth: entry.depth,
168178
};
169179

@@ -193,17 +203,23 @@ impl Drop for Guard {
193203
pub fn enter(name: &'static str) -> Guard {
194204
// Touch EPOCH so relative timestamps are anchored to process start.
195205
let _ = *EPOCH;
206+
207+
// Save current alloc counters and zero them for this scope.
208+
let saved_alloc = crate::alloc::ALLOC_COUNTERS
209+
.try_with(|cell| {
210+
let snap = cell.get();
211+
cell.set(crate::alloc::AllocSnapshot::new());
212+
snap
213+
})
214+
.unwrap_or_default();
215+
196216
STACK.with(|stack| {
197217
let depth = stack.borrow().len() as u16;
198218
stack.borrow_mut().push(StackEntry {
199219
name,
200220
start: Instant::now(),
201221
children_ms: 0.0,
202-
overhead_ns: 0,
203-
alloc_count: 0,
204-
alloc_bytes: 0,
205-
free_count: 0,
206-
free_bytes: 0,
222+
saved_alloc,
207223
depth,
208224
});
209225
});
@@ -633,17 +649,22 @@ pub fn fork() -> Option<SpanContext> {
633649
/// thread correctly attributes children time. Returns an `AdoptGuard` that
634650
/// propagates elapsed time back to the parent on drop.
635651
pub fn adopt(ctx: &SpanContext) -> AdoptGuard {
652+
// Save current alloc counters and zero them, same as enter().
653+
let saved_alloc = crate::alloc::ALLOC_COUNTERS
654+
.try_with(|cell| {
655+
let snap = cell.get();
656+
cell.set(crate::alloc::AllocSnapshot::new());
657+
snap
658+
})
659+
.unwrap_or_default();
660+
636661
STACK.with(|stack| {
637662
let depth = stack.borrow().len() as u16;
638663
stack.borrow_mut().push(StackEntry {
639664
name: ctx.parent_name,
640665
start: Instant::now(),
641666
children_ms: 0.0,
642-
overhead_ns: 0,
643-
alloc_count: 0,
644-
alloc_bytes: 0,
645-
free_count: 0,
646-
free_bytes: 0,
667+
saved_alloc,
647668
depth,
648669
});
649670
});

src/build.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,14 @@ fn extract_rendered_errors(json_output: &str) -> Vec<String> {
111111
/// Build the instrumented binary using `cargo build --message-format=json`.
112112
/// Returns the path to the compiled executable.
113113
pub fn build_instrumented(staging_dir: &Path, target_dir: &Path) -> Result<PathBuf, Error> {
114+
// Remove RUSTUP_TOOLCHAIN so the target project's rust-toolchain.toml
115+
// is respected. Without this, nested cargo invocations inherit the
116+
// parent's toolchain, ignoring the project's pinned version.
114117
let output = Command::new("cargo")
115118
.arg("build")
116119
.arg("--message-format=json")
117120
.env("CARGO_TARGET_DIR", target_dir)
121+
.env_remove("RUSTUP_TOOLCHAIN")
118122
.current_dir(staging_dir)
119123
.output()?;
120124

0 commit comments

Comments
 (0)