Skip to content

Commit 8efab0d

Browse files
fix(runtime): restore function names for migrated async guards
Add a global interned string table mapping u16 IDs to function names, with a thread-local cache on the hot path. Restructure Guard's packed field from [cookie:48][depth:16] to [cookie:32][name_id:16][depth:16], keeping Guard at exactly 16 bytes. On migrated drop, look up the real function name from the intern table instead of using "<migrated>". Closes #116
1 parent 0c6736d commit 8efab0d

File tree

2 files changed

+245
-41
lines changed

2 files changed

+245
-41
lines changed

piano-runtime/src/collector.rs

Lines changed: 237 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,105 @@ thread_local! {
200200
static THREAD_COOKIE: u64 = NEXT_THREAD_COOKIE.fetch_add(1, Ordering::Relaxed);
201201
}
202202

203-
/// Pack a thread cookie (48 bits) and stack depth (16 bits) into a single u64.
203+
// -- Interned function name table ------------------------------------------
204+
//
205+
// Maps u16 IDs to `&'static str` function names so the Guard can carry a
206+
// compact name reference without growing beyond 16 bytes.
207+
//
208+
// Layout: append-only Vec behind a Mutex. Reads during drop_cold take the
209+
// lock briefly; writes happen once per unique name in enter_cold.
210+
// A thread-local cache avoids the global lock on the hot path.
211+
212+
/// Global interned name table: index -> &'static str.
213+
static NAME_TABLE: SyncOnceCell<Mutex<Vec<&'static str>>> = SyncOnceCell::new();
214+
215+
fn name_table() -> &'static Mutex<Vec<&'static str>> {
216+
NAME_TABLE.get_or_init(|| Mutex::new(Vec::new()))
217+
}
218+
219+
// Thread-local cache mapping name pointer -> interned ID.
220+
// Uses pointer identity (`&'static str` addresses are stable).
221+
thread_local! {
222+
static NAME_CACHE: RefCell<HashMap<usize, u16>> = RefCell::new(HashMap::new());
223+
}
224+
225+
/// Intern a function name, returning its u16 ID.
226+
/// Fast path: thread-local cache hit (no global lock).
227+
/// Slow path: global table lookup/insert under lock, then cache.
228+
#[inline(always)]
229+
fn intern_name(name: &'static str) -> u16 {
230+
let ptr = name.as_ptr() as usize;
231+
let cached = NAME_CACHE.with(|cache| cache.borrow().get(&ptr).copied());
232+
if let Some(id) = cached {
233+
return id;
234+
}
235+
intern_name_slow(name, ptr)
236+
}
237+
238+
#[inline(never)]
239+
fn intern_name_slow(name: &'static str, ptr: usize) -> u16 {
240+
let mut table = name_table().lock().unwrap_or_else(|e| e.into_inner());
241+
// Check if already in global table (another thread may have added it).
242+
let id = if let Some(pos) = table.iter().position(|&n| n.as_ptr() as usize == ptr) {
243+
pos as u16
244+
} else {
245+
let len = table.len();
246+
debug_assert!(
247+
len <= u16::MAX as usize,
248+
"interned name table overflow: more than 65535 unique function names"
249+
);
250+
if len > u16::MAX as usize {
251+
// Table full (65536 entries, indices 0..=u16::MAX). Saturate
252+
// instead of wrapping. lookup_name handles out-of-bounds by
253+
// returning "<unknown>", so this degrades gracefully.
254+
return u16::MAX;
255+
}
256+
let id = len as u16;
257+
table.push(name);
258+
id
259+
};
260+
drop(table);
261+
NAME_CACHE.with(|cache| {
262+
cache.borrow_mut().insert(ptr, id);
263+
});
264+
id
265+
}
266+
267+
/// Look up a function name by its interned ID.
268+
/// Returns `"<unknown>"` if the ID is out of bounds (should never happen).
269+
fn lookup_name(id: u16) -> &'static str {
270+
let table = name_table().lock().unwrap_or_else(|e| e.into_inner());
271+
table.get(id as usize).copied().unwrap_or("<unknown>")
272+
}
273+
274+
/// Pack a thread cookie (32 bits), name ID (16 bits), and stack depth (16 bits)
275+
/// into a single u64. Guard stays 16 bytes.
276+
///
277+
/// Layout: `[cookie:32][name_id:16][depth:16]`
278+
///
279+
/// Note: only the low 32 bits of the cookie are stored, limiting unique thread
280+
/// identification to ~4 billion threads per process. This is a deliberate
281+
/// tradeoff (from a previous 48-bit cookie) to make room for the 16-bit
282+
/// name_id. If THREAD_COOKIE exceeds 2^32, `unpack_cookie` returns only the
283+
/// low 32 bits, which could cause false migration detection in `drop_cold`.
204284
#[inline(always)]
205-
fn pack_cookie_depth(cookie: u64, depth: u16) -> u64 {
206-
(cookie << 16) | depth as u64
285+
fn pack_cookie_name_depth(cookie: u64, name_id: u16, depth: u16) -> u64 {
286+
(cookie << 32) | ((name_id as u64) << 16) | (depth as u64)
207287
}
208288

209-
/// Unpack the thread cookie from a packed u64.
289+
/// Unpack the thread cookie (high 32 bits) from a packed u64.
210290
#[inline(always)]
211291
fn unpack_cookie(packed: u64) -> u64 {
212-
packed >> 16
292+
packed >> 32
293+
}
294+
295+
/// Unpack the name ID (bits 16..31) from a packed u64.
296+
#[inline(always)]
297+
fn unpack_name_id(packed: u64) -> u16 {
298+
(packed >> 16) as u16
213299
}
214300

215-
/// Unpack the stack depth from a packed u64.
301+
/// Unpack the stack depth (low 16 bits) from a packed u64.
216302
#[inline(always)]
217303
fn unpack_depth(packed: u64) -> u16 {
218304
packed as u16
@@ -230,7 +316,10 @@ fn unpack_depth(packed: u64) -> u16 {
230316
#[must_use = "dropping the guard immediately records ~0ms; bind it with `let _guard = ...`"]
231317
pub struct Guard {
232318
start_tsc: u64,
233-
/// High 48 bits: thread cookie, low 16 bits: stack depth.
319+
/// Bit layout: `[cookie:32][name_id:16][depth:16]`
320+
/// - cookie: identifies the thread that called enter()
321+
/// - name_id: index into the global interned name table
322+
/// - depth: stack depth at the time of enter()
234323
packed: u64,
235324
}
236325

@@ -300,6 +389,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
300389
let migrated = drop_cookie != enter_cookie;
301390

302391
if migrated {
392+
let name = lookup_name(unpack_name_id(guard.packed));
303393
let elapsed_ns = crate::tsc::elapsed_ns(guard.start_tsc, end_tsc);
304394
let elapsed_ms = elapsed_ns as f64 / 1_000_000.0;
305395
let start_ns = crate::tsc::ticks_to_epoch_ns(guard.start_tsc, crate::tsc::epoch_tsc());
@@ -328,7 +418,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
328418
.lock()
329419
.unwrap_or_else(|e| e.into_inner())
330420
.push(RawRecord {
331-
name: "<migrated>",
421+
name,
332422
elapsed_ms,
333423
children_ms: phantom_children_ms,
334424
#[cfg(feature = "cpu-time")]
@@ -337,7 +427,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
337427
});
338428

339429
let invocation = InvocationRecord {
340-
name: "<migrated>",
430+
name,
341431
start_ns,
342432
elapsed_ns,
343433
self_ns,
@@ -483,13 +573,14 @@ impl Drop for Guard {
483573
}
484574
}
485575

486-
/// Bookkeeping half of enter(): epoch, alloc save, stack push.
487-
/// Returns a packed u64 (thread cookie << 16 | depth).
576+
/// Bookkeeping half of enter(): epoch, alloc save, stack push, name interning.
577+
/// Returns a packed u64: `[cookie:32][name_id:16][depth:16]`.
488578
#[inline(never)]
489579
fn enter_cold(name: &'static str) -> u64 {
490580
let _ = epoch();
491581

492582
let cookie = THREAD_COOKIE.with(|c| *c);
583+
let name_id = intern_name(name);
493584

494585
let saved_alloc = crate::alloc::ALLOC_COUNTERS
495586
.try_with(|cell| {
@@ -518,7 +609,7 @@ fn enter_cold(name: &'static str) -> u64 {
518609
depth
519610
});
520611

521-
pack_cookie_depth(cookie, depth)
612+
pack_cookie_name_depth(cookie, name_id, depth)
522613
}
523614

524615
/// Start timing a function. Returns a Guard that records the measurement on drop.
@@ -2000,10 +2091,11 @@ mod tests {
20002091
});
20012092

20022093
let records = collect_all();
2003-
let rec = records.iter().find(|r| r.name == "<migrated>");
2094+
let rec = records.iter().find(|r| r.name == "migrating_fn");
20042095
assert!(
20052096
rec.is_some(),
2006-
"migrated guard should still produce a record with <migrated> name"
2097+
"migrated guard should preserve function name 'migrating_fn'. Got: {:?}",
2098+
records.iter().map(|r| &r.name).collect::<Vec<_>>()
20072099
);
20082100
assert!(
20092101
rec.unwrap().total_ms > 0.5,
@@ -2225,7 +2317,10 @@ mod tests {
22252317
});
22262318

22272319
let records = collect_all();
2228-
let rec = records.iter().find(|r| r.name == "<migrated>").unwrap();
2320+
let rec = records
2321+
.iter()
2322+
.find(|r| r.name == "cpu_migrated")
2323+
.expect("migrated guard should preserve name 'cpu_migrated'");
22292324
assert!(rec.total_ms > 0.0, "wall time captured");
22302325
assert!(
22312326
rec.cpu_self_ms == 0.0,
@@ -2310,8 +2405,8 @@ mod tests {
23102405

23112406
let parent_inv = invocations
23122407
.iter()
2313-
.find(|r| r.name == "<migrated>")
2314-
.expect("migrated parent should produce an invocation");
2408+
.find(|r| r.name == "mig_parent")
2409+
.expect("migrated parent should preserve name 'mig_parent'");
23152410
let child_inv = invocations
23162411
.iter()
23172412
.find(|r| r.name == "mig_child")
@@ -2348,8 +2443,8 @@ mod tests {
23482443
let records = collect_all();
23492444
let parent_rec = records
23502445
.iter()
2351-
.find(|r| r.name == "<migrated>")
2352-
.expect("migrated parent in collect_all");
2446+
.find(|r| r.name == "rec_parent")
2447+
.expect("migrated parent should preserve name 'rec_parent'");
23532448

23542449
assert!(
23552450
parent_rec.self_ms < parent_rec.total_ms,
@@ -2374,8 +2469,8 @@ mod tests {
23742469
let invocations = collect_invocations();
23752470
let migrated = invocations
23762471
.iter()
2377-
.find(|r| r.name == "<migrated>")
2378-
.expect("should have a migrated record");
2472+
.find(|r| r.name == "other_thread")
2473+
.expect("migrated guard should preserve name 'other_thread'");
23792474

23802475
assert_eq!(
23812476
migrated.self_ns, migrated.elapsed_ns,
@@ -2421,8 +2516,8 @@ mod tests {
24212516

24222517
let migrated = c_invocations
24232518
.iter()
2424-
.find(|r| r.name == "<migrated>")
2425-
.expect("should have migrated record");
2519+
.find(|r| r.name == "bc_parent")
2520+
.expect("migrated guard should preserve name 'bc_parent'");
24262521

24272522
// C's phantom captures c_child's time. B's phantom is leaked
24282523
// (documented limitation: no cross-thread phantom forwarding yet).
@@ -2462,4 +2557,123 @@ mod tests {
24622557
});
24632558
});
24642559
}
2560+
2561+
#[test]
2562+
fn migrated_guard_preserves_function_name() {
2563+
// Migrated guards should report the actual function name,
2564+
// not a generic "<migrated>" placeholder.
2565+
reset();
2566+
let guard = enter("real_fn_name");
2567+
burn_cpu(10_000);
2568+
2569+
std::thread::scope(|s| {
2570+
s.spawn(move || {
2571+
burn_cpu(10_000);
2572+
drop(guard);
2573+
});
2574+
});
2575+
2576+
let records = collect_all();
2577+
let rec = records.iter().find(|r| r.name == "real_fn_name");
2578+
assert!(
2579+
rec.is_some(),
2580+
"migrated guard should preserve function name 'real_fn_name'. Got: {:?}",
2581+
records.iter().map(|r| &r.name).collect::<Vec<_>>()
2582+
);
2583+
assert!(
2584+
rec.unwrap().total_ms > 0.0,
2585+
"should have recorded wall time"
2586+
);
2587+
}
2588+
2589+
#[test]
2590+
fn migrated_guards_distinguish_multiple_functions() {
2591+
// When multiple functions migrate, each should retain its own name
2592+
// instead of collapsing into a single "<migrated>" bucket.
2593+
reset();
2594+
let guard_a = enter("fn_alpha");
2595+
burn_cpu(5_000);
2596+
2597+
let guard_b = std::thread::scope(|s| s.spawn(|| enter("fn_beta")).join().unwrap());
2598+
burn_cpu(5_000);
2599+
2600+
// Drop both guards on different threads than where they were created.
2601+
std::thread::scope(|s| {
2602+
s.spawn(move || {
2603+
drop(guard_a);
2604+
});
2605+
});
2606+
std::thread::scope(|s| {
2607+
s.spawn(move || {
2608+
drop(guard_b);
2609+
});
2610+
});
2611+
2612+
let records = collect_all();
2613+
let names: Vec<&str> = records.iter().map(|r| r.name.as_str()).collect();
2614+
assert!(
2615+
names.contains(&"fn_alpha"),
2616+
"should have fn_alpha in records. Got: {names:?}"
2617+
);
2618+
assert!(
2619+
names.contains(&"fn_beta"),
2620+
"should have fn_beta in records. Got: {names:?}"
2621+
);
2622+
assert!(
2623+
!names.contains(&"<migrated>"),
2624+
"should NOT have <migrated> placeholder. Got: {names:?}"
2625+
);
2626+
}
2627+
2628+
#[test]
2629+
fn migrated_invocation_has_real_name() {
2630+
// Verify InvocationRecord also carries the real function name.
2631+
reset();
2632+
let guard = enter("inv_migrated_fn");
2633+
burn_cpu(10_000);
2634+
2635+
let invocations = std::thread::scope(|s| {
2636+
s.spawn(move || {
2637+
burn_cpu(10_000);
2638+
drop(guard);
2639+
collect_invocations()
2640+
})
2641+
.join()
2642+
.unwrap()
2643+
});
2644+
2645+
let inv = invocations.iter().find(|r| r.name == "inv_migrated_fn");
2646+
assert!(
2647+
inv.is_some(),
2648+
"migrated invocation should have name 'inv_migrated_fn'. Got: {:?}",
2649+
invocations.iter().map(|r| r.name).collect::<Vec<_>>()
2650+
);
2651+
assert!(
2652+
inv.unwrap().elapsed_ns > 0,
2653+
"should have recorded elapsed time"
2654+
);
2655+
}
2656+
2657+
#[test]
2658+
fn pack_unpack_round_trip() {
2659+
let cookie = 42u64;
2660+
let name_id = 1234u16;
2661+
let depth = 567u16;
2662+
let packed = pack_cookie_name_depth(cookie, name_id, depth);
2663+
assert_eq!(unpack_cookie(packed), cookie);
2664+
assert_eq!(unpack_name_id(packed), name_id);
2665+
assert_eq!(unpack_depth(packed), depth);
2666+
2667+
// Max values: verifies the full bit range.
2668+
let packed_max = pack_cookie_name_depth(u32::MAX as u64, u16::MAX, u16::MAX);
2669+
assert_eq!(unpack_cookie(packed_max), u32::MAX as u64);
2670+
assert_eq!(unpack_name_id(packed_max), u16::MAX);
2671+
assert_eq!(unpack_depth(packed_max), u16::MAX);
2672+
2673+
// Zero values: verifies zero-packing.
2674+
let packed_zero = pack_cookie_name_depth(0, 0, 0);
2675+
assert_eq!(unpack_cookie(packed_zero), 0);
2676+
assert_eq!(unpack_name_id(packed_zero), 0);
2677+
assert_eq!(unpack_depth(packed_zero), 0);
2678+
}
24652679
}

0 commit comments

Comments
 (0)