@@ -200,19 +200,105 @@ thread_local! {
200200 static THREAD_COOKIE : u64 = NEXT_THREAD_COOKIE . fetch_add( 1 , Ordering :: Relaxed ) ;
201201}
202202
203- /// Pack a thread cookie (48 bits) and stack depth (16 bits) into a single u64.
203+ // -- Interned function name table ------------------------------------------
204+ //
205+ // Maps u16 IDs to `&'static str` function names so the Guard can carry a
206+ // compact name reference without growing beyond 16 bytes.
207+ //
208+ // Layout: append-only Vec behind a Mutex. Reads during drop_cold take the
209+ // lock briefly; writes happen once per unique name in enter_cold.
210+ // A thread-local cache avoids the global lock on the hot path.
211+
212+ /// Global interned name table: index -> &'static str.
213+ static NAME_TABLE : SyncOnceCell < Mutex < Vec < & ' static str > > > = SyncOnceCell :: new ( ) ;
214+
215+ fn name_table ( ) -> & ' static Mutex < Vec < & ' static str > > {
216+ NAME_TABLE . get_or_init ( || Mutex :: new ( Vec :: new ( ) ) )
217+ }
218+
219+ // Thread-local cache mapping name pointer -> interned ID.
220+ // Uses pointer identity (`&'static str` addresses are stable).
221+ thread_local ! {
222+ static NAME_CACHE : RefCell <HashMap <usize , u16 >> = RefCell :: new( HashMap :: new( ) ) ;
223+ }
224+
225+ /// Intern a function name, returning its u16 ID.
226+ /// Fast path: thread-local cache hit (no global lock).
227+ /// Slow path: global table lookup/insert under lock, then cache.
228+ #[ inline( always) ]
229+ fn intern_name ( name : & ' static str ) -> u16 {
230+ let ptr = name. as_ptr ( ) as usize ;
231+ let cached = NAME_CACHE . with ( |cache| cache. borrow ( ) . get ( & ptr) . copied ( ) ) ;
232+ if let Some ( id) = cached {
233+ return id;
234+ }
235+ intern_name_slow ( name, ptr)
236+ }
237+
238+ #[ inline( never) ]
239+ fn intern_name_slow ( name : & ' static str , ptr : usize ) -> u16 {
240+ let mut table = name_table ( ) . lock ( ) . unwrap_or_else ( |e| e. into_inner ( ) ) ;
241+ // Check if already in global table (another thread may have added it).
242+ let id = if let Some ( pos) = table. iter ( ) . position ( |& n| n. as_ptr ( ) as usize == ptr) {
243+ pos as u16
244+ } else {
245+ let len = table. len ( ) ;
246+ debug_assert ! (
247+ len <= u16 :: MAX as usize ,
248+ "interned name table overflow: more than 65535 unique function names"
249+ ) ;
250+ if len > u16:: MAX as usize {
251+ // Table full (65536 entries, indices 0..=u16::MAX). Saturate
252+ // instead of wrapping. lookup_name handles out-of-bounds by
253+ // returning "<unknown>", so this degrades gracefully.
254+ return u16:: MAX ;
255+ }
256+ let id = len as u16 ;
257+ table. push ( name) ;
258+ id
259+ } ;
260+ drop ( table) ;
261+ NAME_CACHE . with ( |cache| {
262+ cache. borrow_mut ( ) . insert ( ptr, id) ;
263+ } ) ;
264+ id
265+ }
266+
267+ /// Look up a function name by its interned ID.
268+ /// Returns `"<unknown>"` if the ID is out of bounds (should never happen).
269+ fn lookup_name ( id : u16 ) -> & ' static str {
270+ let table = name_table ( ) . lock ( ) . unwrap_or_else ( |e| e. into_inner ( ) ) ;
271+ table. get ( id as usize ) . copied ( ) . unwrap_or ( "<unknown>" )
272+ }
273+
274+ /// Pack a thread cookie (32 bits), name ID (16 bits), and stack depth (16 bits)
275+ /// into a single u64. Guard stays 16 bytes.
276+ ///
277+ /// Layout: `[cookie:32][name_id:16][depth:16]`
278+ ///
279+ /// Note: only the low 32 bits of the cookie are stored, limiting unique thread
280+ /// identification to ~4 billion threads per process. This is a deliberate
281+ /// tradeoff (from a previous 48-bit cookie) to make room for the 16-bit
282+ /// name_id. If THREAD_COOKIE exceeds 2^32, `unpack_cookie` returns only the
283+ /// low 32 bits, which could cause false migration detection in `drop_cold`.
204284#[ inline( always) ]
205- fn pack_cookie_depth ( cookie : u64 , depth : u16 ) -> u64 {
206- ( cookie << 16 ) | depth as u64
285+ fn pack_cookie_name_depth ( cookie : u64 , name_id : u16 , depth : u16 ) -> u64 {
286+ ( cookie << 32 ) | ( ( name_id as u64 ) << 16 ) | ( depth as u64 )
207287}
208288
209- /// Unpack the thread cookie from a packed u64.
289+ /// Unpack the thread cookie (high 32 bits) from a packed u64.
210290#[ inline( always) ]
211291fn unpack_cookie ( packed : u64 ) -> u64 {
212- packed >> 16
292+ packed >> 32
293+ }
294+
295+ /// Unpack the name ID (bits 16..31) from a packed u64.
296+ #[ inline( always) ]
297+ fn unpack_name_id ( packed : u64 ) -> u16 {
298+ ( packed >> 16 ) as u16
213299}
214300
215- /// Unpack the stack depth from a packed u64.
301+ /// Unpack the stack depth (low 16 bits) from a packed u64.
216302#[ inline( always) ]
217303fn unpack_depth ( packed : u64 ) -> u16 {
218304 packed as u16
@@ -230,7 +316,10 @@ fn unpack_depth(packed: u64) -> u16 {
230316#[ must_use = "dropping the guard immediately records ~0ms; bind it with `let _guard = ...`" ]
231317pub struct Guard {
232318 start_tsc : u64 ,
233- /// High 48 bits: thread cookie, low 16 bits: stack depth.
319+ /// Bit layout: `[cookie:32][name_id:16][depth:16]`
320+ /// - cookie: identifies the thread that called enter()
321+ /// - name_id: index into the global interned name table
322+ /// - depth: stack depth at the time of enter()
234323 packed : u64 ,
235324}
236325
@@ -300,6 +389,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
300389 let migrated = drop_cookie != enter_cookie;
301390
302391 if migrated {
392+ let name = lookup_name ( unpack_name_id ( guard. packed ) ) ;
303393 let elapsed_ns = crate :: tsc:: elapsed_ns ( guard. start_tsc , end_tsc) ;
304394 let elapsed_ms = elapsed_ns as f64 / 1_000_000.0 ;
305395 let start_ns = crate :: tsc:: ticks_to_epoch_ns ( guard. start_tsc , crate :: tsc:: epoch_tsc ( ) ) ;
@@ -328,7 +418,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
328418 . lock ( )
329419 . unwrap_or_else ( |e| e. into_inner ( ) )
330420 . push ( RawRecord {
331- name : "<migrated>" ,
421+ name,
332422 elapsed_ms,
333423 children_ms : phantom_children_ms,
334424 #[ cfg( feature = "cpu-time" ) ]
@@ -337,7 +427,7 @@ fn drop_cold(guard: &Guard, end_tsc: u64, #[cfg(feature = "cpu-time")] cpu_end_n
337427 } ) ;
338428
339429 let invocation = InvocationRecord {
340- name : "<migrated>" ,
430+ name,
341431 start_ns,
342432 elapsed_ns,
343433 self_ns,
@@ -483,13 +573,14 @@ impl Drop for Guard {
483573 }
484574}
485575
486- /// Bookkeeping half of enter(): epoch, alloc save, stack push.
487- /// Returns a packed u64 (thread cookie << 16 | depth) .
576+ /// Bookkeeping half of enter(): epoch, alloc save, stack push, name interning .
577+ /// Returns a packed u64: `[ cookie:32][name_id:16][ depth:16]` .
488578#[ inline( never) ]
489579fn enter_cold ( name : & ' static str ) -> u64 {
490580 let _ = epoch ( ) ;
491581
492582 let cookie = THREAD_COOKIE . with ( |c| * c) ;
583+ let name_id = intern_name ( name) ;
493584
494585 let saved_alloc = crate :: alloc:: ALLOC_COUNTERS
495586 . try_with ( |cell| {
@@ -518,7 +609,7 @@ fn enter_cold(name: &'static str) -> u64 {
518609 depth
519610 } ) ;
520611
521- pack_cookie_depth ( cookie, depth)
612+ pack_cookie_name_depth ( cookie, name_id , depth)
522613}
523614
524615/// Start timing a function. Returns a Guard that records the measurement on drop.
@@ -2000,10 +2091,11 @@ mod tests {
20002091 } ) ;
20012092
20022093 let records = collect_all ( ) ;
2003- let rec = records. iter ( ) . find ( |r| r. name == "<migrated> " ) ;
2094+ let rec = records. iter ( ) . find ( |r| r. name == "migrating_fn " ) ;
20042095 assert ! (
20052096 rec. is_some( ) ,
2006- "migrated guard should still produce a record with <migrated> name"
2097+ "migrated guard should preserve function name 'migrating_fn'. Got: {:?}" ,
2098+ records. iter( ) . map( |r| & r. name) . collect:: <Vec <_>>( )
20072099 ) ;
20082100 assert ! (
20092101 rec. unwrap( ) . total_ms > 0.5 ,
@@ -2225,7 +2317,10 @@ mod tests {
22252317 } ) ;
22262318
22272319 let records = collect_all ( ) ;
2228- let rec = records. iter ( ) . find ( |r| r. name == "<migrated>" ) . unwrap ( ) ;
2320+ let rec = records
2321+ . iter ( )
2322+ . find ( |r| r. name == "cpu_migrated" )
2323+ . expect ( "migrated guard should preserve name 'cpu_migrated'" ) ;
22292324 assert ! ( rec. total_ms > 0.0 , "wall time captured" ) ;
22302325 assert ! (
22312326 rec. cpu_self_ms == 0.0 ,
@@ -2310,8 +2405,8 @@ mod tests {
23102405
23112406 let parent_inv = invocations
23122407 . iter ( )
2313- . find ( |r| r. name == "<migrated> " )
2314- . expect ( "migrated parent should produce an invocation " ) ;
2408+ . find ( |r| r. name == "mig_parent " )
2409+ . expect ( "migrated parent should preserve name 'mig_parent' " ) ;
23152410 let child_inv = invocations
23162411 . iter ( )
23172412 . find ( |r| r. name == "mig_child" )
@@ -2348,8 +2443,8 @@ mod tests {
23482443 let records = collect_all ( ) ;
23492444 let parent_rec = records
23502445 . iter ( )
2351- . find ( |r| r. name == "<migrated> " )
2352- . expect ( "migrated parent in collect_all " ) ;
2446+ . find ( |r| r. name == "rec_parent " )
2447+ . expect ( "migrated parent should preserve name 'rec_parent' " ) ;
23532448
23542449 assert ! (
23552450 parent_rec. self_ms < parent_rec. total_ms,
@@ -2374,8 +2469,8 @@ mod tests {
23742469 let invocations = collect_invocations ( ) ;
23752470 let migrated = invocations
23762471 . iter ( )
2377- . find ( |r| r. name == "<migrated> " )
2378- . expect ( "should have a migrated record " ) ;
2472+ . find ( |r| r. name == "other_thread " )
2473+ . expect ( "migrated guard should preserve name 'other_thread' " ) ;
23792474
23802475 assert_eq ! (
23812476 migrated. self_ns, migrated. elapsed_ns,
@@ -2421,8 +2516,8 @@ mod tests {
24212516
24222517 let migrated = c_invocations
24232518 . iter ( )
2424- . find ( |r| r. name == "<migrated> " )
2425- . expect ( "should have migrated record " ) ;
2519+ . find ( |r| r. name == "bc_parent " )
2520+ . expect ( "migrated guard should preserve name 'bc_parent' " ) ;
24262521
24272522 // C's phantom captures c_child's time. B's phantom is leaked
24282523 // (documented limitation: no cross-thread phantom forwarding yet).
@@ -2462,4 +2557,123 @@ mod tests {
24622557 } ) ;
24632558 } ) ;
24642559 }
2560+
2561+ #[ test]
2562+ fn migrated_guard_preserves_function_name ( ) {
2563+ // Migrated guards should report the actual function name,
2564+ // not a generic "<migrated>" placeholder.
2565+ reset ( ) ;
2566+ let guard = enter ( "real_fn_name" ) ;
2567+ burn_cpu ( 10_000 ) ;
2568+
2569+ std:: thread:: scope ( |s| {
2570+ s. spawn ( move || {
2571+ burn_cpu ( 10_000 ) ;
2572+ drop ( guard) ;
2573+ } ) ;
2574+ } ) ;
2575+
2576+ let records = collect_all ( ) ;
2577+ let rec = records. iter ( ) . find ( |r| r. name == "real_fn_name" ) ;
2578+ assert ! (
2579+ rec. is_some( ) ,
2580+ "migrated guard should preserve function name 'real_fn_name'. Got: {:?}" ,
2581+ records. iter( ) . map( |r| & r. name) . collect:: <Vec <_>>( )
2582+ ) ;
2583+ assert ! (
2584+ rec. unwrap( ) . total_ms > 0.0 ,
2585+ "should have recorded wall time"
2586+ ) ;
2587+ }
2588+
2589+ #[ test]
2590+ fn migrated_guards_distinguish_multiple_functions ( ) {
2591+ // When multiple functions migrate, each should retain its own name
2592+ // instead of collapsing into a single "<migrated>" bucket.
2593+ reset ( ) ;
2594+ let guard_a = enter ( "fn_alpha" ) ;
2595+ burn_cpu ( 5_000 ) ;
2596+
2597+ let guard_b = std:: thread:: scope ( |s| s. spawn ( || enter ( "fn_beta" ) ) . join ( ) . unwrap ( ) ) ;
2598+ burn_cpu ( 5_000 ) ;
2599+
2600+ // Drop both guards on different threads than where they were created.
2601+ std:: thread:: scope ( |s| {
2602+ s. spawn ( move || {
2603+ drop ( guard_a) ;
2604+ } ) ;
2605+ } ) ;
2606+ std:: thread:: scope ( |s| {
2607+ s. spawn ( move || {
2608+ drop ( guard_b) ;
2609+ } ) ;
2610+ } ) ;
2611+
2612+ let records = collect_all ( ) ;
2613+ let names: Vec < & str > = records. iter ( ) . map ( |r| r. name . as_str ( ) ) . collect ( ) ;
2614+ assert ! (
2615+ names. contains( & "fn_alpha" ) ,
2616+ "should have fn_alpha in records. Got: {names:?}"
2617+ ) ;
2618+ assert ! (
2619+ names. contains( & "fn_beta" ) ,
2620+ "should have fn_beta in records. Got: {names:?}"
2621+ ) ;
2622+ assert ! (
2623+ !names. contains( & "<migrated>" ) ,
2624+ "should NOT have <migrated> placeholder. Got: {names:?}"
2625+ ) ;
2626+ }
2627+
2628+ #[ test]
2629+ fn migrated_invocation_has_real_name ( ) {
2630+ // Verify InvocationRecord also carries the real function name.
2631+ reset ( ) ;
2632+ let guard = enter ( "inv_migrated_fn" ) ;
2633+ burn_cpu ( 10_000 ) ;
2634+
2635+ let invocations = std:: thread:: scope ( |s| {
2636+ s. spawn ( move || {
2637+ burn_cpu ( 10_000 ) ;
2638+ drop ( guard) ;
2639+ collect_invocations ( )
2640+ } )
2641+ . join ( )
2642+ . unwrap ( )
2643+ } ) ;
2644+
2645+ let inv = invocations. iter ( ) . find ( |r| r. name == "inv_migrated_fn" ) ;
2646+ assert ! (
2647+ inv. is_some( ) ,
2648+ "migrated invocation should have name 'inv_migrated_fn'. Got: {:?}" ,
2649+ invocations. iter( ) . map( |r| r. name) . collect:: <Vec <_>>( )
2650+ ) ;
2651+ assert ! (
2652+ inv. unwrap( ) . elapsed_ns > 0 ,
2653+ "should have recorded elapsed time"
2654+ ) ;
2655+ }
2656+
2657+ #[ test]
2658+ fn pack_unpack_round_trip ( ) {
2659+ let cookie = 42u64 ;
2660+ let name_id = 1234u16 ;
2661+ let depth = 567u16 ;
2662+ let packed = pack_cookie_name_depth ( cookie, name_id, depth) ;
2663+ assert_eq ! ( unpack_cookie( packed) , cookie) ;
2664+ assert_eq ! ( unpack_name_id( packed) , name_id) ;
2665+ assert_eq ! ( unpack_depth( packed) , depth) ;
2666+
2667+ // Max values: verifies the full bit range.
2668+ let packed_max = pack_cookie_name_depth ( u32:: MAX as u64 , u16:: MAX , u16:: MAX ) ;
2669+ assert_eq ! ( unpack_cookie( packed_max) , u32 :: MAX as u64 ) ;
2670+ assert_eq ! ( unpack_name_id( packed_max) , u16 :: MAX ) ;
2671+ assert_eq ! ( unpack_depth( packed_max) , u16 :: MAX ) ;
2672+
2673+ // Zero values: verifies zero-packing.
2674+ let packed_zero = pack_cookie_name_depth ( 0 , 0 , 0 ) ;
2675+ assert_eq ! ( unpack_cookie( packed_zero) , 0 ) ;
2676+ assert_eq ! ( unpack_name_id( packed_zero) , 0 ) ;
2677+ assert_eq ! ( unpack_depth( packed_zero) , 0 ) ;
2678+ }
24652679}
0 commit comments