Skip to content

Commit 11ce5d8

Browse files
lkollarmiss-islington
authored andcommitted
gh-152434: Fix async-aware Gecko collection (GH-152442)
(cherry picked from commit 87ac0bc) Co-authored-by: László Kiss Kollár <kiss.kollar.laszlo@gmail.com>
1 parent 61bb57f commit 11ce5d8

3 files changed

Lines changed: 108 additions & 29 deletions

File tree

Lib/profiling/sampling/gecko_collector.py

Lines changed: 53 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,25 @@ def collect(self, stack_frames, timestamps_us=None):
250250
self.interval = (times[-1] - self.last_sample_time) / self.sample_count
251251
self.last_sample_time = times[-1]
252252

253+
# Process async tasks
254+
if stack_frames and hasattr(stack_frames[0], "awaited_by"):
255+
for frames, thread_id, _ in self._iter_async_frames(stack_frames):
256+
frames = filter_internal_frames(frames)
257+
if not frames:
258+
continue
259+
260+
if thread_id not in self.threads:
261+
self.threads[thread_id] = self._create_thread(
262+
thread_id, False
263+
)
264+
265+
self._record_stack_sample(
266+
self.threads[thread_id], frames, thread_id, times, first_time
267+
)
268+
269+
self.sample_count += len(times)
270+
return
271+
253272
# Process threads
254273
for interpreter_info in stack_frames:
255274
for thread_info in interpreter_info.threads:
@@ -333,37 +352,43 @@ def collect(self, stack_frames, timestamps_us=None):
333352
if not frames:
334353
continue
335354

336-
# Process stack once to get stack_index
337-
stack_index = self._process_stack(thread_data, frames)
338-
339-
# Add samples with timestamps
340-
thread_spill = thread_data["_spill"]
341-
for t in times:
342-
thread_spill.append_sample(stack_index, t)
343-
344-
# Handle opcodes
345-
if self.opcodes_enabled and frames:
346-
leaf_frame = frames[0]
347-
filename, location, funcname, opcode = leaf_frame
348-
if isinstance(location, tuple):
349-
lineno, _, col_offset, _ = location
350-
else:
351-
lineno = location
352-
col_offset = -1
353-
354-
current_state = (opcode, lineno, col_offset, funcname, filename)
355-
356-
if tid not in self.opcode_state:
357-
self.opcode_state[tid] = (*current_state, first_time)
358-
elif self.opcode_state[tid][:5] != current_state:
359-
prev_opcode, prev_lineno, prev_col, prev_funcname, prev_filename, prev_start = self.opcode_state[tid]
360-
self._add_opcode_interval_marker(
361-
tid, prev_opcode, prev_lineno, prev_col, prev_funcname, prev_start, first_time
362-
)
363-
self.opcode_state[tid] = (*current_state, first_time)
355+
self._record_stack_sample(
356+
thread_data, frames, tid, times, first_time
357+
)
364358

365359
self.sample_count += len(times)
366360

361+
def _record_stack_sample(self, thread_data, frames, tid, times, first_time):
362+
stack_index = self._process_stack(thread_data, frames)
363+
364+
thread_spill = thread_data["_spill"]
365+
for t in times:
366+
thread_spill.append_sample(stack_index, t)
367+
368+
if self.opcodes_enabled and frames:
369+
leaf_frame = frames[0]
370+
filename, location, funcname, opcode = leaf_frame
371+
if isinstance(location, tuple):
372+
lineno, _, col_offset, _ = location
373+
else:
374+
lineno = location
375+
col_offset = -1
376+
377+
current_state = (opcode, lineno, col_offset, funcname, filename)
378+
379+
if tid not in self.opcode_state:
380+
self.opcode_state[tid] = (*current_state, first_time)
381+
elif self.opcode_state[tid][:5] != current_state:
382+
(
383+
prev_opcode, prev_lineno, prev_col, prev_funcname,
384+
prev_filename, prev_start
385+
) = self.opcode_state[tid]
386+
self._add_opcode_interval_marker(
387+
tid, prev_opcode, prev_lineno, prev_col, prev_funcname,
388+
prev_start, first_time
389+
)
390+
self.opcode_state[tid] = (*current_state, first_time)
391+
367392
def _create_thread(self, tid, is_main_thread):
368393
"""Create a new thread structure with processed profile format."""
369394
if self.spill_dir is None:

Lib/test/test_profiling/test_sampling_profiler/test_collectors.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,16 @@
4040

4141
from test.support import captured_stdout, captured_stderr
4242

43-
from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, LocationInfo, make_diff_collector_with_mock_baseline
43+
from .mocks import (
44+
MockAwaitedInfo,
45+
MockCoroInfo,
46+
MockFrameInfo,
47+
MockInterpreterInfo,
48+
MockTaskInfo,
49+
MockThreadInfo,
50+
LocationInfo,
51+
make_diff_collector_with_mock_baseline,
52+
)
4453
from .helpers import close_and_unlink, jsonl_tables
4554

4655

@@ -657,6 +666,48 @@ def test_gecko_collector_basic(self):
657666
self.assertGreater(stack_table["length"], 0)
658667
self.assertGreater(len(stack_table["frame"]), 0)
659668

669+
def test_gecko_collector_async_aware(self):
670+
collector = GeckoCollector(1000)
671+
672+
parent = MockTaskInfo(
673+
task_id=1,
674+
task_name="Parent",
675+
coroutine_stack=[
676+
MockCoroInfo(
677+
task_name="Parent",
678+
call_stack=[MockFrameInfo("parent.py", 10, "parent_fn")],
679+
)
680+
],
681+
)
682+
child = MockTaskInfo(
683+
task_id=2,
684+
task_name="Child",
685+
coroutine_stack=[
686+
MockCoroInfo(
687+
task_name="Child",
688+
call_stack=[MockFrameInfo("child.py", 20, "child_fn")],
689+
)
690+
],
691+
awaited_by=[MockCoroInfo(task_name=1, call_stack=[])],
692+
)
693+
694+
collector.collect(
695+
[MockAwaitedInfo(thread_id=100, awaited_by=[parent, child])],
696+
timestamps_us=[1000, 2000],
697+
)
698+
profile_data = export_gecko_profile(self, collector)
699+
700+
self.assertEqual(len(profile_data["threads"]), 1)
701+
thread_data = profile_data["threads"][0]
702+
self.assertEqual(thread_data["samples"]["length"], 2)
703+
704+
string_array = profile_data["shared"]["stringArray"]
705+
self.assertIn("parent_fn", string_array)
706+
self.assertIn("child_fn", string_array)
707+
self.assertIn("Parent", string_array)
708+
self.assertIn("Child", string_array)
709+
self.assertEqual(thread_data["markers"]["length"], 0)
710+
660711
@unittest.skipIf(is_emscripten, "threads not available")
661712
def test_gecko_collector_export(self):
662713
"""Test Gecko profile export functionality."""
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed ``profiling.sampling --gecko`` with ``--async-aware`` by flattening
2+
async task stacks before generating Gecko samples. ``--binary`` now rejects
3+
``--async-aware`` until the binary format supports async task data.

0 commit comments

Comments
 (0)