diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index c70e1eefe276cc..7dc095c6c279bd 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -6,6 +6,7 @@ THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_UNKNOWN, THREAD_STATUS_HAS_EXCEPTION, + _INTERNAL_FRAME_SUFFIXES, ) try: @@ -42,6 +43,25 @@ def extract_lineno(location): return 0 return location[0] +def _is_internal_frame(frame): + if isinstance(frame, tuple): + filename = frame[0] if frame else "" + else: + filename = getattr(frame, "filename", "") + + if not filename: + return False + + return filename.endswith(_INTERNAL_FRAME_SUFFIXES) + + +def filter_internal_frames(frames): + if not frames: + return frames + + return [f for f in frames if not _is_internal_frame(f)] + + class Collector(ABC): @abstractmethod def collect(self, stack_frames, timestamps_us=None): @@ -63,6 +83,10 @@ def collect_failed_sample(self): def export(self, filename): """Export collected data to a file.""" + @staticmethod + def _filter_internal_frames(frames): + return filter_internal_frames(frames) + def _iter_all_frames(self, stack_frames, skip_idle=False): for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: @@ -76,7 +100,10 @@ def _iter_all_frames(self, stack_frames, skip_idle=False): continue frames = thread_info.frame_info if frames: - yield frames, thread_info.thread_id + # Filter out internal profiler frames from the bottom of the stack + frames = self._filter_internal_frames(frames) + if frames: + yield frames, thread_info.thread_id def _iter_async_frames(self, awaited_info_list): # Phase 1: Index tasks and build parent relationships with pre-computed selection diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py index 366cbb38365c9f..58a57700fbdd4a 100644 --- a/Lib/profiling/sampling/constants.py +++ b/Lib/profiling/sampling/constants.py @@ -23,6 +23,12 @@ # Format: (lineno, end_lineno, col_offset, end_col_offset) DEFAULT_LOCATION = (0, 0, -1, -1) +# Internal frame path suffixes to filter from profiling output +# These are internal profiler modules that should not appear in user-facing output +_INTERNAL_FRAME_SUFFIXES = ( + "_sync_coordinator.py", +) + # Thread status flags try: from _remote_debugging import ( diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index c1c9cfcf3b93a9..28ef9b69bf7968 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -6,7 +6,7 @@ import threading import time -from .collector import Collector +from .collector import Collector, filter_internal_frames from .opcode_utils import get_opcode_info, format_opcode try: from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION @@ -172,7 +172,7 @@ def collect(self, stack_frames, timestamps_us=None): # Process threads for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: - frames = thread_info.frame_info + frames = filter_internal_frames(thread_info.frame_info) tid = thread_info.thread_id # Initialize thread if needed diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 30615a7d31d86c..c68ad071f74dfd 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -1825,3 +1825,131 @@ def test_gecko_collector_frame_format(self): thread = profile["threads"][0] # Should have recorded 3 functions self.assertEqual(thread["funcTable"]["length"], 3) + + +class TestInternalFrameFiltering(unittest.TestCase): + """Tests for filtering internal profiler frames from output.""" + + def test_filter_internal_frames(self): + """Test that _sync_coordinator frames are filtered from anywhere in stack.""" + from profiling.sampling.collector import filter_internal_frames + + # Stack with _sync_coordinator in the middle (realistic scenario) + frames = [ + MockFrameInfo("user_script.py", 10, "user_func"), + MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"), + MockFrameInfo("", 87, "_run_code"), + ] + + filtered = filter_internal_frames(frames) + self.assertEqual(len(filtered), 2) + self.assertEqual(filtered[0].filename, "user_script.py") + self.assertEqual(filtered[1].filename, "") + + def test_pstats_collector_filters_internal_frames(self): + """Test that PstatsCollector filters out internal frames.""" + collector = PstatsCollector(sample_interval_usec=1000) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("user_script.py", 10, "user_func"), + MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"), + MockFrameInfo("", 87, "_run_code"), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(frames) + + self.assertEqual(len(collector.result), 2) + self.assertIn(("user_script.py", 10, "user_func"), collector.result) + self.assertIn(("", 87, "_run_code"), collector.result) + + def test_gecko_collector_filters_internal_frames(self): + """Test that GeckoCollector filters out internal frames.""" + collector = GeckoCollector(sample_interval_usec=1000) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 50, "run"), + MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(frames) + + profile = collector._build_profile() + string_array = profile["shared"]["stringArray"] + + # Should not contain _sync_coordinator functions + for s in string_array: + self.assertNotIn("_sync_coordinator", s) + + def test_flamegraph_collector_filters_internal_frames(self): + """Test that FlamegraphCollector filters out internal frames.""" + collector = FlamegraphCollector(sample_interval_usec=1000) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 50, "run"), + MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"), + MockFrameInfo("", 87, "_run_code"), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(frames) + + data = collector._convert_to_flamegraph_format() + strings = data.get("strings", []) + + for s in strings: + self.assertNotIn("_sync_coordinator", s) + + def test_collapsed_stack_collector_filters_internal_frames(self): + """Test that CollapsedStackCollector filters out internal frames.""" + collector = CollapsedStackCollector(sample_interval_usec=1000) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 50, "run"), + MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + collector.collect(frames) + + # Check that no stack contains _sync_coordinator + for (call_tree, _), _ in collector.stack_counter.items(): + for filename, _, _ in call_tree: + self.assertNotIn("_sync_coordinator", filename)