Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion Lib/profiling/sampling/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
THREAD_STATUS_GIL_REQUESTED,
THREAD_STATUS_UNKNOWN,
THREAD_STATUS_HAS_EXCEPTION,
_INTERNAL_FRAME_SUFFIXES,
)

try:
Expand Down Expand Up @@ -42,6 +43,25 @@ def extract_lineno(location):
return 0
return location[0]

def _is_internal_frame(frame):
if isinstance(frame, tuple):
filename = frame[0] if frame else ""
else:
filename = getattr(frame, "filename", "")

if not filename:
return False

return filename.endswith(_INTERNAL_FRAME_SUFFIXES)


def filter_internal_frames(frames):
if not frames:
return frames

return [f for f in frames if not _is_internal_frame(f)]


class Collector(ABC):
@abstractmethod
def collect(self, stack_frames, timestamps_us=None):
Expand All @@ -63,6 +83,10 @@ def collect_failed_sample(self):
def export(self, filename):
"""Export collected data to a file."""

@staticmethod
def _filter_internal_frames(frames):
return filter_internal_frames(frames)

def _iter_all_frames(self, stack_frames, skip_idle=False):
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
Expand All @@ -76,7 +100,10 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
continue
frames = thread_info.frame_info
if frames:
yield frames, thread_info.thread_id
# Filter out internal profiler frames from the bottom of the stack
frames = self._filter_internal_frames(frames)
if frames:
yield frames, thread_info.thread_id

def _iter_async_frames(self, awaited_info_list):
# Phase 1: Index tasks and build parent relationships with pre-computed selection
Expand Down
6 changes: 6 additions & 0 deletions Lib/profiling/sampling/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
# Format: (lineno, end_lineno, col_offset, end_col_offset)
DEFAULT_LOCATION = (0, 0, -1, -1)

# Internal frame path suffixes to filter from profiling output
# These are internal profiler modules that should not appear in user-facing output
_INTERNAL_FRAME_SUFFIXES = (
"_sync_coordinator.py",
)

# Thread status flags
try:
from _remote_debugging import (
Expand Down
4 changes: 2 additions & 2 deletions Lib/profiling/sampling/gecko_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import threading
import time

from .collector import Collector
from .collector import Collector, filter_internal_frames
from .opcode_utils import get_opcode_info, format_opcode
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION
Expand Down Expand Up @@ -172,7 +172,7 @@ def collect(self, stack_frames, timestamps_us=None):
# Process threads
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
frames = thread_info.frame_info
frames = filter_internal_frames(thread_info.frame_info)
tid = thread_info.thread_id

# Initialize thread if needed
Expand Down
128 changes: 128 additions & 0 deletions Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1825,3 +1825,131 @@ def test_gecko_collector_frame_format(self):
thread = profile["threads"][0]
# Should have recorded 3 functions
self.assertEqual(thread["funcTable"]["length"], 3)


class TestInternalFrameFiltering(unittest.TestCase):
"""Tests for filtering internal profiler frames from output."""

def test_filter_internal_frames(self):
"""Test that _sync_coordinator frames are filtered from anywhere in stack."""
from profiling.sampling.collector import filter_internal_frames

# Stack with _sync_coordinator in the middle (realistic scenario)
frames = [
MockFrameInfo("user_script.py", 10, "user_func"),
MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
]

filtered = filter_internal_frames(frames)
self.assertEqual(len(filtered), 2)
self.assertEqual(filtered[0].filename, "user_script.py")
self.assertEqual(filtered[1].filename, "<frozen runpy>")

def test_pstats_collector_filters_internal_frames(self):
"""Test that PstatsCollector filters out internal frames."""
collector = PstatsCollector(sample_interval_usec=1000)

frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("user_script.py", 10, "user_func"),
MockFrameInfo("/path/to/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)

self.assertEqual(len(collector.result), 2)
self.assertIn(("user_script.py", 10, "user_func"), collector.result)
self.assertIn(("<frozen runpy>", 87, "_run_code"), collector.result)

def test_gecko_collector_filters_internal_frames(self):
"""Test that GeckoCollector filters out internal frames."""
collector = GeckoCollector(sample_interval_usec=1000)

frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)

profile = collector._build_profile()
string_array = profile["shared"]["stringArray"]

# Should not contain _sync_coordinator functions
for s in string_array:
self.assertNotIn("_sync_coordinator", s)

def test_flamegraph_collector_filters_internal_frames(self):
"""Test that FlamegraphCollector filters out internal frames."""
collector = FlamegraphCollector(sample_interval_usec=1000)

frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
MockFrameInfo("<frozen runpy>", 87, "_run_code"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)

data = collector._convert_to_flamegraph_format()
strings = data.get("strings", [])

for s in strings:
self.assertNotIn("_sync_coordinator", s)

def test_collapsed_stack_collector_filters_internal_frames(self):
"""Test that CollapsedStackCollector filters out internal frames."""
collector = CollapsedStackCollector(sample_interval_usec=1000)

frames = [
MockInterpreterInfo(
0,
[
MockThreadInfo(
1,
[
MockFrameInfo("app.py", 50, "run"),
MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
],
status=THREAD_STATUS_HAS_GIL,
)
],
)
]
collector.collect(frames)

# Check that no stack contains _sync_coordinator
for (call_tree, _), _ in collector.stack_counter.items():
for filename, _, _ in call_tree:
self.assertNotIn("_sync_coordinator", filename)
Loading