diff --git a/py/noxfile.py b/py/noxfile.py index cc2e9a1c..61ea0aec 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -156,7 +156,7 @@ def test_claude_agent_sdk(session, version): # while still exercising the real Claude Agent SDK control protocol. _install_test_deps(session) _install(session, "claude_agent_sdk", version) - _run_tests(session, f"{WRAPPER_DIR}/claude_agent_sdk/test_wrapper.py") + _run_tests(session, f"{INTEGRATION_DIR}/claude_agent_sdk/test_claude_agent_sdk.py") _run_core_tests(session) diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index f91feb6e..25dd436a 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -7,7 +7,7 @@ import logging from contextlib import contextmanager -from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration +from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration __all__ = ["auto_instrument"] @@ -117,7 +117,7 @@ def auto_instrument( if agno: results["agno"] = _instrument_integration(AgnoIntegration) if claude_agent_sdk: - results["claude_agent_sdk"] = _instrument_claude_agent_sdk() + results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration) if dspy: results["dspy"] = _instrument_dspy() if adk: @@ -164,14 +164,6 @@ def _instrument_google_genai() -> bool: return False -def _instrument_claude_agent_sdk() -> bool: - with _try_patch(): - from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk - - return setup_claude_agent_sdk() - return False - - def _instrument_dspy() -> bool: with _try_patch(): from braintrust.wrappers.dspy import patch_dspy diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py index e87b2dd6..35324c1c 100644 --- a/py/src/braintrust/integrations/__init__.py +++ b/py/src/braintrust/integrations/__init__.py @@ -1,6 +1,7 @@ from .adk import ADKIntegration from .agno import AgnoIntegration from .anthropic import AnthropicIntegration +from .claude_agent_sdk import ClaudeAgentSDKIntegration -__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration"] +__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration"] diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_claude_agent_sdk.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_claude_agent_sdk.py index d9213cdb..f1496f7e 100644 --- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_claude_agent_sdk.py +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_claude_agent_sdk.py @@ -1,7 +1,7 @@ """Test auto_instrument for Claude Agent SDK (no uninstrument available).""" from braintrust.auto import auto_instrument -from braintrust.wrappers.claude_agent_sdk._test_transport import make_cassette_transport +from braintrust.integrations.claude_agent_sdk._test_transport import make_cassette_transport from braintrust.wrappers.test_utils import autoinstrument_test_context diff --git a/py/src/braintrust/integrations/base.py b/py/src/braintrust/integrations/base.py index 3d021f91..690e6c22 100644 --- a/py/src/braintrust/integrations/base.py +++ b/py/src/braintrust/integrations/base.py @@ -3,6 +3,7 @@ import importlib import inspect import re +import sys from abc import ABC, abstractmethod from collections.abc import Iterable from typing import Any, ClassVar @@ -170,6 +171,84 @@ def wrap_target(cls, target: Any) -> Any: return target +class ClassReplacementPatcher(BasePatcher): + """Base patcher for replacing an exported class with a tracing wrapper class. + + Use this when instrumentation cannot be expressed as wrapping one stable + function or method in place. Typical cases are integrations that need to: + + - replace constructor behavior before the SDK stores callbacks or handlers + - preserve per-instance state across multiple methods + - keep ``from provider import Client`` aliases working after setup by + propagating the replacement to modules that already imported the class + + Prefer ``FunctionWrapperPatcher`` when a stable attribute can be instrumented + in place with ``wrap_function_wrapper(...)`` and class identity does not need + to change. + """ + + target_attr: ClassVar[str] + propagate_imported_aliases: ClassVar[bool] = True + # Factory that takes the original exported class and returns the replacement class. + replacement_factory: ClassVar[Any] + + @classmethod + def resolve_target(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> Any | None: + """Return the exported class object that this patcher replaces.""" + root = target if target is not None else module + if root is None: + return None + return getattr(root, cls.target_attr, None) + + @classmethod + def applies(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Return whether the target class exists and the version gate passes.""" + return super().applies(module, version, target=target) and ( + cls.resolve_target(module, version, target=target) is not None + ) + + @classmethod + def patch_marker_attr(cls) -> str: + """Return the sentinel attribute used to mark the replacement class as patched.""" + suffix = re.sub(r"\W+", "_", cls.name).strip("_") + return f"__braintrust_patched_{suffix}__" + + @classmethod + def mark_patched(cls, obj: Any) -> None: + """Mark a replacement class so future patch attempts are idempotent.""" + setattr(obj, cls.patch_marker_attr(), True) + + @classmethod + def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Return whether this patcher's replacement class is already installed.""" + resolved_target = cls.resolve_target(module, version, target=target) + return bool(resolved_target is not None and getattr(resolved_target, cls.patch_marker_attr(), False)) + + @classmethod + def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Replace the exported class and optionally propagate the new binding.""" + root = target if target is not None else module + if root is None or not cls.applies(module, version, target=target): + return False + + original_class = cls.resolve_target(module, version, target=target) + if original_class is None: + return False + + replacement_class = cls.replacement_factory(original_class) + cls.mark_patched(replacement_class) + setattr(root, cls.target_attr, replacement_class) + + if cls.propagate_imported_aliases and target is None: + for mod in list(sys.modules.values()): + if mod is None or not hasattr(mod, cls.target_attr): + continue + if getattr(mod, cls.target_attr, None) is original_class: + setattr(mod, cls.target_attr, replacement_class) + + return True + + class CompositeFunctionWrapperPatcher(BasePatcher): """Patcher that applies multiple ``FunctionWrapperPatcher`` sub-patchers as one unit. diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py b/py/src/braintrust/integrations/claude_agent_sdk/__init__.py similarity index 51% rename from py/src/braintrust/wrappers/claude_agent_sdk/__init__.py rename to py/src/braintrust/integrations/claude_agent_sdk/__init__.py index 1d44358c..b043e958 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/__init__.py @@ -1,9 +1,8 @@ -""" -Braintrust integration for Claude Agent SDK with automatic tracing. +"""Braintrust integration for Claude Agent SDK with automatic tracing. Usage (imports can be before or after setup): from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions - from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk + from braintrust.integrations.claude_agent_sdk import setup_claude_agent_sdk setup_claude_agent_sdk(project="my-project") @@ -19,12 +18,12 @@ from braintrust.logger import NOOP_SPAN, current_span, init_logger -from ._wrapper import _create_client_wrapper_class, _create_tool_wrapper_class +from .integration import ClaudeAgentSDKIntegration logger = logging.getLogger(__name__) -__all__ = ["setup_claude_agent_sdk"] +__all__ = ["ClaudeAgentSDKIntegration", "setup_claude_agent_sdk"] def setup_claude_agent_sdk( @@ -46,7 +45,7 @@ def setup_claude_agent_sdk( Example: ```python import claude_agent_sdk - from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk + from braintrust.integrations.claude_agent_sdk import setup_claude_agent_sdk setup_claude_agent_sdk(project="my-project") @@ -62,33 +61,4 @@ def setup_claude_agent_sdk( if span == NOOP_SPAN: init_logger(project=project, api_key=api_key, project_id=project_id) - try: - import sys - - import claude_agent_sdk - - original_client = claude_agent_sdk.ClaudeSDKClient if hasattr(claude_agent_sdk, "ClaudeSDKClient") else None - original_tool_class = claude_agent_sdk.SdkMcpTool if hasattr(claude_agent_sdk, "SdkMcpTool") else None - - if original_client: - wrapped_client = _create_client_wrapper_class(original_client) - claude_agent_sdk.ClaudeSDKClient = wrapped_client - - for module in list(sys.modules.values()): - if module and hasattr(module, "ClaudeSDKClient"): - if getattr(module, "ClaudeSDKClient", None) is original_client: - setattr(module, "ClaudeSDKClient", wrapped_client) - - if original_tool_class: - wrapped_tool_class = _create_tool_wrapper_class(original_tool_class) - claude_agent_sdk.SdkMcpTool = wrapped_tool_class - - for module in list(sys.modules.values()): - if module and hasattr(module, "SdkMcpTool"): - if getattr(module, "SdkMcpTool", None) is original_tool_class: - setattr(module, "SdkMcpTool", wrapped_tool_class) - - return True - except ImportError: - # Not installed - this is expected when using auto_instrument() - return False + return ClaudeAgentSDKIntegration.setup() diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_constants.py b/py/src/braintrust/integrations/claude_agent_sdk/_constants.py similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/_constants.py rename to py/src/braintrust/integrations/claude_agent_sdk/_constants.py diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/integrations/claude_agent_sdk/_test_transport.py similarity index 99% rename from py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py rename to py/src/braintrust/integrations/claude_agent_sdk/_test_transport.py index 3a516568..217b2e62 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/_test_transport.py @@ -48,7 +48,7 @@ def get_record_mode() -> str: def _require_sdk() -> None: if _CLAUDE_AGENT_SDK_IMPORT_ERROR is not None: raise ImportError( - "claude_agent_sdk is required to use braintrust.wrappers.claude_agent_sdk._test_transport" + "claude_agent_sdk is required to use braintrust.integrations.claude_agent_sdk._test_transport" ) from _CLAUDE_AGENT_SDK_IMPORT_ERROR diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting_sdk_0_1_10.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting_sdk_0_1_10.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting_sdk_0_1_10.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_concurrent_subagents_produce_parallel_llm_spans_with_correct_parenting_sdk_0_1_10.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved_sdk_0_1_10.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved_sdk_0_1_10.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved_sdk_0_1_10.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_interleaved_subagent_tool_output_preserved_sdk_0_1_10.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json b/py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json similarity index 100% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json rename to py/src/braintrust/integrations/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json diff --git a/py/src/braintrust/integrations/claude_agent_sdk/integration.py b/py/src/braintrust/integrations/claude_agent_sdk/integration.py new file mode 100644 index 00000000..3f1a3b00 --- /dev/null +++ b/py/src/braintrust/integrations/claude_agent_sdk/integration.py @@ -0,0 +1,14 @@ +"""Claude Agent SDK integration — orchestration class and setup entry-point.""" + +from braintrust.integrations.base import BaseIntegration + +from .patchers import ClaudeSDKClientPatcher, SdkMcpToolPatcher + + +class ClaudeAgentSDKIntegration(BaseIntegration): + """Braintrust instrumentation for the Claude Agent SDK.""" + + name = "claude_agent_sdk" + import_names = ("claude_agent_sdk",) + min_version = "0.1.10" + patchers = (ClaudeSDKClientPatcher, SdkMcpToolPatcher) diff --git a/py/src/braintrust/integrations/claude_agent_sdk/patchers.py b/py/src/braintrust/integrations/claude_agent_sdk/patchers.py new file mode 100644 index 00000000..5f288d53 --- /dev/null +++ b/py/src/braintrust/integrations/claude_agent_sdk/patchers.py @@ -0,0 +1,30 @@ +"""Claude Agent SDK patchers — class-replacement patchers for ClaudeSDKClient and SdkMcpTool.""" + +from braintrust.integrations.base import ClassReplacementPatcher + +from .tracing import _create_client_wrapper_class, _create_tool_wrapper_class + + +class ClaudeSDKClientPatcher(ClassReplacementPatcher): + """Replace ``claude_agent_sdk.ClaudeSDKClient`` with a tracing wrapper class. + + This integration needs class replacement because the wrapper keeps + per-instance state across ``query()`` and ``receive_response()`` and must + update modules that imported ``ClaudeSDKClient`` before setup. + """ + + name = "claude_agent_sdk.client" + target_attr = "ClaudeSDKClient" + replacement_factory = staticmethod(_create_client_wrapper_class) + + +class SdkMcpToolPatcher(ClassReplacementPatcher): + """Replace ``claude_agent_sdk.SdkMcpTool`` with a tracing wrapper class. + + This integration needs class replacement because the tool wrapper must + intercept construction and replace the handler before the SDK stores it. + """ + + name = "claude_agent_sdk.tool" + target_attr = "SdkMcpTool" + replacement_factory = staticmethod(_create_tool_wrapper_class) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py similarity index 99% rename from py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py rename to py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py index 44cb8426..658da648 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py @@ -24,12 +24,9 @@ print("Claude Agent SDK not installed, skipping integration tests") from braintrust import logger -from braintrust.logger import start_span -from braintrust.span_types import SpanTypeAttribute -from braintrust.test_helpers import init_test_logger -from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk -from braintrust.wrappers.claude_agent_sdk._test_transport import make_cassette_transport -from braintrust.wrappers.claude_agent_sdk._wrapper import ( +from braintrust.integrations.claude_agent_sdk import setup_claude_agent_sdk +from braintrust.integrations.claude_agent_sdk._test_transport import make_cassette_transport +from braintrust.integrations.claude_agent_sdk.tracing import ( ToolSpanTracker, _build_llm_input, _create_client_wrapper_class, @@ -41,12 +38,15 @@ _serialize_tool_result_output, _thread_local, ) +from braintrust.logger import start_span +from braintrust.span_types import SpanTypeAttribute +from braintrust.test_helpers import init_test_logger from braintrust.wrappers.test_utils import verify_autoinstrument_script PROJECT_NAME = "test-claude-agent-sdk" TEST_MODEL = "claude-haiku-4-5-20251001" -REPO_ROOT = Path(__file__).resolve().parents[5] +REPO_ROOT = Path(__file__).resolve().parents[5] # py/src/braintrust/integrations/claude_agent_sdk -> repo root @pytest.fixture diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/integrations/claude_agent_sdk/tracing.py similarity index 99% rename from py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py rename to py/src/braintrust/integrations/claude_agent_sdk/tracing.py index c78a10ac..8ba695d6 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/tracing.py @@ -8,9 +8,7 @@ from typing import Any from braintrust.integrations.anthropic._utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens -from braintrust.logger import start_span -from braintrust.span_types import SpanTypeAttribute -from braintrust.wrappers.claude_agent_sdk._constants import ( +from braintrust.integrations.claude_agent_sdk._constants import ( ANTHROPIC_MESSAGES_CREATE_SPAN_NAME, CLAUDE_AGENT_TASK_SPAN_NAME, DEFAULT_TOOL_NAME, @@ -24,6 +22,8 @@ MessageClassName, SerializedContentType, ) +from braintrust.logger import start_span +from braintrust.span_types import SpanTypeAttribute _thread_local = threading.local() diff --git a/py/src/braintrust/wrappers/adk/__init__.py b/py/src/braintrust/wrappers/adk.py similarity index 100% rename from py/src/braintrust/wrappers/adk/__init__.py rename to py/src/braintrust/wrappers/adk.py diff --git a/py/src/braintrust/wrappers/agno/__init__.py b/py/src/braintrust/wrappers/agno.py similarity index 100% rename from py/src/braintrust/wrappers/agno/__init__.py rename to py/src/braintrust/wrappers/agno.py diff --git a/py/src/braintrust/wrappers/claude_agent_sdk.py b/py/src/braintrust/wrappers/claude_agent_sdk.py new file mode 100644 index 00000000..47ae67c3 --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk.py @@ -0,0 +1,4 @@ +from braintrust.integrations.claude_agent_sdk import setup_claude_agent_sdk # noqa: F401 + + +__all__ = ["setup_claude_agent_sdk"] diff --git a/py/src/braintrust/wrappers/test_utils.py b/py/src/braintrust/wrappers/test_utils.py index f91a3502..5158404a 100644 --- a/py/src/braintrust/wrappers/test_utils.py +++ b/py/src/braintrust/wrappers/test_utils.py @@ -42,7 +42,9 @@ def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subproc # Pass cassettes dir to subprocess since it may use installed package env = os.environ.copy() env["BRAINTRUST_CASSETTES_DIR"] = str(_SOURCE_DIR / "cassettes") - env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str(_SOURCE_DIR / "claude_agent_sdk" / "cassettes") + env["BRAINTRUST_CLAUDE_AGENT_SDK_CASSETTES_DIR"] = str( + _SOURCE_DIR.parent / "integrations" / "claude_agent_sdk" / "cassettes" + ) result = subprocess.run( [sys.executable, str(script_path)], capture_output=True,