From c65635aceb338181b745df5ce39d1e497a2cc5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Tue, 24 Mar 2026 22:25:41 +0000 Subject: [PATCH 1/7] ref(langchain): start to migrate to integrations API --- integrations/langchain-py/README.md | 28 +- integrations/langchain-py/pyproject.toml | 7 +- .../src/braintrust_langchain/__init__.py | 30 +- .../src/braintrust_langchain/callbacks.py | 655 +------- .../src/braintrust_langchain/context.py | 34 +- py/Makefile | 2 +- py/examples/langchain/auto.py | 24 + py/examples/langchain/manual_patching.py | 27 + py/noxfile.py | 18 + py/src/braintrust/auto.py | 7 +- py/src/braintrust/integrations/__init__.py | 3 +- .../auto_test_scripts/test_auto_langchain.py | 21 + .../integrations/langchain/__init__.py | 47 + .../cassettes/test_async_langchain_invoke | 276 ++++ .../cassettes/test_chain_with_memory | 332 ++++ .../langchain/cassettes/test_global_handler | 225 +++ .../test_langchain_anthropic_integration | 300 ++++ .../cassettes/test_langgraph_state_management | 327 ++++ .../langchain/cassettes/test_llm_calls | 333 ++++ .../cassettes/test_parallel_execution | 234 +++ .../cassettes/test_prompt_caching_tokens | 324 ++++ .../langchain/cassettes/test_streaming_ttft | 298 ++++ .../langchain/cassettes/test_tool_usage | 350 +++++ .../integrations/langchain/conftest.py | 58 + .../integrations/langchain/integration.py | 34 + .../integrations/langchain/test_langchain.py | 1380 +++++++++++++++++ .../integrations/langchain/tracing.py | 701 +++++++++ py/src/braintrust/wrappers/langchain.py | 150 -- .../braintrust/wrappers/langchain/__init__.py | 21 + 29 files changed, 5420 insertions(+), 826 deletions(-) create mode 100644 py/examples/langchain/auto.py create mode 100644 py/examples/langchain/manual_patching.py create mode 100644 py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py create mode 100644 py/src/braintrust/integrations/langchain/__init__.py create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_global_handler create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_llm_calls create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_tool_usage create mode 100644 py/src/braintrust/integrations/langchain/conftest.py create mode 100644 py/src/braintrust/integrations/langchain/integration.py create mode 100644 py/src/braintrust/integrations/langchain/test_langchain.py create mode 100644 py/src/braintrust/integrations/langchain/tracing.py delete mode 100644 py/src/braintrust/wrappers/langchain.py create mode 100644 py/src/braintrust/wrappers/langchain/__init__.py diff --git a/integrations/langchain-py/README.md b/integrations/langchain-py/README.md index ec00daef..19717270 100644 --- a/integrations/langchain-py/README.md +++ b/integrations/langchain-py/README.md @@ -1,9 +1,35 @@ -# braintrust-langchain +# braintrust-langchain (DEPRECATED) [![PyPI version](https://img.shields.io/pypi/v/braintrust-langchain.svg)](https://pypi.org/project/braintrust-langchain/) SDK for integrating [Braintrust](https://braintrust.dev) with [LangChain](https://langchain.com/). This package provides a callback handler to automatically log LangChain executions to Braintrust. +> **This package is deprecated.** The LangChain integration is now included in the main [`braintrust`](https://pypi.org/project/braintrust/) package. + +## Migration + +1. Remove `braintrust-langchain` from your dependencies +2. Install or upgrade `braintrust`: + ```bash + pip install --upgrade braintrust + ``` +3. Update your imports: + ```python + # Before + from braintrust_langchain import BraintrustCallbackHandler, set_global_handler + + # After (option 1: auto-instrument langchain library) + import braintrust + braintrust.auto_instrument() + + # After (option 2: explicit) + from braintrust.wrappers.langchain import BraintrustCallbackHandler, set_global_handler + ``` + +The API is identical - no code changes needed beyond the import path. + +--- + ## Installation ```bash diff --git a/integrations/langchain-py/pyproject.toml b/integrations/langchain-py/pyproject.toml index 9bbf9d7a..9d620604 100644 --- a/integrations/langchain-py/pyproject.toml +++ b/integrations/langchain-py/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "braintrust-langchain" version = "0.2.1" -description = "Integration for LangChain and Braintrust Tracing" +description = "DEPRECATED: LangChain integration is now included in the main braintrust package. Install braintrust instead." readme = "README.md" requires-python = ">=3.10" dependencies = [ @@ -10,9 +10,9 @@ dependencies = [ ] license = "MIT" authors = [{ name = "Braintrust", email = "info@braintrust.dev" }] -keywords = ["braintrust", "langchain", "llm", "tracing", "ai", "agents"] +keywords = ["braintrust", "langchain", "llm", "tracing", "ai", "agents", "deprecated"] classifiers = [ - "Development Status :: 4 - Beta", + "Development Status :: 7 - Inactive", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -73,4 +73,3 @@ known-third-party = ["braintrust", "langchain"] [tool.pytest.ini_options] testpaths = ["src/tests"] python_files = ["test_*.py"] -addopts = "-v" diff --git a/integrations/langchain-py/src/braintrust_langchain/__init__.py b/integrations/langchain-py/src/braintrust_langchain/__init__.py index 2feeb7bc..9f9fb04a 100644 --- a/integrations/langchain-py/src/braintrust_langchain/__init__.py +++ b/integrations/langchain-py/src/braintrust_langchain/__init__.py @@ -1,4 +1,28 @@ -from .callbacks import BraintrustCallbackHandler -from .context import set_global_handler +""" +DEPRECATED: braintrust-langchain is now part of the main braintrust package. -__all__ = ["BraintrustCallbackHandler", "set_global_handler"] +Install `braintrust` and use `braintrust.integrations.langchain` or +`braintrust.auto_instrument()` instead. This package now re-exports from +`braintrust.integrations.langchain` for backward compatibility. +""" + +import warnings + +warnings.warn( + "braintrust-langchain is deprecated. The LangChain integration is now included in the " + "main 'braintrust' package. Use 'from braintrust.integrations.langchain import " + "BraintrustCallbackHandler' or 'braintrust.auto_instrument()' instead. " + "This package will be removed in a future release.", + DeprecationWarning, + stacklevel=2, +) + +# Re-export public API from the new location for backward compatibility +from braintrust.integrations.langchain import ( # noqa: E402, F401 + BraintrustCallbackHandler, + BraintrustTracer, + clear_global_handler, + set_global_handler, +) + +__all__ = ["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"] diff --git a/integrations/langchain-py/src/braintrust_langchain/callbacks.py b/integrations/langchain-py/src/braintrust_langchain/callbacks.py index 016a1268..1adfae9d 100644 --- a/integrations/langchain-py/src/braintrust_langchain/callbacks.py +++ b/integrations/langchain-py/src/braintrust_langchain/callbacks.py @@ -1,648 +1,15 @@ -import json -import logging -import re -import time -from collections.abc import Mapping, Sequence -from re import Pattern -from typing import ( - Any, - TypedDict, - Union, -) -from uuid import UUID - -import braintrust -from braintrust import NOOP_SPAN, Logger, Span, SpanAttributes, SpanTypeAttribute, current_span, init_logger -from braintrust.version import VERSION as sdk_version -from langchain_core.agents import AgentAction, AgentFinish -from langchain_core.callbacks.base import BaseCallbackHandler -from langchain_core.documents import Document -from langchain_core.messages import BaseMessage -from langchain_core.outputs.llm_result import LLMResult -from tenacity import RetryCallState -from typing_extensions import NotRequired - -from braintrust_langchain.version import version - -_logger = logging.getLogger("braintrust_langchain") - - -class LogEvent(TypedDict): - input: NotRequired[Any] - output: NotRequired[Any] - expected: NotRequired[Any] - error: NotRequired[str] - tags: NotRequired[Sequence[str] | None] - scores: NotRequired[Mapping[str, int | float]] - metadata: NotRequired[Mapping[str, Any]] - metrics: NotRequired[Mapping[str, int | float]] - id: NotRequired[str] - dataset_record_id: NotRequired[str] - - -class BraintrustCallbackHandler(BaseCallbackHandler): - root_run_id: UUID | None = None - - def __init__( - self, - logger: Logger | Span | None = None, - debug: bool = False, - exclude_metadata_props: Pattern[str] | None = None, - ): - self.logger = logger - self.spans: dict[UUID, Span] = {} - self.debug = debug # DEPRECATED - self.exclude_metadata_props = exclude_metadata_props or re.compile( - r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)" - ) - self.skipped_runs: set[UUID] = set() - # Set run_inline=True to avoid thread executor in async contexts - # This ensures memory logger context is preserved - self.run_inline = True - - self._start_times: dict[UUID, float] = {} - self._first_token_times: dict[UUID, float] = {} - self._ttft_ms: dict[UUID, float] = {} - - def _start_span( - self, - parent_run_id: UUID | None, - run_id: UUID, - name: str | None = None, - type: SpanTypeAttribute | None = SpanTypeAttribute.TASK, - span_attributes: SpanAttributes | Mapping[str, Any] | None = None, - start_time: float | None = None, - set_current: bool | None = None, - parent: str | None = None, - event: LogEvent | None = None, - ) -> Any: - if run_id in self.spans: - # XXX: See graph test case of an example where this _may_ be intended. - _logger.warning(f"Span already exists for run_id {run_id} (this is likely a bug)") - return - - if not parent_run_id: - self.root_run_id = run_id - - current_parent = current_span() - parent_span = None - if parent_run_id and parent_run_id in self.spans: - parent_span = self.spans[parent_run_id] - elif current_parent != NOOP_SPAN: - parent_span = current_parent - elif self.logger is not None: - parent_span = self.logger - else: - parent_span = braintrust - - if event is None: - event = {} - - tags = event.get("tags") or [] - event = { - **event, - "tags": None, - "metadata": { - **({"tags": tags}), - **(event.get("metadata") or {}), - "run_id": run_id, - "parent_run_id": parent_run_id, - "braintrust": { - "integration_name": "langchain-py", - "integration_version": version, - "sdk_version": sdk_version, - "language": "python", - }, - }, - } - - span = parent_span.start_span( - name=name, - type=type, - span_attributes=span_attributes, - start_time=start_time, - set_current=set_current, - parent=parent, - **event, - ) - - if self.logger != NOOP_SPAN and span == NOOP_SPAN: - _logger.warning( - "Braintrust logging not configured. Pass a `logger`, call `init_logger`, or run an experiment to configure Braintrust logging. Setting up a default." - ) - span = init_logger().start_span( - name=name, - type=type, - span_attributes=span_attributes, - start_time=start_time, - set_current=set_current, - parent=parent, - **event, - ) - - span.set_current() - - self.spans[run_id] = span - return span - - def _end_span( - self, - run_id: UUID, - parent_run_id: UUID | None = None, - input: Any | None = None, - output: Any | None = None, - expected: Any | None = None, - error: str | None = None, - tags: Sequence[str] | None = None, - scores: Mapping[str, int | float] | None = None, - metadata: Mapping[str, Any] | None = None, - metrics: Mapping[str, int | float] | None = None, - dataset_record_id: str | None = None, - ) -> Any: - if run_id not in self.spans: - return - - if run_id in self.skipped_runs: - self.skipped_runs.discard(run_id) - return - - span = self.spans.pop(run_id) - - if self.root_run_id == run_id: - self.root_run_id = None - - span.log( - input=input, - output=output, - expected=expected, - error=error, - tags=None, - scores=scores, - metadata={ - **({"tags": tags} if tags else {}), - **(metadata or {}), - }, - metrics=metrics, - dataset_record_id=dataset_record_id, - ) - - # In async workflows, callbacks may execute in different async contexts. - # The span's context variable token may have been created in a different - # context, causing ValueError when trying to reset it. We catch and ignore - # this specific error since the span hierarchy is maintained via self.spans. - try: - span.unset_current() - except ValueError as e: - if "was created in a different Context" in str(e): - pass - else: - raise - - span.end() - - def on_llm_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, # TODO: response= - ) -> Any: - self._end_span(run_id, error=str(error), metadata={**kwargs}) - - self._start_times.pop(run_id, None) - self._first_token_times.pop(run_id, None) - self._ttft_ms.pop(run_id, None) - - def on_chain_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, # TODO: some metadata - ) -> Any: - self._end_span(run_id, error=str(error), metadata={**kwargs}) - - def on_tool_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, error=str(error), metadata={**kwargs}) - - def on_retriever_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, error=str(error), metadata={**kwargs}) - - # Agent Methods - def on_agent_action( - self, - action: AgentAction, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._start_span( - parent_run_id, - run_id, - type=SpanTypeAttribute.LLM, - name=action.tool, - event={"input": action, "metadata": {**kwargs}}, - ) - - def on_agent_finish( - self, - finish: AgentFinish, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, output=finish, metadata={**kwargs}) - - def on_chain_start( - self, - serialized: dict[str, Any], - inputs: dict[str, Any], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - name: str | None = None, - metadata: dict[str, Any] | None = None, - **kwargs: Any, - ) -> Any: - tags = tags or [] - - # avoids extra logs that seem not as useful esp. with langgraph - if "langsmith:hidden" in tags: - self.skipped_runs.add(run_id) - return - - metadata = metadata or {} - resolved_name = ( - name - or metadata.get("langgraph_node") - or serialized.get("name") - or last_item(serialized.get("id") or []) - or "Chain" - ) - - self._start_span( - parent_run_id, - run_id, - name=resolved_name, - event={ - "input": inputs, - "tags": tags, - "metadata": { - "serialized": serialized, - "name": name, - "metadata": metadata, - **kwargs, - }, - }, - ) - - def on_chain_end( - self, - outputs: dict[str, Any], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, output=outputs, tags=tags, metadata={**kwargs}) +""" +DEPRECATED: Import from braintrust.wrappers.langchain instead. +""" - def on_llm_start( - self, - serialized: dict[str, Any], - prompts: list[str], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - name: str | None = None, - **kwargs: Any, - ) -> Any: - self._start_times[run_id] = time.perf_counter() - self._first_token_times.pop(run_id, None) - self._ttft_ms.pop(run_id, None) +import warnings - name = name or serialized.get("name") or last_item(serialized.get("id") or []) or "LLM" - self._start_span( - parent_run_id, - run_id, - name=name, - type=SpanTypeAttribute.LLM, - event={ - "input": prompts, - "tags": tags, - "metadata": { - "serialized": serialized, - "name": name, - "metadata": metadata, - **kwargs, - }, - }, - ) - - def on_chat_model_start( - self, - serialized: dict[str, Any], - messages: list[list["BaseMessage"]], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - name: str | None = None, - invocation_params: dict[str, Any] | None = None, - **kwargs: Any, - ) -> Any: - self._start_times[run_id] = time.perf_counter() - self._first_token_times.pop(run_id, None) - self._ttft_ms.pop(run_id, None) - - invocation_params = invocation_params or {} - self._start_span( - parent_run_id, - run_id, - name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Chat Model", - type=SpanTypeAttribute.LLM, - event={ - "input": messages, - "tags": tags, - "metadata": ( - { - "serialized": serialized, - "invocation_params": invocation_params, - "metadata": metadata or {}, - "name": name, - **kwargs, - } - ), - }, - ) - - def on_llm_end( - self, - response: LLMResult, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - if run_id not in self.spans: - return - - metrics = _get_metrics_from_response(response) - - ttft = self._ttft_ms.pop(run_id, None) - if ttft is not None: - metrics["time_to_first_token"] = ttft - - model_name = _get_model_name_from_response(response) - - self._start_times.pop(run_id, None) - self._first_token_times.pop(run_id, None) - - self._end_span( - run_id, - output=response, - metrics=metrics, - tags=tags, - metadata={ - "model": model_name, - **kwargs, - }, - ) - - def on_tool_start( - self, - serialized: dict[str, Any], - input_str: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - inputs: dict[str, Any] | None = None, - name: str | None = None, - **kwargs: Any, - ) -> Any: - self._start_span( - parent_run_id, - run_id, - name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Tool", - type=SpanTypeAttribute.TOOL, - event={ - "input": inputs or safe_parse_serialized_json(input_str), - "tags": tags, - "metadata": { - "metadata": metadata, - "serialized": serialized, - "input_str": input_str, - "input": safe_parse_serialized_json(input_str), - "inputs": inputs, - "name": name, - **kwargs, - }, - }, - ) - - def on_tool_end( - self, - output: Any, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, output=output, metadata={**kwargs}) - - def on_retriever_start( - self, - serialized: dict[str, Any], - query: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - name: str | None = None, - **kwargs: Any, - ) -> Any: - self._start_span( - parent_run_id, - run_id, - name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Retriever", - type=SpanTypeAttribute.FUNCTION, - event={ - "input": query, - "tags": tags, - "metadata": { - "serialized": serialized, - "metadata": metadata, - "name": name, - **kwargs, - }, - }, - ) - - def on_retriever_end( - self, - documents: Sequence[Document], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - self._end_span(run_id, output=documents, metadata={**kwargs}) - - def on_llm_new_token( - self, - token: str, - *, - chunk: Union["GenerationChunk", "ChatGenerationChunk"] | None = None, # type: ignore - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - if run_id not in self._first_token_times: - now = time.perf_counter() - self._first_token_times[run_id] = now - start = self._start_times.get(run_id) - if start is not None: - self._ttft_ms[run_id] = now - start - - def on_text( - self, - text: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - pass - - def on_retry( - self, - retry_state: RetryCallState, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> Any: - pass - - def on_custom_event( - self, - name: str, - data: Any, - *, - run_id: UUID, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - **kwargs: Any, - ) -> Any: - pass - - -def clean_object(obj: dict[str, Any]) -> dict[str, Any]: - return { - k: v - for k, v in obj.items() - if v is not None and not (isinstance(v, list) and not v) and not (isinstance(v, dict) and not v) - } - - -def safe_parse_serialized_json(input_str: str) -> Any: - try: - return json.loads(input_str) - except: - return input_str - - -def last_item(items: list[Any]) -> Any: - return items[-1] if items else None - - -def _walk_generations(response: LLMResult): - for generations in response.generations or []: - yield from generations or [] - - -def _get_model_name_from_response(response: LLMResult) -> str | None: - model_name = None - for generation in _walk_generations(response): - message = getattr(generation, "message", None) - if not message: - continue - - response_metadata = getattr(message, "response_metadata", None) - if response_metadata and isinstance(response_metadata, dict): - model_name = response_metadata.get("model_name") - - if model_name: - break - - if not model_name: - llm_output: dict[str, Any] = response.llm_output or {} - model_name = llm_output.get("model_name") or llm_output.get("model") or "" - - return model_name - - -def _get_metrics_from_response(response: LLMResult): - metrics = {} - - for generation in _walk_generations(response): - message = getattr(generation, "message", None) - if not message: - continue - - usage_metadata = getattr(message, "usage_metadata", None) - - if usage_metadata and isinstance(usage_metadata, dict): - metrics.update( - clean_object( - { - "total_tokens": usage_metadata.get("total_tokens"), - "prompt_tokens": usage_metadata.get("input_tokens"), - "completion_tokens": usage_metadata.get("output_tokens"), - } - ) - ) - - # Extract cache tokens from nested input_token_details (LangChain format) - # Maps to Braintrust's standard cache token metric names - input_token_details = usage_metadata.get("input_token_details") - if input_token_details and isinstance(input_token_details, dict): - cache_read = input_token_details.get("cache_read") - cache_creation = input_token_details.get("cache_creation") - - if cache_read is not None: - metrics["prompt_cached_tokens"] = cache_read - if cache_creation is not None: - metrics["prompt_cache_creation_tokens"] = cache_creation +warnings.warn( + "braintrust_langchain.callbacks is deprecated. Import from 'braintrust.wrappers.langchain' instead.", + DeprecationWarning, + stacklevel=2, +) - if not metrics or not any(metrics.values()): - llm_output: dict[str, Any] = response.llm_output or {} - metrics = llm_output.get("token_usage") or llm_output.get("estimatedTokens") or {} +from braintrust.integrations.langchain import BraintrustCallbackHandler # noqa: F401 - return clean_object(metrics) +__all__ = ["BraintrustCallbackHandler"] diff --git a/integrations/langchain-py/src/braintrust_langchain/context.py b/integrations/langchain-py/src/braintrust_langchain/context.py index 5c6bb4e8..c11385d1 100644 --- a/integrations/langchain-py/src/braintrust_langchain/context.py +++ b/integrations/langchain-py/src/braintrust_langchain/context.py @@ -1,26 +1,18 @@ -from contextvars import ContextVar +""" +DEPRECATED: Import from braintrust.wrappers.langchain instead. +""" -from langchain_core.tracers.context import register_configure_hook +import warnings -from braintrust_langchain.callbacks import BraintrustCallbackHandler - -__all__ = ["set_global_handler", "clear_global_handler"] - - -braintrust_callback_handler_var: ContextVar[BraintrustCallbackHandler | None] = ContextVar( - "braintrust_callback_handler", default=None +warnings.warn( + "braintrust_langchain.context is deprecated. Import from 'braintrust.wrappers.langchain' instead.", + DeprecationWarning, + stacklevel=2, ) - -def set_global_handler(handler: BraintrustCallbackHandler): - braintrust_callback_handler_var.set(handler) - - -def clear_global_handler(): - braintrust_callback_handler_var.set(None) - - -register_configure_hook( - context_var=braintrust_callback_handler_var, - inheritable=True, +from braintrust.integrations.langchain import ( # noqa: F401 + clear_global_handler, + set_global_handler, ) + +__all__ = ["set_global_handler", "clear_global_handler"] diff --git a/py/Makefile b/py/Makefile index 4696d84d..f2e29b58 100644 --- a/py/Makefile +++ b/py/Makefile @@ -60,7 +60,7 @@ install-dev: install-build-deps $(UV) pip install -r requirements-dev.txt install-optional: install-dev - $(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith + $(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith langchain-core langchain-openai langchain-anthropic langgraph $(UV) pip install -e .[temporal,otel] .DEFAULT_GOAL := help diff --git a/py/examples/langchain/auto.py b/py/examples/langchain/auto.py new file mode 100644 index 00000000..b6776dd7 --- /dev/null +++ b/py/examples/langchain/auto.py @@ -0,0 +1,24 @@ +"""Auto-instrument LangChain with Braintrust tracing. + +Usage: + export BRAINTRUST_API_KEY="your-api-key" + export OPENAI_API_KEY="your-openai-api-key" + python auto.py +""" + +import braintrust + + +# Auto-instrument all supported libraries including LangChain +braintrust.auto_instrument() + +from langchain_openai import ChatOpenAI + + +def main(): + llm = ChatOpenAI(model="gpt-4o-mini") + response = llm.invoke("What is the capital of France?") + print(response.content) + + +main() diff --git a/py/examples/langchain/manual_patching.py b/py/examples/langchain/manual_patching.py new file mode 100644 index 00000000..142bcc5a --- /dev/null +++ b/py/examples/langchain/manual_patching.py @@ -0,0 +1,27 @@ +"""Manually patch LangChain with Braintrust tracing. + +Usage: + export BRAINTRUST_API_KEY="your-api-key" + export OPENAI_API_KEY="your-openai-api-key" + python manual_patching.py +""" + +from braintrust import init_logger +from braintrust.wrappers.langchain import set_global_handler, BraintrustCallbackHandler + + +# Setup LangChain tracing with a specific project +logger = init_logger(project="my-langchain-project") +handler = BraintrustCallbackHandler(logger=logger) +set_global_handler(handler) + +from langchain_openai import ChatOpenAI + + +def main(): + llm = ChatOpenAI(model="gpt-4o-mini") + response = llm.invoke("What is the capital of France?") + print(response.content) + + +main() diff --git a/py/noxfile.py b/py/noxfile.py index bff911db..2789bfee 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -78,6 +78,7 @@ def _pinned_python_version(): "google.genai", "google.adk", "temporalio", + "langchain_core", ) # Test matrix @@ -101,6 +102,7 @@ def _pinned_python_version(): GENAI_VERSIONS = (LATEST,) DSPY_VERSIONS = (LATEST,) GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1") +LANGCHAIN_VERSIONS = (LATEST,) # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0") PYTEST_VERSIONS = (LATEST, "8.4.2") @@ -192,6 +194,19 @@ def test_google_genai(session, version): _run_core_tests(session) +@nox.session() +@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSIONS) +def test_langchain(session, version): + """Test LangChain integration.""" + _install_test_deps(session) + _install(session, "langchain-core", version) + _install(session, "langchain-openai", version) + _install(session, "langchain-anthropic", version) + session.install("langgraph", silent=SILENT_INSTALLS) + _run_tests(session, f"{INTEGRATION_DIR}/langchain/test_langchain.py") + _run_core_tests(session) + + @nox.session() @nox.parametrize("version", GOOGLE_ADK_VERSIONS, ids=GOOGLE_ADK_VERSIONS) def test_google_adk(session, version): @@ -329,6 +344,9 @@ def pylint(session): session.install("opentelemetry.instrumentation.openai") # langsmith is needed for the wrapper module but not in VENDOR_PACKAGES session.install("langsmith") + # langchain deps are needed for the langchain wrapper (langchain-core is in VENDOR_PACKAGES; + # the rest are installed explicitly here for pylint coverage) + session.install("langchain-core", "langchain-openai", "langchain-anthropic", "langgraph") result = session.run("git", "ls-files", "**/*.py", silent=True, log=False) files = [path for path in result.strip().splitlines() if path not in GENERATED_LINT_EXCLUDES] diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index 6c15b653..fb40d088 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -9,7 +9,7 @@ import logging from contextlib import contextmanager -from braintrust.integrations import AnthropicIntegration, IntegrationPatchConfig +from braintrust.integrations import AnthropicIntegration, IntegrationPatchConfig, LangChainIntegration __all__ = ["auto_instrument"] @@ -40,6 +40,7 @@ def auto_instrument( claude_agent_sdk: bool = True, dspy: bool = True, adk: bool = True, + langchain: bool = True, ) -> dict[str, bool]: """ Auto-instrument supported AI/ML libraries for Braintrust tracing. @@ -61,6 +62,7 @@ def auto_instrument( claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True) dspy: Enable DSPy instrumentation (default: True) adk: Enable Google ADK instrumentation (default: True) + langchain: Enable LangChain instrumentation (default: True) Returns: Dict mapping integration name to whether it was successfully instrumented. @@ -117,6 +119,7 @@ def auto_instrument( claude_agent_sdk_enabled = _normalize_bool_option("claude_agent_sdk", claude_agent_sdk) dspy_enabled = _normalize_bool_option("dspy", dspy) adk_enabled = _normalize_bool_option("adk", adk) + langchain_enabled = _normalize_bool_option("langchain", langchain) if openai_enabled: results["openai"] = _instrument_openai() @@ -136,6 +139,8 @@ def auto_instrument( results["dspy"] = _instrument_dspy() if adk_enabled: results["adk"] = _instrument_adk() + if langchain_enabled: + results["langchain"] = _instrument_integration(LangChainIntegration) return results diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py index 1dddbd91..74840ad7 100644 --- a/py/src/braintrust/integrations/__init__.py +++ b/py/src/braintrust/integrations/__init__.py @@ -1,5 +1,6 @@ from .anthropic import AnthropicIntegration from .base import IntegrationPatchConfig +from .langchain import LangChainIntegration -__all__ = ["AnthropicIntegration", "IntegrationPatchConfig"] +__all__ = ["AnthropicIntegration", "IntegrationPatchConfig", "LangChainIntegration"] diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py new file mode 100644 index 00000000..a3719ef8 --- /dev/null +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py @@ -0,0 +1,21 @@ +"""Test auto_instrument for LangChain.""" + +from braintrust.auto import auto_instrument +from braintrust.integrations.langchain import BraintrustCallbackHandler + +# 1. Instrument +results = auto_instrument() +assert results.get("langchain") == True, "auto_instrument should return True for langchain" + +# 2. Idempotent +results2 = auto_instrument() +assert results2.get("langchain") == True, "auto_instrument should still return True on second call" + +# 3. Verify that a global handler was registered with LangChain +from langchain_core.callbacks import CallbackManager + +manager = CallbackManager.configure() +handler = next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None) +assert handler is not None, "BraintrustCallbackHandler should be registered globally after auto_instrument()" + +print("SUCCESS") diff --git a/py/src/braintrust/integrations/langchain/__init__.py b/py/src/braintrust/integrations/langchain/__init__.py new file mode 100644 index 00000000..9758ca95 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/__init__.py @@ -0,0 +1,47 @@ +"""Braintrust integration for LangChain.""" + +from .integration import LangChainIntegration + + +def setup_langchain() -> bool: + """ + Auto-instrument LangChain for Braintrust tracing. + + Registers a global BraintrustCallbackHandler with LangChain's callback system + so that all chains, LLMs, tools, and retrievers are automatically traced. + + This is called automatically by braintrust.auto_instrument(). It is safe to + call multiple times – subsequent calls are no-ops. + + Returns: + True if setup succeeded, False if langchain_core is not installed. + """ + return LangChainIntegration.setup() + + +# Lazily imported to avoid circular imports at module load time +# (tracing.py imports from braintrust, which must be fully initialized first) +_LAZY_ATTRS = frozenset(["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"]) + + +def __getattr__(name: str): + if name in _LAZY_ATTRS: + from .tracing import BraintrustCallbackHandler, BraintrustTracer, clear_global_handler, set_global_handler + + g = globals() + g["BraintrustCallbackHandler"] = BraintrustCallbackHandler + g["BraintrustTracer"] = BraintrustTracer + g["set_global_handler"] = set_global_handler + g["clear_global_handler"] = clear_global_handler + return g[name] + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +__all__ = [ + "LangChainIntegration", + "BraintrustCallbackHandler", + "BraintrustTracer", + "set_global_handler", + "clear_global_handler", + "setup_langchain", +] diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke new file mode 100644 index 00000000..3ecc362e --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke @@ -0,0 +1,276 @@ +interactions: +- request: + body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is + 1 + 2?"}], "model": "claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - AsyncAnthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dJBfSwMxEMS/yjGvpnB3bUUCvhR88aEg/gERCTFZ2uDd5kw2Ui333eWK + Rar4tLC/mWGYPYKHRp83pm4ublf5+uWT0/DYP9ysr+7XOx/uoCAfA00qytluCAopdtPD5hyyWBYo + 9NFTBw3X2eJpliMzyWwxa+t2WS+bBRRcZCEW6Kf9MVJoN5kPR6Opzqq2uqzmGJ8VssTBJLI5MjSI + vZGSGN8g01shdgTNpesUyqGa3iPwUMRIfCXO0M25grNuS8YlshIim1NBfeSJrP+PHb1TPg1b6inZ + ziz7v/of2mx/01EhFjlpN1fIlN6DIyOBEjSmPb1NHuP4BQAA//8DABaJlhKdAQAA + headers: + CF-RAY: + - 983cc1f7fda07e2d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 23 Sep 2025 20:23:04 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 02af79b5-9b1a-4100-a05f-9235eb38bda4 + cf-cache-status: + - DYNAMIC + request-id: + - req_011CTRxS1WS9ia9upALgfUZK + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + via: + - 1.1 google + x-envoy-upstream-service-time: + - '1030' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - AsyncAnthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dJDdSgMxEEZfZfluTWG37YoGvLagN0VBRCSEZGhDdydrMimWsu8uWyxS + xauBOWd++I7oo6cOGq6zxdMsR2aS2XI2r+dt3TZLKAQPjT5vTN08397v1qG5WT+Ep/Z19bJfySMf + oCCHgSaLcrYbgkKK3dSwOYcslgUKLrIQC/Tb8ewLfU7kVDSa6qqaV3fVAuO7QpY4mEQ2R4YGsTdS + EuMbZPooxI6guXSdQjnd1UcEHooYiTviDN1cKzjrtmRcIishsrkU6jNPZP1/7Dw77adhSz0l25m2 + /+v/0Gb7m44KscjFdwuFTGkfHBkJlKAxheVt8hjHLwAAAP//AwBHCKHFnQEAAA== + headers: + CF-RAY: + - 99b0eabe4896b976-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:38 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2025-11-08T00:22:38Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2025-11-08T00:22:38Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2025-11-08T00:22:38Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUuU6hWk8Jg8Bh2c4Vyty + retry-after: + - '23' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1801' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - AsyncAnthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA3SQTUvDQBCG/0p4r24gaRvRBQ9CDyJ4rBeRZbs7tNFkNu7OBkvJf5cUi1TxNDDP + Mx+8R/TBUwcN19nsqUyBmaRclYtq0VRNvYJC66HRp52p6qd1/7DZ3o8yjIe355v1o9tsm1soyGGg + 2aKU7I6gEEM3N2xKbRLLAgUXWIgF+uV49oU+Z3IqGnVxVSyKu2KJ6VUhSRhMJJsCQ4PYG8mR8Q0S + fWRiR9Ccu04hn+7qI1oeshgJ78QJur5WcNbtybhIVtrA5lKozjyS9f+x8+y8n4Y99RRtZ5r+r/9D + 6/1vOimELBffLRUSxbF1ZKSlCI05LG+jxzR9AQAA//8DAEp7u9udAQAA + headers: + CF-RAY: + - 99b0ebedd90d6897-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2025-11-08T00:23:26Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2025-11-08T00:23:26Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2025-11-08T00:23:26Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUuUAHB8QqxGoW7TZyUaz + retry-after: + - '34' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1851' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory new file mode 100644 index 00000000..88cc8848 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory @@ -0,0 +1,332 @@ +interactions: +- request: + body: '{"messages": [{"content": "Assistant: Hello! How can I assist you today? + User: What''s your name?", "role": "user"}], "model": "gpt-4o-mini", "stream": + false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3pSTx8NVvtJFY51xvv7gmxKCqAO\",\n \"object\": + \"chat.completion\",\n \"created\": 1758658986,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Assistant: I don't have a personal + name, but you can call me Assistant. How can I help you today?\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 24,\n \"completion_tokens\": 23,\n \"total_tokens\": 47,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_560af6e559\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:23:06 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983cc206fc1f67ef-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '755' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=TIArUY3FKYo9t2vz5lADo0yFHggpjc9nkMoRBVQYfbA-1758658986949-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - 899e875d60ba290b68341d027600a8fd + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '775' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999980' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_3b64f4bb78c14e2ea80001681e34611d + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Assistant: Hello! How can I assist you today? + User: What''s your name?","role":"user"}],"model":"gpt-4o-mini","stream":false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + cookie: + - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA; + _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4ySTW/bMAyG7/4VhC67xEW+s+YyFAO2ZNhp22HYUBiMRNvaZFGT5KRBkf8+2E5i + d22BXXTgw5fiS/IxARBaiTUIWWKUlTPp+x9fxg/u4yFuv20O9ezD98/7YvPn01fFxeFOjBoF736R + jBfVjeTKGYqabYelJ4zUVJ2sltPFcjJb3LagYkWmkRUupnNOK211Oh1P5+l4lU7entUla0lBrOFn + AgDw2L5Nn1bRg1jDeHSJVBQCFiTW1yQA4dk0EYEh6BDRRjHqoWQbybat3134Grag2L6JUOKeAMGR + D2zRgMWKRrCrIxy5BokWJBoDFcFVfAMbPrRoCyUZ12ZGVnh8N/zXU14HbLzb2pgBQGs5YjO71vH9 + mZyuHg0XzvMu/CMVubY6lJknDGwbPyGyEy09JQD37SzrJ+MRznPlYhb5N7XfTeddOdFvcABnZxg5 + ounj89XohWqZoojahMEuhERZkuqV/eKwVpoHIBl4ft7MS7U739oW/1O+B1KSi6Qy50lp+dRwn+ap + ue/X0q4zbhsWgfxeS8qiJt/sQVGOtemuToRjiFRlubYFeed1d3q5yxbLMeZLWixuRXJK/gIAAP// + AwCouO6tiAMAAA== + headers: + CF-RAY: + - 99b0eacf8822aaac-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:39 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '628' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '639' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999980' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_1009d84201314e5aa9ccdcbafeeac4af + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Assistant: Hello! How can I assist you today? + User: What''s your name?","role":"user"}],"model":"gpt-4o-mini","stream":false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '149' + content-type: + - application/json + cookie: + - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0; + _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJLBbtswDIbvfgpO53hIgiRNcxmKAcN6GNbuMmBDYTASHauRRU2imwRF + gb3GXm9PUthp4nTLgF104Mef4k/yMQNQ1qgFKF2h6Dq4/P23L6P18vJm8+Pz1e16m8pPH25u7fbe + fJ1vlmrQKnh5T1oOqrea6+BILPs91pFQqK06upiNp7PRZDjvQM2GXCtbBcknnNfW23w8HE/y4UU+ + mr+oK7aaklrA9wwA4LF72z69oa1awHBwiNSUEq5ILY5JACqyayMKU7JJ0Isa9FCzF/Jd61cHvoBr + MOx///wlUOEDAUIKpG1pNXisaQDLRmDHDWj0oNE5qAmO8jfwkTcduoaKXOgyhQ3u3p3+HKlsErbu + fePcCUDvWbCdXuf57oU8HV06XoXIy/SHVJXW21QVkTCxbx0l4aA6+pQB3HXTbF4NSIXIdZBCeE3d + d+PJvpzqd3gGCgu6Pj6ZD85UKwwJWpdOtqE06opMr+xXh42xfAKyE89/N3Ou9t639av/Kd8DrSkI + mSJEMla/NtynRWov/F9pxxl3DatE8cFqKsRSbPdgqMTG7e9OpV0SqovS+hXFEO3++MpQTGdDLGc0 + nV6q7Cl7BgAA//8DAEMiDgGKAwAA + headers: + CF-RAY: + - 99b0ebffc94fed3b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:28 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '680' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '708' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999980' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_e273cb6eb8624df78282659b4a19fffe + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_global_handler b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler new file mode 100644 index 00000000..ba9f4fa9 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler @@ -0,0 +1,225 @@ +interactions: +- request: + body: '{"messages": [{"content": "What is 1 + 2?", "role": "user"}], "model": + "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream": + false, "temperature": 1.0, "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '177' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ44VUVp2sk1koSWXX64CaLEy1mWy\",\n \"object\": + \"chat.completion\",\n \"created\": 1758659919,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"1 + 2 equals 3.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 15,\n \"completion_tokens\": 8,\n \"total_tokens\": 23,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_560af6e559\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:38:40 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983cd8d01c33943a-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '930' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=XPwF0fhMV9JwjYuWwUMNbzPKxvSJ.HOkXEftYzjXRew-1758659920459-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - 93acad0503781eb98ab6ea3412173537 + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '1026' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_181413148bbe4814a905514521d6dc34 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '177' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUCr4uL2IlTJ9et2y2HHQZ0Q2EoMm1rk0VNoocNRf77 + IDuN3a0FevGBHx/1Hs3HRAjQNRwEqE6y6p1J33/9XH7adscPRXGXnTb7+49fNse7/b2iYylhFRV0 + +o6Kn1Q3inpnkDXZCSuPkjFOzW53ebHLyiwfQU81mihrHadbSnttdZqv8226vk2z8qLuSCsMcBDf + EiGEeBy/0aet8TccxHr1VOkxBNkiHK5NQoAnEysgQ9CBpWVYzVCRZbSj9Uy8E7nAn4M0QWxull0e + myHI6NQOxiyAtJZYxqSjv4cLOV8dGWqdp1P4RwqNtjp0lUcZyMbXA5ODkZ4TIR7G5MOzMOA89Y4r + ph84PpcV0ziY9z3D8sKYWJq5nG9WLwyramSpTVgsDpRUHdazct6yHGpNC5AsIv/v5aXZU2xt27eM + n4FS6BjrynmstXqed27zGI/xtbbrikfDEND/0gor1ujjb6ixkYOZTgTCn8DYV422LXrn9XQnjauK + 3Vo2OyyKPSTn5C8AAAD//wMAcIbFgjUDAAA= + headers: + CF-RAY: + - 99b0f5db9f1cbffc-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:30:12 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=vfpKl6dvzcujjwigai_kp7UkNhR2ltT1SwFsT05VrS8-1762561812-1.0.1.1-UAyuy134RWxRUzjbClH59IJarw95du8Dl347lkXcDkbXBBx7vCmRuxRccJQB2f1T6oobZSgBj7O8hdaLY4hef6ypZ2uHUshy880EnptiWEY; + path=/; expires=Sat, 08-Nov-25 01:00:12 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=N6FAUGU_qhcPvlVWdt0kvrpbt1SzTvQ0v29fL2QCNbA-1762561812358-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '319' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '489' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_3e940a310adf4d9a88c8da6b70645bb7 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration new file mode 100644 index 00000000..6c396d02 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration @@ -0,0 +1,300 @@ +interactions: +- request: + body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is + 1 + 2?"}], "model": "claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dJBRSwMxEIT/yjGvpnDX9hQDvggegn9AEAkxWdvg3eZMNsVa7r/LFYtU + 8Wlhv5lhmAOCh8aQN6Zu7nZX3IXr+8/X9cNt213vu5fucYSC7EeaVZSz3RAUUuznh805ZLEsUBii + px4arrfF0yJHZpLFerGsl23dNmsouMhCLNBPh1Ok0MdsPh6NprqoltVNtcL0rJAljiaRzZGhQeyN + lMT4BpneC7EjaC59r1CO1fQBgcciRuIbcYZuLhWcdVsyLpGVENmcC+oTT2T9f+zknfNp3NJAyfam + Hf7qf2iz/U0nhVjkrN1KIVPaBUdGAiVozHt6mzym6QsAAP//AwD8n6CUnQEAAA== + headers: + CF-RAY: + - 983cc09c5f361679-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 23 Sep 2025 20:22:09 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 02af79b5-9b1a-4100-a05f-9235eb38bda4 + anthropic-ratelimit-input-tokens-limit: + - '30000' + anthropic-ratelimit-input-tokens-remaining: + - '30000' + anthropic-ratelimit-input-tokens-reset: + - '2025-09-23T20:22:09Z' + anthropic-ratelimit-output-tokens-limit: + - '8000' + anthropic-ratelimit-output-tokens-remaining: + - '8000' + anthropic-ratelimit-output-tokens-reset: + - '2025-09-23T20:22:09Z' + anthropic-ratelimit-requests-limit: + - '50' + anthropic-ratelimit-requests-remaining: + - '49' + anthropic-ratelimit-requests-reset: + - '2025-09-23T20:22:09Z' + anthropic-ratelimit-tokens-limit: + - '38000' + anthropic-ratelimit-tokens-remaining: + - '38000' + anthropic-ratelimit-tokens-reset: + - '2025-09-23T20:22:09Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CTRxMui53W9h6eXYGxUJb + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + via: + - 1.1 google + x-envoy-upstream-service-time: + - '1110' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dJDLasMwEEV/xdxtFfAjCa2g29JVoetShJCmsRt75Eij9BH878WhpqSl + q4E5Zx7cE4bgqYeG6232tEqBmWS1XtVlvSk31RoKnYfGkHamrB5z87bPfPfwGa+Ph9f7401wL1so + yMdIs0Up2R1BIYZ+btiUuiSWBQousBAL9NNp8YXeZ3IuGlVxVdTFbdFgelZIEkYTyabA0CD2RnJk + fINEh0zsCJpz3yvk8119QsdjFiNhT5ygq62Cs64l4yJZ6QKbS6FceCTr/2PL7LyfxpYGirY3m+Gv + /0Or9jedFEKWi+8ahUTx2Dky0lGExhyWt9Fjmr4AAAD//wMARZkZqp0BAAA= + headers: + CF-RAY: + - 99b0eab2783f1758-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:36 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2025-11-08T00:22:36Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2025-11-08T00:22:36Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2025-11-08T00:22:36Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUuU6ZRKcH4CRrH5o4j6b + retry-after: + - '24' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1694' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '110' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.68.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.68.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//dJBNS8QwEIb/SnmvptDudhUCHvQkC4oieBEJIRl3y7aTmkz8Kv3v0sVF + VvE0MM8zH7wj+uCpg4brbPZUpsBMUjblolqsqlXdQKH10OjTxlT1fXPxcHXzubwOZ2vKb7fPu7vL + 9Q4K8jHQbFFKdkNQiKGbGzalNollgYILLMQC/TgefKH3meyLRl2cFIvivFhielJIEgYTyabA0CD2 + RnJkfINEL5nYETTnrlPI+7t6RMtDFiNhR5yg61MFZ92WjItkpQ1sjoXqwCNZ/x87zM77adhST9F2 + ZtX/9X9ovf1NJ4WQ5ei7pUKi+No6MtJShMYclrfRY5q+AAAA//8DAAqaanadAQAA + headers: + CF-RAY: + - 99b0ebe2db3d67ca-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:24 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2025-11-08T00:23:24Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2025-11-08T00:23:24Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2025-11-08T00:23:24Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CUuUA9cTfN1Yz5PMKHD5d + retry-after: + - '37' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1556' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management new file mode 100644 index 00000000..20ffc04b --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management @@ -0,0 +1,327 @@ +interactions: +- request: + body: '{"messages": [{"content": "Say hello", "role": "user"}], "model": "gpt-4o-mini", + "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream": false, + "temperature": 1.0, "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '172' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3xSBjbTuwYXAmP3RRw0GoHz5Ooy\",\n \"object\": + \"chat.completion\",\n \"created\": 1758659482,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Hello! How can I assist you today?\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 9,\n \"completion_tokens\": 9,\n \"total_tokens\": 18,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_51db84afab\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:31:22 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983cce247c46cf2f-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '381' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=Y9Om0gYdHB3h9aUHhUUY9eEia6Y3wmSARFX9Xq907Ho-1758659482810-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - ebcf889942216eb0b613f43f2cdb11b1 + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '397' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_75709538073646e4bd7355c91bc2ce52 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Say hello","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '172' + content-type: + - application/json + cookie: + - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA; + _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSwW7UMBC95ysGnzcoWbppu5eqqoSKgAu9oKIq8tqTrMHxGHuydKn235GTdpPS + InHxYd685/dm5iEDEEaLNQi1law6b/Or2y/F70p//Pz+UprNzf2uuO2uvrY/d9XN8pNYJAZtvqPi + J9ZbRZ23yIbcCKuAkjGplqfVclWV76pyADrSaBOt9ZyfUN4ZZ/JlsTzJi9O8PHtkb8kojGIN3zIA + gIfhTT6dxnuxhmLxVOkwRtmiWB+bAEQgmypCxmgiS8diMYGKHKMbrF+jtfQGrukXKOngA4wE2FMP + TFruL+bEgE0fZTLvemtngHSOWKbwg+W7R+RwNGmp9YE28S+qaIwzcVsHlJFcMhSZvBjQQwZwNwyj + f5ZP+ECd55rpBw7fnY9qYtrAS4yJpZ3K5dniFa1aI0tj42yUQkm1RT0xp7nLXhuaAdks8Usvr2mP + qY1r/0d+ApRCz6hrH1Ab9Tzv1BYwnee/2o4THgyLiGFnFNZsMKQtaGxkb8ejEXEfGbu6Ma7F4IMZ + L6fx9aoqZFPhanUuskP2BwAA//8DABw5ElFHAwAA + headers: + CF-RAY: + - 99b0eadaea14aaac-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:41 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '328' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '342' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_68644fc1eb1a4533b2f98192dc918822 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Say hello","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '172' + content-type: + - application/json + cookie: + - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0; + _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLBbtswDL37Kzid48EOkrTJZYcd1g1osRVFDy0KQ5FoW5ssChK9LSjy + 74PsNnbXDthFBz6+p/dIPmYAwmixA6FayarzNv94d13am9ubQJf79nB5W9/Z609XV1+V/BK+iUVi + 0P47Kn5mvVfUeYtsyI2wCigZk2p5tlmuN+Wq2A5ARxptojWe8xXlnXEmXxbLVV6c5eX5E7slozCK + HdxnAACPw5t8Oo2/xQ6KxXOlwxhlg2J3agIQgWyqCBmjiSwdi8UEKnKMbrB+gdbSO7igX6Ckg88w + EuBAPTBpefgwJwas+yiTeddbOwOkc8QyhR8sPzwhx5NJS40PtI9/UUVtnIltFVBGcslQZPJiQI8Z + wMMwjP5FPuEDdZ4rph84fLcd1cS0gdcYE0s7lcvzxRtalUaWxsbZKIWSqkU9Mae5y14bmgHZLPFr + L29pj6mNa/5HfgKUQs+oKx9QG/Uy79QWMJ3nv9pOEx4Mi4jhp1FYscGQtqCxlr0dj0bEQ2Tsqtq4 + BoMPZryc2lfrTSHrDa7XW5Edsz8AAAD//wMAVD8AOUcDAAA= + headers: + CF-RAY: + - 99b0ec0acb26ed3b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:30 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '589' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '607' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_67359745154e404899e3fd81a37cf26a + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls new file mode 100644 index 00000000..cea55348 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls @@ -0,0 +1,333 @@ +interactions: +- request: + body: '{"messages": [{"content": "What is 1 + 2?", "role": "user"}], "model": + "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream": + false, "temperature": 1.0, "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '177' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3pRI2shpJIGYKUU8RFWUyB6W5O1\",\n \"object\": + \"chat.completion\",\n \"created\": 1758658985,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"1 + 2 equals 3.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 15,\n \"completion_tokens\": 8,\n \"total_tokens\": 23,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_560af6e559\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:23:06 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983cc2032f2967ef-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '441' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=uhF3qDlYXbYwV7mlgYhl_d7MyPH3FwQHxL6cek.ONAQ-1758658986041-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - f4e0a5413e529acf383233e54ad00e99 + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '454' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_023ebefb1f6b4dec8910b8cb4d7421f5 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '177' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJI/b9wwDMV3fwqBa8+B7Yudy61dunRJtxaBoZNon1JZVCQ6/RPcdy9k + X85OmgJdPPBHPr1H8zkTAoyGvQB1lKwGb/OPX++KH7eh6XcPvz8Psm++6O1d8+kRnw5+B5s0QYcH + VPwydaVo8BbZkJuxCigZk2p501R1U27r3QQG0mjTWO85v6Z8MM7kVVFd58VNXp7F1ZGMwgh78S0T + Qojn6Zt8Oo0/YS+KzUtlwBhlj7C/NAkBgWyqgIzRRJaOYbNARY7RTdZL8UFUAh9HaaPYXq27AnZj + lMmpG61dAekcsUxJJ3/3Z3K6OLLU+0CH+GYUOuNMPLYBZSSXXo9MHiZ6yoS4n5KPr8KADzR4bpm+ + 4/RcWc9ysOx7gbszY2Jpl3K13bwj1mpkaWxcLQ6UVEfUy+SyZTlqQyuQrSL/7eU97Tm2cf3/yC9A + KfSMuvUBtVGv8y5tAdMx/qvtsuLJMEQMT0ZhywZD+g0aOzna+UQg/oqMQ9sZ12Pwwcx30vm2bgrZ + NVjXt5Cdsj8AAAD//wMAbYrr4zUDAAA= + headers: + CF-RAY: + - 99b0eacc1d35aaac-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:39 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA; + path=/; expires=Sat, 08-Nov-25 00:52:39 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '300' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '430' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_24854ba725b942179830d357f1af2add + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '177' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUEr4sL242TLtdhu+wy7BRsKAxFoh2lsqRKdLGtyH8f + ZKexu3XALj7w46Peo/mcAaBWuAOUR8Gy9yb/8O1reeq+rJ/osdvvf32sDqftA3/+NFC33+MqKdzh + RJJfVDfS9d4Qa2cnLAMJpjS13G6qelOui+0IeqfIJFnnOV+7vNdW51VRrfNim5d3F/XRaUkRd/A9 + AwB4Hr/Jp1X0A3dQrF4qPcUoOsLdtQkAgzOpgiJGHVlYxtUMpbNMdrRewjuogB4HYSLc3iy7ArVD + FMmpHYxZAGGtY5GSjv7uL+R8dWRc54M7xD+k2Gqr47EJJKKz6fXIzuNIzxnA/Zh8eBUGfXC954bd + A43PlfU0Dud9z/DuwtixMHO5ul29MaxRxEKbuFgcSiGPpGblvGUxKO0WIFtE/tvLW7On2Np2/zN+ + BlKSZ1KND6S0fJ13bguUjvFfbdcVj4YxUnjSkhrWFNJvUNSKwUwngvFnZOqbVtuOgg96upPWN/Wm + EO2G6vo9ZufsNwAAAP//AwDHwDA2NQMAAA== + headers: + CF-RAY: + - 99b0ebfc4e5ced3b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:27 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0; + path=/; expires=Sat, 08-Nov-25 00:53:27 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '269' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '435' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_617bc8e11f2a43a98a0658e7e91298fd + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution new file mode 100644 index 00000000..aec3440e --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution @@ -0,0 +1,234 @@ +interactions: +- request: + body: '{"messages": [{"content": "Tell me a joke about bear", "role": "user"}], + "model": "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": + 0.0, "stream": false, "temperature": 1.0, "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '188' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3vA6tl1z95spYoDxT9RtqqzDF8n\",\n \"object\": + \"chat.completion\",\n \"created\": 1758659340,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Why don\u2019t bears ever get lost?\\n\\nBecause + they always take the bear necessities! \U0001F43B\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 13,\n \"completion_tokens\": 19,\n \"total_tokens\": 32,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_51db84afab\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:29:00 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983ccaa98d189e59-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '742' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=h4eOl14etTzzF9eOjCE9SDq4Y79ZdPOJeIYnqb.tN3E-1758659340929-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - ba7859db365b14edae0dc1d75360d5cb + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '912' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999990' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_31748d3aea8d488c9f1b1b7764b3a5d7 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"content": "write a 2-line poem about bear", "role": "user"}], + "model": "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": + 0.0, "stream": false, "temperature": 1.0, "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '193' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3vAwrz88GjVnlchECG5UbilcrZG\",\n \"object\": + \"chat.completion\",\n \"created\": 1758659340,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"In forest shadows, a bear roams free, + \ \\nMajestic guardian of the ancient tree.\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 15,\n \"completion_tokens\": + 19,\n \"total_tokens\": 34,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_560af6e559\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:29:01 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983ccaa99f09cecd-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '909' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=I8TMI8qNGmqspYd_94RtBiCEVRDIffMScd.j_yw35Es-1758659341697-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - 7d350d2a8b4d267107b257e3a1989c5a + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '1375' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999990' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_891af1935bbf49c39105d7299babb315 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens new file mode 100644 index 00000000..441128e9 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens @@ -0,0 +1,324 @@ +interactions: +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first + type of testing mentioned in section 1.2?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n# + Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction + to Testing\n\nSoftware testing is a critical component of the software development + lifecycle. It ensures that applications\nfunction correctly, meet requirements, + and provide a positive user experience. This guide covers various\ntesting methodologies, + best practices, and tools used in modern software development.\n\n### 1.1 The + Importance of Testing\n\nTesting helps identify defects early in the development + process, reducing the cost of fixing issues later.\nStudies have shown that + the cost of fixing a bug increases exponentially as it progresses through the\ndevelopment + lifecycle. A bug found during requirements gathering might cost $1 to fix, while + the same bug\nfound in production could cost $100 or more.\n\n### 1.2 Types + of Testing\n\nThere are many types of testing, including:\n- Unit Testing: Testing + individual components or functions in isolation\n- Integration Testing: Testing + how components work together\n- End-to-End Testing: Testing the entire application + flow\n- Performance Testing: Testing application speed and scalability\n- Security + Testing: Testing for vulnerabilities and security issues\n- Usability Testing: + Testing user experience and interface design\n\n## Chapter 2: Unit Testing Best + Practices\n\nUnit testing focuses on testing the smallest testable parts of + an application. Here are some best practices:\n\n### 2.1 Write Tests First (TDD)\n\nTest-Driven + Development (TDD) is a methodology where tests are written before the actual + code. The process\nfollows a simple cycle: Red (write a failing test), Green + (write code to pass the test), Refactor (improve\nthe code while keeping tests + passing).\n\n### 2.2 Keep Tests Independent\n\nEach test should be independent + of others. Tests should not rely on the state created by previous tests.\nThis + ensures that tests can be run in any order and that failures are isolated and + easy to debug.\n\n### 2.3 Use Meaningful Names\n\nTest names should clearly + describe what is being tested and what the expected outcome is. A good test + name\nmight be \"test_user_registration_with_valid_email_succeeds\" rather than + just \"test_registration\".\n\n### 2.4 Test Edge Cases\n\nDon''t just test the + happy path. Consider edge cases like:\n- Empty inputs\n- Null or undefined values\n- + Very large inputs\n- Invalid formats\n- Boundary conditions\n\n## Chapter 3: + Integration Testing\n\nIntegration testing verifies that different modules or + services work together correctly.\n\n### 3.1 Database Integration\n\nWhen testing + database interactions, consider using:\n- Test databases separate from production\n- + Database transactions that roll back after each test\n- Mock data that represents + realistic scenarios\n\n### 3.2 API Integration\n\nAPI integration tests should + verify:\n- Correct HTTP status codes\n- Response format and schema\n- Error + handling\n- Authentication and authorization\n\n## Chapter 4: Performance Testing\n\nPerformance + testing ensures your application can handle expected load and scale appropriately.\n\n### + 4.1 Load Testing\n\nLoad testing simulates multiple users accessing the application + simultaneously. Key metrics include:\n- Response time under load\n- Throughput + (requests per second)\n- Error rates\n- Resource utilization (CPU, memory, network)\n\n### + 4.2 Stress Testing\n\nStress testing pushes the application beyond normal operational + capacity to find breaking points and\nunderstand how the system fails gracefully.\n\n## + Chapter 5: Continuous Integration and Testing\n\nModern development practices + integrate testing into the CI/CD pipeline.\n\n### 5.1 Automated Test Runs\n\nTests + should run automatically on every code change. This includes:\n- Running unit + tests on every commit\n- Running integration tests on pull requests\n- Running + end-to-end tests before deployment\n\n### 5.2 Test Coverage\n\nTest coverage + metrics help identify untested code. While 100% coverage isn''t always practical + or necessary,\nmaintaining good coverage helps ensure code quality. Focus on + critical paths and business logic.\n\n## Chapter 6: Testing Tools and Frameworks\n\nMany + tools exist to support testing efforts:\n\n### 6.1 Python Testing\n- pytest: + Feature-rich testing framework\n- unittest: Built-in Python testing module\n- + mock: Library for mocking objects\n\n### 6.2 JavaScript Testing\n- Jest: Popular + testing framework\n- Mocha: Flexible testing framework\n- Cypress: End-to-end + testing tool\n\n### 6.3 Other Tools\n- Selenium: Browser automation\n- JMeter: + Performance testing\n- Postman: API testing\n\n## Conclusion\n\nEffective testing + is essential for delivering high-quality software. By following best practices + and using\nappropriate tools, teams can catch bugs early, improve code quality, + and deliver better products to users.\n\nRemember: Testing is not just about + finding bugs, it''s about building confidence in your code.\n","cache_control":{"type":"ephemeral"}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5160' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.76.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.76.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.19 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/22RzU7rMBCFX8WaZZWiJGqvaHYXsQIhsaBsKIqMPW0sEjt4xgVU9d0ZFyp+V4nn + fHOOZ7yDIVjsoQHT62RxSsF75OlsOp/WZT0vF/UCCnBWiIE2bVndrsO/s+1ptYzXl+fz04tN0nZ5 + JQy/jpgpJNIblEIMfS5oIkesPUvJBM8of83d7sgzvmTl8GngvzEhWuc3ioMiNOyCV9VJrVZwIzyp + sFY3SCxEsQLFHaq1i8Qqu2WR30U1SIz0olWO1GSy9I6PjZNJoZ47Z7osWSQT3YNwmnLIR7vz1m2d + TbpXJgyjGHmW8KjWyR8uRYJIf+h1Pp2sAPb3BRCHsY2oZYsyDHrbcooePgTCp4TeyNQ+9X0B6bCo + ZgfOj4lbDo/oCZq6lEVp02FrxCrbt9+BqpwtjogQ9of8qz1H4NjhgFH37Xz40+4TqLqfhvsCQuKv + pZmEEMatM9iywyjT5je2OlrY798Az+eCZFYCAAA= + headers: + CF-RAY: + - 9c1a60c71c9c67cb-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 21 Jan 2026 22:51:47 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-21T22:51:46Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-21T22:51:47Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2026-01-21T22:51:46Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CXMLqXaFZ4xWZExkXJyyb + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '2088' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first + type of testing mentioned in section 1.2?"},{"role":"assistant","content":"According + to section 1.2 \"Types of Testing,\" the first type of testing mentioned is + **Unit Testing**, which is described as \"Testing individual components or functions + in isolation.\""},{"role":"user","content":"What testing framework is mentioned + for Python?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n# + Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction + to Testing\n\nSoftware testing is a critical component of the software development + lifecycle. It ensures that applications\nfunction correctly, meet requirements, + and provide a positive user experience. This guide covers various\ntesting methodologies, + best practices, and tools used in modern software development.\n\n### 1.1 The + Importance of Testing\n\nTesting helps identify defects early in the development + process, reducing the cost of fixing issues later.\nStudies have shown that + the cost of fixing a bug increases exponentially as it progresses through the\ndevelopment + lifecycle. A bug found during requirements gathering might cost $1 to fix, while + the same bug\nfound in production could cost $100 or more.\n\n### 1.2 Types + of Testing\n\nThere are many types of testing, including:\n- Unit Testing: Testing + individual components or functions in isolation\n- Integration Testing: Testing + how components work together\n- End-to-End Testing: Testing the entire application + flow\n- Performance Testing: Testing application speed and scalability\n- Security + Testing: Testing for vulnerabilities and security issues\n- Usability Testing: + Testing user experience and interface design\n\n## Chapter 2: Unit Testing Best + Practices\n\nUnit testing focuses on testing the smallest testable parts of + an application. Here are some best practices:\n\n### 2.1 Write Tests First (TDD)\n\nTest-Driven + Development (TDD) is a methodology where tests are written before the actual + code. The process\nfollows a simple cycle: Red (write a failing test), Green + (write code to pass the test), Refactor (improve\nthe code while keeping tests + passing).\n\n### 2.2 Keep Tests Independent\n\nEach test should be independent + of others. Tests should not rely on the state created by previous tests.\nThis + ensures that tests can be run in any order and that failures are isolated and + easy to debug.\n\n### 2.3 Use Meaningful Names\n\nTest names should clearly + describe what is being tested and what the expected outcome is. A good test + name\nmight be \"test_user_registration_with_valid_email_succeeds\" rather than + just \"test_registration\".\n\n### 2.4 Test Edge Cases\n\nDon''t just test the + happy path. Consider edge cases like:\n- Empty inputs\n- Null or undefined values\n- + Very large inputs\n- Invalid formats\n- Boundary conditions\n\n## Chapter 3: + Integration Testing\n\nIntegration testing verifies that different modules or + services work together correctly.\n\n### 3.1 Database Integration\n\nWhen testing + database interactions, consider using:\n- Test databases separate from production\n- + Database transactions that roll back after each test\n- Mock data that represents + realistic scenarios\n\n### 3.2 API Integration\n\nAPI integration tests should + verify:\n- Correct HTTP status codes\n- Response format and schema\n- Error + handling\n- Authentication and authorization\n\n## Chapter 4: Performance Testing\n\nPerformance + testing ensures your application can handle expected load and scale appropriately.\n\n### + 4.1 Load Testing\n\nLoad testing simulates multiple users accessing the application + simultaneously. Key metrics include:\n- Response time under load\n- Throughput + (requests per second)\n- Error rates\n- Resource utilization (CPU, memory, network)\n\n### + 4.2 Stress Testing\n\nStress testing pushes the application beyond normal operational + capacity to find breaking points and\nunderstand how the system fails gracefully.\n\n## + Chapter 5: Continuous Integration and Testing\n\nModern development practices + integrate testing into the CI/CD pipeline.\n\n### 5.1 Automated Test Runs\n\nTests + should run automatically on every code change. This includes:\n- Running unit + tests on every commit\n- Running integration tests on pull requests\n- Running + end-to-end tests before deployment\n\n### 5.2 Test Coverage\n\nTest coverage + metrics help identify untested code. While 100% coverage isn''t always practical + or necessary,\nmaintaining good coverage helps ensure code quality. Focus on + critical paths and business logic.\n\n## Chapter 6: Testing Tools and Frameworks\n\nMany + tools exist to support testing efforts:\n\n### 6.1 Python Testing\n- pytest: + Feature-rich testing framework\n- unittest: Built-in Python testing module\n- + mock: Library for mocking objects\n\n### 6.2 JavaScript Testing\n- Jest: Popular + testing framework\n- Mocha: Flexible testing framework\n- Cypress: End-to-end + testing tool\n\n### 6.3 Other Tools\n- Selenium: Browser automation\n- JMeter: + Performance testing\n- Postman: API testing\n\n## Conclusion\n\nEffective testing + is essential for delivering high-quality software. By following best practices + and using\nappropriate tools, teams can catch bugs early, improve code quality, + and deliver better products to users.\n\nRemember: Testing is not just about + finding bugs, it''s about building confidence in your code.\n","cache_control":{"type":"ephemeral"}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5456' + content-type: + - application/json + host: + - api.anthropic.com + user-agent: + - Anthropic/Python 0.76.0 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 0.76.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.10.19 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/2VSzU7DMAx+lSjHqUXtYEzrDYQQB5A4IA2JoiokZg1rnZI4wDTt3XHK/zgl+X4c + +0u2sncGOllJ3aloIA8OESg/ymf5tJjOisV0ITNpDSv6sGqKsqSr5fX87OVC6RbncblY3y7PkDW0 + GSCpIAS1Aga86xKgQrCBFBJD2iEB76q77Zee4C0x41LJE62dNxZXgpwIoMk6FMcHpajl9YZaPtxA + IOazWgpqPYCgD0A8etXDq/PrIJQH0fM9bAYjHp0XH+aqxhrLAzGZDJtkm0xELs5BUfSQe6vb/8Vq + nCZ9REvfjtNoO8otflb9NnGUsYMaD5Ojd3o9qi/tg1d+M7aRwKR0D088W5C7+0wGckPjQXHyHACg + abidFOhIBHiOgJqTwth1mYxjuNVWWhwiNeTWgEFW85LD5ReBRnOpNHjzV1B88UybPa4sjhb79nQF + DC304FXXzPr/5X7Yst1nd5l0kX5Dx+wI4F+shoYseB41fQqjvJG73TumCh7LhwIAAA== + headers: + CF-RAY: + - 9c1a60d4ab5e67cb-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 21 Jan 2026 22:51:49 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-21T22:51:48Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-21T22:51:49Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2026-01-21T22:51:48Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CXMLqgrrchykwCdY7YRKM + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '2016' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft new file mode 100644 index 00000000..1ee7a837 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft @@ -0,0 +1,298 @@ +interactions: +- request: + body: '{"messages":[{"content":"Count from 1 to 5.","role":"user"}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":true}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '124' + content-type: + - application/json + cookie: + - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA; + _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"obfuscation":"uoycSw"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"obfuscation":"7R9sCOG"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"jNZOnCU"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"NTkR0fq"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"obfuscation":"KhfgFBA"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"u5zk4uv"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"yQyBcA4"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"obfuscation":"HhGcZch"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"GNLE7Ci"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"d0EKjlZ"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"obfuscation":"YytmIuX"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"Umbehc1"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"3xi8C7o"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"obfuscation":"N0uOsTp"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"obfuscation":"RilMN7a"} + + + data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"oF"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 99b0eaddeca8aaac-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Sat, 08 Nov 2025 00:22:42 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '275' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '519' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_05aebff8dd644228befd59a7372d3c93 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"Count from 1 to 5.","role":"user"}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":true}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '124' + content-type: + - application/json + cookie: + - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0; + _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"obfuscation":"ov7JiI"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"obfuscation":"eXpmCqg"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"C8QZXu8"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"xdqGFpo"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"obfuscation":"O3SLgWG"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"0aoEi42"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"2oO8rJa"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"obfuscation":"jOHTEGa"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"qGeoxr1"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"uvMar7j"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"obfuscation":"4dFvFfq"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"GdoZztm"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"obfuscation":"NHxpCPR"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"obfuscation":"mfV8KdT"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"obfuscation":"EkPlssM"} + + + data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"fj"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 99b0ec0f7961ed3b-SJC + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Sat, 08 Nov 2025 00:23:30 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '149' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '171' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8afec9e4717b433e9c6900220b2dbd93 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage new file mode 100644 index 00000000..e21d44cc --- /dev/null +++ b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage @@ -0,0 +1,350 @@ +interactions: +- request: + body: '{"messages": [{"content": "What is 3 * 12", "role": "user"}], "model": + "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream": + false, "temperature": 1.0, "tools": [{"type": "function", "function": {"name": + "calculator", "description": "Can perform mathematical operations.", "parameters": + {"properties": {"input": {"properties": {"operation": {"description": "The type + of operation to execute.", "enum": ["add", "subtract", "multiply", "divide"], + "type": "string"}, "number1": {"description": "The first number to operate on.", + "type": "number"}, "number2": {"description": "The second number to operate + on.", "type": "number"}}, "required": ["operation", "number1", "number2"], "type": + "object"}}, "required": ["input"], "type": "object"}}}], "top_p": 1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '725' + content-type: + - application/json + host: + - localhost:8000 + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.13 + method: POST + uri: http://localhost:8000/v1/proxy/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CJ3pT0xTT4C4WwCqA5bvyrihLFrbd\",\n \"object\": + \"chat.completion\",\n \"created\": 1758658987,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_faZyqlGfMGsX50e2EuExUqK0\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"calculator\",\n + \ \"arguments\": \"{\\\"input\\\":{\\\"operation\\\":\\\"multiply\\\",\\\"number1\\\":3,\\\"number2\\\":12}}\"\n + \ }\n }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 97,\n \"completion_tokens\": + 26,\n \"total_tokens\": 123,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_51db84afab\"\n}\n" + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Expose-Headers: + - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id + Connection: + - keep-alive + Date: + - Tue, 23 Sep 2025 20:23:07 GMT + Keep-Alive: + - timeout=5 + Transfer-Encoding: + - chunked + Vary: + - Origin + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + cf-ray: + - 983cc20cabc267ef-SJC + content-type: + - application/json + openai-organization: + - braintrust-data + openai-processing-ms: + - '648' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - _cfuvid=inx7Y1lMFCkI1jONo8plrYH7k2d1EAvkr2WlMIyrK.s-1758658987739-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-bt-cached: + - MISS + x-bt-internal-trace-id: + - 475d214543543ac965368ac2a190850f + x-content-type-options: + - nosniff + x-envoy-upstream-service-time: + - '663' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f6bcef66199c4bcaa6ad5864f7d1d9fb + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"What is 3 * 12","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"tools":[{"type":"function","function":{"name":"calculator","description":"Can + perform mathematical operations.","parameters":{"properties":{"input":{"properties":{"operation":{"description":"The + type of operation to execute.","enum":["add","subtract","multiply","divide"],"type":"string"},"number1":{"description":"The + first number to operate on.","type":"number"},"number2":{"description":"The + second number to operate on.","type":"number"}},"required":["operation","number1","number2"],"type":"object"}},"required":["input"],"type":"object"}}}],"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '725' + content-type: + - application/json + cookie: + - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA; + _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xT0W6bMBR95yus+xymQAppedum7SFKNXXSqmqjQo65EG/G9myzLY3y7xMmBZKm + UnlAcI/PucfH1/uAEOAlZATYljrWaBF+/P51vnsyt/dSfzCrMmFyeXu3fr96untwX2DWMdTmJzL3 + zHrHVKMFOq5kDzOD1GGnGi3TOEmjRTr3QKNKFB2t1i68UmHDJQ/jeXwVzpdhdH1kbxVnaCEjPwJC + CNn7d+dTlvgPMuK1fKVBa2mNkA2LCAGjRFcBai23jkoHsxFkSjqUnXXZCjEBnFKiYFSIsXH/7Cff + Y1hUiALZUupvWK/u/z4k5e9PuNafV+vrab9eeqe9oaqVbAhpgg/17KwZISBpg8eGrBXUKXPGJgSo + qdsGpeucwz4HLnXrcsj2OSiNhnbaOWQ5NK1wXItdDrMcZNts0EQ5ZIvhL84hi+LDAU5aHIJL34+T + 8AxWraXiZapUSuW8AR/r4xE5DCcoVK2N2tgzKlRccrstDFLrg5meT/BsxFuA9mQEQBvVaFc49Qt9 + 05tlLwrjlI5gnB5BpxwVYz2KF7MLckWJjnI/IsNUMsq2WI7UcTppW3I1AYLJ1l+6uaTdb5/L+i3y + I8AYaodloQ2WnJ3ueFxmsLvEry0bQvaGwaL5wxkWjqPpjqPEiraiH3WwO+uwKSouazTacH+/oNJF + ks5plWKS3EBwCP4DAAD//wMAguKIhm0EAAA= + headers: + CF-RAY: + - 99b0ead42b8caaac-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:22:40 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '557' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '702' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_edb893697ec245fbb710a31d27a3ed78 + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"content":"What is 3 * 12","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"tools":[{"type":"function","function":{"name":"calculator","description":"Can + perform mathematical operations.","parameters":{"properties":{"input":{"properties":{"operation":{"description":"The + type of operation to execute.","enum":["add","subtract","multiply","divide"],"type":"string"},"number1":{"description":"The + first number to operate on.","type":"number"},"number2":{"description":"The + second number to operate on.","type":"number"}},"required":["operation","number1","number2"],"type":"object"}},"required":["input"],"type":"object"}}}],"top_p":1.0}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate, zstd + connection: + - keep-alive + content-length: + - '725' + content-type: + - application/json + cookie: + - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0; + _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.108.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.108.2 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xTTY/aMBC951dYc4YqCV+7udEPqSdaVeqh26wiY0/AXce2/MHCIv57lQSSwFKp + OUT2PM+b5+eZY0QICA4ZAbalnlVGjj89/UjkfIlfVm+/4o9P4dW/2a/S0dfl5PAAozpDr/8g85es + D0xXRqIXWrUws0g91qzJYp7O5sk0fmyASnOUddrG+PFUjyuhxDiN0+k4XoyTMznbasHQQUZ+R4QQ + cmz+tU7FcQ8ZiUeXSIXO0Q1C1h0iBKyWdQSoc8J5qjyMepBp5VHV0lWQcgB4rWXBqJR94fY7Dta9 + WVTK4udkP9upwHY7/nm1XH2Pefi2f4n5oF5LfTCNoDIo1pk0wLt4dlOMEFC0wnNBFiT12t5kEwLU + bkKFytfK4ZiDUCb4HLJjDtqgpTV3DlkOVZBeGHnIYZSDCtUabZJDNul2aQ5Zkp5OcFXiFN1bPw/M + s1gGR+V7V6lS2jcCGlufz8ipe0GpN8bqtbtJhVIo4baFReoaY4bvE12ENBIgXLUAGKsr4wuvX7Ap + +rhoSaHv0h5M52fQa09lH0/SyegOXcHRU9G0SNeVjLIt8j61704auNADIBpc/b2ae9zt9YXa/A99 + DzCGxiMvjEUu2PWN+2MW6yH+17HO5EYwOLQ7wbDwAm39HBxLGmQ7WuAOzmNVlEJt0BormvmC0hSz + hK8fprSka4hO0V8AAAD//wMAMU2sv20EAAA= + headers: + CF-RAY: + - 99b0ec04f9abed3b-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sat, 08 Nov 2025 00:23:29 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '614' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '756' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_b741763f424444f38ded6343a488e723 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py new file mode 100644 index 00000000..078502b2 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/conftest.py @@ -0,0 +1,58 @@ +import os +from pathlib import Path + +import pytest +from braintrust.logger import ( + TEST_API_KEY, + Logger, + _internal_reset_global_state, + _internal_with_memory_background_logger, + _MemoryBackgroundLogger, +) +from braintrust.test_helpers import init_test_logger + +from braintrust.wrappers.langchain import clear_global_handler + + +@pytest.fixture(autouse=True) +def setup_braintrust_langchain(): + os.environ["BRAINTRUST_SYNC_FLUSH"] = "1" + os.environ["BRAINTRUST_API_URL"] = "http://localhost:8000" + os.environ["BRAINTRUST_APP_URL"] = "http://localhost:3000" + os.environ["BRAINTRUST_API_KEY"] = TEST_API_KEY + os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key_here" + os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" + os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1/proxy" + + _internal_reset_global_state() + clear_global_handler() + yield + + +@pytest.fixture(scope="module") +def vcr_config(): + record_mode = "none" if (os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) else "once" + + return { + "filter_headers": [ + "authorization", + "x-goog-api-key", + "x-api-key", + "api-key", + "openai-api-key", + ], + "record_mode": record_mode, + "match_on": ["uri", "method", "body"], + "cassette_library_dir": str(Path(__file__).parent / "cassettes"), + "path_transformer": lambda path: path.replace(".yaml", ""), + } + + +@pytest.fixture +def logger_memory_logger(): + logger = init_test_logger("langchain-py") + with _internal_with_memory_background_logger() as bgl: + yield (logger, bgl) + + +LoggerMemoryLogger = tuple[Logger, _MemoryBackgroundLogger] diff --git a/py/src/braintrust/integrations/langchain/integration.py b/py/src/braintrust/integrations/langchain/integration.py new file mode 100644 index 00000000..e22cdc0f --- /dev/null +++ b/py/src/braintrust/integrations/langchain/integration.py @@ -0,0 +1,34 @@ +"""LangChain integration orchestration.""" + +from typing import Any + +from braintrust.integrations.base import BasePatcher, BaseIntegration + + +class LangChainCallbackPatcher(BasePatcher): + """Patcher that registers a global BraintrustCallbackHandler with LangChain.""" + + name = "langchain_callback" + _patched: bool = False + + @classmethod + def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + return cls._patched + + @classmethod + def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + from .tracing import BraintrustCallbackHandler, _ensure_hook_registered, set_global_handler + + _ensure_hook_registered() + handler = BraintrustCallbackHandler() + set_global_handler(handler) + cls._patched = True + return True + + +class LangChainIntegration(BaseIntegration): + """Braintrust instrumentation for LangChain.""" + + name = "langchain" + import_names = ("langchain_core",) + patchers = (LangChainCallbackPatcher,) diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py new file mode 100644 index 00000000..827cf777 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/test_langchain.py @@ -0,0 +1,1380 @@ +# pyright: reportTypedDictNotRequiredAccess=none +import uuid +from typing import Any, Dict, List, Sequence, Union, cast +from unittest.mock import ANY + +import pytest +from braintrust.logger import flush +from langchain_anthropic import ChatAnthropic +from langchain_core.callbacks import BaseCallbackHandler, CallbackManager +from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.prompts.prompt import PromptTemplate +from langchain_core.runnables import RunnableMap, RunnableSerializable +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI +from pydantic import BaseModel, Field + +from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler +from braintrust.wrappers.test_utils import verify_autoinstrument_script + +from .conftest import LoggerMemoryLogger + +# --------------------------------------------------------------------------- +# Helpers (inlined from the integration package) +# --------------------------------------------------------------------------- + + +def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False) -> None: + """Assert that actual contains all key-value pairs from expected.""" + if isinstance(expected, (list, tuple)): + assert isinstance(actual, (list, tuple)), f"Expected sequence but got {type(actual)}" + assert len(actual) >= len(expected), ( + f"Expected sequence of length >= {len(expected)} but got length {len(actual)}" + ) + if not ignore_order: + for i, expected_item in enumerate(expected): + assert_matches_object(actual[i], expected_item) + else: + for expected_item in expected: + matched = False + for actual_item in actual: + try: + assert_matches_object(actual_item, expected_item) + matched = True + except Exception: + pass + assert matched, ( + f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}" + ) + elif isinstance(expected, dict): + assert isinstance(actual, dict), f"Expected dict but got {type(actual)}" + for k, v in expected.items(): + assert k in actual, f"Missing key {k}" + if v is ANY: + continue + if isinstance(v, (dict, list, tuple)): + assert_matches_object(actual[k], v) + else: + assert actual[k] == v, f"Key {k}: expected {v} but got {actual[k]}" + else: + assert actual == expected, f"Expected {expected} but got {actual}" + + +def find_spans_by_attributes(spans: List[Any], **attributes: Any) -> List[Any]: + """Find all spans matching the given span_attributes.""" + matching = [] + for span in spans: + if "span_attributes" not in span: + continue + if all(span["span_attributes"].get(k) == v for k, v in attributes.items()): + matching.append(span) + return matching + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.vcr +def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + prompt = ChatPromptTemplate.from_template("What is 1 + {number}?") + model = ChatOpenAI( + model="gpt-4o-mini", + temperature=1, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=1, + ) + chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) + + spans = memory_logger.pop() + assert len(spans) == 3 + + root_span_id = spans[0]["span_id"] + + assert_matches_object( + spans, + [ + { + "span_attributes": { + "name": "RunnableSequence", + "type": "task", + }, + "input": {"number": "2"}, + "output": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": ANY, + "id": ANY, + "example": ANY, + "tool_calls": ANY, + "invalid_tool_calls": ANY, + "usage_metadata": ANY, + }, + "metadata": {"tags": []}, + "span_id": root_span_id, + "root_span_id": root_span_id, + }, + { + "span_attributes": {"name": "ChatPromptTemplate"}, + "input": {"number": "2"}, + "output": { + "messages": [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + } + ] + }, + "metadata": {"tags": ["seq:step:1"]}, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + { + "span_attributes": {"name": "ChatOpenAI", "type": "llm"}, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + "example": ANY, + } + ] + ], + "output": { + "generations": [ + [ + { + "text": ANY, + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": None, + "id": ANY, + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "run": None, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + "metadata": { + "tags": ["seq:step:2"], + "model": "gpt-4o-mini-2024-07-18", + }, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + ], + ) + + +@pytest.mark.vcr +def test_global_handler(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger, debug=True) + set_global_handler(handler) + + # Make sure the handler is registered in the LangChain library + manager = CallbackManager.configure() + assert next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None) == handler + + prompt = ChatPromptTemplate.from_template("What is 1 + {number}?") + model = ChatOpenAI( + model="gpt-4o-mini", + temperature=1, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=1, + ) + chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + + message = chain.invoke({"number": "2"}) + + spans = memory_logger.pop() + assert len(spans) > 0 + + root_span_id = spans[0]["span_id"] + + assert_matches_object( + spans, + [ + { + "span_attributes": { + "name": "RunnableSequence", + "type": "task", + }, + "input": {"number": "2"}, + "output": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": ANY, + "id": ANY, + "example": ANY, + "tool_calls": ANY, + "invalid_tool_calls": ANY, + "usage_metadata": ANY, + }, + "metadata": {"tags": []}, + "span_id": root_span_id, + "root_span_id": root_span_id, + }, + { + "span_attributes": {"name": "ChatPromptTemplate"}, + "input": {"number": "2"}, + "output": { + "messages": [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + } + ] + }, + "metadata": {"tags": ["seq:step:1"]}, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + { + "span_attributes": {"name": "ChatOpenAI", "type": "llm"}, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + "example": ANY, + } + ] + ], + "output": { + "generations": [ + [ + { + "text": ANY, + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": None, + "id": ANY, + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "run": None, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + "metadata": { + "tags": ["seq:step:2"], + "model": "gpt-4o-mini-2024-07-18", + }, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + ], + ) + + assert message.content == "1 + 2 equals 3." + + +@pytest.mark.vcr +def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + prompt = ChatPromptTemplate.from_template("{history} User: {input}") + model = ChatOpenAI(model="gpt-4o-mini") + chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + + memory = {"history": "Assistant: Hello! How can I assist you today?"} + chain.invoke( + {"input": "What's your name?", **memory}, + config={"callbacks": [cast(BaseCallbackHandler, handler)], "tags": ["test"]}, + ) + + spans = memory_logger.pop() + assert len(spans) == 3 + + root_span_id = spans[0]["span_id"] + + assert_matches_object( + spans, + [ + { + "span_attributes": { + "name": "RunnableSequence", + "type": "task", + }, + "input": {"input": "What's your name?", "history": "Assistant: Hello! How can I assist you today?"}, + "output": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + }, + "metadata": {"tags": ["test"]}, + "span_id": root_span_id, + "root_span_id": root_span_id, + }, + { + "span_attributes": {"name": "ChatPromptTemplate"}, + "input": {"input": "What's your name?", "history": "Assistant: Hello! How can I assist you today?"}, + "output": { + "messages": [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + } + ] + }, + "metadata": {"tags": ["seq:step:1", "test"]}, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + { + "span_attributes": {"name": "ChatOpenAI", "type": "llm"}, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + "example": ANY, + } + ] + ], + "output": { + "generations": [ + [ + { + "text": ANY, + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": None, + "id": ANY, + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "run": None, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + "metadata": { + "tags": ["seq:step:2", "test"], + "model": "gpt-4o-mini-2024-07-18", + }, + "root_span_id": root_span_id, + "span_parents": [root_span_id], + }, + ], + ) + + +@pytest.mark.vcr +def test_tool_usage(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + + class CalculatorInput(BaseModel): + operation: str = Field( + description="The type of operation to execute.", + json_schema_extra={"enum": ["add", "subtract", "multiply", "divide"]}, + ) + number1: float = Field(description="The first number to operate on.") + number2: float = Field(description="The second number to operate on.") + + @tool + def calculator(input: CalculatorInput) -> str: + """Can perform mathematical operations.""" + if input.operation == "add": + return str(input.number1 + input.number2) + elif input.operation == "subtract": + return str(input.number1 - input.number2) + elif input.operation == "multiply": + return str(input.number1 * input.number2) + elif input.operation == "divide": + return str(input.number1 / input.number2) + else: + raise ValueError("Invalid operation.") + + model = ChatOpenAI( + model="gpt-4o-mini", + temperature=1, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=1, + ) + model_with_tools = model.bind_tools([calculator]) + model_with_tools.invoke("What is 3 * 12", config={"callbacks": [cast(BaseCallbackHandler, handler)]}) + + spans = memory_logger.pop() + root_span_id = spans[0]["span_id"] + + assert_matches_object( + spans, + [ + { + "span_id": root_span_id, + "root_span_id": root_span_id, + "span_attributes": { + "name": "ChatOpenAI", + "type": "llm", + }, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + "example": ANY, + } + ] + ], + "metadata": { + "tags": [], + "model": "gpt-4o-mini-2024-07-18", + "invocation_params": { + "tools": [ + { + "type": "function", + "function": { + "name": "calculator", + "description": "Can perform mathematical operations.", + "parameters": ANY, + }, + } + ], + }, + }, + "output": { + "generations": [ + [ + { + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "type": "ai", + "additional_kwargs": { + "tool_calls": ANY, + }, + "response_metadata": ANY, + "name": None, + "id": ANY, + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "run": None, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + } + ], + ) + + +@pytest.mark.vcr +@pytest.mark.skip(reason="Not yet working with VCR.") +def test_parallel_execution(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + + model = ChatOpenAI( + model="gpt-4o-mini", + temperature=1, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=1, + ) + + joke_chain = PromptTemplate.from_template("Tell me a joke about {topic}").pipe(model) + poem_chain = PromptTemplate.from_template("write a 2-line poem about {topic}").pipe(model) + + map_chain = RunnableMap( + { + "joke": joke_chain, + "poem": poem_chain, + } + ) + + map_chain.invoke({"topic": "bear"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) + + spans = memory_logger.pop() + + llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI") + assert len(llm_spans) == 2 + + for span in llm_spans: + assert_matches_object( + span, + { + "span_attributes": {"name": "ChatOpenAI", "type": "llm"}, + "metadata": { + "tags": ["seq:step:2"], + "model": "gpt-4o-mini-2024-07-18", + }, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + } + ] + ], + "output": { + "generations": [ + [ + { + "text": ANY, + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "type": "ai", + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + }, + ) + + +@pytest.mark.vcr +def test_langgraph_state_management(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + try: + from langgraph.graph import END, START, StateGraph + except ImportError: + pytest.skip("langgraph not installed") + + handler = BraintrustCallbackHandler(logger=logger) + model = ChatOpenAI( + model="gpt-4o-mini", + temperature=1, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + n=1, + ) + + def say_hello(state: Dict[str, str]): + response = model.invoke("Say hello") + return cast(Union[str, List[str], Dict[str, str]], response.content) + + def say_bye(state: Dict[str, str]): + print("From the 'sayBye' node: Bye world!") + return "Bye" + + workflow = ( + StateGraph(state_schema=Dict[str, str]) + .add_node("sayHello", say_hello) + .add_node("sayBye", say_bye) + .add_edge(START, "sayHello") + .add_edge("sayHello", "sayBye") + .add_edge("sayBye", END) + ) + + graph = workflow.compile() + graph.invoke({}, config={"callbacks": [handler]}) + + spans = memory_logger.pop() + + langgraph_spans = find_spans_by_attributes(spans, name="LangGraph") + say_hello_spans = find_spans_by_attributes(spans, name="sayHello") + say_bye_spans = find_spans_by_attributes(spans, name="sayBye") + llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI") + + assert len(langgraph_spans) == 1 + assert len(say_hello_spans) == 1 + assert len(say_bye_spans) == 1 + assert len(llm_spans) == 1 + + assert_matches_object( + langgraph_spans[0], + { + "span_attributes": { + "name": "LangGraph", + "type": "task", + }, + "input": {}, + "metadata": { + "tags": [], + }, + "output": "Bye", + }, + ) + + assert_matches_object( + say_hello_spans[0], + { + "span_attributes": { + "name": "sayHello", + }, + "input": {}, + "metadata": { + "tags": ["graph:step:1"], + }, + "output": ANY, + }, + ) + + assert_matches_object( + llm_spans[0], + { + "span_attributes": { + "name": "ChatOpenAI", + "type": "llm", + }, + "input": [ + [ + { + "content": ANY, + "additional_kwargs": {}, + "response_metadata": {}, + "type": "human", + "name": None, + "id": None, + "example": ANY, + } + ] + ], + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "tags": [], + }, + "output": { + "generations": [ + [ + { + "text": ANY, + "generation_info": ANY, + "type": "ChatGeneration", + "message": { + "content": ANY, + "additional_kwargs": ANY, + "response_metadata": ANY, + "type": "ai", + "name": None, + "id": ANY, + }, + } + ] + ], + "llm_output": { + "token_usage": { + "completion_tokens": ANY, + "prompt_tokens": ANY, + "total_tokens": ANY, + }, + "model_name": "gpt-4o-mini-2024-07-18", + }, + "run": None, + "type": "LLMResult", + }, + "metrics": { + "start": ANY, + "total_tokens": ANY, + "prompt_tokens": ANY, + "completion_tokens": ANY, + "end": ANY, + }, + }, + ) + + assert_matches_object( + say_bye_spans[0], + { + "span_attributes": { + "name": "sayBye", + }, + "input": ANY, + "metadata": { + "tags": ["graph:step:2"], + }, + "output": "Bye", + }, + ) + + +@pytest.mark.vcr +def test_chain_null_values(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + + run_id = uuid.UUID("f81d4fae-7dec-11d0-a765-00a0c91e6bf6") + + handler.on_chain_start( + {"id": ["TestChain"], "lc": 1, "type": "not_implemented"}, + {"input1": "value1", "input2": None, "input3": None}, + run_id=run_id, + parent_run_id=None, + tags=["test"], + ) + + handler.on_chain_end( + {"output1": "value1", "output2": None, "output3": None}, + run_id=run_id, + parent_run_id=None, + tags=["test"], + ) + + flush() + + spans = memory_logger.pop() + root_span_id = spans[0]["span_id"] + + assert_matches_object( + spans, + [ + { + "root_span_id": root_span_id, + "span_attributes": { + "name": "TestChain", + "type": "task", + }, + "input": { + "input1": "value1", + "input2": None, + "input3": None, + }, + "metadata": { + "tags": ["test"], + }, + "output": { + "output1": "value1", + "output2": None, + "output3": None, + }, + }, + ], + ) + + +def test_consecutive_eval_calls(logger_memory_logger: LoggerMemoryLogger): + from braintrust import Eval + + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + def task_fn(input, hooks): + handler = BraintrustCallbackHandler(logger=logger) + + run_id = uuid.uuid4() + + handler.on_chain_start( + {"id": ["RunnableSequence"], "lc": 1, "type": "not_implemented"}, + {"number": str(input)}, + run_id=run_id, + parent_run_id=None, + ) + + output = f"Result for {input}" + + handler.on_chain_end( + {"content": output}, + run_id=run_id, + parent_run_id=None, + ) + + return output + + with logger.start_span(name="test-consecutive-eval", span_attributes={"type": "eval"}) as parent_span: + Eval( + "test-consecutive-eval", + data=[{"input": 1, "expected": "Result for 1"}, {"input": 2, "expected": "Result for 2"}], + task=task_fn, + scores=[], + parent=parent_span.id, + ) + + flush() + + spans = memory_logger.pop() + + assert len(spans) == 5, f"Expected 5 spans, got {len(spans)}" + + root_eval_span = [s for s in spans if s.get("span_attributes", {}).get("name") == "test-consecutive-eval"][0] + root_eval_span_id = root_eval_span["span_id"] + + eval_record_spans = [ + s + for s in spans + if s.get("span_attributes", {}).get("name") == "eval" and root_eval_span_id in (s.get("span_parents") or []) + ] + assert len(eval_record_spans) == 2, f"Expected 2 eval record spans, got {len(eval_record_spans)}" + + eval_record_spans_sorted = sorted(eval_record_spans, key=lambda s: s.get("input", 0)) + eval_record_1 = eval_record_spans_sorted[0] + eval_record_2 = eval_record_spans_sorted[1] + + task_spans = [s for s in spans if s.get("span_attributes", {}).get("name") == "task"] + assert len(task_spans) == 2, f"Expected 2 task spans, got {len(task_spans)}" + + task_spans_sorted = sorted(task_spans, key=lambda s: s.get("input", 0)) + task_1_span = task_spans_sorted[0] + task_2_span = task_spans_sorted[1] + + assert_matches_object( + [root_eval_span], + [ + { + "span_id": root_eval_span_id, + "root_span_id": root_eval_span_id, + "span_attributes": { + "name": "test-consecutive-eval", + "type": "eval", + }, + } + ], + ) + + assert_matches_object( + [eval_record_1], + [ + { + "root_span_id": root_eval_span_id, + "span_parents": [root_eval_span_id], + "span_attributes": {"name": "eval"}, + "input": 1, + "output": "Result for 1", + } + ], + ) + + assert_matches_object( + [eval_record_2], + [ + { + "root_span_id": root_eval_span_id, + "span_parents": [root_eval_span_id], + "span_attributes": {"name": "eval"}, + "input": 2, + "output": "Result for 2", + } + ], + ) + + assert_matches_object( + [task_1_span], + [ + { + "root_span_id": root_eval_span_id, + "span_parents": [eval_record_1["span_id"]], + "span_attributes": {"name": "task"}, + "input": 1, + "output": "Result for 1", + } + ], + ) + + assert_matches_object( + [task_2_span], + [ + { + "root_span_id": root_eval_span_id, + "span_parents": [eval_record_2["span_id"]], + "span_attributes": {"name": "task"}, + "input": 2, + "output": "Result for 2", + } + ], + ) + + +@pytest.mark.vcr +def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + prompt = ChatPromptTemplate.from_template("Count from 1 to 5.") + model = ChatOpenAI( + model="gpt-4o-mini", + max_completion_tokens=50, + streaming=True, + ) + chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + + chunks: List[str] = [] + for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}): + if chunk.content: + chunks.append(str(chunk.content)) + + assert len(chunks) > 0, "Expected to receive streaming chunks" + + spans = memory_logger.pop() + assert len(spans) == 3 + + llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI", type="llm") + assert len(llm_spans) == 1 + llm_span = llm_spans[0] + + assert_matches_object( + [llm_span], + [ + { + "id": ANY, + "input": [ + [ + { + "additional_kwargs": {}, + "content": "Count from 1 to 5.", + "example": False, + "id": None, + "name": None, + "response_metadata": {}, + "type": "human", + } + ] + ], + "metadata": { + "braintrust": { + "integration_name": "langchain-py", + } + }, + "metrics": { + "time_to_first_token": ANY, + }, + "output": { + "generations": [ + [ + { + "generation_info": { + "finish_reason": "stop", + "model_name": ANY, + }, + "message": { + "content": "1, 2, 3, 4, 5.", + "type": "AIMessageChunk", + }, + "text": "1, 2, 3, 4, 5.", + "type": "ChatGenerationChunk", + } + ] + ], + "type": "LLMResult", + }, + "project_id": "langchain-py", + "span_attributes": {"name": "ChatOpenAI", "type": "llm"}, + } + ], + ) + + +@pytest.mark.vcr +def test_prompt_caching_tokens(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + handler = BraintrustCallbackHandler(logger=logger) + + model = ChatAnthropic(model="claude-sonnet-4-5-20250929") + + # XXX: if you need to change the cassette or test, you'll want to change the text below to invalidate the stored cache. + + # Anthropic prompt caching requires a minimum of 1024 tokens for Claude Sonnet models. + # This static text (~1500 tokens) ensures we meet that threshold consistently. + # See: https://platform.claude.com/docs/en/build-with-claude/prompt-caching + long_text_for_caching = """ +# Comprehensive Guide to Software Testing Methods! + +## Chapter 1: Introduction to Testing + +Software testing is a critical component of the software development lifecycle. It ensures that applications +function correctly, meet requirements, and provide a positive user experience. This guide covers various +testing methodologies, best practices, and tools used in modern software development. + +### 1.1 The Importance of Testing + +Testing helps identify defects early in the development process, reducing the cost of fixing issues later. +Studies have shown that the cost of fixing a bug increases exponentially as it progresses through the +development lifecycle. A bug found during requirements gathering might cost $1 to fix, while the same bug +found in production could cost $100 or more. + +### 1.2 Types of Testing + +There are many types of testing, including: +- Unit Testing: Testing individual components or functions in isolation +- Integration Testing: Testing how components work together +- End-to-End Testing: Testing the entire application flow +- Performance Testing: Testing application speed and scalability +- Security Testing: Testing for vulnerabilities and security issues +- Usability Testing: Testing user experience and interface design + +## Chapter 2: Unit Testing Best Practices + +Unit testing focuses on testing the smallest testable parts of an application. Here are some best practices: + +### 2.1 Write Tests First (TDD) + +Test-Driven Development (TDD) is a methodology where tests are written before the actual code. The process +follows a simple cycle: Red (write a failing test), Green (write code to pass the test), Refactor (improve +the code while keeping tests passing). + +### 2.2 Keep Tests Independent + +Each test should be independent of others. Tests should not rely on the state created by previous tests. +This ensures that tests can be run in any order and that failures are isolated and easy to debug. + +### 2.3 Use Meaningful Names + +Test names should clearly describe what is being tested and what the expected outcome is. A good test name +might be "test_user_registration_with_valid_email_succeeds" rather than just "test_registration". + +### 2.4 Test Edge Cases + +Don't just test the happy path. Consider edge cases like: +- Empty inputs +- Null or undefined values +- Very large inputs +- Invalid formats +- Boundary conditions + +## Chapter 3: Integration Testing + +Integration testing verifies that different modules or services work together correctly. + +### 3.1 Database Integration + +When testing database interactions, consider using: +- Test databases separate from production +- Database transactions that roll back after each test +- Mock data that represents realistic scenarios + +### 3.2 API Integration + +API integration tests should verify: +- Correct HTTP status codes +- Response format and schema +- Error handling +- Authentication and authorization + +## Chapter 4: Performance Testing + +Performance testing ensures your application can handle expected load and scale appropriately. + +### 4.1 Load Testing + +Load testing simulates multiple users accessing the application simultaneously. Key metrics include: +- Response time under load +- Throughput (requests per second) +- Error rates +- Resource utilization (CPU, memory, network) + +### 4.2 Stress Testing + +Stress testing pushes the application beyond normal operational capacity to find breaking points and +understand how the system fails gracefully. + +## Chapter 5: Continuous Integration and Testing + +Modern development practices integrate testing into the CI/CD pipeline. + +### 5.1 Automated Test Runs + +Tests should run automatically on every code change. This includes: +- Running unit tests on every commit +- Running integration tests on pull requests +- Running end-to-end tests before deployment + +### 5.2 Test Coverage + +Test coverage metrics help identify untested code. While 100% coverage isn't always practical or necessary, +maintaining good coverage helps ensure code quality. Focus on critical paths and business logic. + +## Chapter 6: Testing Tools and Frameworks + +Many tools exist to support testing efforts: + +### 6.1 Python Testing +- pytest: Feature-rich testing framework +- unittest: Built-in Python testing module +- mock: Library for mocking objects + +### 6.2 JavaScript Testing +- Jest: Popular testing framework +- Mocha: Flexible testing framework +- Cypress: End-to-end testing tool + +### 6.3 Other Tools +- Selenium: Browser automation +- JMeter: Performance testing +- Postman: API testing + +## Conclusion + +Effective testing is essential for delivering high-quality software. By following best practices and using +appropriate tools, teams can catch bugs early, improve code quality, and deliver better products to users. + +Remember: Testing is not just about finding bugs, it's about building confidence in your code. +""" + + messages: list[BaseMessage] = [ + SystemMessage( + content=[ + { + "type": "text", + "text": long_text_for_caching, + "cache_control": {"type": "ephemeral"}, + } + ] + ), + HumanMessage(content="What is the first type of testing mentioned in section 1.2?"), + ] + + res = model.invoke(messages, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) + + spans = memory_logger.pop() + assert len(spans) > 0 + + llm_spans = find_spans_by_attributes(spans, name="ChatAnthropic", type="llm") + assert len(llm_spans) == 1 + first_span = llm_spans[0] + + assert "metrics" in first_span + first_metrics = first_span["metrics"] + assert "prompt_tokens" in first_metrics + assert first_metrics["prompt_tokens"] > 0 + + assert "prompt_cache_creation_tokens" in first_metrics + assert first_metrics["prompt_cache_creation_tokens"] > 0 + assert first_metrics["prompt_cached_tokens"] == 0 + + res = model.invoke( + messages + [res, HumanMessage(content="What testing framework is mentioned for Python?")], + config={"callbacks": [cast(BaseCallbackHandler, handler)]}, + ) + + spans = memory_logger.pop() + assert len(spans) > 0 + + llm_spans = find_spans_by_attributes(spans, name="ChatAnthropic", type="llm") + + assert len(llm_spans) == 1 + second_span = llm_spans[0] + + assert "metrics" in second_span + second_metrics = second_span["metrics"] + + assert "prompt_cached_tokens" in second_metrics + assert second_metrics["prompt_cached_tokens"] > 0 + + assert "prompt_tokens" in second_metrics + assert second_metrics["prompt_tokens"] > 0 + + +@pytest.mark.vcr +def test_langchain_anthropic_integration(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + MODEL = "claude-sonnet-4-20250514" + + handler = BraintrustCallbackHandler(logger=logger) + set_global_handler(handler) + + prompt = ChatPromptTemplate.from_template("What is 1 + {number}?") + model = ChatAnthropic(model_name=MODEL) + + chain = prompt | model + + result = chain.invoke({"number": "2"}) + + flush() + + assert isinstance(result.content, str) + assert "3" in result.content.lower() + + spans = memory_logger.pop() + assert len(spans) > 0 + + llm_spans = [span for span in spans if span["span_attributes"].get("type") == "llm"] + assert len(llm_spans) > 0, "Should have at least one LLM call" + + llm_span = llm_spans[0] + assert llm_span["metadata"]["model"] == MODEL + + assert_matches_object( + llm_span["metrics"], + { + "completion_tokens": 13, + "end": ANY, + "prompt_tokens": 16, + "start": ANY, + "total_tokens": 29, + }, + ) + + +def test_auto_instrument_langchain(): + """Test that auto_instrument registers a global LangChain callback handler.""" + verify_autoinstrument_script("test_auto_langchain.py") + + +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_async_langchain_invoke(logger_memory_logger: LoggerMemoryLogger): + logger, memory_logger = logger_memory_logger + assert not memory_logger.pop() + + MODEL = "claude-sonnet-4-20250514" + + handler = BraintrustCallbackHandler(logger=logger) + set_global_handler(handler) + + prompt = ChatPromptTemplate.from_template("What is 1 + {number}?") + model = ChatAnthropic(model_name=MODEL) + + chain = prompt | model + + result = await chain.ainvoke({"number": "2"}) + + flush() + + assert isinstance(result.content, str) + assert "3" in result.content.lower() + + spans = memory_logger.pop() + assert len(spans) > 0 diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py new file mode 100644 index 00000000..08458314 --- /dev/null +++ b/py/src/braintrust/integrations/langchain/tracing.py @@ -0,0 +1,701 @@ +""" +Braintrust tracing implementation for LangChain. + +Contains BraintrustCallbackHandler and supporting utilities for tracing LangChain +chains, LLMs, tools, and retrievers. +""" + +import json +import logging +import re +import time +from collections.abc import Mapping, Sequence +from contextvars import ContextVar +from re import Pattern +from typing import ( + Any, + TypedDict, + Union, +) +from uuid import UUID + +import braintrust +from braintrust import NOOP_SPAN, Logger, Span, SpanAttributes, SpanTypeAttribute, current_span, init_logger +from braintrust.version import VERSION as sdk_version +from typing_extensions import NotRequired + +_logger = logging.getLogger(__name__) + +# integration_name stays "langchain-py" for backward compatibility with existing traces +_INTEGRATION_NAME = "langchain-py" + +# Global handler context variable – registered with LangChain's configure hook system +# so that all LangChain invocations in the process are automatically traced. +_braintrust_callback_handler_var: ContextVar["BraintrustCallbackHandler | None"] = ContextVar( + "braintrust_callback_handler", default=None +) + +_hook_registered = False + + +def _ensure_hook_registered() -> None: + """Lazily register the context var with LangChain's callback configure hook.""" + global _hook_registered + if _hook_registered: + return + from langchain_core.tracers.context import register_configure_hook + + register_configure_hook( + context_var=_braintrust_callback_handler_var, + inheritable=True, + ) + _hook_registered = True + + +def set_global_handler(handler: "BraintrustCallbackHandler") -> None: + """Register a BraintrustCallbackHandler as the global LangChain handler. + + Ensures the LangChain configure hook is registered so the handler is picked + up automatically by all subsequent LangChain invocations in this context. + """ + _ensure_hook_registered() + _braintrust_callback_handler_var.set(handler) + + +def clear_global_handler() -> None: + """Remove the global BraintrustCallbackHandler.""" + _braintrust_callback_handler_var.set(None) + + +class LogEvent(TypedDict): + input: NotRequired[Any] + output: NotRequired[Any] + expected: NotRequired[Any] + error: NotRequired[str] + tags: NotRequired[Sequence[str] | None] + scores: NotRequired[Mapping[str, int | float]] + metadata: NotRequired[Mapping[str, Any]] + metrics: NotRequired[Mapping[str, int | float]] + id: NotRequired[str] + dataset_record_id: NotRequired[str] + + +class BraintrustCallbackHandler: + """LangChain callback handler that traces chains, LLMs, tools, and retrievers in Braintrust.""" + + root_run_id: UUID | None = None + + def __init__( + self, + logger: Logger | Span | None = None, + debug: bool = False, + exclude_metadata_props: Pattern[str] | None = None, + ): + self.logger = logger + self.spans: dict[UUID, Span] = {} + self.debug = debug # DEPRECATED + self.exclude_metadata_props = exclude_metadata_props or re.compile( + r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)" + ) + self.skipped_runs: set[UUID] = set() + # Set run_inline=True to avoid thread executor in async contexts + # This ensures memory logger context is preserved + self.run_inline = True + + self._start_times: dict[UUID, float] = {} + self._first_token_times: dict[UUID, float] = {} + self._ttft_ms: dict[UUID, float] = {} + + def _start_span( + self, + parent_run_id: UUID | None, + run_id: UUID, + name: str | None = None, + type: SpanTypeAttribute | None = SpanTypeAttribute.TASK, + span_attributes: SpanAttributes | Mapping[str, Any] | None = None, + start_time: float | None = None, + set_current: bool | None = None, + parent: str | None = None, + event: LogEvent | None = None, + ) -> Any: + if run_id in self.spans: + # XXX: See graph test case of an example where this _may_ be intended. + _logger.warning(f"Span already exists for run_id {run_id} (this is likely a bug)") + return + + if not parent_run_id: + self.root_run_id = run_id + + current_parent = current_span() + parent_span = None + if parent_run_id and parent_run_id in self.spans: + parent_span = self.spans[parent_run_id] + elif current_parent != NOOP_SPAN: + parent_span = current_parent + elif self.logger is not None: + parent_span = self.logger + else: + parent_span = braintrust + + if event is None: + event = {} + + tags = event.get("tags") or [] + event = { + **event, + "tags": None, + "metadata": { + **({"tags": tags}), + **(event.get("metadata") or {}), + "run_id": run_id, + "parent_run_id": parent_run_id, + "braintrust": { + "integration_name": _INTEGRATION_NAME, + "integration_version": sdk_version, + "sdk_version": sdk_version, + "language": "python", + }, + }, + } + + span = parent_span.start_span( + name=name, + type=type, + span_attributes=span_attributes, + start_time=start_time, + set_current=set_current, + parent=parent, + **event, + ) + + if self.logger != NOOP_SPAN and span == NOOP_SPAN: + _logger.warning( + "Braintrust logging not configured. Pass a `logger`, call `init_logger`, or run an experiment to configure Braintrust logging. Setting up a default." + ) + span = init_logger().start_span( + name=name, + type=type, + span_attributes=span_attributes, + start_time=start_time, + set_current=set_current, + parent=parent, + **event, + ) + + span.set_current() + + self.spans[run_id] = span + return span + + def _end_span( + self, + run_id: UUID, + parent_run_id: UUID | None = None, + input: Any | None = None, + output: Any | None = None, + expected: Any | None = None, + error: str | None = None, + tags: Sequence[str] | None = None, + scores: Mapping[str, int | float] | None = None, + metadata: Mapping[str, Any] | None = None, + metrics: Mapping[str, int | float] | None = None, + dataset_record_id: str | None = None, + ) -> Any: + if run_id not in self.spans: + return + + if run_id in self.skipped_runs: + self.skipped_runs.discard(run_id) + return + + span = self.spans.pop(run_id) + + if self.root_run_id == run_id: + self.root_run_id = None + + span.log( + input=input, + output=output, + expected=expected, + error=error, + tags=None, + scores=scores, + metadata={ + **({"tags": tags} if tags else {}), + **(metadata or {}), + }, + metrics=metrics, + dataset_record_id=dataset_record_id, + ) + + # In async workflows, callbacks may execute in different async contexts. + # The span's context variable token may have been created in a different + # context, causing ValueError when trying to reset it. We catch and ignore + # this specific error since the span hierarchy is maintained via self.spans. + try: + span.unset_current() + except ValueError as e: + if "was created in a different Context" in str(e): + pass + else: + raise + + span.end() + + def on_llm_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, # TODO: response= + ) -> Any: + self._end_span(run_id, error=str(error), metadata={**kwargs}) + + self._start_times.pop(run_id, None) + self._first_token_times.pop(run_id, None) + self._ttft_ms.pop(run_id, None) + + def on_chain_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, # TODO: some metadata + ) -> Any: + self._end_span(run_id, error=str(error), metadata={**kwargs}) + + def on_tool_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, error=str(error), metadata={**kwargs}) + + def on_retriever_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, error=str(error), metadata={**kwargs}) + + # Agent Methods + def on_agent_action( + self, + action: Any, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._start_span( + parent_run_id, + run_id, + type=SpanTypeAttribute.LLM, + name=action.tool, + event={"input": action, "metadata": {**kwargs}}, + ) + + def on_agent_finish( + self, + finish: Any, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, output=finish, metadata={**kwargs}) + + def on_chain_start( + self, + serialized: dict[str, Any], + inputs: dict[str, Any], + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + name: str | None = None, + metadata: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Any: + tags = tags or [] + + # avoids extra logs that seem not as useful esp. with langgraph + if "langsmith:hidden" in tags: + self.skipped_runs.add(run_id) + return + + metadata = metadata or {} + resolved_name = ( + name + or metadata.get("langgraph_node") + or serialized.get("name") + or _last_item(serialized.get("id") or []) + or "Chain" + ) + + self._start_span( + parent_run_id, + run_id, + name=resolved_name, + event={ + "input": inputs, + "tags": tags, + "metadata": { + "serialized": serialized, + "name": name, + "metadata": metadata, + **kwargs, + }, + }, + ) + + def on_chain_end( + self, + outputs: dict[str, Any], + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, output=outputs, tags=tags, metadata={**kwargs}) + + def on_llm_start( + self, + serialized: dict[str, Any], + prompts: list[str], + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + metadata: dict[str, Any] | None = None, + name: str | None = None, + **kwargs: Any, + ) -> Any: + self._start_times[run_id] = time.perf_counter() + self._first_token_times.pop(run_id, None) + self._ttft_ms.pop(run_id, None) + + name = name or serialized.get("name") or _last_item(serialized.get("id") or []) or "LLM" + self._start_span( + parent_run_id, + run_id, + name=name, + type=SpanTypeAttribute.LLM, + event={ + "input": prompts, + "tags": tags, + "metadata": { + "serialized": serialized, + "name": name, + "metadata": metadata, + **kwargs, + }, + }, + ) + + def on_chat_model_start( + self, + serialized: dict[str, Any], + messages: list[list[Any]], + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + metadata: dict[str, Any] | None = None, + name: str | None = None, + invocation_params: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Any: + self._start_times[run_id] = time.perf_counter() + self._first_token_times.pop(run_id, None) + self._ttft_ms.pop(run_id, None) + + invocation_params = invocation_params or {} + self._start_span( + parent_run_id, + run_id, + name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Chat Model", + type=SpanTypeAttribute.LLM, + event={ + "input": messages, + "tags": tags, + "metadata": ( + { + "serialized": serialized, + "invocation_params": invocation_params, + "metadata": metadata or {}, + "name": name, + **kwargs, + } + ), + }, + ) + + def on_llm_end( + self, + response: Any, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + **kwargs: Any, + ) -> Any: + if run_id not in self.spans: + return + + metrics = _get_metrics_from_response(response) + + ttft = self._ttft_ms.pop(run_id, None) + if ttft is not None: + metrics["time_to_first_token"] = ttft + + model_name = _get_model_name_from_response(response) + + self._start_times.pop(run_id, None) + self._first_token_times.pop(run_id, None) + + self._end_span( + run_id, + output=response, + metrics=metrics, + tags=tags, + metadata={ + "model": model_name, + **kwargs, + }, + ) + + def on_tool_start( + self, + serialized: dict[str, Any], + input_str: str, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + metadata: dict[str, Any] | None = None, + inputs: dict[str, Any] | None = None, + name: str | None = None, + **kwargs: Any, + ) -> Any: + self._start_span( + parent_run_id, + run_id, + name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Tool", + type=SpanTypeAttribute.TOOL, + event={ + "input": inputs or _safe_parse_json(input_str), + "tags": tags, + "metadata": { + "metadata": metadata, + "serialized": serialized, + "input_str": input_str, + "input": _safe_parse_json(input_str), + "inputs": inputs, + "name": name, + **kwargs, + }, + }, + ) + + def on_tool_end( + self, + output: Any, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, output=output, metadata={**kwargs}) + + def on_retriever_start( + self, + serialized: dict[str, Any], + query: str, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + tags: list[str] | None = None, + metadata: dict[str, Any] | None = None, + name: str | None = None, + **kwargs: Any, + ) -> Any: + self._start_span( + parent_run_id, + run_id, + name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Retriever", + type=SpanTypeAttribute.FUNCTION, + event={ + "input": query, + "tags": tags, + "metadata": { + "serialized": serialized, + "metadata": metadata, + "name": name, + **kwargs, + }, + }, + ) + + def on_retriever_end( + self, + documents: Sequence[Any], + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + self._end_span(run_id, output=documents, metadata={**kwargs}) + + def on_llm_new_token( + self, + token: str, + *, + chunk: Union[Any, None] = None, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + if run_id not in self._first_token_times: + now = time.perf_counter() + self._first_token_times[run_id] = now + start = self._start_times.get(run_id) + if start is not None: + self._ttft_ms[run_id] = now - start + + def on_text( + self, + text: str, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + pass + + def on_retry( + self, + retry_state: Any, + *, + run_id: UUID, + parent_run_id: UUID | None = None, + **kwargs: Any, + ) -> Any: + pass + + def on_custom_event( + self, + name: str, + data: Any, + *, + run_id: UUID, + tags: list[str] | None = None, + metadata: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Any: + pass + + +class BraintrustTracer(BraintrustCallbackHandler): + """Deprecated. Use BraintrustCallbackHandler instead.""" + + def __init__(self, *args: Any, **kwargs: Any): + _logger.warning( + "BraintrustTracer is deprecated. Use BraintrustCallbackHandler from " + "braintrust.wrappers.langchain instead." + ) + super().__init__(*args, **kwargs) + + +def _safe_parse_json(input_str: str) -> Any: + try: + return json.loads(input_str) + except Exception: + return input_str + + +def _last_item(items: list[Any]) -> Any: + return items[-1] if items else None + + +def _walk_generations(response: Any): + for generations in response.generations or []: + yield from generations or [] + + +def _get_model_name_from_response(response: Any) -> "str | None": + model_name = None + for generation in _walk_generations(response): + message = getattr(generation, "message", None) + if not message: + continue + + response_metadata = getattr(message, "response_metadata", None) + if response_metadata and isinstance(response_metadata, dict): + model_name = response_metadata.get("model_name") + + if model_name: + break + + if not model_name: + llm_output: dict[str, Any] = (response.llm_output or {}) if hasattr(response, "llm_output") else {} + model_name = llm_output.get("model_name") or llm_output.get("model") or "" + + return model_name + + +def _clean_object(obj: dict[str, Any]) -> dict[str, Any]: + return { + k: v + for k, v in obj.items() + if v is not None and not (isinstance(v, list) and not v) and not (isinstance(v, dict) and not v) + } + + +def _get_metrics_from_response(response: Any) -> dict[str, Any]: + metrics: dict[str, Any] = {} + + for generation in _walk_generations(response): + message = getattr(generation, "message", None) + if not message: + continue + + usage_metadata = getattr(message, "usage_metadata", None) + + if usage_metadata and isinstance(usage_metadata, dict): + metrics.update( + _clean_object( + { + "total_tokens": usage_metadata.get("total_tokens"), + "prompt_tokens": usage_metadata.get("input_tokens"), + "completion_tokens": usage_metadata.get("output_tokens"), + } + ) + ) + + # Extract cache tokens from nested input_token_details (LangChain format) + # Maps to Braintrust's standard cache token metric names + input_token_details = usage_metadata.get("input_token_details") + if input_token_details and isinstance(input_token_details, dict): + cache_read = input_token_details.get("cache_read") + cache_creation = input_token_details.get("cache_creation") + + if cache_read is not None: + metrics["prompt_cached_tokens"] = cache_read + if cache_creation is not None: + metrics["prompt_cache_creation_tokens"] = cache_creation + + if not metrics or not any(metrics.values()): + llm_output: dict[str, Any] = (response.llm_output or {}) if hasattr(response, "llm_output") else {} + metrics = llm_output.get("token_usage") or llm_output.get("estimatedTokens") or {} + + return _clean_object(metrics) diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py deleted file mode 100644 index 6beeb578..00000000 --- a/py/src/braintrust/wrappers/langchain.py +++ /dev/null @@ -1,150 +0,0 @@ -import contextvars -import logging -from typing import Any -from uuid import UUID - -import braintrust - - -_logger = logging.getLogger("braintrust.wrappers.langchain") - -try: - from langchain.callbacks.base import BaseCallbackHandler - from langchain.schema import Document - from langchain.schema.agent import AgentAction - from langchain.schema.messages import BaseMessage - from langchain.schema.output import LLMResult -except ImportError: - _logger.warning("Failed to import langchain, using stubs") - BaseCallbackHandler = object - Document = object - AgentAction = object - BaseMessage = object - LLMResult = object - -langchain_parent = contextvars.ContextVar("langchain_current_span", default=None) - - -class BraintrustTracer(BaseCallbackHandler): - def __init__(self, logger=None): - _logger.warning("BraintrustTracer is deprecated, use `pip install braintrust-langchain` instead") - self.logger = logger - self.spans = {} - - def _start_span(self, parent_run_id, run_id, name: str | None, **kwargs: Any) -> Any: - assert run_id not in self.spans, f"Span already exists for run_id {run_id} (this is likely a bug)" - - current_parent = langchain_parent.get() - if parent_run_id in self.spans: - parent_span = self.spans[parent_run_id] - elif current_parent is not None: - parent_span = current_parent - elif self.logger is not None: - parent_span = self.logger - else: - parent_span = braintrust - - span = parent_span.start_span(name=name, **kwargs) - langchain_parent.set(span) - self.spans[run_id] = span - return span - - def _end_span(self, run_id, **kwargs: Any) -> Any: - assert run_id in self.spans, f"No span exists for run_id {run_id} (this is likely a bug)" - span = self.spans.pop(run_id) - span.log(**kwargs) - - if langchain_parent.get() == span: - langchain_parent.set(None) - - span.end() - - def on_chain_start( - self, - serialized: dict[str, Any], - inputs: dict[str, Any], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - self._start_span(parent_run_id, run_id, "Chain", input=inputs, metadata={"tags": tags}) - - def on_chain_end( - self, outputs: dict[str, Any], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any - ) -> Any: - self._end_span(run_id, output=outputs) - - def on_llm_start( - self, - serialized: dict[str, Any], - prompts: list[str], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - self._start_span( - parent_run_id, - run_id, - "LLM", - input=prompts, - metadata={"tags": tags, **kwargs["invocation_params"]}, - ) - - def on_chat_model_start( - self, - serialized: dict[str, Any], - messages: list[list[BaseMessage]], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - self._start_span( - parent_run_id, - run_id, - "Chat Model", - input=[[m.dict() for m in batch] for batch in messages], - metadata={"tags": tags, **kwargs["invocation_params"]}, - ) - - def on_llm_end( - self, response: LLMResult, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any - ) -> Any: - metrics = {} - token_usage = response.llm_output.get("token_usage", {}) - if "total_tokens" in token_usage: - metrics["tokens"] = token_usage["total_tokens"] - if "prompt_tokens" in token_usage: - metrics["prompt_tokens"] = token_usage["prompt_tokens"] - if "completion_tokens" in token_usage: - metrics["completion_tokens"] = token_usage["completion_tokens"] - - self._end_span(run_id, output=[[m.dict() for m in batch] for batch in response.generations], metrics=metrics) - - def on_tool_start( - self, - serialized: dict[str, Any], - input_str: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - **kwargs: Any, - ) -> Any: - _logger.warning("Starting tool, but it will not be traced in braintrust (unsupported)") - - def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any: - pass - - def on_retriever_start(self, query: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any: - _logger.warning("Starting retriever, but it will not be traced in braintrust (unsupported)") - - def on_retriever_end( - self, response: list[Document], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any - ) -> Any: - pass diff --git a/py/src/braintrust/wrappers/langchain/__init__.py b/py/src/braintrust/wrappers/langchain/__init__.py new file mode 100644 index 00000000..0296a51a --- /dev/null +++ b/py/src/braintrust/wrappers/langchain/__init__.py @@ -0,0 +1,21 @@ +""" +Braintrust LangChain wrapper — re-exports from braintrust.integrations.langchain. +""" + +from braintrust.integrations.langchain import ( + BraintrustCallbackHandler, + BraintrustTracer, + LangChainIntegration, + clear_global_handler, + set_global_handler, + setup_langchain, +) + +__all__ = [ + "BraintrustCallbackHandler", + "BraintrustTracer", + "LangChainIntegration", + "set_global_handler", + "clear_global_handler", + "setup_langchain", +] From 813405d17c5b0ced466448c80ab8dc3fb3d2dfdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 01:26:25 +0000 Subject: [PATCH 2/7] maybe done --- integrations/langchain-py/README.md | 2 +- py/examples/langchain/manual_patching.py | 2 +- py/noxfile.py | 12 ++++++++++-- py/src/braintrust/auto.py | 2 -- .../auto_test_scripts/test_auto_langchain.py | 2 ++ py/src/braintrust/integrations/langchain/__init__.py | 4 +++- py/src/braintrust/integrations/langchain/conftest.py | 1 - .../braintrust/integrations/langchain/integration.py | 2 +- .../integrations/langchain/test_langchain.py | 12 +++++------- py/src/braintrust/integrations/langchain/tracing.py | 3 ++- py/src/braintrust/wrappers/langchain/__init__.py | 1 + 11 files changed, 26 insertions(+), 17 deletions(-) diff --git a/integrations/langchain-py/README.md b/integrations/langchain-py/README.md index 19717270..b2e1b2ab 100644 --- a/integrations/langchain-py/README.md +++ b/integrations/langchain-py/README.md @@ -23,7 +23,7 @@ SDK for integrating [Braintrust](https://braintrust.dev) with [LangChain](https: braintrust.auto_instrument() # After (option 2: explicit) - from braintrust.wrappers.langchain import BraintrustCallbackHandler, set_global_handler + from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler ``` The API is identical - no code changes needed beyond the import path. diff --git a/py/examples/langchain/manual_patching.py b/py/examples/langchain/manual_patching.py index 142bcc5a..4912e707 100644 --- a/py/examples/langchain/manual_patching.py +++ b/py/examples/langchain/manual_patching.py @@ -7,7 +7,7 @@ """ from braintrust import init_logger -from braintrust.wrappers.langchain import set_global_handler, BraintrustCallbackHandler +from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler # Setup LangChain tracing with a specific project diff --git a/py/noxfile.py b/py/noxfile.py index 2789bfee..1c8c7389 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -43,6 +43,7 @@ def _pinned_python_version(): INTEGRATION_DIR = "braintrust/integrations" INTEGRATION_AUTO_TEST_DIR = "braintrust/integrations/auto_test_scripts" ANTHROPIC_INTEGRATION_DIR = "braintrust/integrations/anthropic" +LANGCHAIN_INTEGRATION_DIR = "braintrust/integrations/langchain" CONTRIB_DIR = "braintrust/contrib" DEVSERVER_DIR = "braintrust/devserver" @@ -102,7 +103,7 @@ def _pinned_python_version(): GENAI_VERSIONS = (LATEST,) DSPY_VERSIONS = (LATEST,) GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1") -LANGCHAIN_VERSIONS = (LATEST,) +LANGCHAIN_VERSIONS = (LATEST, "0.3.83") # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0") PYTEST_VERSIONS = (LATEST, "8.4.2") @@ -425,7 +426,14 @@ def _run_core_tests(session): _run_tests( session, SRC_DIR, - ignore_paths=[WRAPPER_DIR, INTEGRATION_AUTO_TEST_DIR, ANTHROPIC_INTEGRATION_DIR, CONTRIB_DIR, DEVSERVER_DIR], + ignore_paths=[ + WRAPPER_DIR, + INTEGRATION_AUTO_TEST_DIR, + ANTHROPIC_INTEGRATION_DIR, + LANGCHAIN_INTEGRATION_DIR, + CONTRIB_DIR, + DEVSERVER_DIR, + ], ) diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index fb40d088..ad824726 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -4,8 +4,6 @@ Provides one-line instrumentation for supported libraries. """ -from __future__ import annotations - import logging from contextlib import contextmanager diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py index a3719ef8..9cc7c771 100644 --- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py @@ -3,6 +3,7 @@ from braintrust.auto import auto_instrument from braintrust.integrations.langchain import BraintrustCallbackHandler + # 1. Instrument results = auto_instrument() assert results.get("langchain") == True, "auto_instrument should return True for langchain" @@ -14,6 +15,7 @@ # 3. Verify that a global handler was registered with LangChain from langchain_core.callbacks import CallbackManager + manager = CallbackManager.configure() handler = next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None) assert handler is not None, "BraintrustCallbackHandler should be registered globally after auto_instrument()" diff --git a/py/src/braintrust/integrations/langchain/__init__.py b/py/src/braintrust/integrations/langchain/__init__.py index 9758ca95..e1a19352 100644 --- a/py/src/braintrust/integrations/langchain/__init__.py +++ b/py/src/braintrust/integrations/langchain/__init__.py @@ -21,7 +21,9 @@ def setup_langchain() -> bool: # Lazily imported to avoid circular imports at module load time # (tracing.py imports from braintrust, which must be fully initialized first) -_LAZY_ATTRS = frozenset(["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"]) +_LAZY_ATTRS = frozenset( + ["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"] +) def __getattr__(name: str): diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py index 078502b2..a8f56176 100644 --- a/py/src/braintrust/integrations/langchain/conftest.py +++ b/py/src/braintrust/integrations/langchain/conftest.py @@ -10,7 +10,6 @@ _MemoryBackgroundLogger, ) from braintrust.test_helpers import init_test_logger - from braintrust.wrappers.langchain import clear_global_handler diff --git a/py/src/braintrust/integrations/langchain/integration.py b/py/src/braintrust/integrations/langchain/integration.py index e22cdc0f..0f29b0da 100644 --- a/py/src/braintrust/integrations/langchain/integration.py +++ b/py/src/braintrust/integrations/langchain/integration.py @@ -2,7 +2,7 @@ from typing import Any -from braintrust.integrations.base import BasePatcher, BaseIntegration +from braintrust.integrations.base import BaseIntegration, BasePatcher class LangChainCallbackPatcher(BasePatcher): diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py index 827cf777..889fe505 100644 --- a/py/src/braintrust/integrations/langchain/test_langchain.py +++ b/py/src/braintrust/integrations/langchain/test_langchain.py @@ -1,10 +1,12 @@ # pyright: reportTypedDictNotRequiredAccess=none import uuid -from typing import Any, Dict, List, Sequence, Union, cast +from typing import Any, Dict, List, Union, cast from unittest.mock import ANY import pytest +from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler from braintrust.logger import flush +from braintrust.wrappers.test_utils import verify_autoinstrument_script from langchain_anthropic import ChatAnthropic from langchain_core.callbacks import BaseCallbackHandler, CallbackManager from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage @@ -15,11 +17,9 @@ from langchain_openai import ChatOpenAI from pydantic import BaseModel, Field -from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler -from braintrust.wrappers.test_utils import verify_autoinstrument_script - from .conftest import LoggerMemoryLogger + # --------------------------------------------------------------------------- # Helpers (inlined from the integration package) # --------------------------------------------------------------------------- @@ -44,9 +44,7 @@ def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False matched = True except Exception: pass - assert matched, ( - f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}" - ) + assert matched, f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}" elif isinstance(expected, dict): assert isinstance(actual, dict), f"Expected dict but got {type(actual)}" for k, v in expected.items(): diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py index 08458314..85044bf1 100644 --- a/py/src/braintrust/integrations/langchain/tracing.py +++ b/py/src/braintrust/integrations/langchain/tracing.py @@ -24,6 +24,7 @@ from braintrust.version import VERSION as sdk_version from typing_extensions import NotRequired + _logger = logging.getLogger(__name__) # integration_name stays "langchain-py" for backward compatibility with existing traces @@ -611,7 +612,7 @@ class BraintrustTracer(BraintrustCallbackHandler): def __init__(self, *args: Any, **kwargs: Any): _logger.warning( "BraintrustTracer is deprecated. Use BraintrustCallbackHandler from " - "braintrust.wrappers.langchain instead." + "braintrust.integrations.langchain instead." ) super().__init__(*args, **kwargs) diff --git a/py/src/braintrust/wrappers/langchain/__init__.py b/py/src/braintrust/wrappers/langchain/__init__.py index 0296a51a..72f30ca4 100644 --- a/py/src/braintrust/wrappers/langchain/__init__.py +++ b/py/src/braintrust/wrappers/langchain/__init__.py @@ -11,6 +11,7 @@ setup_langchain, ) + __all__ = [ "BraintrustCallbackHandler", "BraintrustTracer", From 1968511e74935b0d97f2f0557614182f267abea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 17:40:59 +0000 Subject: [PATCH 3/7] rename the cassettes --- ...st_async_langchain_invoke => test_async_langchain_invoke.yaml} | 0 .../{test_chain_with_memory => test_chain_with_memory.yaml} | 0 .../cassettes/{test_global_handler => test_global_handler.yaml} | 0 ...opic_integration => test_langchain_anthropic_integration.yaml} | 0 ...raph_state_management => test_langgraph_state_management.yaml} | 0 .../langchain/cassettes/{test_llm_calls => test_llm_calls.yaml} | 0 .../{test_parallel_execution => test_parallel_execution.yaml} | 0 ...test_prompt_caching_tokens => test_prompt_caching_tokens.yaml} | 0 .../cassettes/{test_streaming_ttft => test_streaming_ttft.yaml} | 0 .../langchain/cassettes/{test_tool_usage => test_tool_usage.yaml} | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename py/src/braintrust/integrations/langchain/cassettes/{test_async_langchain_invoke => test_async_langchain_invoke.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_chain_with_memory => test_chain_with_memory.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_global_handler => test_global_handler.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_langchain_anthropic_integration => test_langchain_anthropic_integration.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_langgraph_state_management => test_langgraph_state_management.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_llm_calls => test_llm_calls.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_parallel_execution => test_parallel_execution.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_prompt_caching_tokens => test_prompt_caching_tokens.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_streaming_ttft => test_streaming_ttft.yaml} (100%) rename py/src/braintrust/integrations/langchain/cassettes/{test_tool_usage => test_tool_usage.yaml} (100%) diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke rename to py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory rename to py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_global_handler b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_global_handler rename to py/src/braintrust/integrations/langchain/cassettes/test_global_handler.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration rename to py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management rename to py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_llm_calls rename to py/src/braintrust/integrations/langchain/cassettes/test_llm_calls.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution rename to py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens rename to py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft rename to py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft.yaml diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage.yaml similarity index 100% rename from py/src/braintrust/integrations/langchain/cassettes/test_tool_usage rename to py/src/braintrust/integrations/langchain/cassettes/test_tool_usage.yaml From edd29ebed7ca4d150460d0cac8b333c4ead107aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 17:42:48 +0000 Subject: [PATCH 4/7] and update the tests to use the renamed cassettes --- py/src/braintrust/integrations/langchain/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py index a8f56176..a10ab496 100644 --- a/py/src/braintrust/integrations/langchain/conftest.py +++ b/py/src/braintrust/integrations/langchain/conftest.py @@ -43,7 +43,6 @@ def vcr_config(): "record_mode": record_mode, "match_on": ["uri", "method", "body"], "cassette_library_dir": str(Path(__file__).parent / "cassettes"), - "path_transformer": lambda path: path.replace(".yaml", ""), } From ab5c901876241c2a8e012914f5bc295ae8010591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 17:48:43 +0000 Subject: [PATCH 5/7] remove unecessary python patterns used by python < 3.10 --- .../integrations/langchain/test_langchain.py | 22 +++++++++---------- .../integrations/langchain/tracing.py | 6 ++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py index 889fe505..5698fdb7 100644 --- a/py/src/braintrust/integrations/langchain/test_langchain.py +++ b/py/src/braintrust/integrations/langchain/test_langchain.py @@ -1,6 +1,6 @@ # pyright: reportTypedDictNotRequiredAccess=none import uuid -from typing import Any, Dict, List, Union, cast +from typing import Any, cast from unittest.mock import ANY import pytest @@ -59,7 +59,7 @@ def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False assert actual == expected, f"Expected {expected} but got {actual}" -def find_spans_by_attributes(spans: List[Any], **attributes: Any) -> List[Any]: +def find_spans_by_attributes(spans: list[Any], **attributes: Any) -> list[Any]: """Find all spans matching the given span_attributes.""" matching = [] for span in spans: @@ -90,7 +90,7 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): presence_penalty=0, n=1, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}) spans = memory_logger.pop() @@ -225,7 +225,7 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): presence_penalty=0, n=1, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) message = chain.invoke({"number": "2"}) @@ -350,7 +350,7 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): handler = BraintrustCallbackHandler(logger=logger) prompt = ChatPromptTemplate.from_template("{history} User: {input}") model = ChatOpenAI(model="gpt-4o-mini") - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) memory = {"history": "Assistant: Hello! How can I assist you today?"} chain.invoke( @@ -695,16 +695,16 @@ def test_langgraph_state_management(logger_memory_logger: LoggerMemoryLogger): n=1, ) - def say_hello(state: Dict[str, str]): + def say_hello(state: dict[str, str]): response = model.invoke("Say hello") - return cast(Union[str, List[str], Dict[str, str]], response.content) + return cast(str | list[str] | dict[str, str], response.content) - def say_bye(state: Dict[str, str]): + def say_bye(state: dict[str, str]): print("From the 'sayBye' node: Bye world!") return "Bye" workflow = ( - StateGraph(state_schema=Dict[str, str]) + StateGraph(state_schema=dict[str, str]) .add_node("sayHello", say_hello) .add_node("sayBye", say_bye) .add_edge(START, "sayHello") @@ -1033,9 +1033,9 @@ def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger): max_completion_tokens=50, streaming=True, ) - chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model) + chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model) - chunks: List[str] = [] + chunks: list[str] = [] for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}): if chunk.content: chunks.append(str(chunk.content)) diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py index 85044bf1..1a142e6b 100644 --- a/py/src/braintrust/integrations/langchain/tracing.py +++ b/py/src/braintrust/integrations/langchain/tracing.py @@ -11,11 +11,9 @@ import time from collections.abc import Mapping, Sequence from contextvars import ContextVar -from re import Pattern from typing import ( Any, TypedDict, - Union, ) from uuid import UUID @@ -90,7 +88,7 @@ def __init__( self, logger: Logger | Span | None = None, debug: bool = False, - exclude_metadata_props: Pattern[str] | None = None, + exclude_metadata_props: re.Pattern[str] | None = None, ): self.logger = logger self.spans: dict[UUID, Span] = {} @@ -561,7 +559,7 @@ def on_llm_new_token( self, token: str, *, - chunk: Union[Any, None] = None, + chunk: Any | None = None, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any, From 45710117a8c1282227d85d230baca10385b75aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 18:47:52 +0000 Subject: [PATCH 6/7] rerecord cassettes --- .../test_async_langchain_invoke.yaml | 201 +++++++++++++++++- .../test_langchain_anthropic_integration.yaml | 196 ++++++++++++++++- .../cassettes/test_prompt_caching_tokens.yaml | 4 +- .../integrations/langchain/conftest.py | 4 +- .../integrations/langchain/test_langchain.py | 49 +---- .../integrations/langchain/tracing.py | 26 ++- 6 files changed, 418 insertions(+), 62 deletions(-) diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml index 3ecc362e..2ffcb4e8 100644 --- a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml +++ b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is + body: '{"max_tokens": 64000, "messages": [{"role": "user", "content": "What is 1 + 2?"}], "model": "claude-sonnet-4-20250514"}' headers: accept: @@ -80,7 +80,7 @@ interactions: code: 200 message: OK - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' headers: accept: - application/json @@ -177,7 +177,7 @@ interactions: code: 200 message: OK - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' headers: accept: - application/json @@ -273,4 +273,199 @@ interactions: status: code: 200 message: OK +- request: + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '111' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - langchain-anthropic/1.4.0 + X-Stainless-Arch: + - x64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - Linux + X-Stainless-Package-Version: + - 0.86.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.14.3 + anthropic-version: + - '2023-06-01' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA6pWKqksSFWyUkotKsovUtKB0lbVMPHE0pKM1LySzOTEksz8vHiYstzU4uLEdJCC + zLyyxJzMFIUK3cSCTN3s1EqlWh2lotTC0tTikvjMFCUrECfewNDQOSrQvSzPx6K8zDjCNCOyMDI1 + qiRKqRYAAAD//wMAzB6OOoIAAAA= + headers: + CF-RAY: + - 9e1ffdb81a020555-CMH + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 18:31:11 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + cf-cache-status: + - DYNAMIC + request-id: + - req_011CZQGvnL8wv3X5hYqYeZtZ + server-timing: + - x-originResponse;dur=13 + set-cookie: + - _cfuvid=gyUUSAZvGwkONX58EfvF0btj1CdyxvDfcMYIpZzdC_s-1774463471.3759444-1.0.1.1-Y5yl2w0p0ULztM1yD9IfmXjoPl60ueMNw9C.Pdj8r9E; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '11' + x-should-retry: + - 'false' + status: + code: 401 + message: Unauthorized +- request: + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '111' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - langchain-anthropic/1.4.0 + X-Stainless-Arch: + - x64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - Linux + X-Stainless-Package-Version: + - 0.86.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.14.3 + anthropic-version: + - '2023-06-01' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA3SQTUsDMRCG/8ryXk1htx+2BDx4EEF68KxIiMnYLs1OtslsaSn73yXFIiqeBuZ5 + 5vOMLnoK0HDBDp4mOTKTTOaTaT1d1ItmDoXWQ6PLG1M3y+XDav1y79ar07579k+7R79KRyjIqadi + Uc52Q1BIMZSEzbnNYlmg4CILsUC/nq++0LGQS9BoqptqWt1VM4xvCllibxLZHBkaxN7IkBhfINN+ + IHYEzUMICsNlrj6j5X4QI3FHnKGbWwVn3ZaMS2SljWx+CvWVJ7L+P3atLf2p31JHyQaz6P7637TZ + /qajQhzkx3YzhUzp0Doy0lKCRnmWt8mXv/MHpXKj2VCEBkcx9mDbYN8DYRw/AQAA//8DAJRQPdG9 + AQAA + headers: + CF-RAY: + - 9e1ffe8f48166bf7-CMH + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 18:31:46 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-03-25T18:31:46Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2026-03-25T18:31:46Z' + anthropic-ratelimit-requests-limit: + - '20000' + anthropic-ratelimit-requests-remaining: + - '19999' + anthropic-ratelimit-requests-reset: + - '2026-03-25T18:31:45Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2026-03-25T18:31:46Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CZQGyKYAWfDXkbs6TYmFj + server-timing: + - x-originResponse;dur=1115 + set-cookie: + - _cfuvid=GkeOmu_nJ3IH0wFr_Ysu_15x_mE_eZFWx7SyJP8s9wY-1774463505.8047376-1.0.1.1-yT6hue7f9O0MUMAxxAf.Cl6UVJAog65JmUFJd1pdxEw; + HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '1114' + status: + code: 200 + message: OK version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml index 6c396d02..516f74f8 100644 --- a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml +++ b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is + body: '{"max_tokens": 64000, "messages": [{"role": "user", "content": "What is 1 + 2?"}], "model": "claude-sonnet-4-20250514"}' headers: accept: @@ -104,7 +104,7 @@ interactions: code: 200 message: OK - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' headers: accept: - application/json @@ -201,7 +201,7 @@ interactions: code: 200 message: OK - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' headers: accept: - application/json @@ -297,4 +297,194 @@ interactions: status: code: 200 message: OK +- request: + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '111' + Content-Type: + - application/json + Cookie: + - _cfuvid=173XVm8c6LwLnZGSZjw3nktrd.OcW9mUj4Ct6Nzco6M-1774463470.6692584-1.0.1.1-Z8FXX.hH2DDJmupKJNi.NwPEp.ON4Sm.8PzX9hNNFu4 + Host: + - api.anthropic.com + User-Agent: + - langchain-anthropic/1.4.0 + X-Stainless-Arch: + - x64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - Linux + X-Stainless-Package-Version: + - 0.86.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.14.3 + anthropic-version: + - '2023-06-01' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAAA6pWKqksSFWyUkotKsovUtKB0lbVMPHE0pKM1LySzOTEksz8vHiYstzU4uLEdJCC + zLyyxJzMFIUK3cSCTN3s1EqlWh2lotTC0tTikvjMFCUrECfewNDQOSrQvSw738/c0CKlqsIlONXX + 1CNTqRYAAAD//wMAPzaTsIIAAAA= + headers: + CF-RAY: + - 9e1ffdb5cf75e6b0-CMH + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 18:31:11 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + cf-cache-status: + - DYNAMIC + request-id: + - req_011CZQGvkoN718dzxDSeM5Hi + server-timing: + - x-originResponse;dur=32 + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + vary: + - Accept-Encoding + x-envoy-upstream-service-time: + - '28' + x-should-retry: + - 'false' + status: + code: 401 + message: Unauthorized +- request: + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '111' + Content-Type: + - application/json + Cookie: + - _cfuvid=PQKr9HJ20fMN78IoIWNmzDAMdKfcxl7b4a28wiRRrOA-1774463501.3608425-1.0.1.1-vWPsSIFWMsbw3tO25QWBZA3vmGBN5garPTOk7LK_8Y4 + Host: + - api.anthropic.com + User-Agent: + - langchain-anthropic/1.4.0 + X-Stainless-Arch: + - x64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - Linux + X-Stainless-Package-Version: + - 0.86.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.14.3 + anthropic-version: + - '2023-06-01' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/3WQT0vEMBDFv0qZq11ou1vRgjePHoS9KRKyzdgG00lNJovr0u/uZHGRVTxleL+X + N3+OMHmDDjronU4GV9ETIa82q6Zq2qqtN1CCNcKnOKiqbh83djek7efN/cN869fzE46HrXj4MGN2 + YYx6QBGCd1nQMdrImlik3hOjVN3z8exn/Mjk9HRQF1dFU9wVa1heSojsZxVQy0jCkIziFAi+QcT3 + hNRLCCXnSkinvt0RLM2JFfs3pAhdfS19dT+i6iWKrSd1aajOXLD5j53/5nycR5wwaKfa6a//h9bj + b7qU4BNfTLeWbTDsbY+KLQbZMx/L6GDy3ekVQ95RDegFkWel99o6vZPbLssXLJhatL0BAAA= + headers: + CF-RAY: + - 9e1ffe873bf7d04f-CMH + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Security-Policy: + - default-src 'none'; frame-ancestors 'none' + Content-Type: + - application/json + Date: + - Wed, 25 Mar 2026 18:31:45 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '3000000' + anthropic-ratelimit-input-tokens-remaining: + - '3000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-03-25T18:31:45Z' + anthropic-ratelimit-output-tokens-limit: + - '600000' + anthropic-ratelimit-output-tokens-remaining: + - '600000' + anthropic-ratelimit-output-tokens-reset: + - '2026-03-25T18:31:45Z' + anthropic-ratelimit-requests-limit: + - '20000' + anthropic-ratelimit-requests-remaining: + - '19999' + anthropic-ratelimit-requests-reset: + - '2026-03-25T18:31:44Z' + anthropic-ratelimit-tokens-limit: + - '3600000' + anthropic-ratelimit-tokens-remaining: + - '3600000' + anthropic-ratelimit-tokens-reset: + - '2026-03-25T18:31:45Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CZQGyE516m7mENVa2xVhJ + server-timing: + - x-originResponse;dur=1219 + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1218' + status: + code: 200 + message: OK version: 1 diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml index 441128e9..dd57682c 100644 --- a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml +++ b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml @@ -1,6 +1,6 @@ interactions: - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is the first type of testing mentioned in section 1.2?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n# Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction to Testing\n\nSoftware testing is a critical component of the software development @@ -159,7 +159,7 @@ interactions: code: 200 message: OK - request: - body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first + body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is the first type of testing mentioned in section 1.2?"},{"role":"assistant","content":"According to section 1.2 \"Types of Testing,\" the first type of testing mentioned is **Unit Testing**, which is described as \"Testing individual components or functions diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py index a10ab496..e369fffd 100644 --- a/py/src/braintrust/integrations/langchain/conftest.py +++ b/py/src/braintrust/integrations/langchain/conftest.py @@ -19,8 +19,8 @@ def setup_braintrust_langchain(): os.environ["BRAINTRUST_API_URL"] = "http://localhost:8000" os.environ["BRAINTRUST_APP_URL"] = "http://localhost:3000" os.environ["BRAINTRUST_API_KEY"] = TEST_API_KEY - os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key_here" - os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" + os.environ.setdefault("ANTHROPIC_API_KEY", "your_anthropic_api_key_here") + os.environ.setdefault("OPENAI_API_KEY", "your_openai_api_key_here") os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1/proxy" _internal_reset_global_state() diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py index 5698fdb7..9bb711d5 100644 --- a/py/src/braintrust/integrations/langchain/test_langchain.py +++ b/py/src/braintrust/integrations/langchain/test_langchain.py @@ -112,9 +112,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": ANY, - "id": ANY, - "example": ANY, "tool_calls": ANY, "invalid_tool_calls": ANY, "usage_metadata": ANY, @@ -133,8 +130,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, } ] }, @@ -151,9 +146,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, - "example": ANY, } ] ], @@ -169,8 +161,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": None, - "id": ANY, }, } ] @@ -183,7 +173,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger): }, "model_name": "gpt-4o-mini-2024-07-18", }, - "run": None, "type": "LLMResult", }, "metrics": { @@ -248,9 +237,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": ANY, - "id": ANY, - "example": ANY, "tool_calls": ANY, "invalid_tool_calls": ANY, "usage_metadata": ANY, @@ -269,8 +255,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, } ] }, @@ -287,9 +271,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, - "example": ANY, } ] ], @@ -305,8 +286,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": None, - "id": ANY, }, } ] @@ -319,7 +298,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger): }, "model_name": "gpt-4o-mini-2024-07-18", }, - "run": None, "type": "LLMResult", }, "metrics": { @@ -392,8 +370,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, } ] }, @@ -410,9 +386,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, - "example": ANY, } ] ], @@ -428,8 +401,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": None, - "id": ANY, }, } ] @@ -442,7 +413,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger): }, "model_name": "gpt-4o-mini-2024-07-18", }, - "run": None, "type": "LLMResult", }, "metrics": { @@ -523,9 +493,6 @@ def calculator(input: CalculatorInput) -> str: "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, - "example": ANY, } ] ], @@ -554,12 +521,8 @@ def calculator(input: CalculatorInput) -> str: "message": { "content": ANY, "type": "ai", - "additional_kwargs": { - "tool_calls": ANY, - }, + "additional_kwargs": ANY, "response_metadata": ANY, - "name": None, - "id": ANY, }, } ] @@ -572,7 +535,6 @@ def calculator(input: CalculatorInput) -> str: }, "model_name": "gpt-4o-mini-2024-07-18", }, - "run": None, "type": "LLMResult", }, "metrics": { @@ -770,9 +732,6 @@ def say_bye(state: dict[str, str]): "additional_kwargs": {}, "response_metadata": {}, "type": "human", - "name": None, - "id": None, - "example": ANY, } ] ], @@ -792,8 +751,6 @@ def say_bye(state: dict[str, str]): "additional_kwargs": ANY, "response_metadata": ANY, "type": "ai", - "name": None, - "id": ANY, }, } ] @@ -806,7 +763,6 @@ def say_bye(state: dict[str, str]): }, "model_name": "gpt-4o-mini-2024-07-18", }, - "run": None, "type": "LLMResult", }, "metrics": { @@ -1059,9 +1015,6 @@ def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger): { "additional_kwargs": {}, "content": "Count from 1 to 5.", - "example": False, - "id": None, - "name": None, "response_metadata": {}, "type": "human", } diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py index 1a142e6b..efe2fd61 100644 --- a/py/src/braintrust/integrations/langchain/tracing.py +++ b/py/src/braintrust/integrations/langchain/tracing.py @@ -84,6 +84,17 @@ class BraintrustCallbackHandler: root_run_id: UUID | None = None + # Duck-typing attributes required by LangChain's callback manager. + # These mirror BaseCallbackHandler without requiring inheritance. + raise_error: bool = False + ignore_llm: bool = False + ignore_retry: bool = False + ignore_chain: bool = False + ignore_agent: bool = False + ignore_retriever: bool = False + ignore_chat_model: bool = False + ignore_custom_event: bool = False + def __init__( self, logger: Logger | Span | None = None, @@ -97,8 +108,8 @@ def __init__( r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)" ) self.skipped_runs: set[UUID] = set() - # Set run_inline=True to avoid thread executor in async contexts - # This ensures memory logger context is preserved + # run_inline=True avoids thread executor in async contexts, + # ensuring the ContextVar state is preserved across callbacks. self.run_inline = True self._start_times: dict[UUID, float] = {} @@ -686,11 +697,18 @@ def _get_metrics_from_response(response: Any) -> dict[str, Any]: input_token_details = usage_metadata.get("input_token_details") if input_token_details and isinstance(input_token_details, dict): cache_read = input_token_details.get("cache_read") - cache_creation = input_token_details.get("cache_creation") + # langchain-anthropic >=1.4 sets cache_creation=0 when ephemeral + # breakdown keys are present, so sum those up as the true total. + cache_creation = input_token_details.get("cache_creation") or 0 + cache_creation += sum( + v + for k, v in input_token_details.items() + if k.startswith("ephemeral_") and k.endswith("_input_tokens") and v + ) if cache_read is not None: metrics["prompt_cached_tokens"] = cache_read - if cache_creation is not None: + if cache_creation: metrics["prompt_cache_creation_tokens"] = cache_creation if not metrics or not any(metrics.values()): From 2d50659bd7e912726d6763f25f1ac44b052a05a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Halber?= Date: Wed, 25 Mar 2026 19:16:34 +0000 Subject: [PATCH 7/7] chore: maintain compatibility for test with pre v1 langchain --- py/noxfile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/py/noxfile.py b/py/noxfile.py index 1c8c7389..d2042088 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -104,6 +104,7 @@ def _pinned_python_version(): DSPY_VERSIONS = (LATEST,) GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1") LANGCHAIN_VERSIONS = (LATEST, "0.3.83") +LANGCHAIN_VERSION_IDS = (LATEST, "langchain-core-0.3.83") # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0") PYTEST_VERSIONS = (LATEST, "8.4.2") @@ -196,14 +197,14 @@ def test_google_genai(session, version): @nox.session() -@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSIONS) +@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSION_IDS) def test_langchain(session, version): """Test LangChain integration.""" _install_test_deps(session) _install(session, "langchain-core", version) - _install(session, "langchain-openai", version) - _install(session, "langchain-anthropic", version) - session.install("langgraph", silent=SILENT_INSTALLS) + _install(session, "langchain-openai") + _install(session, "langchain-anthropic") + _install(session, "langgraph") _run_tests(session, f"{INTEGRATION_DIR}/langchain/test_langchain.py") _run_core_tests(session)