From c65635aceb338181b745df5ce39d1e497a2cc5bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Tue, 24 Mar 2026 22:25:41 +0000
Subject: [PATCH 1/7] ref(langchain): start to migrate to integrations API

---
 integrations/langchain-py/README.md           |   28 +-
 integrations/langchain-py/pyproject.toml      |    7 +-
 .../src/braintrust_langchain/__init__.py      |   30 +-
 .../src/braintrust_langchain/callbacks.py     |  655 +-------
 .../src/braintrust_langchain/context.py       |   34 +-
 py/Makefile                                   |    2 +-
 py/examples/langchain/auto.py                 |   24 +
 py/examples/langchain/manual_patching.py      |   27 +
 py/noxfile.py                                 |   18 +
 py/src/braintrust/auto.py                     |    7 +-
 py/src/braintrust/integrations/__init__.py    |    3 +-
 .../auto_test_scripts/test_auto_langchain.py  |   21 +
 .../integrations/langchain/__init__.py        |   47 +
 .../cassettes/test_async_langchain_invoke     |  276 ++++
 .../cassettes/test_chain_with_memory          |  332 ++++
 .../langchain/cassettes/test_global_handler   |  225 +++
 .../test_langchain_anthropic_integration      |  300 ++++
 .../cassettes/test_langgraph_state_management |  327 ++++
 .../langchain/cassettes/test_llm_calls        |  333 ++++
 .../cassettes/test_parallel_execution         |  234 +++
 .../cassettes/test_prompt_caching_tokens      |  324 ++++
 .../langchain/cassettes/test_streaming_ttft   |  298 ++++
 .../langchain/cassettes/test_tool_usage       |  350 +++++
 .../integrations/langchain/conftest.py        |   58 +
 .../integrations/langchain/integration.py     |   34 +
 .../integrations/langchain/test_langchain.py  | 1380 +++++++++++++++++
 .../integrations/langchain/tracing.py         |  701 +++++++++
 py/src/braintrust/wrappers/langchain.py       |  150 --
 .../braintrust/wrappers/langchain/__init__.py |   21 +
 29 files changed, 5420 insertions(+), 826 deletions(-)
 create mode 100644 py/examples/langchain/auto.py
 create mode 100644 py/examples/langchain/manual_patching.py
 create mode 100644 py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
 create mode 100644 py/src/braintrust/integrations/langchain/__init__.py
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_global_handler
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_llm_calls
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft
 create mode 100644 py/src/braintrust/integrations/langchain/cassettes/test_tool_usage
 create mode 100644 py/src/braintrust/integrations/langchain/conftest.py
 create mode 100644 py/src/braintrust/integrations/langchain/integration.py
 create mode 100644 py/src/braintrust/integrations/langchain/test_langchain.py
 create mode 100644 py/src/braintrust/integrations/langchain/tracing.py
 delete mode 100644 py/src/braintrust/wrappers/langchain.py
 create mode 100644 py/src/braintrust/wrappers/langchain/__init__.py

diff --git a/integrations/langchain-py/README.md b/integrations/langchain-py/README.md
index ec00daef..19717270 100644
--- a/integrations/langchain-py/README.md
+++ b/integrations/langchain-py/README.md
@@ -1,9 +1,35 @@
-# braintrust-langchain
+# braintrust-langchain (DEPRECATED)
 
 [![PyPI version](https://img.shields.io/pypi/v/braintrust-langchain.svg)](https://pypi.org/project/braintrust-langchain/)
 
 SDK for integrating [Braintrust](https://braintrust.dev) with [LangChain](https://langchain.com/). This package provides a callback handler to automatically log LangChain executions to Braintrust.
 
+> **This package is deprecated.** The LangChain integration is now included in the main [`braintrust`](https://pypi.org/project/braintrust/) package.
+
+## Migration
+
+1. Remove `braintrust-langchain` from your dependencies
+2. Install or upgrade `braintrust`:
+   ```bash
+   pip install --upgrade braintrust
+   ```
+3. Update your imports:
+   ```python
+   # Before
+   from braintrust_langchain import BraintrustCallbackHandler, set_global_handler
+
+   # After (option 1: auto-instrument langchain library)
+   import braintrust
+   braintrust.auto_instrument()
+
+   # After (option 2: explicit)
+   from braintrust.wrappers.langchain import BraintrustCallbackHandler, set_global_handler
+   ```
+
+The API is identical - no code changes needed beyond the import path.
+
+---
+
 ## Installation
 
 ```bash
diff --git a/integrations/langchain-py/pyproject.toml b/integrations/langchain-py/pyproject.toml
index 9bbf9d7a..9d620604 100644
--- a/integrations/langchain-py/pyproject.toml
+++ b/integrations/langchain-py/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "braintrust-langchain"
 version = "0.2.1"
-description = "Integration for LangChain and Braintrust Tracing"
+description = "DEPRECATED: LangChain integration is now included in the main braintrust package. Install braintrust instead."
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
@@ -10,9 +10,9 @@ dependencies = [
 ]
 license = "MIT"
 authors = [{ name = "Braintrust", email = "info@braintrust.dev" }]
-keywords = ["braintrust", "langchain", "llm", "tracing", "ai", "agents"]
+keywords = ["braintrust", "langchain", "llm", "tracing", "ai", "agents", "deprecated"]
 classifiers = [
-    "Development Status :: 4 - Beta",
+    "Development Status :: 7 - Inactive",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -73,4 +73,3 @@ known-third-party = ["braintrust", "langchain"]
 [tool.pytest.ini_options]
 testpaths = ["src/tests"]
 python_files = ["test_*.py"]
-addopts = "-v"
diff --git a/integrations/langchain-py/src/braintrust_langchain/__init__.py b/integrations/langchain-py/src/braintrust_langchain/__init__.py
index 2feeb7bc..9f9fb04a 100644
--- a/integrations/langchain-py/src/braintrust_langchain/__init__.py
+++ b/integrations/langchain-py/src/braintrust_langchain/__init__.py
@@ -1,4 +1,28 @@
-from .callbacks import BraintrustCallbackHandler
-from .context import set_global_handler
+"""
+DEPRECATED: braintrust-langchain is now part of the main braintrust package.
 
-__all__ = ["BraintrustCallbackHandler", "set_global_handler"]
+Install `braintrust` and use `braintrust.integrations.langchain` or
+`braintrust.auto_instrument()` instead. This package now re-exports from
+`braintrust.integrations.langchain` for backward compatibility.
+"""
+
+import warnings
+
+warnings.warn(
+    "braintrust-langchain is deprecated. The LangChain integration is now included in the "
+    "main 'braintrust' package. Use 'from braintrust.integrations.langchain import "
+    "BraintrustCallbackHandler' or 'braintrust.auto_instrument()' instead. "
+    "This package will be removed in a future release.",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
+# Re-export public API from the new location for backward compatibility
+from braintrust.integrations.langchain import (  # noqa: E402, F401
+    BraintrustCallbackHandler,
+    BraintrustTracer,
+    clear_global_handler,
+    set_global_handler,
+)
+
+__all__ = ["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"]
diff --git a/integrations/langchain-py/src/braintrust_langchain/callbacks.py b/integrations/langchain-py/src/braintrust_langchain/callbacks.py
index 016a1268..1adfae9d 100644
--- a/integrations/langchain-py/src/braintrust_langchain/callbacks.py
+++ b/integrations/langchain-py/src/braintrust_langchain/callbacks.py
@@ -1,648 +1,15 @@
-import json
-import logging
-import re
-import time
-from collections.abc import Mapping, Sequence
-from re import Pattern
-from typing import (
-    Any,
-    TypedDict,
-    Union,
-)
-from uuid import UUID
-
-import braintrust
-from braintrust import NOOP_SPAN, Logger, Span, SpanAttributes, SpanTypeAttribute, current_span, init_logger
-from braintrust.version import VERSION as sdk_version
-from langchain_core.agents import AgentAction, AgentFinish
-from langchain_core.callbacks.base import BaseCallbackHandler
-from langchain_core.documents import Document
-from langchain_core.messages import BaseMessage
-from langchain_core.outputs.llm_result import LLMResult
-from tenacity import RetryCallState
-from typing_extensions import NotRequired
-
-from braintrust_langchain.version import version
-
-_logger = logging.getLogger("braintrust_langchain")
-
-
-class LogEvent(TypedDict):
-    input: NotRequired[Any]
-    output: NotRequired[Any]
-    expected: NotRequired[Any]
-    error: NotRequired[str]
-    tags: NotRequired[Sequence[str] | None]
-    scores: NotRequired[Mapping[str, int | float]]
-    metadata: NotRequired[Mapping[str, Any]]
-    metrics: NotRequired[Mapping[str, int | float]]
-    id: NotRequired[str]
-    dataset_record_id: NotRequired[str]
-
-
-class BraintrustCallbackHandler(BaseCallbackHandler):
-    root_run_id: UUID | None = None
-
-    def __init__(
-        self,
-        logger: Logger | Span | None = None,
-        debug: bool = False,
-        exclude_metadata_props: Pattern[str] | None = None,
-    ):
-        self.logger = logger
-        self.spans: dict[UUID, Span] = {}
-        self.debug = debug  # DEPRECATED
-        self.exclude_metadata_props = exclude_metadata_props or re.compile(
-            r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)"
-        )
-        self.skipped_runs: set[UUID] = set()
-        # Set run_inline=True to avoid thread executor in async contexts
-        # This ensures memory logger context is preserved
-        self.run_inline = True
-
-        self._start_times: dict[UUID, float] = {}
-        self._first_token_times: dict[UUID, float] = {}
-        self._ttft_ms: dict[UUID, float] = {}
-
-    def _start_span(
-        self,
-        parent_run_id: UUID | None,
-        run_id: UUID,
-        name: str | None = None,
-        type: SpanTypeAttribute | None = SpanTypeAttribute.TASK,
-        span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
-        start_time: float | None = None,
-        set_current: bool | None = None,
-        parent: str | None = None,
-        event: LogEvent | None = None,
-    ) -> Any:
-        if run_id in self.spans:
-            # XXX: See graph test case of an example where this _may_ be intended.
-            _logger.warning(f"Span already exists for run_id {run_id} (this is likely a bug)")
-            return
-
-        if not parent_run_id:
-            self.root_run_id = run_id
-
-        current_parent = current_span()
-        parent_span = None
-        if parent_run_id and parent_run_id in self.spans:
-            parent_span = self.spans[parent_run_id]
-        elif current_parent != NOOP_SPAN:
-            parent_span = current_parent
-        elif self.logger is not None:
-            parent_span = self.logger
-        else:
-            parent_span = braintrust
-
-        if event is None:
-            event = {}
-
-        tags = event.get("tags") or []
-        event = {
-            **event,
-            "tags": None,
-            "metadata": {
-                **({"tags": tags}),
-                **(event.get("metadata") or {}),
-                "run_id": run_id,
-                "parent_run_id": parent_run_id,
-                "braintrust": {
-                    "integration_name": "langchain-py",
-                    "integration_version": version,
-                    "sdk_version": sdk_version,
-                    "language": "python",
-                },
-            },
-        }
-
-        span = parent_span.start_span(
-            name=name,
-            type=type,
-            span_attributes=span_attributes,
-            start_time=start_time,
-            set_current=set_current,
-            parent=parent,
-            **event,
-        )
-
-        if self.logger != NOOP_SPAN and span == NOOP_SPAN:
-            _logger.warning(
-                "Braintrust logging not configured. Pass a `logger`, call `init_logger`, or run an experiment to configure Braintrust logging. Setting up a default."
-            )
-            span = init_logger().start_span(
-                name=name,
-                type=type,
-                span_attributes=span_attributes,
-                start_time=start_time,
-                set_current=set_current,
-                parent=parent,
-                **event,
-            )
-
-        span.set_current()
-
-        self.spans[run_id] = span
-        return span
-
-    def _end_span(
-        self,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        input: Any | None = None,
-        output: Any | None = None,
-        expected: Any | None = None,
-        error: str | None = None,
-        tags: Sequence[str] | None = None,
-        scores: Mapping[str, int | float] | None = None,
-        metadata: Mapping[str, Any] | None = None,
-        metrics: Mapping[str, int | float] | None = None,
-        dataset_record_id: str | None = None,
-    ) -> Any:
-        if run_id not in self.spans:
-            return
-
-        if run_id in self.skipped_runs:
-            self.skipped_runs.discard(run_id)
-            return
-
-        span = self.spans.pop(run_id)
-
-        if self.root_run_id == run_id:
-            self.root_run_id = None
-
-        span.log(
-            input=input,
-            output=output,
-            expected=expected,
-            error=error,
-            tags=None,
-            scores=scores,
-            metadata={
-                **({"tags": tags} if tags else {}),
-                **(metadata or {}),
-            },
-            metrics=metrics,
-            dataset_record_id=dataset_record_id,
-        )
-
-        # In async workflows, callbacks may execute in different async contexts.
-        # The span's context variable token may have been created in a different
-        # context, causing ValueError when trying to reset it. We catch and ignore
-        # this specific error since the span hierarchy is maintained via self.spans.
-        try:
-            span.unset_current()
-        except ValueError as e:
-            if "was created in a different Context" in str(e):
-                pass
-            else:
-                raise
-
-        span.end()
-
-    def on_llm_error(
-        self,
-        error: BaseException,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,  # TODO: response=
-    ) -> Any:
-        self._end_span(run_id, error=str(error), metadata={**kwargs})
-
-        self._start_times.pop(run_id, None)
-        self._first_token_times.pop(run_id, None)
-        self._ttft_ms.pop(run_id, None)
-
-    def on_chain_error(
-        self,
-        error: BaseException,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,  # TODO: some metadata
-    ) -> Any:
-        self._end_span(run_id, error=str(error), metadata={**kwargs})
-
-    def on_tool_error(
-        self,
-        error: BaseException,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, error=str(error), metadata={**kwargs})
-
-    def on_retriever_error(
-        self,
-        error: BaseException,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, error=str(error), metadata={**kwargs})
-
-    # Agent Methods
-    def on_agent_action(
-        self,
-        action: AgentAction,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(
-            parent_run_id,
-            run_id,
-            type=SpanTypeAttribute.LLM,
-            name=action.tool,
-            event={"input": action, "metadata": {**kwargs}},
-        )
-
-    def on_agent_finish(
-        self,
-        finish: AgentFinish,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, output=finish, metadata={**kwargs})
-
-    def on_chain_start(
-        self,
-        serialized: dict[str, Any],
-        inputs: dict[str, Any],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        name: str | None = None,
-        metadata: dict[str, Any] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        tags = tags or []
-
-        # avoids extra logs that seem not as useful esp. with langgraph
-        if "langsmith:hidden" in tags:
-            self.skipped_runs.add(run_id)
-            return
-
-        metadata = metadata or {}
-        resolved_name = (
-            name
-            or metadata.get("langgraph_node")
-            or serialized.get("name")
-            or last_item(serialized.get("id") or [])
-            or "Chain"
-        )
-
-        self._start_span(
-            parent_run_id,
-            run_id,
-            name=resolved_name,
-            event={
-                "input": inputs,
-                "tags": tags,
-                "metadata": {
-                    "serialized": serialized,
-                    "name": name,
-                    "metadata": metadata,
-                    **kwargs,
-                },
-            },
-        )
-
-    def on_chain_end(
-        self,
-        outputs: dict[str, Any],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, output=outputs, tags=tags, metadata={**kwargs})
+"""
+DEPRECATED: Import from braintrust.wrappers.langchain instead.
+"""
 
-    def on_llm_start(
-        self,
-        serialized: dict[str, Any],
-        prompts: list[str],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        name: str | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_times[run_id] = time.perf_counter()
-        self._first_token_times.pop(run_id, None)
-        self._ttft_ms.pop(run_id, None)
+import warnings
 
-        name = name or serialized.get("name") or last_item(serialized.get("id") or []) or "LLM"
-        self._start_span(
-            parent_run_id,
-            run_id,
-            name=name,
-            type=SpanTypeAttribute.LLM,
-            event={
-                "input": prompts,
-                "tags": tags,
-                "metadata": {
-                    "serialized": serialized,
-                    "name": name,
-                    "metadata": metadata,
-                    **kwargs,
-                },
-            },
-        )
-
-    def on_chat_model_start(
-        self,
-        serialized: dict[str, Any],
-        messages: list[list["BaseMessage"]],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        name: str | None = None,
-        invocation_params: dict[str, Any] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_times[run_id] = time.perf_counter()
-        self._first_token_times.pop(run_id, None)
-        self._ttft_ms.pop(run_id, None)
-
-        invocation_params = invocation_params or {}
-        self._start_span(
-            parent_run_id,
-            run_id,
-            name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Chat Model",
-            type=SpanTypeAttribute.LLM,
-            event={
-                "input": messages,
-                "tags": tags,
-                "metadata": (
-                    {
-                        "serialized": serialized,
-                        "invocation_params": invocation_params,
-                        "metadata": metadata or {},
-                        "name": name,
-                        **kwargs,
-                    }
-                ),
-            },
-        )
-
-    def on_llm_end(
-        self,
-        response: LLMResult,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        if run_id not in self.spans:
-            return
-
-        metrics = _get_metrics_from_response(response)
-
-        ttft = self._ttft_ms.pop(run_id, None)
-        if ttft is not None:
-            metrics["time_to_first_token"] = ttft
-
-        model_name = _get_model_name_from_response(response)
-
-        self._start_times.pop(run_id, None)
-        self._first_token_times.pop(run_id, None)
-
-        self._end_span(
-            run_id,
-            output=response,
-            metrics=metrics,
-            tags=tags,
-            metadata={
-                "model": model_name,
-                **kwargs,
-            },
-        )
-
-    def on_tool_start(
-        self,
-        serialized: dict[str, Any],
-        input_str: str,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        inputs: dict[str, Any] | None = None,
-        name: str | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(
-            parent_run_id,
-            run_id,
-            name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Tool",
-            type=SpanTypeAttribute.TOOL,
-            event={
-                "input": inputs or safe_parse_serialized_json(input_str),
-                "tags": tags,
-                "metadata": {
-                    "metadata": metadata,
-                    "serialized": serialized,
-                    "input_str": input_str,
-                    "input": safe_parse_serialized_json(input_str),
-                    "inputs": inputs,
-                    "name": name,
-                    **kwargs,
-                },
-            },
-        )
-
-    def on_tool_end(
-        self,
-        output: Any,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, output=output, metadata={**kwargs})
-
-    def on_retriever_start(
-        self,
-        serialized: dict[str, Any],
-        query: str,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        name: str | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(
-            parent_run_id,
-            run_id,
-            name=name or serialized.get("name") or last_item(serialized.get("id") or []) or "Retriever",
-            type=SpanTypeAttribute.FUNCTION,
-            event={
-                "input": query,
-                "tags": tags,
-                "metadata": {
-                    "serialized": serialized,
-                    "metadata": metadata,
-                    "name": name,
-                    **kwargs,
-                },
-            },
-        )
-
-    def on_retriever_end(
-        self,
-        documents: Sequence[Document],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._end_span(run_id, output=documents, metadata={**kwargs})
-
-    def on_llm_new_token(
-        self,
-        token: str,
-        *,
-        chunk: Union["GenerationChunk", "ChatGenerationChunk"] | None = None,  # type: ignore
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        if run_id not in self._first_token_times:
-            now = time.perf_counter()
-            self._first_token_times[run_id] = now
-            start = self._start_times.get(run_id)
-            if start is not None:
-                self._ttft_ms[run_id] = now - start
-
-    def on_text(
-        self,
-        text: str,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        pass
-
-    def on_retry(
-        self,
-        retry_state: RetryCallState,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        pass
-
-    def on_custom_event(
-        self,
-        name: str,
-        data: Any,
-        *,
-        run_id: UUID,
-        tags: list[str] | None = None,
-        metadata: dict[str, Any] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        pass
-
-
-def clean_object(obj: dict[str, Any]) -> dict[str, Any]:
-    return {
-        k: v
-        for k, v in obj.items()
-        if v is not None and not (isinstance(v, list) and not v) and not (isinstance(v, dict) and not v)
-    }
-
-
-def safe_parse_serialized_json(input_str: str) -> Any:
-    try:
-        return json.loads(input_str)
-    except:
-        return input_str
-
-
-def last_item(items: list[Any]) -> Any:
-    return items[-1] if items else None
-
-
-def _walk_generations(response: LLMResult):
-    for generations in response.generations or []:
-        yield from generations or []
-
-
-def _get_model_name_from_response(response: LLMResult) -> str | None:
-    model_name = None
-    for generation in _walk_generations(response):
-        message = getattr(generation, "message", None)
-        if not message:
-            continue
-
-        response_metadata = getattr(message, "response_metadata", None)
-        if response_metadata and isinstance(response_metadata, dict):
-            model_name = response_metadata.get("model_name")
-
-        if model_name:
-            break
-
-    if not model_name:
-        llm_output: dict[str, Any] = response.llm_output or {}
-        model_name = llm_output.get("model_name") or llm_output.get("model") or ""
-
-    return model_name
-
-
-def _get_metrics_from_response(response: LLMResult):
-    metrics = {}
-
-    for generation in _walk_generations(response):
-        message = getattr(generation, "message", None)
-        if not message:
-            continue
-
-        usage_metadata = getattr(message, "usage_metadata", None)
-
-        if usage_metadata and isinstance(usage_metadata, dict):
-            metrics.update(
-                clean_object(
-                    {
-                        "total_tokens": usage_metadata.get("total_tokens"),
-                        "prompt_tokens": usage_metadata.get("input_tokens"),
-                        "completion_tokens": usage_metadata.get("output_tokens"),
-                    }
-                )
-            )
-
-            # Extract cache tokens from nested input_token_details (LangChain format)
-            # Maps to Braintrust's standard cache token metric names
-            input_token_details = usage_metadata.get("input_token_details")
-            if input_token_details and isinstance(input_token_details, dict):
-                cache_read = input_token_details.get("cache_read")
-                cache_creation = input_token_details.get("cache_creation")
-
-                if cache_read is not None:
-                    metrics["prompt_cached_tokens"] = cache_read
-                if cache_creation is not None:
-                    metrics["prompt_cache_creation_tokens"] = cache_creation
+warnings.warn(
+    "braintrust_langchain.callbacks is deprecated. Import from 'braintrust.wrappers.langchain' instead.",
+    DeprecationWarning,
+    stacklevel=2,
+)
 
-    if not metrics or not any(metrics.values()):
-        llm_output: dict[str, Any] = response.llm_output or {}
-        metrics = llm_output.get("token_usage") or llm_output.get("estimatedTokens") or {}
+from braintrust.integrations.langchain import BraintrustCallbackHandler  # noqa: F401
 
-    return clean_object(metrics)
+__all__ = ["BraintrustCallbackHandler"]
diff --git a/integrations/langchain-py/src/braintrust_langchain/context.py b/integrations/langchain-py/src/braintrust_langchain/context.py
index 5c6bb4e8..c11385d1 100644
--- a/integrations/langchain-py/src/braintrust_langchain/context.py
+++ b/integrations/langchain-py/src/braintrust_langchain/context.py
@@ -1,26 +1,18 @@
-from contextvars import ContextVar
+"""
+DEPRECATED: Import from braintrust.wrappers.langchain instead.
+"""
 
-from langchain_core.tracers.context import register_configure_hook
+import warnings
 
-from braintrust_langchain.callbacks import BraintrustCallbackHandler
-
-__all__ = ["set_global_handler", "clear_global_handler"]
-
-
-braintrust_callback_handler_var: ContextVar[BraintrustCallbackHandler | None] = ContextVar(
-    "braintrust_callback_handler", default=None
+warnings.warn(
+    "braintrust_langchain.context is deprecated. Import from 'braintrust.wrappers.langchain' instead.",
+    DeprecationWarning,
+    stacklevel=2,
 )
 
-
-def set_global_handler(handler: BraintrustCallbackHandler):
-    braintrust_callback_handler_var.set(handler)
-
-
-def clear_global_handler():
-    braintrust_callback_handler_var.set(None)
-
-
-register_configure_hook(
-    context_var=braintrust_callback_handler_var,
-    inheritable=True,
+from braintrust.integrations.langchain import (  # noqa: F401
+    clear_global_handler,
+    set_global_handler,
 )
+
+__all__ = ["set_global_handler", "clear_global_handler"]
diff --git a/py/Makefile b/py/Makefile
index 4696d84d..f2e29b58 100644
--- a/py/Makefile
+++ b/py/Makefile
@@ -60,7 +60,7 @@ install-dev: install-build-deps
 	$(UV) pip install -r requirements-dev.txt
 
 install-optional: install-dev
-	$(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith
+	$(UV) pip install anthropic openai pydantic_ai litellm agno google-genai google-adk dspy langsmith langchain-core langchain-openai langchain-anthropic langgraph
 	$(UV) pip install -e .[temporal,otel]
 
 .DEFAULT_GOAL := help
diff --git a/py/examples/langchain/auto.py b/py/examples/langchain/auto.py
new file mode 100644
index 00000000..b6776dd7
--- /dev/null
+++ b/py/examples/langchain/auto.py
@@ -0,0 +1,24 @@
+"""Auto-instrument LangChain with Braintrust tracing.
+
+Usage:
+    export BRAINTRUST_API_KEY="your-api-key"
+    export OPENAI_API_KEY="your-openai-api-key"
+    python auto.py
+"""
+
+import braintrust
+
+
+# Auto-instrument all supported libraries including LangChain
+braintrust.auto_instrument()
+
+from langchain_openai import ChatOpenAI
+
+
+def main():
+    llm = ChatOpenAI(model="gpt-4o-mini")
+    response = llm.invoke("What is the capital of France?")
+    print(response.content)
+
+
+main()
diff --git a/py/examples/langchain/manual_patching.py b/py/examples/langchain/manual_patching.py
new file mode 100644
index 00000000..142bcc5a
--- /dev/null
+++ b/py/examples/langchain/manual_patching.py
@@ -0,0 +1,27 @@
+"""Manually patch LangChain with Braintrust tracing.
+
+Usage:
+    export BRAINTRUST_API_KEY="your-api-key"
+    export OPENAI_API_KEY="your-openai-api-key"
+    python manual_patching.py
+"""
+
+from braintrust import init_logger
+from braintrust.wrappers.langchain import set_global_handler, BraintrustCallbackHandler
+
+
+# Setup LangChain tracing with a specific project
+logger = init_logger(project="my-langchain-project")
+handler = BraintrustCallbackHandler(logger=logger)
+set_global_handler(handler)
+
+from langchain_openai import ChatOpenAI
+
+
+def main():
+    llm = ChatOpenAI(model="gpt-4o-mini")
+    response = llm.invoke("What is the capital of France?")
+    print(response.content)
+
+
+main()
diff --git a/py/noxfile.py b/py/noxfile.py
index bff911db..2789bfee 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -78,6 +78,7 @@ def _pinned_python_version():
     "google.genai",
     "google.adk",
     "temporalio",
+    "langchain_core",
 )
 
 # Test matrix
@@ -101,6 +102,7 @@ def _pinned_python_version():
 GENAI_VERSIONS = (LATEST,)
 DSPY_VERSIONS = (LATEST,)
 GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1")
+LANGCHAIN_VERSIONS = (LATEST,)
 # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely
 TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0")
 PYTEST_VERSIONS = (LATEST, "8.4.2")
@@ -192,6 +194,19 @@ def test_google_genai(session, version):
     _run_core_tests(session)
 
 
+@nox.session()
+@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSIONS)
+def test_langchain(session, version):
+    """Test LangChain integration."""
+    _install_test_deps(session)
+    _install(session, "langchain-core", version)
+    _install(session, "langchain-openai", version)
+    _install(session, "langchain-anthropic", version)
+    session.install("langgraph", silent=SILENT_INSTALLS)
+    _run_tests(session, f"{INTEGRATION_DIR}/langchain/test_langchain.py")
+    _run_core_tests(session)
+
+
 @nox.session()
 @nox.parametrize("version", GOOGLE_ADK_VERSIONS, ids=GOOGLE_ADK_VERSIONS)
 def test_google_adk(session, version):
@@ -329,6 +344,9 @@ def pylint(session):
     session.install("opentelemetry.instrumentation.openai")
     # langsmith is needed for the wrapper module but not in VENDOR_PACKAGES
     session.install("langsmith")
+    # langchain deps are needed for the langchain wrapper (langchain-core is in VENDOR_PACKAGES;
+    # the rest are installed explicitly here for pylint coverage)
+    session.install("langchain-core", "langchain-openai", "langchain-anthropic", "langgraph")
 
     result = session.run("git", "ls-files", "**/*.py", silent=True, log=False)
     files = [path for path in result.strip().splitlines() if path not in GENERATED_LINT_EXCLUDES]
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index 6c15b653..fb40d088 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -9,7 +9,7 @@
 import logging
 from contextlib import contextmanager
 
-from braintrust.integrations import AnthropicIntegration, IntegrationPatchConfig
+from braintrust.integrations import AnthropicIntegration, IntegrationPatchConfig, LangChainIntegration
 
 
 __all__ = ["auto_instrument"]
@@ -40,6 +40,7 @@ def auto_instrument(
     claude_agent_sdk: bool = True,
     dspy: bool = True,
     adk: bool = True,
+    langchain: bool = True,
 ) -> dict[str, bool]:
     """
     Auto-instrument supported AI/ML libraries for Braintrust tracing.
@@ -61,6 +62,7 @@ def auto_instrument(
         claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
         dspy: Enable DSPy instrumentation (default: True)
         adk: Enable Google ADK instrumentation (default: True)
+        langchain: Enable LangChain instrumentation (default: True)
 
     Returns:
         Dict mapping integration name to whether it was successfully instrumented.
@@ -117,6 +119,7 @@ def auto_instrument(
     claude_agent_sdk_enabled = _normalize_bool_option("claude_agent_sdk", claude_agent_sdk)
     dspy_enabled = _normalize_bool_option("dspy", dspy)
     adk_enabled = _normalize_bool_option("adk", adk)
+    langchain_enabled = _normalize_bool_option("langchain", langchain)
 
     if openai_enabled:
         results["openai"] = _instrument_openai()
@@ -136,6 +139,8 @@ def auto_instrument(
         results["dspy"] = _instrument_dspy()
     if adk_enabled:
         results["adk"] = _instrument_adk()
+    if langchain_enabled:
+        results["langchain"] = _instrument_integration(LangChainIntegration)
 
     return results
 
diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
index 1dddbd91..74840ad7 100644
--- a/py/src/braintrust/integrations/__init__.py
+++ b/py/src/braintrust/integrations/__init__.py
@@ -1,5 +1,6 @@
 from .anthropic import AnthropicIntegration
 from .base import IntegrationPatchConfig
+from .langchain import LangChainIntegration
 
 
-__all__ = ["AnthropicIntegration", "IntegrationPatchConfig"]
+__all__ = ["AnthropicIntegration", "IntegrationPatchConfig", "LangChainIntegration"]
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
new file mode 100644
index 00000000..a3719ef8
--- /dev/null
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
@@ -0,0 +1,21 @@
+"""Test auto_instrument for LangChain."""
+
+from braintrust.auto import auto_instrument
+from braintrust.integrations.langchain import BraintrustCallbackHandler
+
+# 1. Instrument
+results = auto_instrument()
+assert results.get("langchain") == True, "auto_instrument should return True for langchain"
+
+# 2. Idempotent
+results2 = auto_instrument()
+assert results2.get("langchain") == True, "auto_instrument should still return True on second call"
+
+# 3. Verify that a global handler was registered with LangChain
+from langchain_core.callbacks import CallbackManager
+
+manager = CallbackManager.configure()
+handler = next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None)
+assert handler is not None, "BraintrustCallbackHandler should be registered globally after auto_instrument()"
+
+print("SUCCESS")
diff --git a/py/src/braintrust/integrations/langchain/__init__.py b/py/src/braintrust/integrations/langchain/__init__.py
new file mode 100644
index 00000000..9758ca95
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/__init__.py
@@ -0,0 +1,47 @@
+"""Braintrust integration for LangChain."""
+
+from .integration import LangChainIntegration
+
+
+def setup_langchain() -> bool:
+    """
+    Auto-instrument LangChain for Braintrust tracing.
+
+    Registers a global BraintrustCallbackHandler with LangChain's callback system
+    so that all chains, LLMs, tools, and retrievers are automatically traced.
+
+    This is called automatically by braintrust.auto_instrument(). It is safe to
+    call multiple times – subsequent calls are no-ops.
+
+    Returns:
+        True if setup succeeded, False if langchain_core is not installed.
+    """
+    return LangChainIntegration.setup()
+
+
+# Lazily imported to avoid circular imports at module load time
+# (tracing.py imports from braintrust, which must be fully initialized first)
+_LAZY_ATTRS = frozenset(["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"])
+
+
+def __getattr__(name: str):
+    if name in _LAZY_ATTRS:
+        from .tracing import BraintrustCallbackHandler, BraintrustTracer, clear_global_handler, set_global_handler
+
+        g = globals()
+        g["BraintrustCallbackHandler"] = BraintrustCallbackHandler
+        g["BraintrustTracer"] = BraintrustTracer
+        g["set_global_handler"] = set_global_handler
+        g["clear_global_handler"] = clear_global_handler
+        return g[name]
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+__all__ = [
+    "LangChainIntegration",
+    "BraintrustCallbackHandler",
+    "BraintrustTracer",
+    "set_global_handler",
+    "clear_global_handler",
+    "setup_langchain",
+]
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke
new file mode 100644
index 00000000..3ecc362e
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke
@@ -0,0 +1,276 @@
+interactions:
+- request:
+    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is
+      1 + 2?"}], "model": "claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - AsyncAnthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dJBfSwMxEMS/yjGvpnB3bUUCvhR88aEg/gERCTFZ2uDd5kw2Ui333eWK
+        Rar4tLC/mWGYPYKHRp83pm4ublf5+uWT0/DYP9ysr+7XOx/uoCAfA00qytluCAopdtPD5hyyWBYo
+        9NFTBw3X2eJpliMzyWwxa+t2WS+bBRRcZCEW6Kf9MVJoN5kPR6Opzqq2uqzmGJ8VssTBJLI5MjSI
+        vZGSGN8g01shdgTNpesUyqGa3iPwUMRIfCXO0M25grNuS8YlshIim1NBfeSJrP+PHb1TPg1b6inZ
+        ziz7v/of2mx/01EhFjlpN1fIlN6DIyOBEjSmPb1NHuP4BQAA//8DABaJlhKdAQAA
+    headers:
+      CF-RAY:
+      - 983cc1f7fda07e2d-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 23 Sep 2025 20:23:04 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 02af79b5-9b1a-4100-a05f-9235eb38bda4
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CTRxS1WS9ia9upALgfUZK
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      via:
+      - 1.1 google
+      x-envoy-upstream-service-time:
+      - '1030'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - AsyncAnthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dJDdSgMxEEZfZfluTWG37YoGvLagN0VBRCSEZGhDdydrMimWsu8uWyxS
+        xauBOWd++I7oo6cOGq6zxdMsR2aS2XI2r+dt3TZLKAQPjT5vTN08397v1qG5WT+Ep/Z19bJfySMf
+        oCCHgSaLcrYbgkKK3dSwOYcslgUKLrIQC/Tb8ewLfU7kVDSa6qqaV3fVAuO7QpY4mEQ2R4YGsTdS
+        EuMbZPooxI6guXSdQjnd1UcEHooYiTviDN1cKzjrtmRcIishsrkU6jNPZP1/7Dw77adhSz0l25m2
+        /+v/0Gb7m44KscjFdwuFTGkfHBkJlKAxheVt8hjHLwAAAP//AwBHCKHFnQEAAA==
+    headers:
+      CF-RAY:
+      - 99b0eabe4896b976-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:38 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-11-08T00:22:38Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-11-08T00:22:38Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-11-08T00:22:38Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CUuU6hWk8Jg8Bh2c4Vyty
+      retry-after:
+      - '23'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1801'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - AsyncAnthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - async:asyncio
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA3SQTUvDQBCG/0p4r24gaRvRBQ9CDyJ4rBeRZbs7tNFkNu7OBkvJf5cUi1TxNDDP
+        Mx+8R/TBUwcN19nsqUyBmaRclYtq0VRNvYJC66HRp52p6qd1/7DZ3o8yjIe355v1o9tsm1soyGGg
+        2aKU7I6gEEM3N2xKbRLLAgUXWIgF+uV49oU+Z3IqGnVxVSyKu2KJ6VUhSRhMJJsCQ4PYG8mR8Q0S
+        fWRiR9Ccu04hn+7qI1oeshgJ78QJur5WcNbtybhIVtrA5lKozjyS9f+x8+y8n4Y99RRtZ5r+r/9D
+        6/1vOimELBffLRUSxbF1ZKSlCI05LG+jxzR9AQAA//8DAEp7u9udAQAA
+    headers:
+      CF-RAY:
+      - 99b0ebedd90d6897-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:27 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-11-08T00:23:26Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-11-08T00:23:26Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-11-08T00:23:26Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CUuUAHB8QqxGoW7TZyUaz
+      retry-after:
+      - '34'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1851'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory
new file mode 100644
index 00000000..88cc8848
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory
@@ -0,0 +1,332 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "Assistant: Hello! How can I assist you today?
+      User: What''s your name?", "role": "user"}], "model": "gpt-4o-mini", "stream":
+      false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3pSTx8NVvtJFY51xvv7gmxKCqAO\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758658986,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Assistant: I don't have a personal
+        name, but you can call me Assistant. How can I help you today?\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        24,\n    \"completion_tokens\": 23,\n    \"total_tokens\": 47,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:23:06 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983cc206fc1f67ef-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '755'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=TIArUY3FKYo9t2vz5lADo0yFHggpjc9nkMoRBVQYfbA-1758658986949-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - 899e875d60ba290b68341d027600a8fd
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '775'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999980'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_3b64f4bb78c14e2ea80001681e34611d
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"Assistant: Hello! How can I assist you today?
+      User: What''s your name?","role":"user"}],"model":"gpt-4o-mini","stream":false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA;
+        _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4ySTW/bMAyG7/4VhC67xEW+s+YyFAO2ZNhp22HYUBiMRNvaZFGT5KRBkf8+2E5i
+        d22BXXTgw5fiS/IxARBaiTUIWWKUlTPp+x9fxg/u4yFuv20O9ezD98/7YvPn01fFxeFOjBoF736R
+        jBfVjeTKGYqabYelJ4zUVJ2sltPFcjJb3LagYkWmkRUupnNOK211Oh1P5+l4lU7entUla0lBrOFn
+        AgDw2L5Nn1bRg1jDeHSJVBQCFiTW1yQA4dk0EYEh6BDRRjHqoWQbybat3134Grag2L6JUOKeAMGR
+        D2zRgMWKRrCrIxy5BokWJBoDFcFVfAMbPrRoCyUZ12ZGVnh8N/zXU14HbLzb2pgBQGs5YjO71vH9
+        mZyuHg0XzvMu/CMVubY6lJknDGwbPyGyEy09JQD37SzrJ+MRznPlYhb5N7XfTeddOdFvcABnZxg5
+        ounj89XohWqZoojahMEuhERZkuqV/eKwVpoHIBl4ft7MS7U739oW/1O+B1KSi6Qy50lp+dRwn+ap
+        ue/X0q4zbhsWgfxeS8qiJt/sQVGOtemuToRjiFRlubYFeed1d3q5yxbLMeZLWixuRXJK/gIAAP//
+        AwCouO6tiAMAAA==
+    headers:
+      CF-RAY:
+      - 99b0eacf8822aaac-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:39 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '628'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '639'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999980'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_1009d84201314e5aa9ccdcbafeeac4af
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"Assistant: Hello! How can I assist you today?
+      User: What''s your name?","role":"user"}],"model":"gpt-4o-mini","stream":false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0;
+        _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJLBbtswDIbvfgpO53hIgiRNcxmKAcN6GNbuMmBDYTASHauRRU2imwRF
+        gb3GXm9PUthp4nTLgF104Mef4k/yMQNQ1qgFKF2h6Dq4/P23L6P18vJm8+Pz1e16m8pPH25u7fbe
+        fJ1vlmrQKnh5T1oOqrea6+BILPs91pFQqK06upiNp7PRZDjvQM2GXCtbBcknnNfW23w8HE/y4UU+
+        mr+oK7aaklrA9wwA4LF72z69oa1awHBwiNSUEq5ILY5JACqyayMKU7JJ0Isa9FCzF/Jd61cHvoBr
+        MOx///wlUOEDAUIKpG1pNXisaQDLRmDHDWj0oNE5qAmO8jfwkTcduoaKXOgyhQ3u3p3+HKlsErbu
+        fePcCUDvWbCdXuf57oU8HV06XoXIy/SHVJXW21QVkTCxbx0l4aA6+pQB3HXTbF4NSIXIdZBCeE3d
+        d+PJvpzqd3gGCgu6Pj6ZD85UKwwJWpdOtqE06opMr+xXh42xfAKyE89/N3Ou9t639av/Kd8DrSkI
+        mSJEMla/NtynRWov/F9pxxl3DatE8cFqKsRSbPdgqMTG7e9OpV0SqovS+hXFEO3++MpQTGdDLGc0
+        nV6q7Cl7BgAA//8DAEMiDgGKAwAA
+    headers:
+      CF-RAY:
+      - 99b0ebffc94fed3b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:28 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '680'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '708'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999980'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_e273cb6eb8624df78282659b4a19fffe
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_global_handler b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler
new file mode 100644
index 00000000..ba9f4fa9
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler
@@ -0,0 +1,225 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "What is 1 + 2?", "role": "user"}], "model":
+      "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream":
+      false, "temperature": 1.0, "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '177'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ44VUVp2sk1koSWXX64CaLEy1mWy\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758659919,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"1 + 2 equals 3.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        15,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 23,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:38:40 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983cd8d01c33943a-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '930'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=XPwF0fhMV9JwjYuWwUMNbzPKxvSJ.HOkXEftYzjXRew-1758659920459-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - 93acad0503781eb98ab6ea3412173537
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '1026'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_181413148bbe4814a905514521d6dc34
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '177'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUCr4uL2IlTJ9et2y2HHQZ0Q2EoMm1rk0VNoocNRf77
+        IDuN3a0FevGBHx/1Hs3HRAjQNRwEqE6y6p1J33/9XH7adscPRXGXnTb7+49fNse7/b2iYylhFRV0
+        +o6Kn1Q3inpnkDXZCSuPkjFOzW53ebHLyiwfQU81mihrHadbSnttdZqv8226vk2z8qLuSCsMcBDf
+        EiGEeBy/0aet8TccxHr1VOkxBNkiHK5NQoAnEysgQ9CBpWVYzVCRZbSj9Uy8E7nAn4M0QWxull0e
+        myHI6NQOxiyAtJZYxqSjv4cLOV8dGWqdp1P4RwqNtjp0lUcZyMbXA5ODkZ4TIR7G5MOzMOA89Y4r
+        ph84PpcV0ziY9z3D8sKYWJq5nG9WLwyramSpTVgsDpRUHdazct6yHGpNC5AsIv/v5aXZU2xt27eM
+        n4FS6BjrynmstXqed27zGI/xtbbrikfDEND/0gor1ujjb6ixkYOZTgTCn8DYV422LXrn9XQnjauK
+        3Vo2OyyKPSTn5C8AAAD//wMAcIbFgjUDAAA=
+    headers:
+      CF-RAY:
+      - 99b0f5db9f1cbffc-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:30:12 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=vfpKl6dvzcujjwigai_kp7UkNhR2ltT1SwFsT05VrS8-1762561812-1.0.1.1-UAyuy134RWxRUzjbClH59IJarw95du8Dl347lkXcDkbXBBx7vCmRuxRccJQB2f1T6oobZSgBj7O8hdaLY4hef6ypZ2uHUshy880EnptiWEY;
+        path=/; expires=Sat, 08-Nov-25 01:00:12 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=N6FAUGU_qhcPvlVWdt0kvrpbt1SzTvQ0v29fL2QCNbA-1762561812358-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '319'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '489'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_3e940a310adf4d9a88c8da6b70645bb7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration
new file mode 100644
index 00000000..6c396d02
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration
@@ -0,0 +1,300 @@
+interactions:
+- request:
+    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is
+      1 + 2?"}], "model": "claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dJBRSwMxEIT/yjGvpnDX9hQDvggegn9AEAkxWdvg3eZMNsVa7r/LFYtU
+        8Wlhv5lhmAOCh8aQN6Zu7nZX3IXr+8/X9cNt213vu5fucYSC7EeaVZSz3RAUUuznh805ZLEsUBii
+        px4arrfF0yJHZpLFerGsl23dNmsouMhCLNBPh1Ok0MdsPh6NprqoltVNtcL0rJAljiaRzZGhQeyN
+        lMT4BpneC7EjaC59r1CO1fQBgcciRuIbcYZuLhWcdVsyLpGVENmcC+oTT2T9f+zknfNp3NJAyfam
+        Hf7qf2iz/U0nhVjkrN1KIVPaBUdGAiVozHt6mzym6QsAAP//AwD8n6CUnQEAAA==
+    headers:
+      CF-RAY:
+      - 983cc09c5f361679-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 23 Sep 2025 20:22:09 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 02af79b5-9b1a-4100-a05f-9235eb38bda4
+      anthropic-ratelimit-input-tokens-limit:
+      - '30000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '30000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-09-23T20:22:09Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '8000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '8000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-09-23T20:22:09Z'
+      anthropic-ratelimit-requests-limit:
+      - '50'
+      anthropic-ratelimit-requests-remaining:
+      - '49'
+      anthropic-ratelimit-requests-reset:
+      - '2025-09-23T20:22:09Z'
+      anthropic-ratelimit-tokens-limit:
+      - '38000'
+      anthropic-ratelimit-tokens-remaining:
+      - '38000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-09-23T20:22:09Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CTRxMui53W9h6eXYGxUJb
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      via:
+      - 1.1 google
+      x-envoy-upstream-service-time:
+      - '1110'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dJDLasMwEEV/xdxtFfAjCa2g29JVoetShJCmsRt75Eij9BH878WhpqSl
+        q4E5Zx7cE4bgqYeG6232tEqBmWS1XtVlvSk31RoKnYfGkHamrB5z87bPfPfwGa+Ph9f7401wL1so
+        yMdIs0Up2R1BIYZ+btiUuiSWBQousBAL9NNp8YXeZ3IuGlVxVdTFbdFgelZIEkYTyabA0CD2RnJk
+        fINEh0zsCJpz3yvk8119QsdjFiNhT5ygq62Cs64l4yJZ6QKbS6FceCTr/2PL7LyfxpYGirY3m+Gv
+        /0Or9jedFEKWi+8ahUTx2Dky0lGExhyWt9Fjmr4AAAD//wMARZkZqp0BAAA=
+    headers:
+      CF-RAY:
+      - 99b0eab2783f1758-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:36 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-11-08T00:22:36Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-11-08T00:22:36Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-11-08T00:22:36Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CUuU6ZRKcH4CRrH5o4j6b
+      retry-after:
+      - '24'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1694'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '110'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.68.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.68.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//dJBNS8QwEIb/SnmvptDudhUCHvQkC4oieBEJIRl3y7aTmkz8Kv3v0sVF
+        VvE0MM8zH7wj+uCpg4brbPZUpsBMUjblolqsqlXdQKH10OjTxlT1fXPxcHXzubwOZ2vKb7fPu7vL
+        9Q4K8jHQbFFKdkNQiKGbGzalNollgYILLMQC/TgefKH3meyLRl2cFIvivFhielJIEgYTyabA0CD2
+        RnJkfINEL5nYETTnrlPI+7t6RMtDFiNhR5yg61MFZ92WjItkpQ1sjoXqwCNZ/x87zM77adhST9F2
+        ZtX/9X9ovf1NJ4WQ5ei7pUKi+No6MtJShMYclrfRY5q+AAAA//8DAAqaanadAQAA
+    headers:
+      CF-RAY:
+      - 99b0ebe2db3d67ca-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:24 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2025-11-08T00:23:24Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2025-11-08T00:23:24Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2025-11-08T00:23:24Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CUuUA9cTfN1Yz5PMKHD5d
+      retry-after:
+      - '37'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1556'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management
new file mode 100644
index 00000000..20ffc04b
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management
@@ -0,0 +1,327 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "Say hello", "role": "user"}], "model": "gpt-4o-mini",
+      "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream": false,
+      "temperature": 1.0, "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '172'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3xSBjbTuwYXAmP3RRw0GoHz5Ooy\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758659482,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Hello! How can I assist you today?\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        9,\n    \"completion_tokens\": 9,\n    \"total_tokens\": 18,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_51db84afab\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:31:22 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983cce247c46cf2f-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '381'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=Y9Om0gYdHB3h9aUHhUUY9eEia6Y3wmSARFX9Xq907Ho-1758659482810-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - ebcf889942216eb0b613f43f2cdb11b1
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '397'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_75709538073646e4bd7355c91bc2ce52
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"Say hello","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '172'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA;
+        _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xSwW7UMBC95ysGnzcoWbppu5eqqoSKgAu9oKIq8tqTrMHxGHuydKn235GTdpPS
+        InHxYd685/dm5iEDEEaLNQi1law6b/Or2y/F70p//Pz+UprNzf2uuO2uvrY/d9XN8pNYJAZtvqPi
+        J9ZbRZ23yIbcCKuAkjGplqfVclWV76pyADrSaBOt9ZyfUN4ZZ/JlsTzJi9O8PHtkb8kojGIN3zIA
+        gIfhTT6dxnuxhmLxVOkwRtmiWB+bAEQgmypCxmgiS8diMYGKHKMbrF+jtfQGrukXKOngA4wE2FMP
+        TFruL+bEgE0fZTLvemtngHSOWKbwg+W7R+RwNGmp9YE28S+qaIwzcVsHlJFcMhSZvBjQQwZwNwyj
+        f5ZP+ECd55rpBw7fnY9qYtrAS4yJpZ3K5dniFa1aI0tj42yUQkm1RT0xp7nLXhuaAdks8Usvr2mP
+        qY1r/0d+ApRCz6hrH1Ab9Tzv1BYwnee/2o4THgyLiGFnFNZsMKQtaGxkb8ejEXEfGbu6Ma7F4IMZ
+        L6fx9aoqZFPhanUuskP2BwAA//8DABw5ElFHAwAA
+    headers:
+      CF-RAY:
+      - 99b0eadaea14aaac-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:41 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '328'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '342'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_68644fc1eb1a4533b2f98192dc918822
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"Say hello","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '172'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0;
+        _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFLBbtswDL37Kzid48EOkrTJZYcd1g1osRVFDy0KQ5FoW5ssChK9LSjy
+        74PsNnbXDthFBz6+p/dIPmYAwmixA6FayarzNv94d13am9ubQJf79nB5W9/Z609XV1+V/BK+iUVi
+        0P47Kn5mvVfUeYtsyI2wCigZk2p5tlmuN+Wq2A5ARxptojWe8xXlnXEmXxbLVV6c5eX5E7slozCK
+        HdxnAACPw5t8Oo2/xQ6KxXOlwxhlg2J3agIQgWyqCBmjiSwdi8UEKnKMbrB+gdbSO7igX6Ckg88w
+        EuBAPTBpefgwJwas+yiTeddbOwOkc8QyhR8sPzwhx5NJS40PtI9/UUVtnIltFVBGcslQZPJiQI8Z
+        wMMwjP5FPuEDdZ4rph84fLcd1cS0gdcYE0s7lcvzxRtalUaWxsbZKIWSqkU9Mae5y14bmgHZLPFr
+        L29pj6mNa/5HfgKUQs+oKx9QG/Uy79QWMJ3nv9pOEx4Mi4jhp1FYscGQtqCxlr0dj0bEQ2Tsqtq4
+        BoMPZryc2lfrTSHrDa7XW5Edsz8AAAD//wMAVD8AOUcDAAA=
+    headers:
+      CF-RAY:
+      - 99b0ec0acb26ed3b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:30 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '589'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '607'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_67359745154e404899e3fd81a37cf26a
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls
new file mode 100644
index 00000000..cea55348
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls
@@ -0,0 +1,333 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "What is 1 + 2?", "role": "user"}], "model":
+      "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream":
+      false, "temperature": 1.0, "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '177'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3pRI2shpJIGYKUU8RFWUyB6W5O1\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758658985,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"1 + 2 equals 3.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        15,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 23,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:23:06 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983cc2032f2967ef-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '441'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=uhF3qDlYXbYwV7mlgYhl_d7MyPH3FwQHxL6cek.ONAQ-1758658986041-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - f4e0a5413e529acf383233e54ad00e99
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '454'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_023ebefb1f6b4dec8910b8cb4d7421f5
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '177'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJI/b9wwDMV3fwqBa8+B7Yudy61dunRJtxaBoZNon1JZVCQ6/RPcdy9k
+        X85OmgJdPPBHPr1H8zkTAoyGvQB1lKwGb/OPX++KH7eh6XcPvz8Psm++6O1d8+kRnw5+B5s0QYcH
+        VPwydaVo8BbZkJuxCigZk2p501R1U27r3QQG0mjTWO85v6Z8MM7kVVFd58VNXp7F1ZGMwgh78S0T
+        Qojn6Zt8Oo0/YS+KzUtlwBhlj7C/NAkBgWyqgIzRRJaOYbNARY7RTdZL8UFUAh9HaaPYXq27AnZj
+        lMmpG61dAekcsUxJJ3/3Z3K6OLLU+0CH+GYUOuNMPLYBZSSXXo9MHiZ6yoS4n5KPr8KADzR4bpm+
+        4/RcWc9ysOx7gbszY2Jpl3K13bwj1mpkaWxcLQ6UVEfUy+SyZTlqQyuQrSL/7eU97Tm2cf3/yC9A
+        KfSMuvUBtVGv8y5tAdMx/qvtsuLJMEQMT0ZhywZD+g0aOzna+UQg/oqMQ9sZ12Pwwcx30vm2bgrZ
+        NVjXt5Cdsj8AAAD//wMAbYrr4zUDAAA=
+    headers:
+      CF-RAY:
+      - 99b0eacc1d35aaac-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:39 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA;
+        path=/; expires=Sat, 08-Nov-25 00:52:39 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '300'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '430'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_24854ba725b942179830d357f1af2add
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"What is 1 + 2?","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '177'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUEr4sL242TLtdhu+wy7BRsKAxFoh2lsqRKdLGtyH8f
+        ZKexu3XALj7w46Peo/mcAaBWuAOUR8Gy9yb/8O1reeq+rJ/osdvvf32sDqftA3/+NFC33+MqKdzh
+        RJJfVDfS9d4Qa2cnLAMJpjS13G6qelOui+0IeqfIJFnnOV+7vNdW51VRrfNim5d3F/XRaUkRd/A9
+        AwB4Hr/Jp1X0A3dQrF4qPcUoOsLdtQkAgzOpgiJGHVlYxtUMpbNMdrRewjuogB4HYSLc3iy7ArVD
+        FMmpHYxZAGGtY5GSjv7uL+R8dWRc54M7xD+k2Gqr47EJJKKz6fXIzuNIzxnA/Zh8eBUGfXC954bd
+        A43PlfU0Dud9z/DuwtixMHO5ul29MaxRxEKbuFgcSiGPpGblvGUxKO0WIFtE/tvLW7On2Np2/zN+
+        BlKSZ1KND6S0fJ13bguUjvFfbdcVj4YxUnjSkhrWFNJvUNSKwUwngvFnZOqbVtuOgg96upPWN/Wm
+        EO2G6vo9ZufsNwAAAP//AwDHwDA2NQMAAA==
+    headers:
+      CF-RAY:
+      - 99b0ebfc4e5ced3b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:27 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0;
+        path=/; expires=Sat, 08-Nov-25 00:53:27 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '269'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '435'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_617bc8e11f2a43a98a0658e7e91298fd
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution
new file mode 100644
index 00000000..aec3440e
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution
@@ -0,0 +1,234 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "Tell me a joke about bear", "role": "user"}],
+      "model": "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty":
+      0.0, "stream": false, "temperature": 1.0, "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '188'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3vA6tl1z95spYoDxT9RtqqzDF8n\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758659340,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Why don\u2019t bears ever get lost?\\n\\nBecause
+        they always take the bear necessities! \U0001F43B\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        13,\n    \"completion_tokens\": 19,\n    \"total_tokens\": 32,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_51db84afab\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:29:00 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983ccaa98d189e59-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '742'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=h4eOl14etTzzF9eOjCE9SDq4Y79ZdPOJeIYnqb.tN3E-1758659340929-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - ba7859db365b14edae0dc1d75360d5cb
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '912'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999990'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_31748d3aea8d488c9f1b1b7764b3a5d7
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages": [{"content": "write a 2-line poem about bear", "role": "user"}],
+      "model": "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty":
+      0.0, "stream": false, "temperature": 1.0, "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '193'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3vAwrz88GjVnlchECG5UbilcrZG\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758659340,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"In forest shadows, a bear roams free,
+        \ \\nMajestic guardian of the ancient tree.\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 15,\n    \"completion_tokens\":
+        19,\n    \"total_tokens\": 34,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:29:01 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983ccaa99f09cecd-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '909'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=I8TMI8qNGmqspYd_94RtBiCEVRDIffMScd.j_yw35Es-1758659341697-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - 7d350d2a8b4d267107b257e3a1989c5a
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '1375'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999990'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_891af1935bbf49c39105d7299babb315
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens
new file mode 100644
index 00000000..441128e9
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens
@@ -0,0 +1,324 @@
+interactions:
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first
+      type of testing mentioned in section 1.2?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n#
+      Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction
+      to Testing\n\nSoftware testing is a critical component of the software development
+      lifecycle. It ensures that applications\nfunction correctly, meet requirements,
+      and provide a positive user experience. This guide covers various\ntesting methodologies,
+      best practices, and tools used in modern software development.\n\n### 1.1 The
+      Importance of Testing\n\nTesting helps identify defects early in the development
+      process, reducing the cost of fixing issues later.\nStudies have shown that
+      the cost of fixing a bug increases exponentially as it progresses through the\ndevelopment
+      lifecycle. A bug found during requirements gathering might cost $1 to fix, while
+      the same bug\nfound in production could cost $100 or more.\n\n### 1.2 Types
+      of Testing\n\nThere are many types of testing, including:\n- Unit Testing: Testing
+      individual components or functions in isolation\n- Integration Testing: Testing
+      how components work together\n- End-to-End Testing: Testing the entire application
+      flow\n- Performance Testing: Testing application speed and scalability\n- Security
+      Testing: Testing for vulnerabilities and security issues\n- Usability Testing:
+      Testing user experience and interface design\n\n## Chapter 2: Unit Testing Best
+      Practices\n\nUnit testing focuses on testing the smallest testable parts of
+      an application. Here are some best practices:\n\n### 2.1 Write Tests First (TDD)\n\nTest-Driven
+      Development (TDD) is a methodology where tests are written before the actual
+      code. The process\nfollows a simple cycle: Red (write a failing test), Green
+      (write code to pass the test), Refactor (improve\nthe code while keeping tests
+      passing).\n\n### 2.2 Keep Tests Independent\n\nEach test should be independent
+      of others. Tests should not rely on the state created by previous tests.\nThis
+      ensures that tests can be run in any order and that failures are isolated and
+      easy to debug.\n\n### 2.3 Use Meaningful Names\n\nTest names should clearly
+      describe what is being tested and what the expected outcome is. A good test
+      name\nmight be \"test_user_registration_with_valid_email_succeeds\" rather than
+      just \"test_registration\".\n\n### 2.4 Test Edge Cases\n\nDon''t just test the
+      happy path. Consider edge cases like:\n- Empty inputs\n- Null or undefined values\n-
+      Very large inputs\n- Invalid formats\n- Boundary conditions\n\n## Chapter 3:
+      Integration Testing\n\nIntegration testing verifies that different modules or
+      services work together correctly.\n\n### 3.1 Database Integration\n\nWhen testing
+      database interactions, consider using:\n- Test databases separate from production\n-
+      Database transactions that roll back after each test\n- Mock data that represents
+      realistic scenarios\n\n### 3.2 API Integration\n\nAPI integration tests should
+      verify:\n- Correct HTTP status codes\n- Response format and schema\n- Error
+      handling\n- Authentication and authorization\n\n## Chapter 4: Performance Testing\n\nPerformance
+      testing ensures your application can handle expected load and scale appropriately.\n\n###
+      4.1 Load Testing\n\nLoad testing simulates multiple users accessing the application
+      simultaneously. Key metrics include:\n- Response time under load\n- Throughput
+      (requests per second)\n- Error rates\n- Resource utilization (CPU, memory, network)\n\n###
+      4.2 Stress Testing\n\nStress testing pushes the application beyond normal operational
+      capacity to find breaking points and\nunderstand how the system fails gracefully.\n\n##
+      Chapter 5: Continuous Integration and Testing\n\nModern development practices
+      integrate testing into the CI/CD pipeline.\n\n### 5.1 Automated Test Runs\n\nTests
+      should run automatically on every code change. This includes:\n- Running unit
+      tests on every commit\n- Running integration tests on pull requests\n- Running
+      end-to-end tests before deployment\n\n### 5.2 Test Coverage\n\nTest coverage
+      metrics help identify untested code. While 100% coverage isn''t always practical
+      or necessary,\nmaintaining good coverage helps ensure code quality. Focus on
+      critical paths and business logic.\n\n## Chapter 6: Testing Tools and Frameworks\n\nMany
+      tools exist to support testing efforts:\n\n### 6.1 Python Testing\n- pytest:
+      Feature-rich testing framework\n- unittest: Built-in Python testing module\n-
+      mock: Library for mocking objects\n\n### 6.2 JavaScript Testing\n- Jest: Popular
+      testing framework\n- Mocha: Flexible testing framework\n- Cypress: End-to-end
+      testing tool\n\n### 6.3 Other Tools\n- Selenium: Browser automation\n- JMeter:
+      Performance testing\n- Postman: API testing\n\n## Conclusion\n\nEffective testing
+      is essential for delivering high-quality software. By following best practices
+      and using\nappropriate tools, teams can catch bugs early, improve code quality,
+      and deliver better products to users.\n\nRemember: Testing is not just about
+      finding bugs, it''s about building confidence in your code.\n","cache_control":{"type":"ephemeral"}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '5160'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.76.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.76.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.19
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/22RzU7rMBCFX8WaZZWiJGqvaHYXsQIhsaBsKIqMPW0sEjt4xgVU9d0ZFyp+V4nn
+        fHOOZ7yDIVjsoQHT62RxSsF75OlsOp/WZT0vF/UCCnBWiIE2bVndrsO/s+1ptYzXl+fz04tN0nZ5
+        JQy/jpgpJNIblEIMfS5oIkesPUvJBM8of83d7sgzvmTl8GngvzEhWuc3ioMiNOyCV9VJrVZwIzyp
+        sFY3SCxEsQLFHaq1i8Qqu2WR30U1SIz0olWO1GSy9I6PjZNJoZ47Z7osWSQT3YNwmnLIR7vz1m2d
+        TbpXJgyjGHmW8KjWyR8uRYJIf+h1Pp2sAPb3BRCHsY2oZYsyDHrbcooePgTCp4TeyNQ+9X0B6bCo
+        ZgfOj4lbDo/oCZq6lEVp02FrxCrbt9+BqpwtjogQ9of8qz1H4NjhgFH37Xz40+4TqLqfhvsCQuKv
+        pZmEEMatM9iywyjT5je2OlrY798Az+eCZFYCAAA=
+    headers:
+      CF-RAY:
+      - 9c1a60c71c9c67cb-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 21 Jan 2026 22:51:47 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2026-01-21T22:51:46Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2026-01-21T22:51:47Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2026-01-21T22:51:46Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CXMLqXaFZ4xWZExkXJyyb
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '2088'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first
+      type of testing mentioned in section 1.2?"},{"role":"assistant","content":"According
+      to section 1.2 \"Types of Testing,\" the first type of testing mentioned is
+      **Unit Testing**, which is described as \"Testing individual components or functions
+      in isolation.\""},{"role":"user","content":"What testing framework is mentioned
+      for Python?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n#
+      Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction
+      to Testing\n\nSoftware testing is a critical component of the software development
+      lifecycle. It ensures that applications\nfunction correctly, meet requirements,
+      and provide a positive user experience. This guide covers various\ntesting methodologies,
+      best practices, and tools used in modern software development.\n\n### 1.1 The
+      Importance of Testing\n\nTesting helps identify defects early in the development
+      process, reducing the cost of fixing issues later.\nStudies have shown that
+      the cost of fixing a bug increases exponentially as it progresses through the\ndevelopment
+      lifecycle. A bug found during requirements gathering might cost $1 to fix, while
+      the same bug\nfound in production could cost $100 or more.\n\n### 1.2 Types
+      of Testing\n\nThere are many types of testing, including:\n- Unit Testing: Testing
+      individual components or functions in isolation\n- Integration Testing: Testing
+      how components work together\n- End-to-End Testing: Testing the entire application
+      flow\n- Performance Testing: Testing application speed and scalability\n- Security
+      Testing: Testing for vulnerabilities and security issues\n- Usability Testing:
+      Testing user experience and interface design\n\n## Chapter 2: Unit Testing Best
+      Practices\n\nUnit testing focuses on testing the smallest testable parts of
+      an application. Here are some best practices:\n\n### 2.1 Write Tests First (TDD)\n\nTest-Driven
+      Development (TDD) is a methodology where tests are written before the actual
+      code. The process\nfollows a simple cycle: Red (write a failing test), Green
+      (write code to pass the test), Refactor (improve\nthe code while keeping tests
+      passing).\n\n### 2.2 Keep Tests Independent\n\nEach test should be independent
+      of others. Tests should not rely on the state created by previous tests.\nThis
+      ensures that tests can be run in any order and that failures are isolated and
+      easy to debug.\n\n### 2.3 Use Meaningful Names\n\nTest names should clearly
+      describe what is being tested and what the expected outcome is. A good test
+      name\nmight be \"test_user_registration_with_valid_email_succeeds\" rather than
+      just \"test_registration\".\n\n### 2.4 Test Edge Cases\n\nDon''t just test the
+      happy path. Consider edge cases like:\n- Empty inputs\n- Null or undefined values\n-
+      Very large inputs\n- Invalid formats\n- Boundary conditions\n\n## Chapter 3:
+      Integration Testing\n\nIntegration testing verifies that different modules or
+      services work together correctly.\n\n### 3.1 Database Integration\n\nWhen testing
+      database interactions, consider using:\n- Test databases separate from production\n-
+      Database transactions that roll back after each test\n- Mock data that represents
+      realistic scenarios\n\n### 3.2 API Integration\n\nAPI integration tests should
+      verify:\n- Correct HTTP status codes\n- Response format and schema\n- Error
+      handling\n- Authentication and authorization\n\n## Chapter 4: Performance Testing\n\nPerformance
+      testing ensures your application can handle expected load and scale appropriately.\n\n###
+      4.1 Load Testing\n\nLoad testing simulates multiple users accessing the application
+      simultaneously. Key metrics include:\n- Response time under load\n- Throughput
+      (requests per second)\n- Error rates\n- Resource utilization (CPU, memory, network)\n\n###
+      4.2 Stress Testing\n\nStress testing pushes the application beyond normal operational
+      capacity to find breaking points and\nunderstand how the system fails gracefully.\n\n##
+      Chapter 5: Continuous Integration and Testing\n\nModern development practices
+      integrate testing into the CI/CD pipeline.\n\n### 5.1 Automated Test Runs\n\nTests
+      should run automatically on every code change. This includes:\n- Running unit
+      tests on every commit\n- Running integration tests on pull requests\n- Running
+      end-to-end tests before deployment\n\n### 5.2 Test Coverage\n\nTest coverage
+      metrics help identify untested code. While 100% coverage isn''t always practical
+      or necessary,\nmaintaining good coverage helps ensure code quality. Focus on
+      critical paths and business logic.\n\n## Chapter 6: Testing Tools and Frameworks\n\nMany
+      tools exist to support testing efforts:\n\n### 6.1 Python Testing\n- pytest:
+      Feature-rich testing framework\n- unittest: Built-in Python testing module\n-
+      mock: Library for mocking objects\n\n### 6.2 JavaScript Testing\n- Jest: Popular
+      testing framework\n- Mocha: Flexible testing framework\n- Cypress: End-to-end
+      testing tool\n\n### 6.3 Other Tools\n- Selenium: Browser automation\n- JMeter:
+      Performance testing\n- Postman: API testing\n\n## Conclusion\n\nEffective testing
+      is essential for delivering high-quality software. By following best practices
+      and using\nappropriate tools, teams can catch bugs early, improve code quality,
+      and deliver better products to users.\n\nRemember: Testing is not just about
+      finding bugs, it''s about building confidence in your code.\n","cache_control":{"type":"ephemeral"}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      anthropic-version:
+      - '2023-06-01'
+      connection:
+      - keep-alive
+      content-length:
+      - '5456'
+      content-type:
+      - application/json
+      host:
+      - api.anthropic.com
+      user-agent:
+      - Anthropic/Python 0.76.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 0.76.0
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.10.19
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/2VSzU7DMAx+lSjHqUXtYEzrDYQQB5A4IA2JoiokZg1rnZI4wDTt3XHK/zgl+X4c
+        +0u2sncGOllJ3aloIA8OESg/ymf5tJjOisV0ITNpDSv6sGqKsqSr5fX87OVC6RbncblY3y7PkDW0
+        GSCpIAS1Aga86xKgQrCBFBJD2iEB76q77Zee4C0x41LJE62dNxZXgpwIoMk6FMcHpajl9YZaPtxA
+        IOazWgpqPYCgD0A8etXDq/PrIJQH0fM9bAYjHp0XH+aqxhrLAzGZDJtkm0xELs5BUfSQe6vb/8Vq
+        nCZ9REvfjtNoO8otflb9NnGUsYMaD5Ojd3o9qi/tg1d+M7aRwKR0D088W5C7+0wGckPjQXHyHACg
+        abidFOhIBHiOgJqTwth1mYxjuNVWWhwiNeTWgEFW85LD5ReBRnOpNHjzV1B88UybPa4sjhb79nQF
+        DC304FXXzPr/5X7Yst1nd5l0kX5Dx+wI4F+shoYseB41fQqjvJG73TumCh7LhwIAAA==
+    headers:
+      CF-RAY:
+      - 9c1a60d4ab5e67cb-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 21 Jan 2026 22:51:49 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2026-01-21T22:51:48Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2026-01-21T22:51:49Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2026-01-21T22:51:48Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CXMLqgrrchykwCdY7YRKM
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '2016'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft
new file mode 100644
index 00000000..1ee7a837
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft
@@ -0,0 +1,298 @@
+interactions:
+- request:
+    body: '{"messages":[{"content":"Count from 1 to 5.","role":"user"}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '124'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA;
+        _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"obfuscation":"uoycSw"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"obfuscation":"7R9sCOG"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"jNZOnCU"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"NTkR0fq"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"obfuscation":"KhfgFBA"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"u5zk4uv"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"yQyBcA4"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"obfuscation":"HhGcZch"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"GNLE7Ci"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"d0EKjlZ"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"obfuscation":"YytmIuX"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"Umbehc1"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"3xi8C7o"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"obfuscation":"N0uOsTp"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"obfuscation":"RilMN7a"}
+
+
+        data: {"id":"chatcmpl-CZR0zJXGi0lsnYkPoiga2R6HChxps","object":"chat.completion.chunk","created":1762561361,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"oF"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 99b0eaddeca8aaac-SJC
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 08 Nov 2025 00:22:42 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '275'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '519'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_05aebff8dd644228befd59a7372d3c93
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"Count from 1 to 5.","role":"user"}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":true}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '124'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0;
+        _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"obfuscation":"ov7JiI"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"obfuscation":"eXpmCqg"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"C8QZXu8"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"xdqGFpo"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"obfuscation":"O3SLgWG"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"0aoEi42"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"2oO8rJa"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"obfuscation":"jOHTEGa"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"qGeoxr1"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"uvMar7j"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"obfuscation":"4dFvFfq"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"obfuscation":"GdoZztm"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"obfuscation":"NHxpCPR"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"obfuscation":"mfV8KdT"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"obfuscation":"EkPlssM"}
+
+
+        data: {"id":"chatcmpl-CZR1mouRDQnH9qWlT2zp6Fs0nW1Uq","object":"chat.completion.chunk","created":1762561410,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_560af6e559","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"fj"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-RAY:
+      - 99b0ec0f7961ed3b-SJC
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Sat, 08 Nov 2025 00:23:30 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '149'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '171'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_8afec9e4717b433e9c6900220b2dbd93
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage
new file mode 100644
index 00000000..e21d44cc
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage
@@ -0,0 +1,350 @@
+interactions:
+- request:
+    body: '{"messages": [{"content": "What is 3 * 12", "role": "user"}], "model":
+      "gpt-4o-mini", "frequency_penalty": 0.0, "n": 1, "presence_penalty": 0.0, "stream":
+      false, "temperature": 1.0, "tools": [{"type": "function", "function": {"name":
+      "calculator", "description": "Can perform mathematical operations.", "parameters":
+      {"properties": {"input": {"properties": {"operation": {"description": "The type
+      of operation to execute.", "enum": ["add", "subtract", "multiply", "divide"],
+      "type": "string"}, "number1": {"description": "The first number to operate on.",
+      "type": "number"}, "number2": {"description": "The second number to operate
+      on.", "type": "number"}}, "required": ["operation", "number1", "number2"], "type":
+      "object"}}, "required": ["input"], "type": "object"}}}], "top_p": 1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '725'
+      content-type:
+      - application/json
+      host:
+      - localhost:8000
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.13
+    method: POST
+    uri: http://localhost:8000/v1/proxy/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-CJ3pT0xTT4C4WwCqA5bvyrihLFrbd\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1758658987,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_faZyqlGfMGsX50e2EuExUqK0\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"calculator\",\n
+        \             \"arguments\": \"{\\\"input\\\":{\\\"operation\\\":\\\"multiply\\\",\\\"number1\\\":3,\\\"number2\\\":12}}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 97,\n    \"completion_tokens\":
+        26,\n    \"total_tokens\": 123,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_51db84afab\"\n}\n"
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-span-id,x-bt-span-export,x-bt-query-plan,x-bt-internal-trace-id
+      Connection:
+      - keep-alive
+      Date:
+      - Tue, 23 Sep 2025 20:23:07 GMT
+      Keep-Alive:
+      - timeout=5
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      cf-ray:
+      - 983cc20cabc267ef-SJC
+      content-type:
+      - application/json
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '648'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - _cfuvid=inx7Y1lMFCkI1jONo8plrYH7k2d1EAvkr2WlMIyrK.s-1758658987739-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-bt-cached:
+      - MISS
+      x-bt-internal-trace-id:
+      - 475d214543543ac965368ac2a190850f
+      x-content-type-options:
+      - nosniff
+      x-envoy-upstream-service-time:
+      - '663'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f6bcef66199c4bcaa6ad5864f7d1d9fb
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"What is 3 * 12","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"tools":[{"type":"function","function":{"name":"calculator","description":"Can
+      perform mathematical operations.","parameters":{"properties":{"input":{"properties":{"operation":{"description":"The
+      type of operation to execute.","enum":["add","subtract","multiply","divide"],"type":"string"},"number1":{"description":"The
+      first number to operate on.","type":"number"},"number2":{"description":"The
+      second number to operate on.","type":"number"}},"required":["operation","number1","number2"],"type":"object"}},"required":["input"],"type":"object"}}}],"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '725'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=W_Ukgb.mz8e1GW7CfhzN.QQaN09_xQq1uTHm3a.dJdU-1762561359-1.0.1.1-6IrkySxpZaL.1C65iH0iOLFfere0JxHCiasT6bak.RihYFMyJgIz2OuYJqcUey8c5vicjtorNby_Z_GJX.ZMIHa6PyzVrhqgfZZmtnnn.sA;
+        _cfuvid=jwWMA4k30hLPwBwTSCIdIeS5.m1TkcdYLYTt4YSTZhI-1762561359243-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xT0W6bMBR95yus+xymQAppedum7SFKNXXSqmqjQo65EG/G9myzLY3y7xMmBZKm
+        UnlAcI/PucfH1/uAEOAlZATYljrWaBF+/P51vnsyt/dSfzCrMmFyeXu3fr96untwX2DWMdTmJzL3
+        zHrHVKMFOq5kDzOD1GGnGi3TOEmjRTr3QKNKFB2t1i68UmHDJQ/jeXwVzpdhdH1kbxVnaCEjPwJC
+        CNn7d+dTlvgPMuK1fKVBa2mNkA2LCAGjRFcBai23jkoHsxFkSjqUnXXZCjEBnFKiYFSIsXH/7Cff
+        Y1hUiALZUupvWK/u/z4k5e9PuNafV+vrab9eeqe9oaqVbAhpgg/17KwZISBpg8eGrBXUKXPGJgSo
+        qdsGpeucwz4HLnXrcsj2OSiNhnbaOWQ5NK1wXItdDrMcZNts0EQ5ZIvhL84hi+LDAU5aHIJL34+T
+        8AxWraXiZapUSuW8AR/r4xE5DCcoVK2N2tgzKlRccrstDFLrg5meT/BsxFuA9mQEQBvVaFc49Qt9
+        05tlLwrjlI5gnB5BpxwVYz2KF7MLckWJjnI/IsNUMsq2WI7UcTppW3I1AYLJ1l+6uaTdb5/L+i3y
+        I8AYaodloQ2WnJ3ueFxmsLvEry0bQvaGwaL5wxkWjqPpjqPEiraiH3WwO+uwKSouazTacH+/oNJF
+        ks5plWKS3EBwCP4DAAD//wMAguKIhm0EAAA=
+    headers:
+      CF-RAY:
+      - 99b0ead42b8caaac-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:22:40 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '557'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '702'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_edb893697ec245fbb710a31d27a3ed78
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"content":"What is 3 * 12","role":"user"}],"model":"gpt-4o-mini","frequency_penalty":0.0,"n":1,"presence_penalty":0.0,"stream":false,"temperature":1.0,"tools":[{"type":"function","function":{"name":"calculator","description":"Can
+      perform mathematical operations.","parameters":{"properties":{"input":{"properties":{"operation":{"description":"The
+      type of operation to execute.","enum":["add","subtract","multiply","divide"],"type":"string"},"number1":{"description":"The
+      first number to operate on.","type":"number"},"number2":{"description":"The
+      second number to operate on.","type":"number"}},"required":["operation","number1","number2"],"type":"object"}},"required":["input"],"type":"object"}}}],"top_p":1.0}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '725'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=.AxQfRhAvElThVl_Qz9zUVdqz_GtBGXwRQ0TVPIg5pc-1762561407-1.0.1.1-klsoMaFKHjzxOrHy2Zfd8Sc76RDHsMXURLAaIzORncnm47NI1MY0BqqBGOEsVXlZb.RdqeqpxzGFhl8DlRDjy.SqRfa2B4zEYdKZqQ2kVB0;
+        _cfuvid=0ohSoYMS21h1NkHWl4FeeVCp5aK2KHeEjclSm1NY7yY-1762561407934-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.108.2
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.108.2
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.11.10
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA4xTTY/aMBC951dYc4YqCV+7udEPqSdaVeqh26wiY0/AXce2/MHCIv57lQSSwFKp
+        OUT2PM+b5+eZY0QICA4ZAbalnlVGjj89/UjkfIlfVm+/4o9P4dW/2a/S0dfl5PAAozpDr/8g85es
+        D0xXRqIXWrUws0g91qzJYp7O5sk0fmyASnOUddrG+PFUjyuhxDiN0+k4XoyTMznbasHQQUZ+R4QQ
+        cmz+tU7FcQ8ZiUeXSIXO0Q1C1h0iBKyWdQSoc8J5qjyMepBp5VHV0lWQcgB4rWXBqJR94fY7Dta9
+        WVTK4udkP9upwHY7/nm1XH2Pefi2f4n5oF5LfTCNoDIo1pk0wLt4dlOMEFC0wnNBFiT12t5kEwLU
+        bkKFytfK4ZiDUCb4HLJjDtqgpTV3DlkOVZBeGHnIYZSDCtUabZJDNul2aQ5Zkp5OcFXiFN1bPw/M
+        s1gGR+V7V6lS2jcCGlufz8ipe0GpN8bqtbtJhVIo4baFReoaY4bvE12ENBIgXLUAGKsr4wuvX7Ap
+        +rhoSaHv0h5M52fQa09lH0/SyegOXcHRU9G0SNeVjLIt8j61704auNADIBpc/b2ae9zt9YXa/A99
+        DzCGxiMvjEUu2PWN+2MW6yH+17HO5EYwOLQ7wbDwAm39HBxLGmQ7WuAOzmNVlEJt0BormvmC0hSz
+        hK8fprSka4hO0V8AAAD//wMAMU2sv20EAAA=
+    headers:
+      CF-RAY:
+      - 99b0ec04f9abed3b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Sat, 08 Nov 2025 00:23:29 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '614'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '756'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_b741763f424444f38ded6343a488e723
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py
new file mode 100644
index 00000000..078502b2
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/conftest.py
@@ -0,0 +1,58 @@
+import os
+from pathlib import Path
+
+import pytest
+from braintrust.logger import (
+    TEST_API_KEY,
+    Logger,
+    _internal_reset_global_state,
+    _internal_with_memory_background_logger,
+    _MemoryBackgroundLogger,
+)
+from braintrust.test_helpers import init_test_logger
+
+from braintrust.wrappers.langchain import clear_global_handler
+
+
+@pytest.fixture(autouse=True)
+def setup_braintrust_langchain():
+    os.environ["BRAINTRUST_SYNC_FLUSH"] = "1"
+    os.environ["BRAINTRUST_API_URL"] = "http://localhost:8000"
+    os.environ["BRAINTRUST_APP_URL"] = "http://localhost:3000"
+    os.environ["BRAINTRUST_API_KEY"] = TEST_API_KEY
+    os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key_here"
+    os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
+    os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1/proxy"
+
+    _internal_reset_global_state()
+    clear_global_handler()
+    yield
+
+
+@pytest.fixture(scope="module")
+def vcr_config():
+    record_mode = "none" if (os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) else "once"
+
+    return {
+        "filter_headers": [
+            "authorization",
+            "x-goog-api-key",
+            "x-api-key",
+            "api-key",
+            "openai-api-key",
+        ],
+        "record_mode": record_mode,
+        "match_on": ["uri", "method", "body"],
+        "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
+        "path_transformer": lambda path: path.replace(".yaml", ""),
+    }
+
+
+@pytest.fixture
+def logger_memory_logger():
+    logger = init_test_logger("langchain-py")
+    with _internal_with_memory_background_logger() as bgl:
+        yield (logger, bgl)
+
+
+LoggerMemoryLogger = tuple[Logger, _MemoryBackgroundLogger]
diff --git a/py/src/braintrust/integrations/langchain/integration.py b/py/src/braintrust/integrations/langchain/integration.py
new file mode 100644
index 00000000..e22cdc0f
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/integration.py
@@ -0,0 +1,34 @@
+"""LangChain integration orchestration."""
+
+from typing import Any
+
+from braintrust.integrations.base import BasePatcher, BaseIntegration
+
+
+class LangChainCallbackPatcher(BasePatcher):
+    """Patcher that registers a global BraintrustCallbackHandler with LangChain."""
+
+    name = "langchain_callback"
+    _patched: bool = False
+
+    @classmethod
+    def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
+        return cls._patched
+
+    @classmethod
+    def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
+        from .tracing import BraintrustCallbackHandler, _ensure_hook_registered, set_global_handler
+
+        _ensure_hook_registered()
+        handler = BraintrustCallbackHandler()
+        set_global_handler(handler)
+        cls._patched = True
+        return True
+
+
+class LangChainIntegration(BaseIntegration):
+    """Braintrust instrumentation for LangChain."""
+
+    name = "langchain"
+    import_names = ("langchain_core",)
+    patchers = (LangChainCallbackPatcher,)
diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py
new file mode 100644
index 00000000..827cf777
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/test_langchain.py
@@ -0,0 +1,1380 @@
+# pyright: reportTypedDictNotRequiredAccess=none
+import uuid
+from typing import Any, Dict, List, Sequence, Union, cast
+from unittest.mock import ANY
+
+import pytest
+from braintrust.logger import flush
+from langchain_anthropic import ChatAnthropic
+from langchain_core.callbacks import BaseCallbackHandler, CallbackManager
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.runnables import RunnableMap, RunnableSerializable
+from langchain_core.tools import tool
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+
+from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler
+from braintrust.wrappers.test_utils import verify_autoinstrument_script
+
+from .conftest import LoggerMemoryLogger
+
+# ---------------------------------------------------------------------------
+# Helpers (inlined from the integration package)
+# ---------------------------------------------------------------------------
+
+
+def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False) -> None:
+    """Assert that actual contains all key-value pairs from expected."""
+    if isinstance(expected, (list, tuple)):
+        assert isinstance(actual, (list, tuple)), f"Expected sequence but got {type(actual)}"
+        assert len(actual) >= len(expected), (
+            f"Expected sequence of length >= {len(expected)} but got length {len(actual)}"
+        )
+        if not ignore_order:
+            for i, expected_item in enumerate(expected):
+                assert_matches_object(actual[i], expected_item)
+        else:
+            for expected_item in expected:
+                matched = False
+                for actual_item in actual:
+                    try:
+                        assert_matches_object(actual_item, expected_item)
+                        matched = True
+                    except Exception:
+                        pass
+                assert matched, (
+                    f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}"
+                )
+    elif isinstance(expected, dict):
+        assert isinstance(actual, dict), f"Expected dict but got {type(actual)}"
+        for k, v in expected.items():
+            assert k in actual, f"Missing key {k}"
+            if v is ANY:
+                continue
+            if isinstance(v, (dict, list, tuple)):
+                assert_matches_object(actual[k], v)
+            else:
+                assert actual[k] == v, f"Key {k}: expected {v} but got {actual[k]}"
+    else:
+        assert actual == expected, f"Expected {expected} but got {actual}"
+
+
+def find_spans_by_attributes(spans: List[Any], **attributes: Any) -> List[Any]:
+    """Find all spans matching the given span_attributes."""
+    matching = []
+    for span in spans:
+        if "span_attributes" not in span:
+            continue
+        if all(span["span_attributes"].get(k) == v for k, v in attributes.items()):
+            matching.append(span)
+    return matching
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.vcr
+def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    prompt = ChatPromptTemplate.from_template("What is 1 + {number}?")
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=1,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+        n=1,
+    )
+    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
+
+    spans = memory_logger.pop()
+    assert len(spans) == 3
+
+    root_span_id = spans[0]["span_id"]
+
+    assert_matches_object(
+        spans,
+        [
+            {
+                "span_attributes": {
+                    "name": "RunnableSequence",
+                    "type": "task",
+                },
+                "input": {"number": "2"},
+                "output": {
+                    "content": ANY,
+                    "additional_kwargs": ANY,
+                    "response_metadata": ANY,
+                    "type": "ai",
+                    "name": ANY,
+                    "id": ANY,
+                    "example": ANY,
+                    "tool_calls": ANY,
+                    "invalid_tool_calls": ANY,
+                    "usage_metadata": ANY,
+                },
+                "metadata": {"tags": []},
+                "span_id": root_span_id,
+                "root_span_id": root_span_id,
+            },
+            {
+                "span_attributes": {"name": "ChatPromptTemplate"},
+                "input": {"number": "2"},
+                "output": {
+                    "messages": [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                        }
+                    ]
+                },
+                "metadata": {"tags": ["seq:step:1"]},
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+            {
+                "span_attributes": {"name": "ChatOpenAI", "type": "llm"},
+                "input": [
+                    [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                            "example": ANY,
+                        }
+                    ]
+                ],
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "text": ANY,
+                                "generation_info": ANY,
+                                "type": "ChatGeneration",
+                                "message": {
+                                    "content": ANY,
+                                    "additional_kwargs": ANY,
+                                    "response_metadata": ANY,
+                                    "type": "ai",
+                                    "name": None,
+                                    "id": ANY,
+                                },
+                            }
+                        ]
+                    ],
+                    "llm_output": {
+                        "token_usage": {
+                            "completion_tokens": ANY,
+                            "prompt_tokens": ANY,
+                            "total_tokens": ANY,
+                        },
+                        "model_name": "gpt-4o-mini-2024-07-18",
+                    },
+                    "run": None,
+                    "type": "LLMResult",
+                },
+                "metrics": {
+                    "start": ANY,
+                    "total_tokens": ANY,
+                    "prompt_tokens": ANY,
+                    "completion_tokens": ANY,
+                    "end": ANY,
+                },
+                "metadata": {
+                    "tags": ["seq:step:2"],
+                    "model": "gpt-4o-mini-2024-07-18",
+                },
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+        ],
+    )
+
+
+@pytest.mark.vcr
+def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger, debug=True)
+    set_global_handler(handler)
+
+    # Make sure the handler is registered in the LangChain library
+    manager = CallbackManager.configure()
+    assert next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None) == handler
+
+    prompt = ChatPromptTemplate.from_template("What is 1 + {number}?")
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=1,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+        n=1,
+    )
+    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+
+    message = chain.invoke({"number": "2"})
+
+    spans = memory_logger.pop()
+    assert len(spans) > 0
+
+    root_span_id = spans[0]["span_id"]
+
+    assert_matches_object(
+        spans,
+        [
+            {
+                "span_attributes": {
+                    "name": "RunnableSequence",
+                    "type": "task",
+                },
+                "input": {"number": "2"},
+                "output": {
+                    "content": ANY,
+                    "additional_kwargs": ANY,
+                    "response_metadata": ANY,
+                    "type": "ai",
+                    "name": ANY,
+                    "id": ANY,
+                    "example": ANY,
+                    "tool_calls": ANY,
+                    "invalid_tool_calls": ANY,
+                    "usage_metadata": ANY,
+                },
+                "metadata": {"tags": []},
+                "span_id": root_span_id,
+                "root_span_id": root_span_id,
+            },
+            {
+                "span_attributes": {"name": "ChatPromptTemplate"},
+                "input": {"number": "2"},
+                "output": {
+                    "messages": [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                        }
+                    ]
+                },
+                "metadata": {"tags": ["seq:step:1"]},
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+            {
+                "span_attributes": {"name": "ChatOpenAI", "type": "llm"},
+                "input": [
+                    [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                            "example": ANY,
+                        }
+                    ]
+                ],
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "text": ANY,
+                                "generation_info": ANY,
+                                "type": "ChatGeneration",
+                                "message": {
+                                    "content": ANY,
+                                    "additional_kwargs": ANY,
+                                    "response_metadata": ANY,
+                                    "type": "ai",
+                                    "name": None,
+                                    "id": ANY,
+                                },
+                            }
+                        ]
+                    ],
+                    "llm_output": {
+                        "token_usage": {
+                            "completion_tokens": ANY,
+                            "prompt_tokens": ANY,
+                            "total_tokens": ANY,
+                        },
+                        "model_name": "gpt-4o-mini-2024-07-18",
+                    },
+                    "run": None,
+                    "type": "LLMResult",
+                },
+                "metrics": {
+                    "start": ANY,
+                    "total_tokens": ANY,
+                    "prompt_tokens": ANY,
+                    "completion_tokens": ANY,
+                    "end": ANY,
+                },
+                "metadata": {
+                    "tags": ["seq:step:2"],
+                    "model": "gpt-4o-mini-2024-07-18",
+                },
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+        ],
+    )
+
+    assert message.content == "1 + 2 equals 3."
+
+
+@pytest.mark.vcr
+def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    prompt = ChatPromptTemplate.from_template("{history} User: {input}")
+    model = ChatOpenAI(model="gpt-4o-mini")
+    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+
+    memory = {"history": "Assistant: Hello! How can I assist you today?"}
+    chain.invoke(
+        {"input": "What's your name?", **memory},
+        config={"callbacks": [cast(BaseCallbackHandler, handler)], "tags": ["test"]},
+    )
+
+    spans = memory_logger.pop()
+    assert len(spans) == 3
+
+    root_span_id = spans[0]["span_id"]
+
+    assert_matches_object(
+        spans,
+        [
+            {
+                "span_attributes": {
+                    "name": "RunnableSequence",
+                    "type": "task",
+                },
+                "input": {"input": "What's your name?", "history": "Assistant: Hello! How can I assist you today?"},
+                "output": {
+                    "content": ANY,
+                    "additional_kwargs": ANY,
+                    "response_metadata": ANY,
+                    "type": "ai",
+                },
+                "metadata": {"tags": ["test"]},
+                "span_id": root_span_id,
+                "root_span_id": root_span_id,
+            },
+            {
+                "span_attributes": {"name": "ChatPromptTemplate"},
+                "input": {"input": "What's your name?", "history": "Assistant: Hello! How can I assist you today?"},
+                "output": {
+                    "messages": [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                        }
+                    ]
+                },
+                "metadata": {"tags": ["seq:step:1", "test"]},
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+            {
+                "span_attributes": {"name": "ChatOpenAI", "type": "llm"},
+                "input": [
+                    [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                            "example": ANY,
+                        }
+                    ]
+                ],
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "text": ANY,
+                                "generation_info": ANY,
+                                "type": "ChatGeneration",
+                                "message": {
+                                    "content": ANY,
+                                    "additional_kwargs": ANY,
+                                    "response_metadata": ANY,
+                                    "type": "ai",
+                                    "name": None,
+                                    "id": ANY,
+                                },
+                            }
+                        ]
+                    ],
+                    "llm_output": {
+                        "token_usage": {
+                            "completion_tokens": ANY,
+                            "prompt_tokens": ANY,
+                            "total_tokens": ANY,
+                        },
+                        "model_name": "gpt-4o-mini-2024-07-18",
+                    },
+                    "run": None,
+                    "type": "LLMResult",
+                },
+                "metrics": {
+                    "start": ANY,
+                    "total_tokens": ANY,
+                    "prompt_tokens": ANY,
+                    "completion_tokens": ANY,
+                    "end": ANY,
+                },
+                "metadata": {
+                    "tags": ["seq:step:2", "test"],
+                    "model": "gpt-4o-mini-2024-07-18",
+                },
+                "root_span_id": root_span_id,
+                "span_parents": [root_span_id],
+            },
+        ],
+    )
+
+
+@pytest.mark.vcr
+def test_tool_usage(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+
+    class CalculatorInput(BaseModel):
+        operation: str = Field(
+            description="The type of operation to execute.",
+            json_schema_extra={"enum": ["add", "subtract", "multiply", "divide"]},
+        )
+        number1: float = Field(description="The first number to operate on.")
+        number2: float = Field(description="The second number to operate on.")
+
+    @tool
+    def calculator(input: CalculatorInput) -> str:
+        """Can perform mathematical operations."""
+        if input.operation == "add":
+            return str(input.number1 + input.number2)
+        elif input.operation == "subtract":
+            return str(input.number1 - input.number2)
+        elif input.operation == "multiply":
+            return str(input.number1 * input.number2)
+        elif input.operation == "divide":
+            return str(input.number1 / input.number2)
+        else:
+            raise ValueError("Invalid operation.")
+
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=1,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+        n=1,
+    )
+    model_with_tools = model.bind_tools([calculator])
+    model_with_tools.invoke("What is 3 * 12", config={"callbacks": [cast(BaseCallbackHandler, handler)]})
+
+    spans = memory_logger.pop()
+    root_span_id = spans[0]["span_id"]
+
+    assert_matches_object(
+        spans,
+        [
+            {
+                "span_id": root_span_id,
+                "root_span_id": root_span_id,
+                "span_attributes": {
+                    "name": "ChatOpenAI",
+                    "type": "llm",
+                },
+                "input": [
+                    [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                            "name": None,
+                            "id": None,
+                            "example": ANY,
+                        }
+                    ]
+                ],
+                "metadata": {
+                    "tags": [],
+                    "model": "gpt-4o-mini-2024-07-18",
+                    "invocation_params": {
+                        "tools": [
+                            {
+                                "type": "function",
+                                "function": {
+                                    "name": "calculator",
+                                    "description": "Can perform mathematical operations.",
+                                    "parameters": ANY,
+                                },
+                            }
+                        ],
+                    },
+                },
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "generation_info": ANY,
+                                "type": "ChatGeneration",
+                                "message": {
+                                    "content": ANY,
+                                    "type": "ai",
+                                    "additional_kwargs": {
+                                        "tool_calls": ANY,
+                                    },
+                                    "response_metadata": ANY,
+                                    "name": None,
+                                    "id": ANY,
+                                },
+                            }
+                        ]
+                    ],
+                    "llm_output": {
+                        "token_usage": {
+                            "completion_tokens": ANY,
+                            "prompt_tokens": ANY,
+                            "total_tokens": ANY,
+                        },
+                        "model_name": "gpt-4o-mini-2024-07-18",
+                    },
+                    "run": None,
+                    "type": "LLMResult",
+                },
+                "metrics": {
+                    "start": ANY,
+                    "total_tokens": ANY,
+                    "prompt_tokens": ANY,
+                    "completion_tokens": ANY,
+                    "end": ANY,
+                },
+            }
+        ],
+    )
+
+
+@pytest.mark.vcr
+@pytest.mark.skip(reason="Not yet working with VCR.")
+def test_parallel_execution(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=1,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+        n=1,
+    )
+
+    joke_chain = PromptTemplate.from_template("Tell me a joke about {topic}").pipe(model)
+    poem_chain = PromptTemplate.from_template("write a 2-line poem about {topic}").pipe(model)
+
+    map_chain = RunnableMap(
+        {
+            "joke": joke_chain,
+            "poem": poem_chain,
+        }
+    )
+
+    map_chain.invoke({"topic": "bear"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
+
+    spans = memory_logger.pop()
+
+    llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI")
+    assert len(llm_spans) == 2
+
+    for span in llm_spans:
+        assert_matches_object(
+            span,
+            {
+                "span_attributes": {"name": "ChatOpenAI", "type": "llm"},
+                "metadata": {
+                    "tags": ["seq:step:2"],
+                    "model": "gpt-4o-mini-2024-07-18",
+                },
+                "input": [
+                    [
+                        {
+                            "content": ANY,
+                            "additional_kwargs": {},
+                            "response_metadata": {},
+                            "type": "human",
+                        }
+                    ]
+                ],
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "text": ANY,
+                                "generation_info": ANY,
+                                "type": "ChatGeneration",
+                                "message": {
+                                    "content": ANY,
+                                    "type": "ai",
+                                },
+                            }
+                        ]
+                    ],
+                    "llm_output": {
+                        "token_usage": {
+                            "completion_tokens": ANY,
+                            "prompt_tokens": ANY,
+                            "total_tokens": ANY,
+                        },
+                        "model_name": "gpt-4o-mini-2024-07-18",
+                    },
+                    "type": "LLMResult",
+                },
+                "metrics": {
+                    "start": ANY,
+                    "total_tokens": ANY,
+                    "prompt_tokens": ANY,
+                    "completion_tokens": ANY,
+                    "end": ANY,
+                },
+            },
+        )
+
+
+@pytest.mark.vcr
+def test_langgraph_state_management(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    try:
+        from langgraph.graph import END, START, StateGraph
+    except ImportError:
+        pytest.skip("langgraph not installed")
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        temperature=1,
+        top_p=1,
+        frequency_penalty=0,
+        presence_penalty=0,
+        n=1,
+    )
+
+    def say_hello(state: Dict[str, str]):
+        response = model.invoke("Say hello")
+        return cast(Union[str, List[str], Dict[str, str]], response.content)
+
+    def say_bye(state: Dict[str, str]):
+        print("From the 'sayBye' node: Bye world!")
+        return "Bye"
+
+    workflow = (
+        StateGraph(state_schema=Dict[str, str])
+        .add_node("sayHello", say_hello)
+        .add_node("sayBye", say_bye)
+        .add_edge(START, "sayHello")
+        .add_edge("sayHello", "sayBye")
+        .add_edge("sayBye", END)
+    )
+
+    graph = workflow.compile()
+    graph.invoke({}, config={"callbacks": [handler]})
+
+    spans = memory_logger.pop()
+
+    langgraph_spans = find_spans_by_attributes(spans, name="LangGraph")
+    say_hello_spans = find_spans_by_attributes(spans, name="sayHello")
+    say_bye_spans = find_spans_by_attributes(spans, name="sayBye")
+    llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI")
+
+    assert len(langgraph_spans) == 1
+    assert len(say_hello_spans) == 1
+    assert len(say_bye_spans) == 1
+    assert len(llm_spans) == 1
+
+    assert_matches_object(
+        langgraph_spans[0],
+        {
+            "span_attributes": {
+                "name": "LangGraph",
+                "type": "task",
+            },
+            "input": {},
+            "metadata": {
+                "tags": [],
+            },
+            "output": "Bye",
+        },
+    )
+
+    assert_matches_object(
+        say_hello_spans[0],
+        {
+            "span_attributes": {
+                "name": "sayHello",
+            },
+            "input": {},
+            "metadata": {
+                "tags": ["graph:step:1"],
+            },
+            "output": ANY,
+        },
+    )
+
+    assert_matches_object(
+        llm_spans[0],
+        {
+            "span_attributes": {
+                "name": "ChatOpenAI",
+                "type": "llm",
+            },
+            "input": [
+                [
+                    {
+                        "content": ANY,
+                        "additional_kwargs": {},
+                        "response_metadata": {},
+                        "type": "human",
+                        "name": None,
+                        "id": None,
+                        "example": ANY,
+                    }
+                ]
+            ],
+            "metadata": {
+                "model": "gpt-4o-mini-2024-07-18",
+                "tags": [],
+            },
+            "output": {
+                "generations": [
+                    [
+                        {
+                            "text": ANY,
+                            "generation_info": ANY,
+                            "type": "ChatGeneration",
+                            "message": {
+                                "content": ANY,
+                                "additional_kwargs": ANY,
+                                "response_metadata": ANY,
+                                "type": "ai",
+                                "name": None,
+                                "id": ANY,
+                            },
+                        }
+                    ]
+                ],
+                "llm_output": {
+                    "token_usage": {
+                        "completion_tokens": ANY,
+                        "prompt_tokens": ANY,
+                        "total_tokens": ANY,
+                    },
+                    "model_name": "gpt-4o-mini-2024-07-18",
+                },
+                "run": None,
+                "type": "LLMResult",
+            },
+            "metrics": {
+                "start": ANY,
+                "total_tokens": ANY,
+                "prompt_tokens": ANY,
+                "completion_tokens": ANY,
+                "end": ANY,
+            },
+        },
+    )
+
+    assert_matches_object(
+        say_bye_spans[0],
+        {
+            "span_attributes": {
+                "name": "sayBye",
+            },
+            "input": ANY,
+            "metadata": {
+                "tags": ["graph:step:2"],
+            },
+            "output": "Bye",
+        },
+    )
+
+
+@pytest.mark.vcr
+def test_chain_null_values(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+
+    run_id = uuid.UUID("f81d4fae-7dec-11d0-a765-00a0c91e6bf6")
+
+    handler.on_chain_start(
+        {"id": ["TestChain"], "lc": 1, "type": "not_implemented"},
+        {"input1": "value1", "input2": None, "input3": None},
+        run_id=run_id,
+        parent_run_id=None,
+        tags=["test"],
+    )
+
+    handler.on_chain_end(
+        {"output1": "value1", "output2": None, "output3": None},
+        run_id=run_id,
+        parent_run_id=None,
+        tags=["test"],
+    )
+
+    flush()
+
+    spans = memory_logger.pop()
+    root_span_id = spans[0]["span_id"]
+
+    assert_matches_object(
+        spans,
+        [
+            {
+                "root_span_id": root_span_id,
+                "span_attributes": {
+                    "name": "TestChain",
+                    "type": "task",
+                },
+                "input": {
+                    "input1": "value1",
+                    "input2": None,
+                    "input3": None,
+                },
+                "metadata": {
+                    "tags": ["test"],
+                },
+                "output": {
+                    "output1": "value1",
+                    "output2": None,
+                    "output3": None,
+                },
+            },
+        ],
+    )
+
+
+def test_consecutive_eval_calls(logger_memory_logger: LoggerMemoryLogger):
+    from braintrust import Eval
+
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    def task_fn(input, hooks):
+        handler = BraintrustCallbackHandler(logger=logger)
+
+        run_id = uuid.uuid4()
+
+        handler.on_chain_start(
+            {"id": ["RunnableSequence"], "lc": 1, "type": "not_implemented"},
+            {"number": str(input)},
+            run_id=run_id,
+            parent_run_id=None,
+        )
+
+        output = f"Result for {input}"
+
+        handler.on_chain_end(
+            {"content": output},
+            run_id=run_id,
+            parent_run_id=None,
+        )
+
+        return output
+
+    with logger.start_span(name="test-consecutive-eval", span_attributes={"type": "eval"}) as parent_span:
+        Eval(
+            "test-consecutive-eval",
+            data=[{"input": 1, "expected": "Result for 1"}, {"input": 2, "expected": "Result for 2"}],
+            task=task_fn,
+            scores=[],
+            parent=parent_span.id,
+        )
+
+    flush()
+
+    spans = memory_logger.pop()
+
+    assert len(spans) == 5, f"Expected 5 spans, got {len(spans)}"
+
+    root_eval_span = [s for s in spans if s.get("span_attributes", {}).get("name") == "test-consecutive-eval"][0]
+    root_eval_span_id = root_eval_span["span_id"]
+
+    eval_record_spans = [
+        s
+        for s in spans
+        if s.get("span_attributes", {}).get("name") == "eval" and root_eval_span_id in (s.get("span_parents") or [])
+    ]
+    assert len(eval_record_spans) == 2, f"Expected 2 eval record spans, got {len(eval_record_spans)}"
+
+    eval_record_spans_sorted = sorted(eval_record_spans, key=lambda s: s.get("input", 0))
+    eval_record_1 = eval_record_spans_sorted[0]
+    eval_record_2 = eval_record_spans_sorted[1]
+
+    task_spans = [s for s in spans if s.get("span_attributes", {}).get("name") == "task"]
+    assert len(task_spans) == 2, f"Expected 2 task spans, got {len(task_spans)}"
+
+    task_spans_sorted = sorted(task_spans, key=lambda s: s.get("input", 0))
+    task_1_span = task_spans_sorted[0]
+    task_2_span = task_spans_sorted[1]
+
+    assert_matches_object(
+        [root_eval_span],
+        [
+            {
+                "span_id": root_eval_span_id,
+                "root_span_id": root_eval_span_id,
+                "span_attributes": {
+                    "name": "test-consecutive-eval",
+                    "type": "eval",
+                },
+            }
+        ],
+    )
+
+    assert_matches_object(
+        [eval_record_1],
+        [
+            {
+                "root_span_id": root_eval_span_id,
+                "span_parents": [root_eval_span_id],
+                "span_attributes": {"name": "eval"},
+                "input": 1,
+                "output": "Result for 1",
+            }
+        ],
+    )
+
+    assert_matches_object(
+        [eval_record_2],
+        [
+            {
+                "root_span_id": root_eval_span_id,
+                "span_parents": [root_eval_span_id],
+                "span_attributes": {"name": "eval"},
+                "input": 2,
+                "output": "Result for 2",
+            }
+        ],
+    )
+
+    assert_matches_object(
+        [task_1_span],
+        [
+            {
+                "root_span_id": root_eval_span_id,
+                "span_parents": [eval_record_1["span_id"]],
+                "span_attributes": {"name": "task"},
+                "input": 1,
+                "output": "Result for 1",
+            }
+        ],
+    )
+
+    assert_matches_object(
+        [task_2_span],
+        [
+            {
+                "root_span_id": root_eval_span_id,
+                "span_parents": [eval_record_2["span_id"]],
+                "span_attributes": {"name": "task"},
+                "input": 2,
+                "output": "Result for 2",
+            }
+        ],
+    )
+
+
+@pytest.mark.vcr
+def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    prompt = ChatPromptTemplate.from_template("Count from 1 to 5.")
+    model = ChatOpenAI(
+        model="gpt-4o-mini",
+        max_completion_tokens=50,
+        streaming=True,
+    )
+    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+
+    chunks: List[str] = []
+    for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}):
+        if chunk.content:
+            chunks.append(str(chunk.content))
+
+    assert len(chunks) > 0, "Expected to receive streaming chunks"
+
+    spans = memory_logger.pop()
+    assert len(spans) == 3
+
+    llm_spans = find_spans_by_attributes(spans, name="ChatOpenAI", type="llm")
+    assert len(llm_spans) == 1
+    llm_span = llm_spans[0]
+
+    assert_matches_object(
+        [llm_span],
+        [
+            {
+                "id": ANY,
+                "input": [
+                    [
+                        {
+                            "additional_kwargs": {},
+                            "content": "Count from 1 to 5.",
+                            "example": False,
+                            "id": None,
+                            "name": None,
+                            "response_metadata": {},
+                            "type": "human",
+                        }
+                    ]
+                ],
+                "metadata": {
+                    "braintrust": {
+                        "integration_name": "langchain-py",
+                    }
+                },
+                "metrics": {
+                    "time_to_first_token": ANY,
+                },
+                "output": {
+                    "generations": [
+                        [
+                            {
+                                "generation_info": {
+                                    "finish_reason": "stop",
+                                    "model_name": ANY,
+                                },
+                                "message": {
+                                    "content": "1, 2, 3, 4, 5.",
+                                    "type": "AIMessageChunk",
+                                },
+                                "text": "1, 2, 3, 4, 5.",
+                                "type": "ChatGenerationChunk",
+                            }
+                        ]
+                    ],
+                    "type": "LLMResult",
+                },
+                "project_id": "langchain-py",
+                "span_attributes": {"name": "ChatOpenAI", "type": "llm"},
+            }
+        ],
+    )
+
+
+@pytest.mark.vcr
+def test_prompt_caching_tokens(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    handler = BraintrustCallbackHandler(logger=logger)
+
+    model = ChatAnthropic(model="claude-sonnet-4-5-20250929")
+
+    # XXX: if you need to change the cassette or test, you'll want to change the text below to invalidate the stored cache.
+
+    # Anthropic prompt caching requires a minimum of 1024 tokens for Claude Sonnet models.
+    # This static text (~1500 tokens) ensures we meet that threshold consistently.
+    # See: https://platform.claude.com/docs/en/build-with-claude/prompt-caching
+    long_text_for_caching = """
+# Comprehensive Guide to Software Testing Methods!
+
+## Chapter 1: Introduction to Testing
+
+Software testing is a critical component of the software development lifecycle. It ensures that applications
+function correctly, meet requirements, and provide a positive user experience. This guide covers various
+testing methodologies, best practices, and tools used in modern software development.
+
+### 1.1 The Importance of Testing
+
+Testing helps identify defects early in the development process, reducing the cost of fixing issues later.
+Studies have shown that the cost of fixing a bug increases exponentially as it progresses through the
+development lifecycle. A bug found during requirements gathering might cost $1 to fix, while the same bug
+found in production could cost $100 or more.
+
+### 1.2 Types of Testing
+
+There are many types of testing, including:
+- Unit Testing: Testing individual components or functions in isolation
+- Integration Testing: Testing how components work together
+- End-to-End Testing: Testing the entire application flow
+- Performance Testing: Testing application speed and scalability
+- Security Testing: Testing for vulnerabilities and security issues
+- Usability Testing: Testing user experience and interface design
+
+## Chapter 2: Unit Testing Best Practices
+
+Unit testing focuses on testing the smallest testable parts of an application. Here are some best practices:
+
+### 2.1 Write Tests First (TDD)
+
+Test-Driven Development (TDD) is a methodology where tests are written before the actual code. The process
+follows a simple cycle: Red (write a failing test), Green (write code to pass the test), Refactor (improve
+the code while keeping tests passing).
+
+### 2.2 Keep Tests Independent
+
+Each test should be independent of others. Tests should not rely on the state created by previous tests.
+This ensures that tests can be run in any order and that failures are isolated and easy to debug.
+
+### 2.3 Use Meaningful Names
+
+Test names should clearly describe what is being tested and what the expected outcome is. A good test name
+might be "test_user_registration_with_valid_email_succeeds" rather than just "test_registration".
+
+### 2.4 Test Edge Cases
+
+Don't just test the happy path. Consider edge cases like:
+- Empty inputs
+- Null or undefined values
+- Very large inputs
+- Invalid formats
+- Boundary conditions
+
+## Chapter 3: Integration Testing
+
+Integration testing verifies that different modules or services work together correctly.
+
+### 3.1 Database Integration
+
+When testing database interactions, consider using:
+- Test databases separate from production
+- Database transactions that roll back after each test
+- Mock data that represents realistic scenarios
+
+### 3.2 API Integration
+
+API integration tests should verify:
+- Correct HTTP status codes
+- Response format and schema
+- Error handling
+- Authentication and authorization
+
+## Chapter 4: Performance Testing
+
+Performance testing ensures your application can handle expected load and scale appropriately.
+
+### 4.1 Load Testing
+
+Load testing simulates multiple users accessing the application simultaneously. Key metrics include:
+- Response time under load
+- Throughput (requests per second)
+- Error rates
+- Resource utilization (CPU, memory, network)
+
+### 4.2 Stress Testing
+
+Stress testing pushes the application beyond normal operational capacity to find breaking points and
+understand how the system fails gracefully.
+
+## Chapter 5: Continuous Integration and Testing
+
+Modern development practices integrate testing into the CI/CD pipeline.
+
+### 5.1 Automated Test Runs
+
+Tests should run automatically on every code change. This includes:
+- Running unit tests on every commit
+- Running integration tests on pull requests
+- Running end-to-end tests before deployment
+
+### 5.2 Test Coverage
+
+Test coverage metrics help identify untested code. While 100% coverage isn't always practical or necessary,
+maintaining good coverage helps ensure code quality. Focus on critical paths and business logic.
+
+## Chapter 6: Testing Tools and Frameworks
+
+Many tools exist to support testing efforts:
+
+### 6.1 Python Testing
+- pytest: Feature-rich testing framework
+- unittest: Built-in Python testing module
+- mock: Library for mocking objects
+
+### 6.2 JavaScript Testing
+- Jest: Popular testing framework
+- Mocha: Flexible testing framework
+- Cypress: End-to-end testing tool
+
+### 6.3 Other Tools
+- Selenium: Browser automation
+- JMeter: Performance testing
+- Postman: API testing
+
+## Conclusion
+
+Effective testing is essential for delivering high-quality software. By following best practices and using
+appropriate tools, teams can catch bugs early, improve code quality, and deliver better products to users.
+
+Remember: Testing is not just about finding bugs, it's about building confidence in your code.
+"""
+
+    messages: list[BaseMessage] = [
+        SystemMessage(
+            content=[
+                {
+                    "type": "text",
+                    "text": long_text_for_caching,
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ]
+        ),
+        HumanMessage(content="What is the first type of testing mentioned in section 1.2?"),
+    ]
+
+    res = model.invoke(messages, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
+
+    spans = memory_logger.pop()
+    assert len(spans) > 0
+
+    llm_spans = find_spans_by_attributes(spans, name="ChatAnthropic", type="llm")
+    assert len(llm_spans) == 1
+    first_span = llm_spans[0]
+
+    assert "metrics" in first_span
+    first_metrics = first_span["metrics"]
+    assert "prompt_tokens" in first_metrics
+    assert first_metrics["prompt_tokens"] > 0
+
+    assert "prompt_cache_creation_tokens" in first_metrics
+    assert first_metrics["prompt_cache_creation_tokens"] > 0
+    assert first_metrics["prompt_cached_tokens"] == 0
+
+    res = model.invoke(
+        messages + [res, HumanMessage(content="What testing framework is mentioned for Python?")],
+        config={"callbacks": [cast(BaseCallbackHandler, handler)]},
+    )
+
+    spans = memory_logger.pop()
+    assert len(spans) > 0
+
+    llm_spans = find_spans_by_attributes(spans, name="ChatAnthropic", type="llm")
+
+    assert len(llm_spans) == 1
+    second_span = llm_spans[0]
+
+    assert "metrics" in second_span
+    second_metrics = second_span["metrics"]
+
+    assert "prompt_cached_tokens" in second_metrics
+    assert second_metrics["prompt_cached_tokens"] > 0
+
+    assert "prompt_tokens" in second_metrics
+    assert second_metrics["prompt_tokens"] > 0
+
+
+@pytest.mark.vcr
+def test_langchain_anthropic_integration(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    MODEL = "claude-sonnet-4-20250514"
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    set_global_handler(handler)
+
+    prompt = ChatPromptTemplate.from_template("What is 1 + {number}?")
+    model = ChatAnthropic(model_name=MODEL)
+
+    chain = prompt | model
+
+    result = chain.invoke({"number": "2"})
+
+    flush()
+
+    assert isinstance(result.content, str)
+    assert "3" in result.content.lower()
+
+    spans = memory_logger.pop()
+    assert len(spans) > 0
+
+    llm_spans = [span for span in spans if span["span_attributes"].get("type") == "llm"]
+    assert len(llm_spans) > 0, "Should have at least one LLM call"
+
+    llm_span = llm_spans[0]
+    assert llm_span["metadata"]["model"] == MODEL
+
+    assert_matches_object(
+        llm_span["metrics"],
+        {
+            "completion_tokens": 13,
+            "end": ANY,
+            "prompt_tokens": 16,
+            "start": ANY,
+            "total_tokens": 29,
+        },
+    )
+
+
+def test_auto_instrument_langchain():
+    """Test that auto_instrument registers a global LangChain callback handler."""
+    verify_autoinstrument_script("test_auto_langchain.py")
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_async_langchain_invoke(logger_memory_logger: LoggerMemoryLogger):
+    logger, memory_logger = logger_memory_logger
+    assert not memory_logger.pop()
+
+    MODEL = "claude-sonnet-4-20250514"
+
+    handler = BraintrustCallbackHandler(logger=logger)
+    set_global_handler(handler)
+
+    prompt = ChatPromptTemplate.from_template("What is 1 + {number}?")
+    model = ChatAnthropic(model_name=MODEL)
+
+    chain = prompt | model
+
+    result = await chain.ainvoke({"number": "2"})
+
+    flush()
+
+    assert isinstance(result.content, str)
+    assert "3" in result.content.lower()
+
+    spans = memory_logger.pop()
+    assert len(spans) > 0
diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py
new file mode 100644
index 00000000..08458314
--- /dev/null
+++ b/py/src/braintrust/integrations/langchain/tracing.py
@@ -0,0 +1,701 @@
+"""
+Braintrust tracing implementation for LangChain.
+
+Contains BraintrustCallbackHandler and supporting utilities for tracing LangChain
+chains, LLMs, tools, and retrievers.
+"""
+
+import json
+import logging
+import re
+import time
+from collections.abc import Mapping, Sequence
+from contextvars import ContextVar
+from re import Pattern
+from typing import (
+    Any,
+    TypedDict,
+    Union,
+)
+from uuid import UUID
+
+import braintrust
+from braintrust import NOOP_SPAN, Logger, Span, SpanAttributes, SpanTypeAttribute, current_span, init_logger
+from braintrust.version import VERSION as sdk_version
+from typing_extensions import NotRequired
+
+_logger = logging.getLogger(__name__)
+
+# integration_name stays "langchain-py" for backward compatibility with existing traces
+_INTEGRATION_NAME = "langchain-py"
+
+# Global handler context variable – registered with LangChain's configure hook system
+# so that all LangChain invocations in the process are automatically traced.
+_braintrust_callback_handler_var: ContextVar["BraintrustCallbackHandler | None"] = ContextVar(
+    "braintrust_callback_handler", default=None
+)
+
+_hook_registered = False
+
+
+def _ensure_hook_registered() -> None:
+    """Lazily register the context var with LangChain's callback configure hook."""
+    global _hook_registered
+    if _hook_registered:
+        return
+    from langchain_core.tracers.context import register_configure_hook
+
+    register_configure_hook(
+        context_var=_braintrust_callback_handler_var,
+        inheritable=True,
+    )
+    _hook_registered = True
+
+
+def set_global_handler(handler: "BraintrustCallbackHandler") -> None:
+    """Register a BraintrustCallbackHandler as the global LangChain handler.
+
+    Ensures the LangChain configure hook is registered so the handler is picked
+    up automatically by all subsequent LangChain invocations in this context.
+    """
+    _ensure_hook_registered()
+    _braintrust_callback_handler_var.set(handler)
+
+
+def clear_global_handler() -> None:
+    """Remove the global BraintrustCallbackHandler."""
+    _braintrust_callback_handler_var.set(None)
+
+
+class LogEvent(TypedDict):
+    input: NotRequired[Any]
+    output: NotRequired[Any]
+    expected: NotRequired[Any]
+    error: NotRequired[str]
+    tags: NotRequired[Sequence[str] | None]
+    scores: NotRequired[Mapping[str, int | float]]
+    metadata: NotRequired[Mapping[str, Any]]
+    metrics: NotRequired[Mapping[str, int | float]]
+    id: NotRequired[str]
+    dataset_record_id: NotRequired[str]
+
+
+class BraintrustCallbackHandler:
+    """LangChain callback handler that traces chains, LLMs, tools, and retrievers in Braintrust."""
+
+    root_run_id: UUID | None = None
+
+    def __init__(
+        self,
+        logger: Logger | Span | None = None,
+        debug: bool = False,
+        exclude_metadata_props: Pattern[str] | None = None,
+    ):
+        self.logger = logger
+        self.spans: dict[UUID, Span] = {}
+        self.debug = debug  # DEPRECATED
+        self.exclude_metadata_props = exclude_metadata_props or re.compile(
+            r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)"
+        )
+        self.skipped_runs: set[UUID] = set()
+        # Set run_inline=True to avoid thread executor in async contexts
+        # This ensures memory logger context is preserved
+        self.run_inline = True
+
+        self._start_times: dict[UUID, float] = {}
+        self._first_token_times: dict[UUID, float] = {}
+        self._ttft_ms: dict[UUID, float] = {}
+
+    def _start_span(
+        self,
+        parent_run_id: UUID | None,
+        run_id: UUID,
+        name: str | None = None,
+        type: SpanTypeAttribute | None = SpanTypeAttribute.TASK,
+        span_attributes: SpanAttributes | Mapping[str, Any] | None = None,
+        start_time: float | None = None,
+        set_current: bool | None = None,
+        parent: str | None = None,
+        event: LogEvent | None = None,
+    ) -> Any:
+        if run_id in self.spans:
+            # XXX: See graph test case of an example where this _may_ be intended.
+            _logger.warning(f"Span already exists for run_id {run_id} (this is likely a bug)")
+            return
+
+        if not parent_run_id:
+            self.root_run_id = run_id
+
+        current_parent = current_span()
+        parent_span = None
+        if parent_run_id and parent_run_id in self.spans:
+            parent_span = self.spans[parent_run_id]
+        elif current_parent != NOOP_SPAN:
+            parent_span = current_parent
+        elif self.logger is not None:
+            parent_span = self.logger
+        else:
+            parent_span = braintrust
+
+        if event is None:
+            event = {}
+
+        tags = event.get("tags") or []
+        event = {
+            **event,
+            "tags": None,
+            "metadata": {
+                **({"tags": tags}),
+                **(event.get("metadata") or {}),
+                "run_id": run_id,
+                "parent_run_id": parent_run_id,
+                "braintrust": {
+                    "integration_name": _INTEGRATION_NAME,
+                    "integration_version": sdk_version,
+                    "sdk_version": sdk_version,
+                    "language": "python",
+                },
+            },
+        }
+
+        span = parent_span.start_span(
+            name=name,
+            type=type,
+            span_attributes=span_attributes,
+            start_time=start_time,
+            set_current=set_current,
+            parent=parent,
+            **event,
+        )
+
+        if self.logger != NOOP_SPAN and span == NOOP_SPAN:
+            _logger.warning(
+                "Braintrust logging not configured. Pass a `logger`, call `init_logger`, or run an experiment to configure Braintrust logging. Setting up a default."
+            )
+            span = init_logger().start_span(
+                name=name,
+                type=type,
+                span_attributes=span_attributes,
+                start_time=start_time,
+                set_current=set_current,
+                parent=parent,
+                **event,
+            )
+
+        span.set_current()
+
+        self.spans[run_id] = span
+        return span
+
+    def _end_span(
+        self,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        input: Any | None = None,
+        output: Any | None = None,
+        expected: Any | None = None,
+        error: str | None = None,
+        tags: Sequence[str] | None = None,
+        scores: Mapping[str, int | float] | None = None,
+        metadata: Mapping[str, Any] | None = None,
+        metrics: Mapping[str, int | float] | None = None,
+        dataset_record_id: str | None = None,
+    ) -> Any:
+        if run_id not in self.spans:
+            return
+
+        if run_id in self.skipped_runs:
+            self.skipped_runs.discard(run_id)
+            return
+
+        span = self.spans.pop(run_id)
+
+        if self.root_run_id == run_id:
+            self.root_run_id = None
+
+        span.log(
+            input=input,
+            output=output,
+            expected=expected,
+            error=error,
+            tags=None,
+            scores=scores,
+            metadata={
+                **({"tags": tags} if tags else {}),
+                **(metadata or {}),
+            },
+            metrics=metrics,
+            dataset_record_id=dataset_record_id,
+        )
+
+        # In async workflows, callbacks may execute in different async contexts.
+        # The span's context variable token may have been created in a different
+        # context, causing ValueError when trying to reset it. We catch and ignore
+        # this specific error since the span hierarchy is maintained via self.spans.
+        try:
+            span.unset_current()
+        except ValueError as e:
+            if "was created in a different Context" in str(e):
+                pass
+            else:
+                raise
+
+        span.end()
+
+    def on_llm_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,  # TODO: response=
+    ) -> Any:
+        self._end_span(run_id, error=str(error), metadata={**kwargs})
+
+        self._start_times.pop(run_id, None)
+        self._first_token_times.pop(run_id, None)
+        self._ttft_ms.pop(run_id, None)
+
+    def on_chain_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,  # TODO: some metadata
+    ) -> Any:
+        self._end_span(run_id, error=str(error), metadata={**kwargs})
+
+    def on_tool_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, error=str(error), metadata={**kwargs})
+
+    def on_retriever_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, error=str(error), metadata={**kwargs})
+
+    # Agent Methods
+    def on_agent_action(
+        self,
+        action: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._start_span(
+            parent_run_id,
+            run_id,
+            type=SpanTypeAttribute.LLM,
+            name=action.tool,
+            event={"input": action, "metadata": {**kwargs}},
+        )
+
+    def on_agent_finish(
+        self,
+        finish: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, output=finish, metadata={**kwargs})
+
+    def on_chain_start(
+        self,
+        serialized: dict[str, Any],
+        inputs: dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        name: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        tags = tags or []
+
+        # avoids extra logs that seem not as useful esp. with langgraph
+        if "langsmith:hidden" in tags:
+            self.skipped_runs.add(run_id)
+            return
+
+        metadata = metadata or {}
+        resolved_name = (
+            name
+            or metadata.get("langgraph_node")
+            or serialized.get("name")
+            or _last_item(serialized.get("id") or [])
+            or "Chain"
+        )
+
+        self._start_span(
+            parent_run_id,
+            run_id,
+            name=resolved_name,
+            event={
+                "input": inputs,
+                "tags": tags,
+                "metadata": {
+                    "serialized": serialized,
+                    "name": name,
+                    "metadata": metadata,
+                    **kwargs,
+                },
+            },
+        )
+
+    def on_chain_end(
+        self,
+        outputs: dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, output=outputs, tags=tags, metadata={**kwargs})
+
+    def on_llm_start(
+        self,
+        serialized: dict[str, Any],
+        prompts: list[str],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        name: str | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._start_times[run_id] = time.perf_counter()
+        self._first_token_times.pop(run_id, None)
+        self._ttft_ms.pop(run_id, None)
+
+        name = name or serialized.get("name") or _last_item(serialized.get("id") or []) or "LLM"
+        self._start_span(
+            parent_run_id,
+            run_id,
+            name=name,
+            type=SpanTypeAttribute.LLM,
+            event={
+                "input": prompts,
+                "tags": tags,
+                "metadata": {
+                    "serialized": serialized,
+                    "name": name,
+                    "metadata": metadata,
+                    **kwargs,
+                },
+            },
+        )
+
+    def on_chat_model_start(
+        self,
+        serialized: dict[str, Any],
+        messages: list[list[Any]],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        name: str | None = None,
+        invocation_params: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._start_times[run_id] = time.perf_counter()
+        self._first_token_times.pop(run_id, None)
+        self._ttft_ms.pop(run_id, None)
+
+        invocation_params = invocation_params or {}
+        self._start_span(
+            parent_run_id,
+            run_id,
+            name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Chat Model",
+            type=SpanTypeAttribute.LLM,
+            event={
+                "input": messages,
+                "tags": tags,
+                "metadata": (
+                    {
+                        "serialized": serialized,
+                        "invocation_params": invocation_params,
+                        "metadata": metadata or {},
+                        "name": name,
+                        **kwargs,
+                    }
+                ),
+            },
+        )
+
+    def on_llm_end(
+        self,
+        response: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        if run_id not in self.spans:
+            return
+
+        metrics = _get_metrics_from_response(response)
+
+        ttft = self._ttft_ms.pop(run_id, None)
+        if ttft is not None:
+            metrics["time_to_first_token"] = ttft
+
+        model_name = _get_model_name_from_response(response)
+
+        self._start_times.pop(run_id, None)
+        self._first_token_times.pop(run_id, None)
+
+        self._end_span(
+            run_id,
+            output=response,
+            metrics=metrics,
+            tags=tags,
+            metadata={
+                "model": model_name,
+                **kwargs,
+            },
+        )
+
+    def on_tool_start(
+        self,
+        serialized: dict[str, Any],
+        input_str: str,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        inputs: dict[str, Any] | None = None,
+        name: str | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._start_span(
+            parent_run_id,
+            run_id,
+            name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Tool",
+            type=SpanTypeAttribute.TOOL,
+            event={
+                "input": inputs or _safe_parse_json(input_str),
+                "tags": tags,
+                "metadata": {
+                    "metadata": metadata,
+                    "serialized": serialized,
+                    "input_str": input_str,
+                    "input": _safe_parse_json(input_str),
+                    "inputs": inputs,
+                    "name": name,
+                    **kwargs,
+                },
+            },
+        )
+
+    def on_tool_end(
+        self,
+        output: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, output=output, metadata={**kwargs})
+
+    def on_retriever_start(
+        self,
+        serialized: dict[str, Any],
+        query: str,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        name: str | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._start_span(
+            parent_run_id,
+            run_id,
+            name=name or serialized.get("name") or _last_item(serialized.get("id") or []) or "Retriever",
+            type=SpanTypeAttribute.FUNCTION,
+            event={
+                "input": query,
+                "tags": tags,
+                "metadata": {
+                    "serialized": serialized,
+                    "metadata": metadata,
+                    "name": name,
+                    **kwargs,
+                },
+            },
+        )
+
+    def on_retriever_end(
+        self,
+        documents: Sequence[Any],
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        self._end_span(run_id, output=documents, metadata={**kwargs})
+
+    def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk: Union[Any, None] = None,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        if run_id not in self._first_token_times:
+            now = time.perf_counter()
+            self._first_token_times[run_id] = now
+            start = self._start_times.get(run_id)
+            if start is not None:
+                self._ttft_ms[run_id] = now - start
+
+    def on_text(
+        self,
+        text: str,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        pass
+
+    def on_retry(
+        self,
+        retry_state: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        pass
+
+    def on_custom_event(
+        self,
+        name: str,
+        data: Any,
+        *,
+        run_id: UUID,
+        tags: list[str] | None = None,
+        metadata: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> Any:
+        pass
+
+
+class BraintrustTracer(BraintrustCallbackHandler):
+    """Deprecated. Use BraintrustCallbackHandler instead."""
+
+    def __init__(self, *args: Any, **kwargs: Any):
+        _logger.warning(
+            "BraintrustTracer is deprecated. Use BraintrustCallbackHandler from "
+            "braintrust.wrappers.langchain instead."
+        )
+        super().__init__(*args, **kwargs)
+
+
+def _safe_parse_json(input_str: str) -> Any:
+    try:
+        return json.loads(input_str)
+    except Exception:
+        return input_str
+
+
+def _last_item(items: list[Any]) -> Any:
+    return items[-1] if items else None
+
+
+def _walk_generations(response: Any):
+    for generations in response.generations or []:
+        yield from generations or []
+
+
+def _get_model_name_from_response(response: Any) -> "str | None":
+    model_name = None
+    for generation in _walk_generations(response):
+        message = getattr(generation, "message", None)
+        if not message:
+            continue
+
+        response_metadata = getattr(message, "response_metadata", None)
+        if response_metadata and isinstance(response_metadata, dict):
+            model_name = response_metadata.get("model_name")
+
+        if model_name:
+            break
+
+    if not model_name:
+        llm_output: dict[str, Any] = (response.llm_output or {}) if hasattr(response, "llm_output") else {}
+        model_name = llm_output.get("model_name") or llm_output.get("model") or ""
+
+    return model_name
+
+
+def _clean_object(obj: dict[str, Any]) -> dict[str, Any]:
+    return {
+        k: v
+        for k, v in obj.items()
+        if v is not None and not (isinstance(v, list) and not v) and not (isinstance(v, dict) and not v)
+    }
+
+
+def _get_metrics_from_response(response: Any) -> dict[str, Any]:
+    metrics: dict[str, Any] = {}
+
+    for generation in _walk_generations(response):
+        message = getattr(generation, "message", None)
+        if not message:
+            continue
+
+        usage_metadata = getattr(message, "usage_metadata", None)
+
+        if usage_metadata and isinstance(usage_metadata, dict):
+            metrics.update(
+                _clean_object(
+                    {
+                        "total_tokens": usage_metadata.get("total_tokens"),
+                        "prompt_tokens": usage_metadata.get("input_tokens"),
+                        "completion_tokens": usage_metadata.get("output_tokens"),
+                    }
+                )
+            )
+
+            # Extract cache tokens from nested input_token_details (LangChain format)
+            # Maps to Braintrust's standard cache token metric names
+            input_token_details = usage_metadata.get("input_token_details")
+            if input_token_details and isinstance(input_token_details, dict):
+                cache_read = input_token_details.get("cache_read")
+                cache_creation = input_token_details.get("cache_creation")
+
+                if cache_read is not None:
+                    metrics["prompt_cached_tokens"] = cache_read
+                if cache_creation is not None:
+                    metrics["prompt_cache_creation_tokens"] = cache_creation
+
+    if not metrics or not any(metrics.values()):
+        llm_output: dict[str, Any] = (response.llm_output or {}) if hasattr(response, "llm_output") else {}
+        metrics = llm_output.get("token_usage") or llm_output.get("estimatedTokens") or {}
+
+    return _clean_object(metrics)
diff --git a/py/src/braintrust/wrappers/langchain.py b/py/src/braintrust/wrappers/langchain.py
deleted file mode 100644
index 6beeb578..00000000
--- a/py/src/braintrust/wrappers/langchain.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import contextvars
-import logging
-from typing import Any
-from uuid import UUID
-
-import braintrust
-
-
-_logger = logging.getLogger("braintrust.wrappers.langchain")
-
-try:
-    from langchain.callbacks.base import BaseCallbackHandler
-    from langchain.schema import Document
-    from langchain.schema.agent import AgentAction
-    from langchain.schema.messages import BaseMessage
-    from langchain.schema.output import LLMResult
-except ImportError:
-    _logger.warning("Failed to import langchain, using stubs")
-    BaseCallbackHandler = object
-    Document = object
-    AgentAction = object
-    BaseMessage = object
-    LLMResult = object
-
-langchain_parent = contextvars.ContextVar("langchain_current_span", default=None)
-
-
-class BraintrustTracer(BaseCallbackHandler):
-    def __init__(self, logger=None):
-        _logger.warning("BraintrustTracer is deprecated, use `pip install braintrust-langchain` instead")
-        self.logger = logger
-        self.spans = {}
-
-    def _start_span(self, parent_run_id, run_id, name: str | None, **kwargs: Any) -> Any:
-        assert run_id not in self.spans, f"Span already exists for run_id {run_id} (this is likely a bug)"
-
-        current_parent = langchain_parent.get()
-        if parent_run_id in self.spans:
-            parent_span = self.spans[parent_run_id]
-        elif current_parent is not None:
-            parent_span = current_parent
-        elif self.logger is not None:
-            parent_span = self.logger
-        else:
-            parent_span = braintrust
-
-        span = parent_span.start_span(name=name, **kwargs)
-        langchain_parent.set(span)
-        self.spans[run_id] = span
-        return span
-
-    def _end_span(self, run_id, **kwargs: Any) -> Any:
-        assert run_id in self.spans, f"No span exists for run_id {run_id} (this is likely a bug)"
-        span = self.spans.pop(run_id)
-        span.log(**kwargs)
-
-        if langchain_parent.get() == span:
-            langchain_parent.set(None)
-
-        span.end()
-
-    def on_chain_start(
-        self,
-        serialized: dict[str, Any],
-        inputs: dict[str, Any],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(parent_run_id, run_id, "Chain", input=inputs, metadata={"tags": tags})
-
-    def on_chain_end(
-        self, outputs: dict[str, Any], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
-    ) -> Any:
-        self._end_span(run_id, output=outputs)
-
-    def on_llm_start(
-        self,
-        serialized: dict[str, Any],
-        prompts: list[str],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(
-            parent_run_id,
-            run_id,
-            "LLM",
-            input=prompts,
-            metadata={"tags": tags, **kwargs["invocation_params"]},
-        )
-
-    def on_chat_model_start(
-        self,
-        serialized: dict[str, Any],
-        messages: list[list[BaseMessage]],
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        self._start_span(
-            parent_run_id,
-            run_id,
-            "Chat Model",
-            input=[[m.dict() for m in batch] for batch in messages],
-            metadata={"tags": tags, **kwargs["invocation_params"]},
-        )
-
-    def on_llm_end(
-        self, response: LLMResult, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
-    ) -> Any:
-        metrics = {}
-        token_usage = response.llm_output.get("token_usage", {})
-        if "total_tokens" in token_usage:
-            metrics["tokens"] = token_usage["total_tokens"]
-        if "prompt_tokens" in token_usage:
-            metrics["prompt_tokens"] = token_usage["prompt_tokens"]
-        if "completion_tokens" in token_usage:
-            metrics["completion_tokens"] = token_usage["completion_tokens"]
-
-        self._end_span(run_id, output=[[m.dict() for m in batch] for batch in response.generations], metrics=metrics)
-
-    def on_tool_start(
-        self,
-        serialized: dict[str, Any],
-        input_str: str,
-        *,
-        run_id: UUID,
-        parent_run_id: UUID | None = None,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> Any:
-        _logger.warning("Starting tool, but it will not be traced in braintrust (unsupported)")
-
-    def on_tool_end(self, output: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any:
-        pass
-
-    def on_retriever_start(self, query: str, *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any) -> Any:
-        _logger.warning("Starting retriever, but it will not be traced in braintrust (unsupported)")
-
-    def on_retriever_end(
-        self, response: list[Document], *, run_id: UUID, parent_run_id: UUID | None = None, **kwargs: Any
-    ) -> Any:
-        pass
diff --git a/py/src/braintrust/wrappers/langchain/__init__.py b/py/src/braintrust/wrappers/langchain/__init__.py
new file mode 100644
index 00000000..0296a51a
--- /dev/null
+++ b/py/src/braintrust/wrappers/langchain/__init__.py
@@ -0,0 +1,21 @@
+"""
+Braintrust LangChain wrapper — re-exports from braintrust.integrations.langchain.
+"""
+
+from braintrust.integrations.langchain import (
+    BraintrustCallbackHandler,
+    BraintrustTracer,
+    LangChainIntegration,
+    clear_global_handler,
+    set_global_handler,
+    setup_langchain,
+)
+
+__all__ = [
+    "BraintrustCallbackHandler",
+    "BraintrustTracer",
+    "LangChainIntegration",
+    "set_global_handler",
+    "clear_global_handler",
+    "setup_langchain",
+]

From 813405d17c5b0ced466448c80ab8dc3fb3d2dfdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 01:26:25 +0000
Subject: [PATCH 2/7] maybe done

---
 integrations/langchain-py/README.md                  |  2 +-
 py/examples/langchain/manual_patching.py             |  2 +-
 py/noxfile.py                                        | 12 ++++++++++--
 py/src/braintrust/auto.py                            |  2 --
 .../auto_test_scripts/test_auto_langchain.py         |  2 ++
 py/src/braintrust/integrations/langchain/__init__.py |  4 +++-
 py/src/braintrust/integrations/langchain/conftest.py |  1 -
 .../braintrust/integrations/langchain/integration.py |  2 +-
 .../integrations/langchain/test_langchain.py         | 12 +++++-------
 py/src/braintrust/integrations/langchain/tracing.py  |  3 ++-
 py/src/braintrust/wrappers/langchain/__init__.py     |  1 +
 11 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/integrations/langchain-py/README.md b/integrations/langchain-py/README.md
index 19717270..b2e1b2ab 100644
--- a/integrations/langchain-py/README.md
+++ b/integrations/langchain-py/README.md
@@ -23,7 +23,7 @@ SDK for integrating [Braintrust](https://braintrust.dev) with [LangChain](https:
    braintrust.auto_instrument()
 
    # After (option 2: explicit)
-   from braintrust.wrappers.langchain import BraintrustCallbackHandler, set_global_handler
+   from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler
    ```
 
 The API is identical - no code changes needed beyond the import path.
diff --git a/py/examples/langchain/manual_patching.py b/py/examples/langchain/manual_patching.py
index 142bcc5a..4912e707 100644
--- a/py/examples/langchain/manual_patching.py
+++ b/py/examples/langchain/manual_patching.py
@@ -7,7 +7,7 @@
 """
 
 from braintrust import init_logger
-from braintrust.wrappers.langchain import set_global_handler, BraintrustCallbackHandler
+from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler
 
 
 # Setup LangChain tracing with a specific project
diff --git a/py/noxfile.py b/py/noxfile.py
index 2789bfee..1c8c7389 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -43,6 +43,7 @@ def _pinned_python_version():
 INTEGRATION_DIR = "braintrust/integrations"
 INTEGRATION_AUTO_TEST_DIR = "braintrust/integrations/auto_test_scripts"
 ANTHROPIC_INTEGRATION_DIR = "braintrust/integrations/anthropic"
+LANGCHAIN_INTEGRATION_DIR = "braintrust/integrations/langchain"
 CONTRIB_DIR = "braintrust/contrib"
 DEVSERVER_DIR = "braintrust/devserver"
 
@@ -102,7 +103,7 @@ def _pinned_python_version():
 GENAI_VERSIONS = (LATEST,)
 DSPY_VERSIONS = (LATEST,)
 GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1")
-LANGCHAIN_VERSIONS = (LATEST,)
+LANGCHAIN_VERSIONS = (LATEST, "0.3.83")
 # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely
 TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0")
 PYTEST_VERSIONS = (LATEST, "8.4.2")
@@ -425,7 +426,14 @@ def _run_core_tests(session):
     _run_tests(
         session,
         SRC_DIR,
-        ignore_paths=[WRAPPER_DIR, INTEGRATION_AUTO_TEST_DIR, ANTHROPIC_INTEGRATION_DIR, CONTRIB_DIR, DEVSERVER_DIR],
+        ignore_paths=[
+            WRAPPER_DIR,
+            INTEGRATION_AUTO_TEST_DIR,
+            ANTHROPIC_INTEGRATION_DIR,
+            LANGCHAIN_INTEGRATION_DIR,
+            CONTRIB_DIR,
+            DEVSERVER_DIR,
+        ],
     )
 
 
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index fb40d088..ad824726 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -4,8 +4,6 @@
 Provides one-line instrumentation for supported libraries.
 """
 
-from __future__ import annotations
-
 import logging
 from contextlib import contextmanager
 
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
index a3719ef8..9cc7c771 100644
--- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_langchain.py
@@ -3,6 +3,7 @@
 from braintrust.auto import auto_instrument
 from braintrust.integrations.langchain import BraintrustCallbackHandler
 
+
 # 1. Instrument
 results = auto_instrument()
 assert results.get("langchain") == True, "auto_instrument should return True for langchain"
@@ -14,6 +15,7 @@
 # 3. Verify that a global handler was registered with LangChain
 from langchain_core.callbacks import CallbackManager
 
+
 manager = CallbackManager.configure()
 handler = next((h for h in manager.handlers if isinstance(h, BraintrustCallbackHandler)), None)
 assert handler is not None, "BraintrustCallbackHandler should be registered globally after auto_instrument()"
diff --git a/py/src/braintrust/integrations/langchain/__init__.py b/py/src/braintrust/integrations/langchain/__init__.py
index 9758ca95..e1a19352 100644
--- a/py/src/braintrust/integrations/langchain/__init__.py
+++ b/py/src/braintrust/integrations/langchain/__init__.py
@@ -21,7 +21,9 @@ def setup_langchain() -> bool:
 
 # Lazily imported to avoid circular imports at module load time
 # (tracing.py imports from braintrust, which must be fully initialized first)
-_LAZY_ATTRS = frozenset(["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"])
+_LAZY_ATTRS = frozenset(
+    ["BraintrustCallbackHandler", "BraintrustTracer", "set_global_handler", "clear_global_handler"]
+)
 
 
 def __getattr__(name: str):
diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py
index 078502b2..a8f56176 100644
--- a/py/src/braintrust/integrations/langchain/conftest.py
+++ b/py/src/braintrust/integrations/langchain/conftest.py
@@ -10,7 +10,6 @@
     _MemoryBackgroundLogger,
 )
 from braintrust.test_helpers import init_test_logger
-
 from braintrust.wrappers.langchain import clear_global_handler
 
 
diff --git a/py/src/braintrust/integrations/langchain/integration.py b/py/src/braintrust/integrations/langchain/integration.py
index e22cdc0f..0f29b0da 100644
--- a/py/src/braintrust/integrations/langchain/integration.py
+++ b/py/src/braintrust/integrations/langchain/integration.py
@@ -2,7 +2,7 @@
 
 from typing import Any
 
-from braintrust.integrations.base import BasePatcher, BaseIntegration
+from braintrust.integrations.base import BaseIntegration, BasePatcher
 
 
 class LangChainCallbackPatcher(BasePatcher):
diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py
index 827cf777..889fe505 100644
--- a/py/src/braintrust/integrations/langchain/test_langchain.py
+++ b/py/src/braintrust/integrations/langchain/test_langchain.py
@@ -1,10 +1,12 @@
 # pyright: reportTypedDictNotRequiredAccess=none
 import uuid
-from typing import Any, Dict, List, Sequence, Union, cast
+from typing import Any, Dict, List, Union, cast
 from unittest.mock import ANY
 
 import pytest
+from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler
 from braintrust.logger import flush
+from braintrust.wrappers.test_utils import verify_autoinstrument_script
 from langchain_anthropic import ChatAnthropic
 from langchain_core.callbacks import BaseCallbackHandler, CallbackManager
 from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
@@ -15,11 +17,9 @@
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, Field
 
-from braintrust.integrations.langchain import BraintrustCallbackHandler, set_global_handler
-from braintrust.wrappers.test_utils import verify_autoinstrument_script
-
 from .conftest import LoggerMemoryLogger
 
+
 # ---------------------------------------------------------------------------
 # Helpers (inlined from the integration package)
 # ---------------------------------------------------------------------------
@@ -44,9 +44,7 @@ def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False
                         matched = True
                     except Exception:
                         pass
-                assert matched, (
-                    f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}"
-                )
+                assert matched, f"Expected {expected_item} in unordered sequence but couldn't find match in {actual}"
     elif isinstance(expected, dict):
         assert isinstance(actual, dict), f"Expected dict but got {type(actual)}"
         for k, v in expected.items():
diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py
index 08458314..85044bf1 100644
--- a/py/src/braintrust/integrations/langchain/tracing.py
+++ b/py/src/braintrust/integrations/langchain/tracing.py
@@ -24,6 +24,7 @@
 from braintrust.version import VERSION as sdk_version
 from typing_extensions import NotRequired
 
+
 _logger = logging.getLogger(__name__)
 
 # integration_name stays "langchain-py" for backward compatibility with existing traces
@@ -611,7 +612,7 @@ class BraintrustTracer(BraintrustCallbackHandler):
     def __init__(self, *args: Any, **kwargs: Any):
         _logger.warning(
             "BraintrustTracer is deprecated. Use BraintrustCallbackHandler from "
-            "braintrust.wrappers.langchain instead."
+            "braintrust.integrations.langchain instead."
         )
         super().__init__(*args, **kwargs)
 
diff --git a/py/src/braintrust/wrappers/langchain/__init__.py b/py/src/braintrust/wrappers/langchain/__init__.py
index 0296a51a..72f30ca4 100644
--- a/py/src/braintrust/wrappers/langchain/__init__.py
+++ b/py/src/braintrust/wrappers/langchain/__init__.py
@@ -11,6 +11,7 @@
     setup_langchain,
 )
 
+
 __all__ = [
     "BraintrustCallbackHandler",
     "BraintrustTracer",

From 1968511e74935b0d97f2f0557614182f267abea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 17:40:59 +0000
Subject: [PATCH 3/7] rename the cassettes

---
 ...st_async_langchain_invoke => test_async_langchain_invoke.yaml} | 0
 .../{test_chain_with_memory => test_chain_with_memory.yaml}       | 0
 .../cassettes/{test_global_handler => test_global_handler.yaml}   | 0
 ...opic_integration => test_langchain_anthropic_integration.yaml} | 0
 ...raph_state_management => test_langgraph_state_management.yaml} | 0
 .../langchain/cassettes/{test_llm_calls => test_llm_calls.yaml}   | 0
 .../{test_parallel_execution => test_parallel_execution.yaml}     | 0
 ...test_prompt_caching_tokens => test_prompt_caching_tokens.yaml} | 0
 .../cassettes/{test_streaming_ttft => test_streaming_ttft.yaml}   | 0
 .../langchain/cassettes/{test_tool_usage => test_tool_usage.yaml} | 0
 10 files changed, 0 insertions(+), 0 deletions(-)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_async_langchain_invoke => test_async_langchain_invoke.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_chain_with_memory => test_chain_with_memory.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_global_handler => test_global_handler.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_langchain_anthropic_integration => test_langchain_anthropic_integration.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_langgraph_state_management => test_langgraph_state_management.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_llm_calls => test_llm_calls.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_parallel_execution => test_parallel_execution.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_prompt_caching_tokens => test_prompt_caching_tokens.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_streaming_ttft => test_streaming_ttft.yaml} (100%)
 rename py/src/braintrust/integrations/langchain/cassettes/{test_tool_usage => test_tool_usage.yaml} (100%)

diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke
rename to py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory b/py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory
rename to py/src/braintrust/integrations/langchain/cassettes/test_chain_with_memory.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_global_handler b/py/src/braintrust/integrations/langchain/cassettes/test_global_handler.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_global_handler
rename to py/src/braintrust/integrations/langchain/cassettes/test_global_handler.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration
rename to py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management b/py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management
rename to py/src/braintrust/integrations/langchain/cassettes/test_langgraph_state_management.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls b/py/src/braintrust/integrations/langchain/cassettes/test_llm_calls.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_llm_calls
rename to py/src/braintrust/integrations/langchain/cassettes/test_llm_calls.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution b/py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution
rename to py/src/braintrust/integrations/langchain/cassettes/test_parallel_execution.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens
rename to py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft b/py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft
rename to py/src/braintrust/integrations/langchain/cassettes/test_streaming_ttft.yaml
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage b/py/src/braintrust/integrations/langchain/cassettes/test_tool_usage.yaml
similarity index 100%
rename from py/src/braintrust/integrations/langchain/cassettes/test_tool_usage
rename to py/src/braintrust/integrations/langchain/cassettes/test_tool_usage.yaml

From edd29ebed7ca4d150460d0cac8b333c4ead107aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 17:42:48 +0000
Subject: [PATCH 4/7] and update the tests to use the renamed cassettes

---
 py/src/braintrust/integrations/langchain/conftest.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py
index a8f56176..a10ab496 100644
--- a/py/src/braintrust/integrations/langchain/conftest.py
+++ b/py/src/braintrust/integrations/langchain/conftest.py
@@ -43,7 +43,6 @@ def vcr_config():
         "record_mode": record_mode,
         "match_on": ["uri", "method", "body"],
         "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
-        "path_transformer": lambda path: path.replace(".yaml", ""),
     }
 
 

From ab5c901876241c2a8e012914f5bc295ae8010591 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 17:48:43 +0000
Subject: [PATCH 5/7] remove unecessary python patterns used by python < 3.10

---
 .../integrations/langchain/test_langchain.py  | 22 +++++++++----------
 .../integrations/langchain/tracing.py         |  6 ++---
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py
index 889fe505..5698fdb7 100644
--- a/py/src/braintrust/integrations/langchain/test_langchain.py
+++ b/py/src/braintrust/integrations/langchain/test_langchain.py
@@ -1,6 +1,6 @@
 # pyright: reportTypedDictNotRequiredAccess=none
 import uuid
-from typing import Any, Dict, List, Union, cast
+from typing import Any, cast
 from unittest.mock import ANY
 
 import pytest
@@ -59,7 +59,7 @@ def assert_matches_object(actual: Any, expected: Any, ignore_order: bool = False
         assert actual == expected, f"Expected {expected} but got {actual}"
 
 
-def find_spans_by_attributes(spans: List[Any], **attributes: Any) -> List[Any]:
+def find_spans_by_attributes(spans: list[Any], **attributes: Any) -> list[Any]:
     """Find all spans matching the given span_attributes."""
     matching = []
     for span in spans:
@@ -90,7 +90,7 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
         presence_penalty=0,
         n=1,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
     chain.invoke({"number": "2"}, config={"callbacks": [cast(BaseCallbackHandler, handler)]})
 
     spans = memory_logger.pop()
@@ -225,7 +225,7 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
         presence_penalty=0,
         n=1,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
     message = chain.invoke({"number": "2"})
 
@@ -350,7 +350,7 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
     handler = BraintrustCallbackHandler(logger=logger)
     prompt = ChatPromptTemplate.from_template("{history} User: {input}")
     model = ChatOpenAI(model="gpt-4o-mini")
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
     memory = {"history": "Assistant: Hello! How can I assist you today?"}
     chain.invoke(
@@ -695,16 +695,16 @@ def test_langgraph_state_management(logger_memory_logger: LoggerMemoryLogger):
         n=1,
     )
 
-    def say_hello(state: Dict[str, str]):
+    def say_hello(state: dict[str, str]):
         response = model.invoke("Say hello")
-        return cast(Union[str, List[str], Dict[str, str]], response.content)
+        return cast(str | list[str] | dict[str, str], response.content)
 
-    def say_bye(state: Dict[str, str]):
+    def say_bye(state: dict[str, str]):
         print("From the 'sayBye' node: Bye world!")
         return "Bye"
 
     workflow = (
-        StateGraph(state_schema=Dict[str, str])
+        StateGraph(state_schema=dict[str, str])
         .add_node("sayHello", say_hello)
         .add_node("sayBye", say_bye)
         .add_edge(START, "sayHello")
@@ -1033,9 +1033,9 @@ def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger):
         max_completion_tokens=50,
         streaming=True,
     )
-    chain: RunnableSerializable[Dict[str, str], BaseMessage] = prompt.pipe(model)
+    chain: RunnableSerializable[dict[str, str], BaseMessage] = prompt.pipe(model)
 
-    chunks: List[str] = []
+    chunks: list[str] = []
     for chunk in chain.stream({}, config={"callbacks": [cast(BaseCallbackHandler, handler)]}):
         if chunk.content:
             chunks.append(str(chunk.content))
diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py
index 85044bf1..1a142e6b 100644
--- a/py/src/braintrust/integrations/langchain/tracing.py
+++ b/py/src/braintrust/integrations/langchain/tracing.py
@@ -11,11 +11,9 @@
 import time
 from collections.abc import Mapping, Sequence
 from contextvars import ContextVar
-from re import Pattern
 from typing import (
     Any,
     TypedDict,
-    Union,
 )
 from uuid import UUID
 
@@ -90,7 +88,7 @@ def __init__(
         self,
         logger: Logger | Span | None = None,
         debug: bool = False,
-        exclude_metadata_props: Pattern[str] | None = None,
+        exclude_metadata_props: re.Pattern[str] | None = None,
     ):
         self.logger = logger
         self.spans: dict[UUID, Span] = {}
@@ -561,7 +559,7 @@ def on_llm_new_token(
         self,
         token: str,
         *,
-        chunk: Union[Any, None] = None,
+        chunk: Any | None = None,
         run_id: UUID,
         parent_run_id: UUID | None = None,
         **kwargs: Any,

From 45710117a8c1282227d85d230baca10385b75aa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 18:47:52 +0000
Subject: [PATCH 6/7] rerecord cassettes

---
 .../test_async_langchain_invoke.yaml          | 201 +++++++++++++++++-
 .../test_langchain_anthropic_integration.yaml | 196 ++++++++++++++++-
 .../cassettes/test_prompt_caching_tokens.yaml |   4 +-
 .../integrations/langchain/conftest.py        |   4 +-
 .../integrations/langchain/test_langchain.py  |  49 +----
 .../integrations/langchain/tracing.py         |  26 ++-
 6 files changed, 418 insertions(+), 62 deletions(-)

diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml
index 3ecc362e..2ffcb4e8 100644
--- a/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_async_langchain_invoke.yaml
@@ -1,6 +1,6 @@
 interactions:
 - request:
-    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is
+    body: '{"max_tokens": 64000, "messages": [{"role": "user", "content": "What is
       1 + 2?"}], "model": "claude-sonnet-4-20250514"}'
     headers:
       accept:
@@ -80,7 +80,7 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
     headers:
       accept:
       - application/json
@@ -177,7 +177,7 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
     headers:
       accept:
       - application/json
@@ -273,4 +273,199 @@ interactions:
     status:
       code: 200
       message: OK
+- request:
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '111'
+      Content-Type:
+      - application/json
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - langchain-anthropic/1.4.0
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.86.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.14.3
+      anthropic-version:
+      - '2023-06-01'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA6pWKqksSFWyUkotKsovUtKB0lbVMPHE0pKM1LySzOTEksz8vHiYstzU4uLEdJCC
+        zLyyxJzMFIUK3cSCTN3s1EqlWh2lotTC0tTikvjMFCUrECfewNDQOSrQvSzPx6K8zDjCNCOyMDI1
+        qiRKqRYAAAD//wMAzB6OOoIAAAA=
+    headers:
+      CF-RAY:
+      - 9e1ffdb81a020555-CMH
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 18:31:11 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CZQGvnL8wv3X5hYqYeZtZ
+      server-timing:
+      - x-originResponse;dur=13
+      set-cookie:
+      - _cfuvid=gyUUSAZvGwkONX58EfvF0btj1CdyxvDfcMYIpZzdC_s-1774463471.3759444-1.0.1.1-Y5yl2w0p0ULztM1yD9IfmXjoPl60ueMNw9C.Pdj8r9E;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-envoy-upstream-service-time:
+      - '11'
+      x-should-retry:
+      - 'false'
+    status:
+      code: 401
+      message: Unauthorized
+- request:
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '111'
+      Content-Type:
+      - application/json
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - langchain-anthropic/1.4.0
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.86.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.14.3
+      anthropic-version:
+      - '2023-06-01'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA3SQTUsDMRCG/8ryXk1htx+2BDx4EEF68KxIiMnYLs1OtslsaSn73yXFIiqeBuZ5
+        5vOMLnoK0HDBDp4mOTKTTOaTaT1d1ItmDoXWQ6PLG1M3y+XDav1y79ar07579k+7R79KRyjIqadi
+        Uc52Q1BIMZSEzbnNYlmg4CILsUC/nq++0LGQS9BoqptqWt1VM4xvCllibxLZHBkaxN7IkBhfINN+
+        IHYEzUMICsNlrj6j5X4QI3FHnKGbWwVn3ZaMS2SljWx+CvWVJ7L+P3atLf2p31JHyQaz6P7637TZ
+        /qajQhzkx3YzhUzp0Doy0lKCRnmWt8mXv/MHpXKj2VCEBkcx9mDbYN8DYRw/AQAA//8DAJRQPdG9
+        AQAA
+    headers:
+      CF-RAY:
+      - 9e1ffe8f48166bf7-CMH
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 18:31:46 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2026-03-25T18:31:46Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2026-03-25T18:31:46Z'
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-03-25T18:31:45Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2026-03-25T18:31:46Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CZQGyKYAWfDXkbs6TYmFj
+      server-timing:
+      - x-originResponse;dur=1115
+      set-cookie:
+      - _cfuvid=GkeOmu_nJ3IH0wFr_Ysu_15x_mE_eZFWx7SyJP8s9wY-1774463505.8047376-1.0.1.1-yT6hue7f9O0MUMAxxAf.Cl6UVJAog65JmUFJd1pdxEw;
+        HttpOnly; SameSite=None; Secure; Path=/; Domain=api.anthropic.com
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-envoy-upstream-service-time:
+      - '1114'
+    status:
+      code: 200
+      message: OK
 version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml
index 6c396d02..516f74f8 100644
--- a/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_langchain_anthropic_integration.yaml
@@ -1,6 +1,6 @@
 interactions:
 - request:
-    body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "What is
+    body: '{"max_tokens": 64000, "messages": [{"role": "user", "content": "What is
       1 + 2?"}], "model": "claude-sonnet-4-20250514"}'
     headers:
       accept:
@@ -104,7 +104,7 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
     headers:
       accept:
       - application/json
@@ -201,7 +201,7 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
     headers:
       accept:
       - application/json
@@ -297,4 +297,194 @@ interactions:
     status:
       code: 200
       message: OK
+- request:
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '111'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=173XVm8c6LwLnZGSZjw3nktrd.OcW9mUj4Ct6Nzco6M-1774463470.6692584-1.0.1.1-Z8FXX.hH2DDJmupKJNi.NwPEp.ON4Sm.8PzX9hNNFu4
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - langchain-anthropic/1.4.0
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Async:
+      - 'false'
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.86.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.14.3
+      anthropic-version:
+      - '2023-06-01'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAA6pWKqksSFWyUkotKsovUtKB0lbVMPHE0pKM1LySzOTEksz8vHiYstzU4uLEdJCC
+        zLyyxJzMFIUK3cSCTN3s1EqlWh2lotTC0tTikvjMFCUrECfewNDQOSrQvSw738/c0CKlqsIlONXX
+        1CNTqRYAAAD//wMAPzaTsIIAAAA=
+    headers:
+      CF-RAY:
+      - 9e1ffdb5cf75e6b0-CMH
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 18:31:11 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CZQGvkoN718dzxDSeM5Hi
+      server-timing:
+      - x-originResponse;dur=32
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-envoy-upstream-service-time:
+      - '28'
+      x-should-retry:
+      - 'false'
+    status:
+      code: 401
+      message: Unauthorized
+- request:
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is 1 + 2?"}],"model":"claude-sonnet-4-20250514"}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '111'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=PQKr9HJ20fMN78IoIWNmzDAMdKfcxl7b4a28wiRRrOA-1774463501.3608425-1.0.1.1-vWPsSIFWMsbw3tO25QWBZA3vmGBN5garPTOk7LK_8Y4
+      Host:
+      - api.anthropic.com
+      User-Agent:
+      - langchain-anthropic/1.4.0
+      X-Stainless-Arch:
+      - x64
+      X-Stainless-Async:
+      - 'false'
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - Linux
+      X-Stainless-Package-Version:
+      - 0.86.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.14.3
+      anthropic-version:
+      - '2023-06-01'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-timeout:
+      - NOT_GIVEN
+    method: POST
+    uri: https://api.anthropic.com/v1/messages
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/3WQT0vEMBDFv0qZq11ou1vRgjePHoS9KRKyzdgG00lNJovr0u/uZHGRVTxleL+X
+        N3+OMHmDDjronU4GV9ETIa82q6Zq2qqtN1CCNcKnOKiqbh83djek7efN/cN869fzE46HrXj4MGN2
+        YYx6QBGCd1nQMdrImlik3hOjVN3z8exn/Mjk9HRQF1dFU9wVa1heSojsZxVQy0jCkIziFAi+QcT3
+        hNRLCCXnSkinvt0RLM2JFfs3pAhdfS19dT+i6iWKrSd1aajOXLD5j53/5nycR5wwaKfa6a//h9bj
+        b7qU4BNfTLeWbTDsbY+KLQbZMx/L6GDy3ekVQ95RDegFkWel99o6vZPbLssXLJhatL0BAAA=
+    headers:
+      CF-RAY:
+      - 9e1ffe873bf7d04f-CMH
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Security-Policy:
+      - default-src 'none'; frame-ancestors 'none'
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 18:31:45 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Robots-Tag:
+      - none
+      anthropic-organization-id:
+      - 27796668-7351-40ac-acc4-024aee8995a5
+      anthropic-ratelimit-input-tokens-limit:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-remaining:
+      - '3000000'
+      anthropic-ratelimit-input-tokens-reset:
+      - '2026-03-25T18:31:45Z'
+      anthropic-ratelimit-output-tokens-limit:
+      - '600000'
+      anthropic-ratelimit-output-tokens-remaining:
+      - '600000'
+      anthropic-ratelimit-output-tokens-reset:
+      - '2026-03-25T18:31:45Z'
+      anthropic-ratelimit-requests-limit:
+      - '20000'
+      anthropic-ratelimit-requests-remaining:
+      - '19999'
+      anthropic-ratelimit-requests-reset:
+      - '2026-03-25T18:31:44Z'
+      anthropic-ratelimit-tokens-limit:
+      - '3600000'
+      anthropic-ratelimit-tokens-remaining:
+      - '3600000'
+      anthropic-ratelimit-tokens-reset:
+      - '2026-03-25T18:31:45Z'
+      cf-cache-status:
+      - DYNAMIC
+      request-id:
+      - req_011CZQGyE516m7mENVa2xVhJ
+      server-timing:
+      - x-originResponse;dur=1219
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '1218'
+    status:
+      code: 200
+      message: OK
 version: 1
diff --git a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml
index 441128e9..dd57682c 100644
--- a/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml
+++ b/py/src/braintrust/integrations/langchain/cassettes/test_prompt_caching_tokens.yaml
@@ -1,6 +1,6 @@
 interactions:
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is the first
       type of testing mentioned in section 1.2?"}],"model":"claude-sonnet-4-5-20250929","system":[{"type":"text","text":"\n#
       Comprehensive Guide to Software Testing Methods!\n\n## Chapter 1: Introduction
       to Testing\n\nSoftware testing is a critical component of the software development
@@ -159,7 +159,7 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"max_tokens":1024,"messages":[{"role":"user","content":"What is the first
+    body: '{"max_tokens":64000,"messages":[{"role":"user","content":"What is the first
       type of testing mentioned in section 1.2?"},{"role":"assistant","content":"According
       to section 1.2 \"Types of Testing,\" the first type of testing mentioned is
       **Unit Testing**, which is described as \"Testing individual components or functions
diff --git a/py/src/braintrust/integrations/langchain/conftest.py b/py/src/braintrust/integrations/langchain/conftest.py
index a10ab496..e369fffd 100644
--- a/py/src/braintrust/integrations/langchain/conftest.py
+++ b/py/src/braintrust/integrations/langchain/conftest.py
@@ -19,8 +19,8 @@ def setup_braintrust_langchain():
     os.environ["BRAINTRUST_API_URL"] = "http://localhost:8000"
     os.environ["BRAINTRUST_APP_URL"] = "http://localhost:3000"
     os.environ["BRAINTRUST_API_KEY"] = TEST_API_KEY
-    os.environ["ANTHROPIC_API_KEY"] = "your_anthropic_api_key_here"
-    os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
+    os.environ.setdefault("ANTHROPIC_API_KEY", "your_anthropic_api_key_here")
+    os.environ.setdefault("OPENAI_API_KEY", "your_openai_api_key_here")
     os.environ["OPENAI_BASE_URL"] = "http://localhost:8000/v1/proxy"
 
     _internal_reset_global_state()
diff --git a/py/src/braintrust/integrations/langchain/test_langchain.py b/py/src/braintrust/integrations/langchain/test_langchain.py
index 5698fdb7..9bb711d5 100644
--- a/py/src/braintrust/integrations/langchain/test_langchain.py
+++ b/py/src/braintrust/integrations/langchain/test_langchain.py
@@ -112,9 +112,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
                     "additional_kwargs": ANY,
                     "response_metadata": ANY,
                     "type": "ai",
-                    "name": ANY,
-                    "id": ANY,
-                    "example": ANY,
                     "tool_calls": ANY,
                     "invalid_tool_calls": ANY,
                     "usage_metadata": ANY,
@@ -133,8 +130,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
                         }
                     ]
                 },
@@ -151,9 +146,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
-                            "example": ANY,
                         }
                     ]
                 ],
@@ -169,8 +161,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
                                     "additional_kwargs": ANY,
                                     "response_metadata": ANY,
                                     "type": "ai",
-                                    "name": None,
-                                    "id": ANY,
                                 },
                             }
                         ]
@@ -183,7 +173,6 @@ def test_llm_calls(logger_memory_logger: LoggerMemoryLogger):
                         },
                         "model_name": "gpt-4o-mini-2024-07-18",
                     },
-                    "run": None,
                     "type": "LLMResult",
                 },
                 "metrics": {
@@ -248,9 +237,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
                     "additional_kwargs": ANY,
                     "response_metadata": ANY,
                     "type": "ai",
-                    "name": ANY,
-                    "id": ANY,
-                    "example": ANY,
                     "tool_calls": ANY,
                     "invalid_tool_calls": ANY,
                     "usage_metadata": ANY,
@@ -269,8 +255,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
                         }
                     ]
                 },
@@ -287,9 +271,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
-                            "example": ANY,
                         }
                     ]
                 ],
@@ -305,8 +286,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
                                     "additional_kwargs": ANY,
                                     "response_metadata": ANY,
                                     "type": "ai",
-                                    "name": None,
-                                    "id": ANY,
                                 },
                             }
                         ]
@@ -319,7 +298,6 @@ def test_global_handler(logger_memory_logger: LoggerMemoryLogger):
                         },
                         "model_name": "gpt-4o-mini-2024-07-18",
                     },
-                    "run": None,
                     "type": "LLMResult",
                 },
                 "metrics": {
@@ -392,8 +370,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
                         }
                     ]
                 },
@@ -410,9 +386,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
-                            "example": ANY,
                         }
                     ]
                 ],
@@ -428,8 +401,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
                                     "additional_kwargs": ANY,
                                     "response_metadata": ANY,
                                     "type": "ai",
-                                    "name": None,
-                                    "id": ANY,
                                 },
                             }
                         ]
@@ -442,7 +413,6 @@ def test_chain_with_memory(logger_memory_logger: LoggerMemoryLogger):
                         },
                         "model_name": "gpt-4o-mini-2024-07-18",
                     },
-                    "run": None,
                     "type": "LLMResult",
                 },
                 "metrics": {
@@ -523,9 +493,6 @@ def calculator(input: CalculatorInput) -> str:
                             "additional_kwargs": {},
                             "response_metadata": {},
                             "type": "human",
-                            "name": None,
-                            "id": None,
-                            "example": ANY,
                         }
                     ]
                 ],
@@ -554,12 +521,8 @@ def calculator(input: CalculatorInput) -> str:
                                 "message": {
                                     "content": ANY,
                                     "type": "ai",
-                                    "additional_kwargs": {
-                                        "tool_calls": ANY,
-                                    },
+                                    "additional_kwargs": ANY,
                                     "response_metadata": ANY,
-                                    "name": None,
-                                    "id": ANY,
                                 },
                             }
                         ]
@@ -572,7 +535,6 @@ def calculator(input: CalculatorInput) -> str:
                         },
                         "model_name": "gpt-4o-mini-2024-07-18",
                     },
-                    "run": None,
                     "type": "LLMResult",
                 },
                 "metrics": {
@@ -770,9 +732,6 @@ def say_bye(state: dict[str, str]):
                         "additional_kwargs": {},
                         "response_metadata": {},
                         "type": "human",
-                        "name": None,
-                        "id": None,
-                        "example": ANY,
                     }
                 ]
             ],
@@ -792,8 +751,6 @@ def say_bye(state: dict[str, str]):
                                 "additional_kwargs": ANY,
                                 "response_metadata": ANY,
                                 "type": "ai",
-                                "name": None,
-                                "id": ANY,
                             },
                         }
                     ]
@@ -806,7 +763,6 @@ def say_bye(state: dict[str, str]):
                     },
                     "model_name": "gpt-4o-mini-2024-07-18",
                 },
-                "run": None,
                 "type": "LLMResult",
             },
             "metrics": {
@@ -1059,9 +1015,6 @@ def test_streaming_ttft(logger_memory_logger: LoggerMemoryLogger):
                         {
                             "additional_kwargs": {},
                             "content": "Count from 1 to 5.",
-                            "example": False,
-                            "id": None,
-                            "name": None,
                             "response_metadata": {},
                             "type": "human",
                         }
diff --git a/py/src/braintrust/integrations/langchain/tracing.py b/py/src/braintrust/integrations/langchain/tracing.py
index 1a142e6b..efe2fd61 100644
--- a/py/src/braintrust/integrations/langchain/tracing.py
+++ b/py/src/braintrust/integrations/langchain/tracing.py
@@ -84,6 +84,17 @@ class BraintrustCallbackHandler:
 
     root_run_id: UUID | None = None
 
+    # Duck-typing attributes required by LangChain's callback manager.
+    # These mirror BaseCallbackHandler without requiring inheritance.
+    raise_error: bool = False
+    ignore_llm: bool = False
+    ignore_retry: bool = False
+    ignore_chain: bool = False
+    ignore_agent: bool = False
+    ignore_retriever: bool = False
+    ignore_chat_model: bool = False
+    ignore_custom_event: bool = False
+
     def __init__(
         self,
         logger: Logger | Span | None = None,
@@ -97,8 +108,8 @@ def __init__(
             r"^(l[sc]_|langgraph_|__pregel_|checkpoint_ns)"
         )
         self.skipped_runs: set[UUID] = set()
-        # Set run_inline=True to avoid thread executor in async contexts
-        # This ensures memory logger context is preserved
+        # run_inline=True avoids thread executor in async contexts,
+        # ensuring the ContextVar state is preserved across callbacks.
         self.run_inline = True
 
         self._start_times: dict[UUID, float] = {}
@@ -686,11 +697,18 @@ def _get_metrics_from_response(response: Any) -> dict[str, Any]:
             input_token_details = usage_metadata.get("input_token_details")
             if input_token_details and isinstance(input_token_details, dict):
                 cache_read = input_token_details.get("cache_read")
-                cache_creation = input_token_details.get("cache_creation")
+                # langchain-anthropic >=1.4 sets cache_creation=0 when ephemeral
+                # breakdown keys are present, so sum those up as the true total.
+                cache_creation = input_token_details.get("cache_creation") or 0
+                cache_creation += sum(
+                    v
+                    for k, v in input_token_details.items()
+                    if k.startswith("ephemeral_") and k.endswith("_input_tokens") and v
+                )
 
                 if cache_read is not None:
                     metrics["prompt_cached_tokens"] = cache_read
-                if cache_creation is not None:
+                if cache_creation:
                     metrics["prompt_cache_creation_tokens"] = cache_creation
 
     if not metrics or not any(metrics.values()):

From 2d50659bd7e912726d6763f25f1ac44b052a05a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Halber?= <cedric@braintrustdata.com>
Date: Wed, 25 Mar 2026 19:16:34 +0000
Subject: [PATCH 7/7] chore: maintain compatibility for test with pre v1
 langchain

---
 py/noxfile.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/py/noxfile.py b/py/noxfile.py
index 1c8c7389..d2042088 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -104,6 +104,7 @@ def _pinned_python_version():
 DSPY_VERSIONS = (LATEST,)
 GOOGLE_ADK_VERSIONS = (LATEST, "1.14.1")
 LANGCHAIN_VERSIONS = (LATEST, "0.3.83")
+LANGCHAIN_VERSION_IDS = (LATEST, "langchain-core-0.3.83")
 # temporalio 1.19.0+ requires Python >= 3.10; skip Python 3.9 entirely
 TEMPORAL_VERSIONS = (LATEST, "1.20.0", "1.19.0")
 PYTEST_VERSIONS = (LATEST, "8.4.2")
@@ -196,14 +197,14 @@ def test_google_genai(session, version):
 
 
 @nox.session()
-@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSIONS)
+@nox.parametrize("version", LANGCHAIN_VERSIONS, ids=LANGCHAIN_VERSION_IDS)
 def test_langchain(session, version):
     """Test LangChain integration."""
     _install_test_deps(session)
     _install(session, "langchain-core", version)
-    _install(session, "langchain-openai", version)
-    _install(session, "langchain-anthropic", version)
-    session.install("langgraph", silent=SILENT_INSTALLS)
+    _install(session, "langchain-openai")
+    _install(session, "langchain-anthropic")
+    _install(session, "langgraph")
     _run_tests(session, f"{INTEGRATION_DIR}/langchain/test_langchain.py")
     _run_core_tests(session)