From 7945ab66518fda16d81abfed34aa32249c6e508e Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 24 Mar 2026 17:15:53 -0700 Subject: [PATCH] perf(bt_json): optimize _to_bt_safe and bt_safe_deep_copy hot paths Add primitive fast-paths using type identity checks before expensive isinstance calls against abstract classes and Pydantic model_dump. Rewrite _deep_copy_object to use type(v) is dict/list instead of isinstance(v, Mapping), inline primitive checks, and cache visited set methods. Guard Pydantic model_dump/dict with hasattr to avoid warnings overhead on non-Pydantic values. Benchmark results (geometric mean: 8.49x faster): _to_bt_safe: primitive-int 2.02 us -> 42.2 ns 47.8x faster primitive-float-nan 2.01 us -> 67.8 ns 29.7x faster str-subclass-enum 2.02 us -> 97.5 ns 20.8x faster dataclass 10.2 us -> 3.32 us 3.1x faster pydantic-v2-like 1.68 us -> 1.78 us 1.1x slower (noise) pydantic-v1-like 1.94 us -> 841 ns 2.3x faster bt_safe_deep_copy: small 19.7 us -> 2.30 us 8.6x faster medium 179 us -> 17.3 us 10.4x faster large 1.66 ms -> 138 us 12.1x faster circular 178 us -> 17.9 us 9.9x faster non-string-keys 16.7 us -> 2.12 us 7.9x faster --- py/src/braintrust/bt_json.py | 143 ++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 46 deletions(-) diff --git a/py/src/braintrust/bt_json.py b/py/src/braintrust/bt_json.py index e0c7be13..76dadb01 100644 --- a/py/src/braintrust/bt_json.py +++ b/py/src/braintrust/bt_json.py @@ -19,6 +19,23 @@ def _to_bt_safe(v: Any) -> Any: """ Converts the object to a Braintrust-safe representation (i.e. Attachment objects are safe (specially handled by background logger)). """ + # Fast path: check primitives via type identity before hitting + # isinstance checks against abstract classes or Pydantic model_dump. + if v is None or v is True or v is False: + return v + tv = type(v) + if tv is int or tv is str: + return v + if tv is float or isinstance(v, float): + if math.isnan(v): + return "NaN" + if math.isinf(v): + return "Infinity" if v > 0 else "-Infinity" + return v + # Catch str/int subclasses (e.g. str-enums like SpanTypeAttribute) + if isinstance(v, (int, str)): + return v + # avoid circular imports from braintrust.logger import BaseAttachment, Dataset, Experiment, Logger, ReadonlyAttachment, Span @@ -57,32 +74,20 @@ def _to_bt_safe(v: Any) -> Any: # Suppress Pydantic serializer warnings that arise from generic/discriminated-union # models (e.g. OpenAI's ParsedResponse[T]). See # https://github.com/braintrustdata/braintrust-sdk-python/issues/60 - try: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Pydantic serializer warnings", category=UserWarning) - return cast(Any, v).model_dump(exclude_none=True) - except (AttributeError, TypeError): - pass + if hasattr(v, "model_dump"): + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Pydantic serializer warnings", category=UserWarning) + return cast(Any, v).model_dump(exclude_none=True) + except (AttributeError, TypeError): + pass # Attempt to dump a Pydantic v1 `BaseModel`. - try: - return cast(Any, v).dict(exclude_none=True) - except (AttributeError, TypeError): - pass - - if isinstance(v, float): - # Handle NaN and Infinity for JSON compatibility - if math.isnan(v): - return "NaN" - - if math.isinf(v): - return "Infinity" if v > 0 else "-Infinity" - - return v - - if isinstance(v, (int, str, bool)) or v is None: - # Skip roundtrip for primitive types. - return v + if hasattr(v, "dict") and not isinstance(v, type): + try: + return cast(Any, v).dict(exclude_none=True) + except (AttributeError, TypeError): + pass # Note: we avoid using copy.deepcopy, because it's difficult to # guarantee the independence of such copied types from their origin. @@ -119,7 +124,6 @@ def bt_safe_deep_copy(obj: Any, max_depth: int = 200): Args: obj: Object to deep copy and sanitize. - to_json_safe: Function to ensure the object is json safe. max_depth: Maximum depth to copy. Returns: @@ -127,41 +131,88 @@ def bt_safe_deep_copy(obj: Any, max_depth: int = 200): """ # Track visited objects to detect circular references visited: set[int] = set() + visited_add = visited.add + visited_discard = visited.discard def _deep_copy_object(v: Any, depth: int = 0) -> Any: - # Check depth limit - use >= to stop before exceeding + # Fast path: primitives don't need deep copy or circular ref tracking. + if v is None or v is True or v is False: + return v + tv = type(v) + if tv is int or tv is str: + return v + if tv is float or isinstance(v, float): + if math.isnan(v): + return "NaN" + if math.isinf(v): + return "Infinity" if v > 0 else "-Infinity" + return v + # Catch str/int subclasses (e.g. str-enums) + if isinstance(v, (int, str)): + return v + if depth >= max_depth: return "" - # Check for circular references in mutable containers - # Use id() to track object identity - if isinstance(v, (Mapping, list, tuple, set)): + # Fast path for dict (the most common container in log data). + # Uses type identity instead of isinstance(v, Mapping) which is slow. + if tv is dict: obj_id = id(v) if obj_id in visited: return "" - visited.add(obj_id) + visited_add(obj_id) try: - if isinstance(v, Mapping): - # Prevent dict keys from holding references to user data. Note that - # `bt_json` already coerces keys to string, a behavior that comes from - # `json.dumps`. However, that runs at log upload time, while we want to - # cut out all the references to user objects synchronously in this - # function. - result = {} - for k in v: + result = {} + for k in v: + if type(k) is str: + key_str = k + else: try: key_str = str(k) except Exception: - # If str() fails on the key, use a fallback representation key_str = f"" - result[key_str] = _deep_copy_object(v[k], depth + 1) - return result - elif isinstance(v, (list, tuple, set)): - return [_deep_copy_object(x, depth + 1) for x in v] + result[key_str] = _deep_copy_object(v[k], depth + 1) + return result + finally: + visited_discard(obj_id) + elif tv is list or tv is tuple: + obj_id = id(v) + if obj_id in visited: + return "" + visited_add(obj_id) + try: + return [_deep_copy_object(x, depth + 1) for x in v] + finally: + visited_discard(obj_id) + # Slow path for non-builtin Mapping/set types. + elif isinstance(v, Mapping): + obj_id = id(v) + if obj_id in visited: + return "" + visited_add(obj_id) + try: + result = {} + for k in v: + if type(k) is str: + key_str = k + else: + try: + key_str = str(k) + except Exception: + key_str = f"" + result[key_str] = _deep_copy_object(v[k], depth + 1) + return result + finally: + visited_discard(obj_id) + elif isinstance(v, set): + obj_id = id(v) + if obj_id in visited: + return "" + visited_add(obj_id) + try: + return [_deep_copy_object(x, depth + 1) for x in v] finally: - # Remove from visited set after processing to allow the same object - # to appear in different branches of the tree - visited.discard(obj_id) + visited_discard(obj_id) try: return _to_bt_safe(v)