Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 126 additions & 11 deletions sentry_sdk/integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,104 @@ def _collect_ai_data(
return model, input_tokens, output_tokens, content_blocks


def _transform_content_block(content_block: "dict[str, Any]") -> "dict[str, Any]":
"""
Transform an Anthropic content block to a Sentry-compatible format.

Handles binary data (images, documents) by converting them to the standardized format:
- base64 encoded data -> type: "blob"
- URL references -> type: "uri"
- file_id references -> type: "file"
"""
block_type = content_block.get("type")

# Handle image blocks
if block_type == "image":
source = content_block.get("source", {})
source_type = source.get("type")
media_type = source.get("media_type", "")

if source_type == "base64":
return {
"type": "blob",
"modality": "image",
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": "image",
"mime_type": media_type,
"uri": source.get("url", ""),
}
elif source_type == "file":
return {
"type": "file",
"modality": "image",
"mime_type": media_type,
"file_id": source.get("file_id", ""),
}

# Handle document blocks (PDFs, etc.)
elif block_type == "document":
source = content_block.get("source", {})
source_type = source.get("type")
media_type = source.get("media_type", "")

if source_type == "base64":
return {
"type": "blob",
"modality": "document",
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": "document",
"mime_type": media_type,
"uri": source.get("url", ""),
}
elif source_type == "file":
return {
"type": "file",
"modality": "document",
"mime_type": media_type,
"file_id": source.get("file_id", ""),
}
elif source_type == "text":
# Plain text documents - keep as is but mark the type
return {
"type": "text",
"text": source.get("data", ""),
}

# For text blocks and other types, return as-is
return content_block


def _transform_message_content(
content: "Any",
) -> "Any":
"""
Transform message content, handling both string content and list of content blocks.
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for block in content:
if isinstance(block, dict):
transformed.append(_transform_content_block(block))
else:
transformed.append(block)
return transformed

return content


def _set_input_data(
span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration"
) -> None:
Expand Down Expand Up @@ -164,19 +262,36 @@ def _set_input_data(
and "content" in message
and isinstance(message["content"], (list, tuple))
):
transformed_content = []
for item in message["content"]:
if item.get("type") == "tool_result":
normalized_messages.append(
{
"role": GEN_AI_ALLOWED_MESSAGE_ROLES.TOOL,
"content": { # type: ignore[dict-item]
"tool_use_id": item.get("tool_use_id"),
"output": item.get("content"),
},
}
)
# Skip tool_result items - they can contain images/documents
# with nested structures that are difficult to redact properly
if isinstance(item, dict) and item.get("type") == "tool_result":
continue
Comment on lines +269 to +270
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The new logic skips tool_result content blocks. If a user message contains only tool_result blocks, the entire message will be silently dropped, losing observability data.
Severity: HIGH | Confidence: High

🔍 Detailed Analysis

The new logic at this location explicitly skips any content block with type equal to tool_result. In Anthropic's tool-calling workflow, it is a common pattern for a user message to contain only tool_result blocks. When this occurs, the loop will skip all content blocks, leaving the transformed_content list empty. Consequently, the subsequent if transformed_content: check will fail, and the entire message will be silently dropped from the Sentry event. This results in a loss of observability for tool-calling interactions, making it impossible to trace the full conversation flow in Sentry for users of this feature.

💡 Suggested Fix

Instead of completely skipping tool_result blocks, they should be processed and included in the normalized_messages. A potential fix is to handle them similarly to other content types, ensuring they are not dropped, while still considering the redaction concerns that motivated the original change. This might involve selectively redacting sensitive fields within the tool_result instead of removing it entirely.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: sentry_sdk/integrations/anthropic.py#L269-L270

Potential issue: The new logic at this location explicitly skips any content block with
`type` equal to `tool_result`. In Anthropic's tool-calling workflow, it is a common
pattern for a user message to contain only `tool_result` blocks. When this occurs, the
loop will skip all content blocks, leaving the `transformed_content` list empty.
Consequently, the subsequent `if transformed_content:` check will fail, and the entire
message will be silently dropped from the Sentry event. This results in a loss of
observability for tool-calling interactions, making it impossible to trace the full
conversation flow in Sentry for users of this feature.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 8340295


# Transform content blocks (images, documents, etc.)
transformed_content.append(
_transform_content_block(item)
if isinstance(item, dict)
else item
)

# If there are non-tool-result items, add them as a message
if transformed_content:
normalized_messages.append(
{
"role": message.get("role"),
"content": transformed_content,
}
)
else:
normalized_messages.append(message)
# Transform content for non-list messages or assistant messages
transformed_message = message.copy()
if "content" in transformed_message:
transformed_message["content"] = _transform_message_content(
transformed_message["content"]
)
normalized_messages.append(transformed_message)

role_normalized_messages = normalize_message_roles(normalized_messages)
scope = sentry_sdk.get_current_scope()
Expand Down
Loading
Loading