From fac5c127e890eecb3b1bbef92ac58f667433507b Mon Sep 17 00:00:00 2001 From: Djordje Lukic Date: Tue, 10 Feb 2026 14:09:19 +0100 Subject: [PATCH] provider/anthropic: enforce tool sequencing, remove self-repair/validation Stop inserting synthetic tool_result blocks and remove preflight sequencing validators. Instead, enforce Anthropic tool_use/tool_result adjacency and ID matching during message conversion (and preserve tool-call blocks during history trimming) so invalid histories fail fast with clear errors. Signed-off-by: Djordje Lukic --- pkg/model/provider/anthropic/beta_client.go | 116 ---------- .../provider/anthropic/beta_converter.go | 73 ++++-- .../provider/anthropic/beta_converter_test.go | 61 +++-- pkg/model/provider/anthropic/client.go | 217 ++++-------------- pkg/model/provider/anthropic/client_test.go | 83 +++---- pkg/session/session.go | 108 ++++++--- 6 files changed, 245 insertions(+), 413 deletions(-) diff --git a/pkg/model/provider/anthropic/beta_client.go b/pkg/model/provider/anthropic/beta_client.go index b99124341..2fd60d2ef 100644 --- a/pkg/model/provider/anthropic/beta_client.go +++ b/pkg/model/provider/anthropic/beta_client.go @@ -43,14 +43,6 @@ func (c *Client) createBetaStream( slog.Error("Failed to convert messages for Anthropic Beta request", "error", err) return nil, err } - if err := validateAnthropicSequencingBeta(converted); err != nil { - slog.Warn("Invalid message sequencing for Anthropic Beta API detected, attempting self-repair", "error", err) - converted = repairAnthropicSequencingBeta(converted) - if err2 := validateAnthropicSequencingBeta(converted); err2 != nil { - slog.Error("Failed to self-repair Anthropic Beta sequencing", "error", err2) - return nil, err - } - } sys := extractBetaSystemBlocks(messages) @@ -146,114 +138,6 @@ func (c *Client) createBetaStream( return ad, nil } -// validateAnthropicSequencingBeta performs the same validation as standard API but for Beta payloads -func validateAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) error { - for i := range msgs { - m, ok := marshalToMapBeta(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArrayBeta(m)) - if len(toolUseIDs) == 0 { - continue - } - - if i+1 >= len(msgs) { - slog.Warn("Anthropic (beta) sequencing invalid: assistant tool_use present but no next user tool_result message", "assistant_index", i) - return errors.New("assistant tool_use present but no subsequent user message with tool_result blocks (beta)") - } - - next, ok := marshalToMapBeta(msgs[i+1]) - if !ok || next["role"] != "user" { - slog.Warn("Anthropic (beta) sequencing invalid: next message after assistant tool_use is not user", "assistant_index", i, "next_role", next["role"]) - return errors.New("assistant tool_use must be followed by a user message containing corresponding tool_result blocks (beta)") - } - - toolResultIDs := collectToolResultIDs(contentArrayBeta(next)) - missing := differenceIDs(toolUseIDs, toolResultIDs) - if len(missing) > 0 { - slog.Warn("Anthropic (beta) sequencing invalid: missing tool_result for tool_use id in next user message", "assistant_index", i, "tool_use_id", missing[0], "missing_count", len(missing)) - return fmt.Errorf("missing tool_result for tool_use id %s in the next user message (beta)", missing[0]) - } - } - return nil -} - -// repairAnthropicSequencingBeta inserts a synthetic user message with tool_result blocks -// for any assistant tool_use blocks that don't have corresponding tool_result blocks -// in the immediate next user message. -func repairAnthropicSequencingBeta(msgs []anthropic.BetaMessageParam) []anthropic.BetaMessageParam { - if len(msgs) == 0 { - return msgs - } - repaired := make([]anthropic.BetaMessageParam, 0, len(msgs)+2) - for i := range msgs { - m, ok := marshalToMapBeta(msgs[i]) - if !ok || m["role"] != "assistant" { - repaired = append(repaired, msgs[i]) - continue - } - - toolUseIDs := collectToolUseIDs(contentArrayBeta(m)) - if len(toolUseIDs) == 0 { - repaired = append(repaired, msgs[i]) - continue - } - - // Check if the next message is a user message with tool_results - needsSyntheticMessage := true - if i+1 < len(msgs) { - if next, ok := marshalToMapBeta(msgs[i+1]); ok && next["role"] == "user" { - toolResultIDs := collectToolResultIDs(contentArrayBeta(next)) - // Remove tool_use IDs that have corresponding tool_results - for id := range toolResultIDs { - delete(toolUseIDs, id) - } - // If all tool_use IDs have results, no synthetic message needed - if len(toolUseIDs) == 0 { - needsSyntheticMessage = false - } - } - } - - // Append the assistant message first - repaired = append(repaired, msgs[i]) - - // If there are missing tool_results, insert a synthetic user message immediately after - if needsSyntheticMessage && len(toolUseIDs) > 0 { - slog.Debug("Inserting synthetic user message for missing tool_results", - "assistant_index", i, - "missing_count", len(toolUseIDs)) - - blocks := make([]anthropic.BetaContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - slog.Debug("Creating synthetic tool_result", "tool_use_id", id) - blocks = append(blocks, anthropic.BetaContentBlockParamUnion{ - OfToolResult: &anthropic.BetaToolResultBlockParam{ - ToolUseID: id, - Content: []anthropic.BetaToolResultBlockParamContentUnion{ - {OfText: &anthropic.BetaTextBlockParam{Text: "(tool execution failed)"}}, - }, - }, - }) - } - repaired = append(repaired, anthropic.BetaMessageParam{ - Role: anthropic.BetaMessageParamRoleUser, - Content: blocks, - }) - } - } - return repaired -} - -// marshalToMapBeta is an alias for marshalToMap - shared with standard API. -// Kept as separate function for clarity in Beta-specific code paths. -var marshalToMapBeta = marshalToMap - -// contentArrayBeta is an alias for contentArray - shared with standard API. -var contentArrayBeta = contentArray - // countAnthropicTokensBeta calls Anthropic's Count Tokens API for the provided Beta API payload // and returns the number of input tokens. func countAnthropicTokensBeta( diff --git a/pkg/model/provider/anthropic/beta_converter.go b/pkg/model/provider/anthropic/beta_converter.go index e8601017a..ecc05199d 100644 --- a/pkg/model/provider/anthropic/beta_converter.go +++ b/pkg/model/provider/anthropic/beta_converter.go @@ -22,6 +22,7 @@ import ( // blocks from the same assistant message MUST be grouped into a single user message. func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Message) ([]anthropic.BetaMessageParam, error) { var betaMessages []anthropic.BetaMessageParam + var pendingToolUseIDs map[string]struct{} for i := 0; i < len(messages); i++ { msg := &messages[i] @@ -75,11 +76,15 @@ func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Messag // Add tool calls if len(msg.ToolCalls) > 0 { + pendingToolUseIDs = make(map[string]struct{}, len(msg.ToolCalls)) for _, toolCall := range msg.ToolCalls { var inpts map[string]any if err := json.Unmarshal([]byte(toolCall.Function.Arguments), &inpts); err != nil { inpts = map[string]any{} } + if toolCall.ID != "" { + pendingToolUseIDs[toolCall.ID] = struct{}{} + } contentBlocks = append(contentBlocks, anthropic.BetaContentBlockParamUnion{ OfToolUse: &anthropic.BetaToolUseBlockParam{ ID: toolCall.ID, @@ -88,6 +93,8 @@ func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Messag }, }) } + } else { + pendingToolUseIDs = nil } if len(contentBlocks) > 0 { @@ -102,38 +109,54 @@ func (c *Client) convertBetaMessages(ctx context.Context, messages []chat.Messag // Collect consecutive tool messages and merge them into a single user message // This is required by Anthropic API: all tool_result blocks for tool_use blocks // from the same assistant message must be in the same user message - toolResultBlocks := []anthropic.BetaContentBlockParamUnion{ - { - OfToolResult: &anthropic.BetaToolResultBlockParam{ - ToolUseID: msg.ToolCallID, - Content: []anthropic.BetaToolResultBlockParamContentUnion{ - {OfText: &anthropic.BetaTextBlockParam{Text: strings.TrimSpace(msg.Content)}}, - }, - }, - }, + if pendingToolUseIDs == nil { + // Orphan tool results (no preceding assistant tool_use in this window): drop them. + j := i + for j < len(messages) && messages[j].Role == chat.MessageRoleTool { + j++ + } + i = j - 1 + continue } - // Look ahead for consecutive tool messages and merge them - j := i + 1 + toolResultBlocks := make([]anthropic.BetaContentBlockParamUnion, 0) + hadToolMessages := false + j := i for j < len(messages) && messages[j].Role == chat.MessageRoleTool { - toolResultBlocks = append(toolResultBlocks, anthropic.BetaContentBlockParamUnion{ - OfToolResult: &anthropic.BetaToolResultBlockParam{ - ToolUseID: messages[j].ToolCallID, - Content: []anthropic.BetaToolResultBlockParamContentUnion{ - {OfText: &anthropic.BetaTextBlockParam{Text: strings.TrimSpace(messages[j].Content)}}, - }, - }, - }) + hadToolMessages = true + id := messages[j].ToolCallID + if id != "" { + if _, ok := pendingToolUseIDs[id]; ok { + toolResultBlocks = append(toolResultBlocks, anthropic.BetaContentBlockParamUnion{ + OfToolResult: &anthropic.BetaToolResultBlockParam{ + ToolUseID: id, + Content: []anthropic.BetaToolResultBlockParamContentUnion{ + {OfText: &anthropic.BetaTextBlockParam{Text: strings.TrimSpace(messages[j].Content)}}, + }, + }, + }) + delete(pendingToolUseIDs, id) + } + } j++ } - // Add the merged user message with all tool results - betaMessages = append(betaMessages, anthropic.BetaMessageParam{ - Role: anthropic.BetaMessageParamRoleUser, - Content: toolResultBlocks, - }) + if hadToolMessages && len(toolResultBlocks) == 0 { + return nil, fmt.Errorf("tool_result messages present but none match pending tool_use ids (beta converter)") + } + if len(pendingToolUseIDs) > 0 { + for id := range pendingToolUseIDs { + return nil, fmt.Errorf("missing tool_result for tool_use id %s (beta converter)", id) + } + } - // Skip the messages we've already processed + if len(toolResultBlocks) > 0 { + betaMessages = append(betaMessages, anthropic.BetaMessageParam{ + Role: anthropic.BetaMessageParamRoleUser, + Content: toolResultBlocks, + }) + } + pendingToolUseIDs = nil i = j - 1 continue } diff --git a/pkg/model/provider/anthropic/beta_converter_test.go b/pkg/model/provider/anthropic/beta_converter_test.go index c5aaadd4d..f5ed7816a 100644 --- a/pkg/model/provider/anthropic/beta_converter_test.go +++ b/pkg/model/provider/anthropic/beta_converter_test.go @@ -1,6 +1,7 @@ package anthropic import ( + "encoding/json" "testing" "github.com/stretchr/testify/assert" @@ -10,6 +11,39 @@ import ( "github.com/docker/cagent/pkg/tools" ) +func marshalToMapBeta(t *testing.T, v any) map[string]any { + t.Helper() + b, err := json.Marshal(v) + require.NoError(t, err) + var m map[string]any + require.NoError(t, json.Unmarshal(b, &m)) + return m +} + +func contentArrayBeta(m map[string]any) []any { + if a, ok := m["content"].([]any); ok { + return a + } + return nil +} + +func collectToolResultIDsBeta(content []any) map[string]struct{} { + ids := make(map[string]struct{}) + for _, c := range content { + cb, ok := c.(map[string]any) + if !ok { + continue + } + if cb["type"] != "tool_result" { + continue + } + if id, _ := cb["tool_use_id"].(string); id != "" { + ids[id] = struct{}{} + } + } + return ids +} + func TestConvertBetaMessages_MergesConsecutiveToolMessages(t *testing.T) { // Simulates the roast battle scenario where: // - Assistant message has 2 tool_use blocks (transfer_task calls) @@ -65,27 +99,22 @@ func TestConvertBetaMessages_MergesConsecutiveToolMessages(t *testing.T) { require.Len(t, betaMessages, 4, "Should have 4 messages after conversion") - msg0Map, _ := marshalToMapBeta(betaMessages[0]) - msg1Map, _ := marshalToMapBeta(betaMessages[1]) - msg2Map, _ := marshalToMapBeta(betaMessages[2]) - msg3Map, _ := marshalToMapBeta(betaMessages[3]) + msg0Map := marshalToMapBeta(t, betaMessages[0]) + msg1Map := marshalToMapBeta(t, betaMessages[1]) + msg2Map := marshalToMapBeta(t, betaMessages[2]) + msg3Map := marshalToMapBeta(t, betaMessages[3]) assert.Equal(t, "user", msg0Map["role"]) assert.Equal(t, "assistant", msg1Map["role"]) assert.Equal(t, "user", msg2Map["role"]) assert.Equal(t, "assistant", msg3Map["role"]) - userMsg2Map, ok := marshalToMapBeta(betaMessages[2]) - require.True(t, ok) + userMsg2Map := marshalToMapBeta(t, betaMessages[2]) content := contentArrayBeta(userMsg2Map) require.Len(t, content, 2, "User message should have 2 tool_result blocks") - toolResultIDs := collectToolResultIDs(content) + toolResultIDs := collectToolResultIDsBeta(content) assert.Contains(t, toolResultIDs, "tool_call_1") assert.Contains(t, toolResultIDs, "tool_call_2") - - // Most importantly: validate that the sequence is valid for Anthropic API - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err, "Converted messages should pass Anthropic sequencing validation") } func TestConvertBetaMessages_SingleToolMessage(t *testing.T) { @@ -123,10 +152,6 @@ func TestConvertBetaMessages_SingleToolMessage(t *testing.T) { betaMessages, err := testClient().convertBetaMessages(t.Context(), messages) require.NoError(t, err) require.Len(t, betaMessages, 4) - - // Validate sequence - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err) } func TestConvertBetaMessages_NonConsecutiveToolMessages(t *testing.T) { @@ -181,10 +206,6 @@ func TestConvertBetaMessages_NonConsecutiveToolMessages(t *testing.T) { }, } - betaMessages, err := testClient().convertBetaMessages(t.Context(), messages) + _, err := testClient().convertBetaMessages(t.Context(), messages) require.NoError(t, err) - - // Validate the entire sequence - err = validateAnthropicSequencingBeta(betaMessages) - require.NoError(t, err, "Messages with non-consecutive tool calls should still validate") } diff --git a/pkg/model/provider/anthropic/client.go b/pkg/model/provider/anthropic/client.go index b4b1f5e5a..3f0a770eb 100644 --- a/pkg/model/provider/anthropic/client.go +++ b/pkg/model/provider/anthropic/client.go @@ -271,15 +271,8 @@ func (c *Client) CreateChatCompletionStream( slog.Error("Failed to convert messages for Anthropic request", "error", err) return nil, err } - // Preflight validation to ensure tool_use/tool_result sequencing is valid - if err := validateAnthropicSequencing(converted); err != nil { - slog.Warn("Invalid message sequencing for Anthropic detected, attempting self-repair", "error", err) - converted = repairAnthropicSequencing(converted) - if err2 := validateAnthropicSequencing(converted); err2 != nil { - slog.Error("Failed to self-repair Anthropic sequencing", "error", err2) - return nil, err - } - } + // convertMessages enforces Anthropic tool sequencing strictly; if it's invalid, + // convertMessages returns an error. sys := extractSystemBlocks(messages) params := anthropic.MessageNewParams{ @@ -368,9 +361,10 @@ func (c *Client) CreateChatCompletionStream( func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ([]anthropic.MessageParam, error) { var anthropicMessages []anthropic.MessageParam - // Track whether the last appended assistant message included tool_use blocks - // so we can ensure the immediate next message is the grouped tool_result user message. - pendingAssistantToolUse := false + // Track the set of tool_use IDs from the last appended assistant message. + // Anthropic requires the immediate next message to be a user message containing + // tool_result blocks for those IDs (and only those IDs). + var pendingToolUseIDs map[string]struct{} for i := 0; i < len(messages); i++ { msg := &messages[i] @@ -379,6 +373,9 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( continue } if msg.Role == chat.MessageRoleUser { + if pendingToolUseIDs != nil { + return nil, errors.New("assistant tool_use must be immediately followed by tool results") + } // Handle MultiContent for user messages (including images and files) if len(msg.MultiContent) > 0 { contentBlocks, err := c.convertUserMultiContent(ctx, msg.MultiContent) @@ -396,6 +393,9 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( continue } if msg.Role == chat.MessageRoleAssistant { + if pendingToolUseIDs != nil { + return nil, errors.New("assistant tool_use must be immediately followed by tool results") + } contentBlocks := make([]anthropic.ContentBlockParamUnion, 0) // Include thinking blocks when present to preserve extended thinking context @@ -406,6 +406,7 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( } if len(msg.ToolCalls) > 0 { + pendingToolUseIDs = make(map[string]struct{}, len(msg.ToolCalls)) blockLen := len(msg.ToolCalls) msgContent := strings.TrimSpace(msg.Content) offset := 0 @@ -426,6 +427,9 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( if err := json.Unmarshal([]byte(toolCall.Function.Arguments), &inpts); err != nil { inpts = map[string]any{} } + if toolCall.ID != "" { + pendingToolUseIDs[toolCall.ID] = struct{}{} + } toolUseBlocks[len(contentBlocks)+j+offset] = anthropic.ContentBlockParamUnion{ OfToolUse: &anthropic.ToolUseBlockParam{ ID: toolCall.ID, @@ -435,8 +439,6 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( } } anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(toolUseBlocks...)) - // Mark that we expect the very next message to be the grouped tool_result blocks. - pendingAssistantToolUse = true } else { if txt := strings.TrimSpace(msg.Content); txt != "" { contentBlocks = append(contentBlocks, anthropic.NewTextBlock(txt)) @@ -444,38 +446,58 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) ( if len(contentBlocks) > 0 { anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(contentBlocks...)) } - // No tool_use in this assistant message - pendingAssistantToolUse = false + pendingToolUseIDs = nil } continue } if msg.Role == chat.MessageRoleTool { + if pendingToolUseIDs == nil { + return nil, fmt.Errorf("unexpected tool result without preceding tool_use (tool_use_id=%q)", messages[i].ToolCallID) + } // Group consecutive tool results into a single user message. // // This is to satisfy Anthropic's requirement that tool_use blocks are immediately followed // by a single user message containing all corresponding tool_result blocks. var blocks []anthropic.ContentBlockParamUnion + hadToolMessages := false j := i for j < len(messages) && messages[j].Role == chat.MessageRoleTool { - tr := anthropic.NewToolResultBlock(messages[j].ToolCallID, strings.TrimSpace(messages[j].Content), false) + hadToolMessages = true + id := messages[j].ToolCallID + if strings.TrimSpace(id) == "" { + return nil, errors.New("tool result is missing tool_use_id") + } + if _, ok := pendingToolUseIDs[id]; !ok { + return nil, fmt.Errorf("unexpected tool_result tool_use_id=%q", id) + } + tr := anthropic.NewToolResultBlock(id, strings.TrimSpace(messages[j].Content), false) blocks = append(blocks, tr) + delete(pendingToolUseIDs, id) j++ } - if len(blocks) > 0 { - // Only include tool_result blocks if they immediately follow an assistant - // message that contained tool_use. Otherwise, drop them to avoid invalid - // sequencing errors. - if pendingAssistantToolUse { - anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(blocks...)) + if hadToolMessages && len(blocks) == 0 { + return nil, errors.New("tool_result messages present but none match pending tool_use ids") + } + if len(pendingToolUseIDs) > 0 { + missing := make([]string, 0, len(pendingToolUseIDs)) + for id := range pendingToolUseIDs { + missing = append(missing, id) } - // Whether we used them or not, we've now handled the expected tool_result slot. - pendingAssistantToolUse = false + return nil, fmt.Errorf("missing tool_result for tool_use id %s (and %d more)", missing[0], len(missing)-1) } + if len(blocks) > 0 { + anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(blocks...)) + } + pendingToolUseIDs = nil i = j - 1 continue } } + if pendingToolUseIDs != nil { + return nil, errors.New("assistant tool_use present but no subsequent tool results") + } + // Add ephemeral cache to last 2 messages' last content block applyMessageCacheControl(anthropicMessages) @@ -668,150 +690,7 @@ func (c *Client) FileManager() *FileManager { return c.fileManager } -// validateAnthropicSequencing verifies that for every assistant message that includes -// one or more tool_use blocks, the immediately following message is a user message -// that includes tool_result blocks for all those tool_use IDs (grouped into that single message). -func validateAnthropicSequencing(msgs []anthropic.MessageParam) error { - // Marshal-based inspection to avoid depending on SDK internals of union types - for i := range msgs { - m, ok := marshalToMap(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArray(m)) - if len(toolUseIDs) == 0 { - continue - } - - if i+1 >= len(msgs) { - slog.Warn("Anthropic sequencing invalid: assistant tool_use present but no next user tool_result message", "assistant_index", i) - return errors.New("assistant tool_use present but no subsequent user message with tool_result blocks") - } - - next, ok := marshalToMap(msgs[i+1]) - if !ok || next["role"] != "user" { - slog.Warn("Anthropic sequencing invalid: next message after assistant tool_use is not user", "assistant_index", i, "next_role", next["role"]) - return errors.New("assistant tool_use must be followed by a user message containing corresponding tool_result blocks") - } - - toolResultIDs := collectToolResultIDs(contentArray(next)) - missing := differenceIDs(toolUseIDs, toolResultIDs) - if len(missing) > 0 { - slog.Warn("Anthropic sequencing invalid: missing tool_result for tool_use id in next user message", "assistant_index", i, "tool_use_id", missing[0], "missing_count", len(missing)) - return fmt.Errorf("missing tool_result for tool_use id %s in the next user message", missing[0]) - } - } - return nil -} - -// repairAnthropicSequencing inserts a synthetic user message containing tool_result blocks -// immediately after any assistant message that has tool_use blocks missing a corresponding -// tool_result in the next user message. This is a best-effort local repair to keep the -// conversation valid for Anthropic while preserving original messages, to keep the agent loop running. -func repairAnthropicSequencing(msgs []anthropic.MessageParam) []anthropic.MessageParam { - if len(msgs) == 0 { - return msgs - } - repaired := make([]anthropic.MessageParam, 0, len(msgs)+2) - for i := range msgs { - repaired = append(repaired, msgs[i]) - - m, ok := marshalToMap(msgs[i]) - if !ok || m["role"] != "assistant" { - continue - } - - toolUseIDs := collectToolUseIDs(contentArray(m)) - if len(toolUseIDs) == 0 { - continue - } - - // Remove any IDs that already have results in the next user message - if i+1 < len(msgs) { - if next, ok := marshalToMap(msgs[i+1]); ok && next["role"] == "user" { - toolResultIDs := collectToolResultIDs(contentArray(next)) - for id := range toolResultIDs { - delete(toolUseIDs, id) - } - } - } - - if len(toolUseIDs) > 0 { - blocks := make([]anthropic.ContentBlockParamUnion, 0, len(toolUseIDs)) - for id := range toolUseIDs { - blocks = append(blocks, anthropic.NewToolResultBlock(id, "(tool execution failed)", false)) - } - repaired = append(repaired, anthropic.NewUserMessage(blocks...)) - } - } - return repaired -} - -// marshalToMap is a helper that converts any value to a map[string]any via JSON marshaling. -// This is used to inspect SDK union types without depending on their internal structure. -// It's shared by both standard and Beta API validation/repair code. -func marshalToMap(v any) (map[string]any, bool) { - b, err := json.Marshal(v) - if err != nil { - return nil, false - } - var m map[string]any - if json.Unmarshal(b, &m) != nil { - return nil, false - } - return m, true -} - -// contentArray extracts the content array from a marshaled message map. -// Used by both standard and Beta API validation/repair code. -func contentArray(m map[string]any) []any { - if a, ok := m["content"].([]any); ok { - return a - } - return nil -} - -func collectToolUseIDs(content []any) map[string]struct{} { - ids := make(map[string]struct{}) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - if t, _ := cb["type"].(string); t == "tool_use" { - if id, _ := cb["id"].(string); id != "" { - ids[id] = struct{}{} - } - } - } - } - return ids -} - -func collectToolResultIDs(content []any) map[string]struct{} { - ids := make(map[string]struct{}) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - if t, _ := cb["type"].(string); t == "tool_result" { - if id, _ := cb["tool_use_id"].(string); id != "" { - ids[id] = struct{}{} - } - } - } - } - return ids -} - -func differenceIDs(a, b map[string]struct{}) []string { - if len(a) == 0 { - return nil - } - var missing []string - for id := range a { - if _, ok := b[id]; !ok { - missing = append(missing, id) - } - } - return missing -} +// Tool sequencing validation helpers removed; conversion enforces sequencing strictly. // anthropicContextLimit returns a reasonable default context window for Anthropic models. // We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time. diff --git a/pkg/model/provider/anthropic/client_test.go b/pkg/model/provider/anthropic/client_test.go index 8e2f56b78..b2bacfc58 100644 --- a/pkg/model/provider/anthropic/client_test.go +++ b/pkg/model/provider/anthropic/client_test.go @@ -91,11 +91,15 @@ func TestConvertMessages_AssistantToolCalls_NoText_IncludesToolUse(t *testing.T) ToolCalls: []tools.ToolCall{ {ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{\"x\":1}"}}, }, + }, { + Role: chat.MessageRoleTool, + ToolCallID: "tool-1", + Content: "result", }} out, err := testClient().convertMessages(t.Context(), msgs) require.NoError(t, err) - require.Len(t, out, 1) + require.Len(t, out, 2) b, err := json.Marshal(out[0]) require.NoError(t, err) @@ -173,7 +177,7 @@ func TestSystemMessages_InterspersedExtractedAndExcluded(t *testing.T) { } } -func TestSequencingRepair_Standard(t *testing.T) { +func TestConvertMessages_MissingToolResults_Fails(t *testing.T) { msgs := []chat.Message{ {Role: chat.MessageRoleUser, Content: "start"}, { @@ -182,44 +186,15 @@ func TestSequencingRepair_Standard(t *testing.T) { {ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}}, }, }, - // Intentionally missing the user/tool_result message here + // Intentionally missing the tool result message here {Role: chat.MessageRoleUser, Content: "continue"}, } - converted, err := testClient().convertMessages(t.Context(), msgs) - require.NoError(t, err) - err = validateAnthropicSequencing(converted) + _, err := testClient().convertMessages(t.Context(), msgs) require.Error(t, err) - - repaired := repairAnthropicSequencing(converted) - err = validateAnthropicSequencing(repaired) - require.NoError(t, err) } -func TestSequencingRepair_Beta(t *testing.T) { - msgs := []chat.Message{ - {Role: chat.MessageRoleUser, Content: "start"}, - { - Role: chat.MessageRoleAssistant, - ToolCalls: []tools.ToolCall{ - {ID: "tool-1", Function: tools.FunctionCall{Name: "do_thing", Arguments: "{}"}}, - }, - }, - // Intentionally missing the user/tool_result message here - {Role: chat.MessageRoleUser, Content: "continue"}, - } - - converted, err := testClient().convertBetaMessages(t.Context(), msgs) - require.NoError(t, err) - err = validateAnthropicSequencingBeta(converted) - require.Error(t, err) - - repaired := repairAnthropicSequencingBeta(converted) - err = validateAnthropicSequencingBeta(repaired) - require.NoError(t, err) -} - -func TestConvertMessages_DropOrphanToolResults_NoPrecedingToolUse(t *testing.T) { +func TestConvertMessages_OrphanToolResults_NoPrecedingToolUse_Fails(t *testing.T) { msgs := []chat.Message{ {Role: chat.MessageRoleUser, Content: "start"}, // Orphan tool result (no assistant tool_use immediately before) @@ -227,24 +202,8 @@ func TestConvertMessages_DropOrphanToolResults_NoPrecedingToolUse(t *testing.T) {Role: chat.MessageRoleUser, Content: "continue"}, } - converted, err := testClient().convertMessages(t.Context(), msgs) - require.NoError(t, err) - // Expect only the two user text messages to appear - require.Len(t, converted, 2) - - // Ensure none of the converted messages contain tool_result blocks - for i := range converted { - b, err := json.Marshal(converted[i]) - require.NoError(t, err) - var m map[string]any - require.NoError(t, json.Unmarshal(b, &m)) - content, _ := m["content"].([]any) - for _, c := range content { - if cb, ok := c.(map[string]any); ok { - assert.NotEqual(t, "tool_result", cb["type"], "unexpected orphan tool_result included in payload") - } - } - } + _, err := testClient().convertMessages(t.Context(), msgs) + require.Error(t, err) } func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) { @@ -267,9 +226,6 @@ func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) { // Expect: user(start), assistant(tool_use), user(grouped tool_result), user(ok) require.Len(t, converted, 4) - // Validate sequencing is acceptable to Anthropic - require.NoError(t, validateAnthropicSequencing(converted)) - b, err := json.Marshal(converted[2]) require.NoError(t, err) var m map[string]any @@ -293,6 +249,23 @@ func TestConvertMessages_GroupToolResults_AfterAssistantToolUse(t *testing.T) { assert.Contains(t, ids, "tool-2") } +func TestConvertMessages_UnexpectedToolResultID_Fails(t *testing.T) { + msgs := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "start"}, + { + Role: chat.MessageRoleAssistant, + ToolCalls: []tools.ToolCall{ + {ID: "tool-1", Function: tools.FunctionCall{Name: "t1", Arguments: "{}"}}, + }, + }, + {Role: chat.MessageRoleTool, ToolCallID: "tool-1", Content: "ok"}, + {Role: chat.MessageRoleTool, ToolCallID: "tool-2", Content: "orphan"}, + } + + _, err := testClient().convertMessages(t.Context(), msgs) + require.Error(t, err) +} + // TestCountAnthropicTokens_Success tests successful token counting for standard API func TestCountAnthropicTokens_Success(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/session/session.go b/pkg/session/session.go index 5ccd32424..84c4fc7c0 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -665,14 +665,19 @@ func (s *Session) GetMessages(a *agent.Agent) []chat.Message { return messages } -// trimMessages ensures we don't exceed the maximum number of messages while maintaining -// consistency between assistant messages and their tool call results. +// trimMessages ensures we don't exceed the maximum number of *conversation* messages +// while preserving valid tool sequencing. +// +// Many providers require that tool result messages are adjacent to (and correspond to) +// the immediately preceding assistant tool call message. When trimming history, we may +// otherwise keep a tool result without its assistant tool call, which later causes +// provider request validation errors. +// // System messages are always preserved and not counted against the limit. func trimMessages(messages []chat.Message, maxItems int) []chat.Message { // Separate system messages from conversation messages var systemMessages []chat.Message var conversationMessages []chat.Message - for i := range messages { if messages[i].Role == chat.MessageRoleSystem { systemMessages = append(systemMessages, messages[i]) @@ -681,45 +686,92 @@ func trimMessages(messages []chat.Message, maxItems int) []chat.Message { } } - // If conversation messages fit within limit, return all messages - if len(conversationMessages) <= maxItems { + if maxItems <= 0 || len(conversationMessages) <= maxItems { return messages } - // Keep track of tool call IDs that need to be removed - toolCallsToRemove := make(map[string]bool) - - // Calculate how many conversation messages we need to remove - toRemove := len(conversationMessages) - maxItems + // Keep most recent messages first, but treat tool blocks as an atomic unit: + // [assistant(tool_calls)] + [one or more tool result messages]. + keptReversed := make([]chat.Message, 0, maxItems) + for i := len(conversationMessages) - 1; i >= 0 && len(keptReversed) < maxItems; { + msg := conversationMessages[i] + if msg.Role != chat.MessageRoleTool { + keptReversed = append(keptReversed, msg) + i-- + continue + } - // Start from the beginning (oldest messages) - for i := range toRemove { - // If this is an assistant message with tool calls, mark them for removal - if conversationMessages[i].Role == chat.MessageRoleAssistant { - for _, toolCall := range conversationMessages[i].ToolCalls { - toolCallsToRemove[toolCall.ID] = true - } + // Collect consecutive tool result messages ending at i. + end := i + start := i + for start >= 0 && conversationMessages[start].Role == chat.MessageRoleTool { + start-- } - } - // Combine system messages with trimmed conversation messages - result := make([]chat.Message, 0, len(systemMessages)+maxItems) + // Tool messages must be immediately preceded by an assistant message with matching tool calls. + assistantIdx := start + if assistantIdx < 0 || conversationMessages[assistantIdx].Role != chat.MessageRoleAssistant { + // Orphan tool results: drop them. + i = start + continue + } - // Add all system messages first - result = append(result, systemMessages...) + assistantMsg := conversationMessages[assistantIdx] + assistantToolIDs := make(map[string]struct{}, len(assistantMsg.ToolCalls)) + for _, tc := range assistantMsg.ToolCalls { + if tc.ID != "" { + assistantToolIDs[tc.ID] = struct{}{} + } + } + if len(assistantToolIDs) == 0 { + // Assistant didn't actually record tool calls; treat these as orphans. + i = start + continue + } - // Add the most recent conversation messages - for i := toRemove; i < len(conversationMessages); i++ { - msg := conversationMessages[i] + valid := true + for k := assistantIdx + 1; k <= end; k++ { + id := conversationMessages[k].ToolCallID + if id == "" { + valid = false + break + } + if _, ok := assistantToolIDs[id]; !ok { + valid = false + break + } + } + if !valid { + // Tool results don't correspond to the immediately preceding assistant tool calls. + // Drop them; keeping them would create invalid sequencing for some providers. + i = start + continue + } - // Skip tool messages that correspond to removed assistant messages - if msg.Role == chat.MessageRoleTool && toolCallsToRemove[msg.ToolCallID] { + blockLen := (end - assistantIdx) + 1 + if len(keptReversed)+blockLen > maxItems { + // Not enough room to include this tool interaction coherently; drop the whole block. + i = assistantIdx - 1 continue } - result = append(result, msg) + // Append tool results (newest to oldest), then assistant (so after reversing + // the final order is assistant -> tool results). + for k := end; k > assistantIdx; k-- { + keptReversed = append(keptReversed, conversationMessages[k]) + } + keptReversed = append(keptReversed, assistantMsg) + i = assistantIdx - 1 + } + + // Reverse back into chronological order + for i, j := 0, len(keptReversed)-1; i < j; i, j = i+1, j-1 { + keptReversed[i], keptReversed[j] = keptReversed[j], keptReversed[i] } + result := make([]chat.Message, 0, len(systemMessages)+len(keptReversed)) + result = append(result, systemMessages...) + result = append(result, keptReversed...) return result }