Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pkg/runtime/fallback.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,9 +369,10 @@ func getEffectiveCooldown(a *agent.Agent) time.Duration {
}

// getEffectiveRetries returns the number of retries to use for the agent.
// If no retries are explicitly configured (retries == 0) and fallback models
// are configured, returns DefaultFallbackRetries to provide sensible retry
// behavior out of the box.
// If no retries are explicitly configured (retries == 0), returns
// DefaultFallbackRetries to provide sensible retry behavior out of the box.
// Retries apply to retryable errors (5xx, timeouts) on the same model,
// regardless of whether fallback models are configured.
//
// Note: Users who explicitly want 0 retries can set retries: -1 in their config
// (though this is an edge case - most users want some retries for resilience).
Expand All @@ -381,8 +382,7 @@ func getEffectiveRetries(a *agent.Agent) int {
if retries < 0 {
return 0
}
// 0 means "use default" when fallback models are configured
if retries == 0 && len(a.FallbackModels()) > 0 {
if retries == 0 {
return DefaultFallbackRetries
}
return retries
Expand Down
52 changes: 50 additions & 2 deletions pkg/runtime/fallback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ func TestIsRetryableModelError(t *testing.T) {
err: errors.New("something weird happened"),
expected: false,
},
{
name: "anthropic streaming internal server error",
err: fmt.Errorf("error receiving from stream: %w", errors.New(`received error while streaming: {"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"},"request_id":"req_test"}`)),
expected: true,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -719,12 +724,12 @@ func TestGetEffectiveRetries(t *testing.T) {
mockModel := &mockProvider{id: "test/model", stream: newStreamBuilder().AddContent("ok").AddStopWithUsage(1, 1).Build()}
mockFallback := &mockProvider{id: "test/fallback", stream: newStreamBuilder().AddContent("ok").AddStopWithUsage(1, 1).Build()}

// Agent with no retries configured and no fallback models should return 0
// Agent with no retries configured and no fallback models should still get default retries
agentNoFallback := agent.New("no-fallback", "test",
agent.WithModel(mockModel),
)
retries := getEffectiveRetries(agentNoFallback)
assert.Equal(t, 0, retries, "no fallback models = no retries (nothing to retry to)")
assert.Equal(t, DefaultFallbackRetries, retries, "no fallback models should still get default retries for transient errors")

// Agent with no retries configured but with fallback models should use default
agentWithFallback := agent.New("with-fallback", "test",
Expand Down Expand Up @@ -877,6 +882,49 @@ func TestFallbackModelsClonedWithThinkingEnabled(t *testing.T) {
})
}

func TestPrimaryRetriesWithoutFallbackModels(t *testing.T) {
synctest.Test(t, func(t *testing.T) {
// Primary fails twice with retryable error (mimics Anthropic streaming internal
// server error), then succeeds. No fallback models are configured.
successStream := newStreamBuilder().
AddContent("Success after transient failures").
AddStopWithUsage(10, 5).
Build()
primary := &countingProvider{
id: "primary/counting",
failCount: 2,
err: errors.New(`error receiving from stream: received error while streaming: {"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}}`),
stream: successStream,
}

root := agent.New("root", "test",
agent.WithModel(primary),
// No fallback models
)

tm := team.New(team.WithAgents(root))
rt, err := NewLocalRuntime(tm, WithSessionCompaction(false), WithModelStore(mockModelStore{}))
require.NoError(t, err)

sess := session.New(session.WithUserMessage("test"))
sess.Title = "No Fallback Retry Test"

events := rt.RunStream(t.Context(), sess)

var gotContent bool
for ev := range events {
if choice, ok := ev.(*AgentChoiceEvent); ok {
if choice.Content == "Success after transient failures" {
gotContent = true
}
}
}

assert.True(t, gotContent, "should recover from transient errors even without fallback models")
assert.Equal(t, 3, primary.callCount, "primary should be called 3 times (2 failures + 1 success)")
})
}

// Verify interface compliance
var (
_ provider.Provider = (*mockProvider)(nil)
Expand Down