diff --git a/pkg/tools/builtin/sandbox.go b/pkg/sandbox/docker.go similarity index 59% rename from pkg/tools/builtin/sandbox.go rename to pkg/sandbox/docker.go index e5e0044c0..5ea86a590 100644 --- a/pkg/tools/builtin/sandbox.go +++ b/pkg/sandbox/docker.go @@ -1,4 +1,4 @@ -package builtin +package sandbox import ( "bytes" @@ -28,8 +28,8 @@ const ( sandboxLabelPID = "com.docker.cagent.sandbox.pid" ) -// sandboxRunner handles command execution in a Docker container sandbox. -type sandboxRunner struct { +// DockerRunner handles command execution in a Docker container sandbox. +type DockerRunner struct { config *latest.SandboxConfig workingDir string env []string @@ -37,11 +37,15 @@ type sandboxRunner struct { mu sync.Mutex } -func newSandboxRunner(config *latest.SandboxConfig, workingDir string, env []string) *sandboxRunner { - // Clean up any orphaned containers from previous cagent runs +// Verify interface compliance. +var _ Runner = (*DockerRunner)(nil) + +// NewDockerRunner creates a new Docker sandbox runner. +// It cleans up any orphaned containers from previous cagent runs. +func NewDockerRunner(config *latest.SandboxConfig, workingDir string, env []string) *DockerRunner { cleanupOrphanedSandboxContainers() - return &sandboxRunner{ + return &DockerRunner{ config: config, workingDir: workingDir, env: env, @@ -96,15 +100,15 @@ func isProcessRunning(pid int) bool { return err == nil } -// runCommand executes a command inside the sandbox container. -func (s *sandboxRunner) runCommand(timeoutCtx, ctx context.Context, command, cwd string, timeout time.Duration) *tools.ToolCallResult { - containerID, err := s.ensureContainer(ctx) +// RunCommand executes a command inside the Docker sandbox container. +func (d *DockerRunner) RunCommand(timeoutCtx, ctx context.Context, command, cwd string, timeout time.Duration) *tools.ToolCallResult { + containerID, err := d.ensureContainer(ctx) if err != nil { return tools.ResultError(fmt.Sprintf("Failed to start sandbox container: %s", err)) } args := []string{"exec", "-w", cwd} - args = append(args, s.buildEnvVars()...) + args = append(args, d.buildEnvVars()...) args = append(args, containerID, "/bin/sh", "-c", command) cmd := exec.CommandContext(timeoutCtx, "docker", args...) @@ -114,47 +118,53 @@ func (s *sandboxRunner) runCommand(timeoutCtx, ctx context.Context, command, cwd err = cmd.Run() - output := formatCommandOutput(timeoutCtx, ctx, err, outBuf.String(), timeout) - return tools.ResultSuccess(limitOutput(output)) + output := FormatCommandOutput(timeoutCtx, ctx, err, outBuf.String(), timeout) + return tools.ResultSuccess(LimitOutput(output)) +} + +// Start is a no-op for Docker runner; containers are lazily started. +func (d *DockerRunner) Start(context.Context) error { + return nil } -// stop stops and removes the sandbox container. -func (s *sandboxRunner) stop() { - s.mu.Lock() - defer s.mu.Unlock() +// Stop stops and removes the sandbox container. +func (d *DockerRunner) Stop(context.Context) error { + d.mu.Lock() + defer d.mu.Unlock() - if s.containerID == "" { - return + if d.containerID == "" { + return nil } - stopCmd := exec.Command("docker", "stop", "-t", "1", s.containerID) + stopCmd := exec.Command("docker", "stop", "-t", "1", d.containerID) _ = stopCmd.Run() - s.containerID = "" + d.containerID = "" + return nil } // ensureContainer ensures the sandbox container is running, starting it if necessary. -func (s *sandboxRunner) ensureContainer(ctx context.Context) (string, error) { - s.mu.Lock() - defer s.mu.Unlock() +func (d *DockerRunner) ensureContainer(ctx context.Context) (string, error) { + d.mu.Lock() + defer d.mu.Unlock() - if s.containerID != "" && s.isContainerRunning(ctx) { - return s.containerID, nil + if d.containerID != "" && d.isContainerRunning(ctx) { + return d.containerID, nil } - s.containerID = "" + d.containerID = "" - return s.startContainer(ctx) + return d.startContainer(ctx) } -func (s *sandboxRunner) isContainerRunning(ctx context.Context) bool { - cmd := exec.CommandContext(ctx, "docker", "container", "inspect", "-f", "{{.State.Running}}", s.containerID) +func (d *DockerRunner) isContainerRunning(ctx context.Context) bool { + cmd := exec.CommandContext(ctx, "docker", "container", "inspect", "-f", "{{.State.Running}}", d.containerID) output, err := cmd.Output() return err == nil && strings.TrimSpace(string(output)) == "true" } -func (s *sandboxRunner) startContainer(ctx context.Context) (string, error) { - containerName := s.generateContainerName() - image := cmp.Or(s.config.Image, "alpine:latest") +func (d *DockerRunner) startContainer(ctx context.Context) (string, error) { + containerName := d.generateContainerName() + image := cmp.Or(d.config.Image, "alpine:latest") args := []string{ "run", "-d", @@ -163,9 +173,9 @@ func (s *sandboxRunner) startContainer(ctx context.Context) (string, error) { "--label", sandboxLabelKey + "=true", "--label", fmt.Sprintf("%s=%d", sandboxLabelPID, os.Getpid()), } - args = append(args, s.buildVolumeMounts()...) - args = append(args, s.buildEnvVars()...) - args = append(args, "-w", s.workingDir, image, "tail", "-f", "/dev/null") + args = append(args, d.buildVolumeMounts()...) + args = append(args, d.buildEnvVars()...) + args = append(args, "-w", d.workingDir, image, "tail", "-f", "/dev/null") cmd := exec.CommandContext(ctx, "docker", args...) var stderr bytes.Buffer @@ -176,25 +186,25 @@ func (s *sandboxRunner) startContainer(ctx context.Context) (string, error) { return "", fmt.Errorf("failed to start sandbox container: %w\nstderr: %s", err, stderr.String()) } - s.containerID = strings.TrimSpace(string(output)) - return s.containerID, nil + d.containerID = strings.TrimSpace(string(output)) + return d.containerID, nil } -func (s *sandboxRunner) generateContainerName() string { +func (d *DockerRunner) generateContainerName() string { randomBytes := make([]byte, 4) _, _ = rand.Read(randomBytes) return fmt.Sprintf("cagent-sandbox-%s", hex.EncodeToString(randomBytes)) } -func (s *sandboxRunner) buildVolumeMounts() []string { +func (d *DockerRunner) buildVolumeMounts() []string { var args []string - for _, pathSpec := range s.config.Paths { - hostPath, mode := parseSandboxPath(pathSpec) + for _, pathSpec := range d.config.Paths { + hostPath, mode := ParseSandboxPath(pathSpec) // Resolve to absolute path if !filepath.IsAbs(hostPath) { - if s.workingDir != "" { - hostPath = filepath.Join(s.workingDir, hostPath) + if d.workingDir != "" { + hostPath = filepath.Join(d.workingDir, hostPath) } else { // If workingDir is empty, resolve relative to current directory var err error @@ -215,16 +225,13 @@ func (s *sandboxRunner) buildVolumeMounts() []string { } // buildEnvVars creates Docker -e flags for environment variables. -// This forwards the host environment to the sandbox container. // Only variables with valid POSIX names are forwarded. -func (s *sandboxRunner) buildEnvVars() []string { +func (d *DockerRunner) buildEnvVars() []string { var args []string - for _, envVar := range s.env { - // Each env var is in KEY=VALUE format - // Only forward variables with valid names to avoid Docker issues + for _, envVar := range d.env { if idx := strings.Index(envVar, "="); idx > 0 { key := envVar[:idx] - if isValidEnvVarName(key) { + if IsValidEnvVarName(key) { args = append(args, "-e", envVar) } } @@ -232,9 +239,9 @@ func (s *sandboxRunner) buildEnvVars() []string { return args } -// isValidEnvVarName checks if an environment variable name is valid for POSIX. +// IsValidEnvVarName checks if an environment variable name is valid for POSIX. // Valid names start with a letter or underscore and contain only alphanumerics and underscores. -func isValidEnvVarName(name string) bool { +func IsValidEnvVarName(name string) bool { if name == "" { return false } @@ -247,8 +254,8 @@ func isValidEnvVarName(name string) bool { return true } -// parseSandboxPath parses a path specification like "./path" or "/path:ro" into path and mode. -func parseSandboxPath(pathSpec string) (path, mode string) { +// ParseSandboxPath parses a path specification like "./path" or "/path:ro" into path and mode. +func ParseSandboxPath(pathSpec string) (path, mode string) { mode = "rw" // Default to read-write switch { @@ -264,3 +271,30 @@ func parseSandboxPath(pathSpec string) (path, mode string) { return path, mode } + +// FormatCommandOutput formats command output handling timeout, cancellation, and errors. +func FormatCommandOutput(timeoutCtx, ctx context.Context, err error, rawOutput string, timeout time.Duration) string { + var output string + if timeoutCtx.Err() != nil { + if ctx.Err() != nil { + output = "Command cancelled" + } else { + output = fmt.Sprintf("Command timed out after %v\nOutput: %s", timeout, rawOutput) + } + } else { + output = rawOutput + if err != nil { + output = fmt.Sprintf("Error executing command: %s\nOutput: %s", err, output) + } + } + return cmp.Or(strings.TrimSpace(output), "") +} + +// LimitOutput truncates output to a maximum size. +func LimitOutput(output string) string { + const maxOutputSize = 30000 + if len(output) > maxOutputSize { + return output[:maxOutputSize] + "\n\n[Output truncated: exceeded 30,000 character limit]" + } + return output +} diff --git a/pkg/sandbox/docker_test.go b/pkg/sandbox/docker_test.go new file mode 100644 index 000000000..8c18da8a9 --- /dev/null +++ b/pkg/sandbox/docker_test.go @@ -0,0 +1,77 @@ +package sandbox + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseSandboxPath(t *testing.T) { + t.Parallel() + + tests := []struct { + input string + wantPath string + wantMode string + }{ + {input: ".", wantPath: ".", wantMode: "rw"}, + {input: "/tmp", wantPath: "/tmp", wantMode: "rw"}, + {input: "./src", wantPath: "./src", wantMode: "rw"}, + {input: "/tmp:ro", wantPath: "/tmp", wantMode: "ro"}, + {input: "./config:ro", wantPath: "./config", wantMode: "ro"}, + {input: "/data:rw", wantPath: "/data", wantMode: "rw"}, + {input: "./secrets:ro", wantPath: "./secrets", wantMode: "ro"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + t.Parallel() + path, mode := ParseSandboxPath(tt.input) + assert.Equal(t, tt.wantPath, path) + assert.Equal(t, tt.wantMode, mode) + }) + } +} + +func TestIsValidEnvVarName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + valid bool + }{ + {"HOME", true}, + {"USER", true}, + {"PATH", true}, + {"_private", true}, + {"MY_VAR_123", true}, + {"a", true}, + {"A", true}, + {"_", true}, + {"", false}, + {"123", false}, + {"1VAR", false}, + {"VAR-NAME", false}, + {"VAR.NAME", false}, + {"VAR NAME", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + result := IsValidEnvVarName(tt.name) + assert.Equal(t, tt.valid, result, "IsValidEnvVarName(%q)", tt.name) + }) + } +} + +func TestIsProcessRunning(t *testing.T) { + t.Parallel() + + // Current process should be running + assert.True(t, isProcessRunning(os.Getpid()), "Current process should be running") + + // Non-existent PID should not be running (using a very high PID unlikely to exist) + assert.False(t, isProcessRunning(999999999), "Very high PID should not be running") +} diff --git a/pkg/sandbox/runner.go b/pkg/sandbox/runner.go new file mode 100644 index 000000000..3d04fddbb --- /dev/null +++ b/pkg/sandbox/runner.go @@ -0,0 +1,24 @@ +package sandbox + +import ( + "context" + "time" + + "github.com/docker/cagent/pkg/tools" +) + +// Runner is a pluggable interface for sandbox execution backends. +// Implementations handle command execution in isolated environments +// (Docker containers, Kubernetes pods, etc.). +type Runner interface { + // RunCommand executes a command synchronously and returns the result. + // The timeoutCtx carries the command timeout; ctx is the parent context. + // cwd is the working directory inside the sandbox. + // timeout is the original duration for formatting timeout messages. + RunCommand(timeoutCtx, ctx context.Context, command, cwd string, timeout time.Duration) *tools.ToolCallResult + + // Start initializes the runner (e.g., discover pod, start container). + Start(ctx context.Context) error + // Stop cleans up the runner (e.g., stop container). + Stop(ctx context.Context) error +} diff --git a/pkg/teamloader/registry.go b/pkg/teamloader/registry.go index 498747610..c56636584 100644 --- a/pkg/teamloader/registry.go +++ b/pkg/teamloader/registry.go @@ -14,6 +14,7 @@ import ( "github.com/docker/cagent/pkg/js" "github.com/docker/cagent/pkg/memory/database/sqlite" "github.com/docker/cagent/pkg/path" + "github.com/docker/cagent/pkg/sandbox" "github.com/docker/cagent/pkg/tools" "github.com/docker/cagent/pkg/tools/a2a" "github.com/docker/cagent/pkg/tools/builtin" @@ -145,28 +146,32 @@ func createShellTool(ctx context.Context, toolset latest.Toolset, _ string, runC } env = append(env, os.Environ()...) - // Expand sandbox paths with JS interpolation (e.g., ${env.HOME}:ro) + // Create Docker sandbox runner from toolset config sandboxConfig := expandSandboxPaths(ctx, toolset.Sandbox, runConfig.EnvProvider()) + var runner sandbox.Runner + if sandboxConfig != nil { + runner = sandbox.NewDockerRunner(sandboxConfig, runConfig.WorkingDir, env) + } - return builtin.NewShellTool(env, runConfig, sandboxConfig), nil + return builtin.NewShellTool(env, runConfig, runner), nil } // expandSandboxPaths expands environment variable references in sandbox paths. // Supports JS template literal syntax like ${env.HOME} or ${env.HOME || '/default'}. -func expandSandboxPaths(ctx context.Context, sandbox *latest.SandboxConfig, envProvider environment.Provider) *latest.SandboxConfig { - if sandbox == nil { +func expandSandboxPaths(ctx context.Context, sandboxCfg *latest.SandboxConfig, envProvider environment.Provider) *latest.SandboxConfig { + if sandboxCfg == nil { return nil } expander := js.NewJsExpander(envProvider) - expandedPaths := make([]string, len(sandbox.Paths)) - for i, p := range sandbox.Paths { + expandedPaths := make([]string, len(sandboxCfg.Paths)) + for i, p := range sandboxCfg.Paths { expandedPaths[i] = expander.Expand(ctx, p) } return &latest.SandboxConfig{ - Image: sandbox.Image, + Image: sandboxCfg.Image, Paths: expandedPaths, } } diff --git a/pkg/tools/builtin/shell.go b/pkg/tools/builtin/shell.go index 7a43db111..7ad76c992 100644 --- a/pkg/tools/builtin/shell.go +++ b/pkg/tools/builtin/shell.go @@ -18,7 +18,7 @@ import ( "github.com/docker/cagent/pkg/concurrent" "github.com/docker/cagent/pkg/config" - "github.com/docker/cagent/pkg/config/latest" + "github.com/docker/cagent/pkg/sandbox" "github.com/docker/cagent/pkg/tools" ) @@ -50,7 +50,7 @@ type shellHandler struct { workingDir string jobs *concurrent.Map[string, *backgroundJob] jobCounter atomic.Int64 - sandbox *sandboxRunner + sandbox sandbox.Runner } // Job status constants @@ -155,7 +155,7 @@ func (h *shellHandler) RunShell(ctx context.Context, params RunShellArgs) (*tool // Delegate to sandbox runner if configured if h.sandbox != nil { - return h.sandbox.runCommand(timeoutCtx, ctx, params.Cmd, cwd, timeout), nil + return h.sandbox.RunCommand(timeoutCtx, ctx, params.Cmd, cwd, timeout), nil } slog.Debug("Executing native shell command", "command", params.Cmd, "cwd", cwd) @@ -346,9 +346,10 @@ func (h *shellHandler) StopBackgroundJob(_ context.Context, params StopBackgroun return tools.ResultSuccess(fmt.Sprintf("Job %s stopped successfully", params.JobID)), nil } -// NewShellTool creates a new shell tool with optional sandbox configuration. -func NewShellTool(env []string, runConfig *config.RuntimeConfig, sandboxConfig *latest.SandboxConfig) *ShellTool { - shell, argsPrefix := detectShell(sandboxConfig != nil) +// NewShellTool creates a new shell tool with an optional sandbox runner. +// When runner is non-nil, shell commands are delegated to it. +func NewShellTool(env []string, runConfig *config.RuntimeConfig, runner sandbox.Runner) *ShellTool { + shell, argsPrefix := detectShell(runner != nil) handler := &shellHandler{ shell: shell, @@ -357,10 +358,7 @@ func NewShellTool(env []string, runConfig *config.RuntimeConfig, sandboxConfig * timeout: 30 * time.Second, jobs: concurrent.NewMap[string, *backgroundJob](), workingDir: runConfig.WorkingDir, - } - - if sandboxConfig != nil { - handler.sandbox = newSandboxRunner(sandboxConfig, runConfig.WorkingDir, env) + sandbox: runner, } return &ShellTool{handler: handler} @@ -493,7 +491,7 @@ func (t *ShellTool) Start(context.Context) error { return nil } -func (t *ShellTool) Stop(context.Context) error { +func (t *ShellTool) Stop(ctx context.Context) error { // Terminate all running background jobs t.handler.jobs.Range(func(_ string, job *backgroundJob) bool { if job.status.CompareAndSwap(statusRunning, statusStopped) { @@ -502,9 +500,9 @@ func (t *ShellTool) Stop(context.Context) error { return true }) - // Stop sandbox container if running + // Stop sandbox runner if configured if t.handler.sandbox != nil { - t.handler.sandbox.stop() + _ = t.handler.sandbox.Stop(ctx) } return nil diff --git a/pkg/tools/builtin/shell_test.go b/pkg/tools/builtin/shell_test.go index 104e6b443..21cfa0375 100644 --- a/pkg/tools/builtin/shell_test.go +++ b/pkg/tools/builtin/shell_test.go @@ -1,17 +1,31 @@ package builtin import ( + "context" "os" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/docker/cagent/pkg/config" - "github.com/docker/cagent/pkg/config/latest" + "github.com/docker/cagent/pkg/sandbox" "github.com/docker/cagent/pkg/tools" ) +// noopRunner is a minimal sandbox.Runner for testing sandbox-mode behavior +// without needing Docker. +type noopRunner struct{} + +var _ sandbox.Runner = (*noopRunner)(nil) + +func (*noopRunner) RunCommand(_, _ context.Context, _, _ string, _ time.Duration) *tools.ToolCallResult { + return tools.ResultSuccess("") +} +func (*noopRunner) Start(context.Context) error { return nil } +func (*noopRunner) Stop(context.Context) error { return nil } + func TestNewShellTool(t *testing.T) { t.Setenv("SHELL", "/bin/bash") tool := NewShellTool(nil, &config.RuntimeConfig{Config: config.Config{WorkingDir: t.TempDir()}}, nil) @@ -124,47 +138,11 @@ func TestShellTool_ListBackgroundJobs(t *testing.T) { assert.Contains(t, listResult.Output, "ID: job_") } -func TestParseSandboxPath(t *testing.T) { - t.Parallel() - - tests := []struct { - input string - wantPath string - wantMode string - }{ - {input: ".", wantPath: ".", wantMode: "rw"}, - {input: "/tmp", wantPath: "/tmp", wantMode: "rw"}, - {input: "./src", wantPath: "./src", wantMode: "rw"}, - {input: "/tmp:ro", wantPath: "/tmp", wantMode: "ro"}, - {input: "./config:ro", wantPath: "./config", wantMode: "ro"}, - {input: "/data:rw", wantPath: "/data", wantMode: "rw"}, - {input: "./secrets:ro", wantPath: "./secrets", wantMode: "ro"}, - } - - for _, tt := range tests { - t.Run(tt.input, func(t *testing.T) { - t.Parallel() - path, mode := parseSandboxPath(tt.input) - assert.Equal(t, tt.wantPath, path) - assert.Equal(t, tt.wantMode, mode) - }) - } -} - func TestShellTool_SandboxInstructions(t *testing.T) { t.Parallel() - workingDir := "/workspace/project" - sandboxConfig := &latest.SandboxConfig{ - Image: "alpine:latest", - Paths: []string{ - ".", - "/tmp", - "/home/user:ro", - }, - } - - tool := NewShellTool(nil, &config.RuntimeConfig{Config: config.Config{WorkingDir: workingDir}}, sandboxConfig) + runner := &noopRunner{} + tool := NewShellTool(nil, &config.RuntimeConfig{Config: config.Config{WorkingDir: "/workspace/project"}}, runner) instructions := tool.Instructions() @@ -192,48 +170,6 @@ func TestShellTool_NativeInstructions(t *testing.T) { assert.NotContains(t, instructions, "## Mounted Paths") } -func TestIsValidEnvVarName(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - valid bool - }{ - {"HOME", true}, - {"USER", true}, - {"PATH", true}, - {"_private", true}, - {"MY_VAR_123", true}, - {"a", true}, - {"A", true}, - {"_", true}, - {"", false}, - {"123", false}, - {"1VAR", false}, - {"VAR-NAME", false}, - {"VAR.NAME", false}, - {"VAR NAME", false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - result := isValidEnvVarName(tt.name) - assert.Equal(t, tt.valid, result, "isValidEnvVarName(%q)", tt.name) - }) - } -} - -func TestIsProcessRunning(t *testing.T) { - t.Parallel() - - // Current process should be running - assert.True(t, isProcessRunning(os.Getpid()), "Current process should be running") - - // Non-existent PID should not be running (using a very high PID unlikely to exist) - assert.False(t, isProcessRunning(999999999), "Very high PID should not be running") -} - func TestResolveWorkDir(t *testing.T) { t.Parallel()