From d579b5bf759fe6d3718c13b1917dcfdb80bdaa53 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Sat, 2 May 2026 19:52:09 -0700 Subject: [PATCH] adds shell tool --- docker-compose.example.yml | 2 + docs/api/rest.md | 7 +- docs/api/streaming-chat.md | 3 +- server/README.md | 2 + server/src/env.ts | 8 +- server/src/llm/chat-tools.ts | 179 +++++++++++++++++++++++++++++++---- 6 files changed, 178 insertions(+), 23 deletions(-) diff --git a/docker-compose.example.yml b/docker-compose.example.yml index 4e2e62f..108b6d7 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -24,6 +24,8 @@ services: CHAT_CODEX_SSH_KEY_PATH: ${CHAT_CODEX_SSH_KEY_PATH:-} CHAT_CODEX_SSH_PRIVATE_KEY_B64: ${CHAT_CODEX_SSH_PRIVATE_KEY_B64:-} CHAT_CODEX_EXEC_TIMEOUT_MS: ${CHAT_CODEX_EXEC_TIMEOUT_MS:-600000} + CHAT_SHELL_TOOL_ENABLED: ${CHAT_SHELL_TOOL_ENABLED:-false} + CHAT_SHELL_EXEC_TIMEOUT_MS: ${CHAT_SHELL_EXEC_TIMEOUT_MS:-120000} volumes: - sybil_data:/data # Example key mount for codex_exec: diff --git a/docs/api/rest.md b/docs/api/rest.md index 3ba3060..c83c6ab 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -170,12 +170,14 @@ Behavior notes: - For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction. - For `openai` and `xai`, image attachments are sent as chat-completions content parts alongside text. - For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks. -- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. +- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available. - `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. - `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side). - `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec ` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`. -- `codex_exec` configuration: +- `shell_exec` runs arbitrary non-interactive shell commands on the same configured devbox, starting in `CHAT_CODEX_REMOTE_WORKDIR`. It uses `bash -lc` when bash exists, otherwise `sh -lc`, and does not run inside the Sybil server container. +- Devbox tool configuration: - `CHAT_CODEX_TOOL_ENABLED=true` + - `CHAT_SHELL_TOOL_ENABLED=true` - `CHAT_CODEX_REMOTE_HOST=` (required when enabled) - `CHAT_CODEX_REMOTE_USER=` (optional; omitted if `CHAT_CODEX_REMOTE_HOST` already contains `user@host`) - `CHAT_CODEX_REMOTE_PORT=22` (optional) @@ -183,6 +185,7 @@ Behavior notes: - `CHAT_CODEX_SSH_KEY_PATH=/run/secrets/codex_ssh_key` (recommended private-key delivery via read-only volume mount) - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=` (optional fallback when a volume mount is not practical) - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional) + - `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional) - When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output. - `anthropic` currently runs without server-managed tool calls. diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md index 3628a09..ac902c3 100644 --- a/docs/api/streaming-chat.md +++ b/docs/api/streaming-chat.md @@ -127,12 +127,13 @@ Event order: ## Provider Streaming Behavior -- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, and optional `codex_exec`) before producing final text. +- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text. - `openai`: image attachments are sent as chat-completions content parts; text attachments are inlined as text parts. - `xai`: same attachment behavior as OpenAI. - `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks. - `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints. - `codex_exec` is available only when `CHAT_CODEX_TOOL_ENABLED=true`. It SSHes to `CHAT_CODEX_REMOTE_HOST`, creates/uses `CHAT_CODEX_REMOTE_WORKDIR`, and runs `codex exec ` there. Prefer `CHAT_CODEX_SSH_KEY_PATH` with a read-only mounted private key; `CHAT_CODEX_SSH_PRIVATE_KEY_B64` is also supported. +- `shell_exec` is available only when `CHAT_SHELL_TOOL_ENABLED=true`. It uses the same devbox SSH configuration, starts in `CHAT_CODEX_REMOTE_WORKDIR`, and runs non-interactive shell commands there, not inside the Sybil server container. Tool-enabled streaming notes (`openai`/`xai`): - Stream still emits standard `meta`, `delta`, `done|error` events. diff --git a/server/README.md b/server/README.md index 0dd4c3e..68ab1c3 100644 --- a/server/README.md +++ b/server/README.md @@ -54,6 +54,8 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev). - `CHAT_CODEX_SSH_KEY_PATH` (recommended: path to a read-only mounted private key) - `CHAT_CODEX_SSH_PRIVATE_KEY_B64` (optional fallback private key delivery) - `CHAT_CODEX_EXEC_TIMEOUT_MS` (`600000` by default) +- `CHAT_SHELL_TOOL_ENABLED` (`false` by default; enables the `shell_exec` chat tool for OpenAI/xAI on the same devbox) +- `CHAT_SHELL_EXEC_TIMEOUT_MS` (`120000` by default) ## API - `GET /health` diff --git a/server/src/env.ts b/server/src/env.ts index e37ed54..dc28a5b 100644 --- a/server/src/env.ts +++ b/server/src/env.ts @@ -75,6 +75,10 @@ const EnvSchema = z.object({ CHAT_CODEX_SSH_KEY_PATH: OptionalTrimmedStringSchema, CHAT_CODEX_SSH_PRIVATE_KEY_B64: OptionalTrimmedStringSchema, CHAT_CODEX_EXEC_TIMEOUT_MS: defaultedPositiveInt(600_000), + + // Optional arbitrary shell tool that runs only on the configured devbox. + CHAT_SHELL_TOOL_ENABLED: BooleanFlagSchema, + CHAT_SHELL_EXEC_TIMEOUT_MS: defaultedPositiveInt(120_000), }).superRefine((value, ctx) => { if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) { ctx.addIssue({ @@ -84,11 +88,11 @@ const EnvSchema = z.object({ }); } - if (value.CHAT_CODEX_TOOL_ENABLED && !value.CHAT_CODEX_REMOTE_HOST) { + if ((value.CHAT_CODEX_TOOL_ENABLED || value.CHAT_SHELL_TOOL_ENABLED) && !value.CHAT_CODEX_REMOTE_HOST) { ctx.addIssue({ code: "custom", path: ["CHAT_CODEX_REMOTE_HOST"], - message: "CHAT_CODEX_REMOTE_HOST is required when CHAT_CODEX_TOOL_ENABLED=true", + message: "CHAT_CODEX_REMOTE_HOST is required when CHAT_CODEX_TOOL_ENABLED=true or CHAT_SHELL_TOOL_ENABLED=true", }); } }); diff --git a/server/src/llm/chat-tools.ts b/server/src/llm/chat-tools.ts index 9c9902b..fb4d32a 100644 --- a/server/src/llm/chat-tools.ts +++ b/server/src/llm/chat-tools.ts @@ -21,7 +21,10 @@ const FETCH_TIMEOUT_MS = 12_000; const MAX_CODEX_PROMPT_CHARACTERS = 60_000; const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000; const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000; -const CODEX_EXEC_MAX_BUFFER_BYTES = 1_000_000; +const MAX_SHELL_COMMAND_CHARACTERS = 20_000; +const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000; +const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000; +const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000; const execFileAsync = promisify(execFile); @@ -53,6 +56,15 @@ const CodexExecArgsSchema = z type CodexExecArgs = z.infer; +const ShellExecArgsSchema = z + .object({ + command: z.string().trim().min(1).max(MAX_SHELL_COMMAND_CHARACTERS), + maxCharacters: z.coerce.number().int().min(1_000).max(MAX_SHELL_MAX_OUTPUT_CHARACTERS).optional(), + }) + .strict(); + +type ShellExecArgs = z.infer; + const CODEX_EXEC_TOOL = { type: "function", function: { @@ -80,6 +92,33 @@ const CODEX_EXEC_TOOL = { }, }; +const SHELL_EXEC_TOOL = { + type: "function", + function: { + name: "shell_exec", + description: + "Run an arbitrary non-interactive shell command on the configured remote devbox, starting in the persistent scratch workspace. Use for quick Python scripts, calculations, file inspection, package/tool checks, tests, and command-line work that needs a real shell. This does not run inside the Sybil server container.", + parameters: { + type: "object", + properties: { + command: { + type: "string", + description: + "Shell command to run on the devbox. The command is executed with bash -lc when bash exists, otherwise sh -lc, starting in the persistent scratch workspace.", + }, + maxCharacters: { + type: "integer", + minimum: 1_000, + maximum: MAX_SHELL_MAX_OUTPUT_CHARACTERS, + description: "Maximum stdout/stderr characters returned to the model (default 24000).", + }, + }, + required: ["command"], + additionalProperties: false, + }, + }, +}; + const BASE_CHAT_TOOLS: any[] = [ { type: "function", @@ -142,7 +181,11 @@ const BASE_CHAT_TOOLS: any[] = [ }, ]; -const CHAT_TOOLS: any[] = env.CHAT_CODEX_TOOL_ENABLED ? [...BASE_CHAT_TOOLS, CODEX_EXEC_TOOL] : BASE_CHAT_TOOLS; +const CHAT_TOOLS: any[] = [ + ...BASE_CHAT_TOOLS, + ...(env.CHAT_CODEX_TOOL_ENABLED ? [CODEX_EXEC_TOOL] : []), + ...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []), +]; export const CHAT_TOOL_SYSTEM_PROMPT = "You can use tools to gather up-to-date web information when needed. " + @@ -151,6 +194,9 @@ export const CHAT_TOOL_SYSTEM_PROMPT = (env.CHAT_CODEX_TOOL_ENABLED ? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, and expected report-back format. " : "") + + (env.CHAT_SHELL_TOOL_ENABLED + ? "Use shell_exec for direct command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. " + : "") + "Do not fabricate tool outputs; reason only from provided tool results."; type ToolRunOutcome = { @@ -252,6 +298,16 @@ function buildToolSummary(name: string, args: Record, status: " return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`; } + if (name === "shell_exec") { + const command = typeof args.command === "string" ? args.command.trim() : ""; + if (status === "completed") { + return command ? `Ran devbox shell command: '${toSingleLine(command, 120)}'.` : "Ran devbox shell command."; + } + return command + ? `Devbox shell command '${toSingleLine(command, 120)}' failed.${errSuffix}` + : `Devbox shell command failed.${errSuffix}`; + } + if (status === "completed") { return `Ran tool '${name}'.`; } @@ -466,7 +522,7 @@ function shellQuote(value: string) { return `'${value.replace(/'/g, `'\\''`)}'`; } -function buildCodexSshTarget() { +function buildDevboxSshTarget() { const host = env.CHAT_CODEX_REMOTE_HOST; if (!host) { throw new Error("CHAT_CODEX_REMOTE_HOST not set"); @@ -483,7 +539,16 @@ function buildRemoteCodexCommand(prompt: string) { return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`; } -async function withCodexSshKeyPath(fn: (keyPath?: string) => Promise) { +function buildRemoteShellCommand(command: string) { + const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim(); + const quotedCommand = shellQuote(command); + return ( + `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ` + + `if command -v bash >/dev/null 2>&1; then bash -lc ${quotedCommand}; else sh -lc ${quotedCommand}; fi` + ); +} + +async function withDevboxSshKeyPath(fn: (keyPath?: string) => Promise) { if (env.CHAT_CODEX_SSH_KEY_PATH) { return fn(env.CHAT_CODEX_SSH_KEY_PATH); } @@ -502,7 +567,7 @@ async function withCodexSshKeyPath(fn: (keyPath?: string) => Promise) { } } -function clipCodexOutput(value: string, maxCharacters: number) { +function clipRemoteOutput(value: string, maxCharacters: number) { if (value.length <= maxCharacters) { return { text: value, truncated: false }; } @@ -525,7 +590,7 @@ async function runCodexExecTool(input: unknown): Promise { const args: CodexExecArgs = CodexExecArgsSchema.parse(input); const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS; - const sshTarget = buildCodexSshTarget(); + const sshTarget = buildDevboxSshTarget(); const remoteCommand = buildRemoteCodexCommand(args.prompt); const run = async (keyPath?: string) => { @@ -549,10 +614,10 @@ async function runCodexExecTool(input: unknown): Promise { try { const result = await execFileAsync("ssh", sshArgs, { timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS, - maxBuffer: CODEX_EXEC_MAX_BUFFER_BYTES, + maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES, }); - const stdout = clipCodexOutput(bufferOrStringToString(result.stdout), maxCharacters); - const stderr = clipCodexOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); + const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters); + const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); return { ok: true, host: env.CHAT_CODEX_REMOTE_HOST, @@ -563,8 +628,8 @@ async function runCodexExecTool(input: unknown): Promise { stderrTruncated: stderr.truncated, }; } catch (err: any) { - const stdout = clipCodexOutput(bufferOrStringToString(err?.stdout), maxCharacters); - const stderr = clipCodexOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); + const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters); + const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); return { ok: false, error: err?.killed @@ -582,13 +647,83 @@ async function runCodexExecTool(input: unknown): Promise { } }; - return withCodexSshKeyPath(run); + return withDevboxSshKeyPath(run); +} + +async function runShellExecTool(input: unknown): Promise { + if (!env.CHAT_SHELL_TOOL_ENABLED) { + return { ok: false, error: "shell_exec is disabled." }; + } + + const args: ShellExecArgs = ShellExecArgsSchema.parse(input); + const maxCharacters = args.maxCharacters ?? DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS; + const sshTarget = buildDevboxSshTarget(); + const remoteCommand = buildRemoteShellCommand(args.command); + + const run = async (keyPath?: string) => { + const sshArgs = [ + "-o", + "BatchMode=yes", + "-o", + "StrictHostKeyChecking=accept-new", + "-o", + "UserKnownHostsFile=/tmp/sybil-codex-known-hosts", + "-p", + String(env.CHAT_CODEX_REMOTE_PORT), + ]; + + if (keyPath) { + sshArgs.push("-i", keyPath); + } + + sshArgs.push(sshTarget, remoteCommand); + + try { + const result = await execFileAsync("ssh", sshArgs, { + timeout: env.CHAT_SHELL_EXEC_TIMEOUT_MS, + maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES, + }); + const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters); + const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); + return { + ok: true, + host: env.CHAT_CODEX_REMOTE_HOST, + workdir: env.CHAT_CODEX_REMOTE_WORKDIR, + command: args.command, + stdout: stdout.text, + stderr: stderr.text, + stdoutTruncated: stdout.truncated, + stderrTruncated: stderr.truncated, + }; + } catch (err: any) { + const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters); + const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); + return { + ok: false, + error: err?.killed + ? `Remote shell command timed out after ${env.CHAT_SHELL_EXEC_TIMEOUT_MS}ms.` + : err?.message ?? String(err), + exitCode: typeof err?.code === "number" ? err.code : null, + signal: typeof err?.signal === "string" ? err.signal : null, + host: env.CHAT_CODEX_REMOTE_HOST, + workdir: env.CHAT_CODEX_REMOTE_WORKDIR, + command: args.command, + stdout: stdout.text, + stderr: stderr.text, + stdoutTruncated: stdout.truncated, + stderrTruncated: stderr.truncated, + }; + } + }; + + return withDevboxSshKeyPath(run); } async function executeTool(name: string, args: unknown): Promise { if (name === "web_search") return runWebSearchTool(args); if (name === "fetch_url") return runFetchUrlTool(args); if (name === "codex_exec") return runCodexExecTool(args); + if (name === "shell_exec") return runShellExecTool(args); return { ok: false, error: `Unknown tool: ${name}` }; } @@ -604,13 +739,21 @@ function parseToolArgs(raw: unknown) { } function buildEventArgs(name: string, args: Record) { - if (name !== "codex_exec" || typeof args.prompt !== "string") { - return args; + if (name === "codex_exec" && typeof args.prompt === "string") { + return { + ...args, + prompt: clipText(args.prompt, 1_000), + }; } - return { - ...args, - prompt: clipText(args.prompt, 1_000), - }; + + if (name === "shell_exec" && typeof args.command === "string") { + return { + ...args, + command: clipText(args.command, 1_000), + }; + } + + return args; } function mergeUsage(acc: Required, usage: any) {