From fd9ee455fbf46f79848dd65666e00bb4446d6af5 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Sat, 2 May 2026 19:38:15 -0700 Subject: [PATCH] experimental devbox support --- Dockerfile | 4 + docker-compose.example.yml | 11 ++ docs/api/rest.md | 12 ++- docs/api/streaming-chat.md | 5 +- server/README.md | 8 ++ server/src/env.ts | 47 ++++++++ server/src/llm/chat-tools.ts | 201 ++++++++++++++++++++++++++++++++++- 7 files changed, 282 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6ceb6b6..aba1a35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,10 @@ COPY server/package.json server/package-lock.json ./ COPY server/scripts ./scripts COPY server/prisma ./prisma +RUN apt-get update \ + && apt-get install -y --no-install-recommends openssh-client \ + && rm -rf /var/lib/apt/lists/* + RUN npm ci --omit=dev --no-audit --no-fund COPY --from=server-build /app/server/dist ./dist diff --git a/docker-compose.example.yml b/docker-compose.example.yml index b3fa4d8..4e2e62f 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -15,8 +15,19 @@ services: EXA_API_KEY: ${EXA_API_KEY:-} CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa} SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-} + CHAT_CODEX_TOOL_ENABLED: ${CHAT_CODEX_TOOL_ENABLED:-false} + CHAT_CODEX_REMOTE_HOST: ${CHAT_CODEX_REMOTE_HOST:-} + CHAT_CODEX_REMOTE_USER: ${CHAT_CODEX_REMOTE_USER:-} + CHAT_CODEX_REMOTE_PORT: ${CHAT_CODEX_REMOTE_PORT:-22} + CHAT_CODEX_REMOTE_WORKDIR: ${CHAT_CODEX_REMOTE_WORKDIR:-/workspace/sybil-codex} + # Prefer mounting a private key read-only and pointing CHAT_CODEX_SSH_KEY_PATH at it. + CHAT_CODEX_SSH_KEY_PATH: ${CHAT_CODEX_SSH_KEY_PATH:-} + CHAT_CODEX_SSH_PRIVATE_KEY_B64: ${CHAT_CODEX_SSH_PRIVATE_KEY_B64:-} + CHAT_CODEX_EXEC_TIMEOUT_MS: ${CHAT_CODEX_EXEC_TIMEOUT_MS:-600000} volumes: - sybil_data:/data + # Example key mount for codex_exec: + # - ./secrets/devbox_id_ed25519:/run/secrets/codex_ssh_key:ro expose: - "8787" restart: unless-stopped diff --git a/docs/api/rest.md b/docs/api/rest.md index 75ceda3..3ba3060 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -170,9 +170,19 @@ Behavior notes: - For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction. - For `openai` and `xai`, image attachments are sent as chat-completions content parts alongside text. - For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks. -- Available tool calls for chat: `web_search` and `fetch_url`. +- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. - `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. - `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side). +- `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec ` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`. +- `codex_exec` configuration: + - `CHAT_CODEX_TOOL_ENABLED=true` + - `CHAT_CODEX_REMOTE_HOST=` (required when enabled) + - `CHAT_CODEX_REMOTE_USER=` (optional; omitted if `CHAT_CODEX_REMOTE_HOST` already contains `user@host`) + - `CHAT_CODEX_REMOTE_PORT=22` (optional) + - `CHAT_CODEX_REMOTE_WORKDIR=/workspace/sybil-codex` (optional; created on the remote host if missing) + - `CHAT_CODEX_SSH_KEY_PATH=/run/secrets/codex_ssh_key` (recommended private-key delivery via read-only volume mount) + - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=` (optional fallback when a volume mount is not practical) + - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional) - When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output. - `anthropic` currently runs without server-managed tool calls. diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md index 1b10490..3628a09 100644 --- a/docs/api/streaming-chat.md +++ b/docs/api/streaming-chat.md @@ -127,11 +127,12 @@ Event order: ## Provider Streaming Behavior -- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text. +- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, and optional `codex_exec`) before producing final text. - `openai`: image attachments are sent as chat-completions content parts; text attachments are inlined as text parts. -- `xai`: same tool-enabled behavior as OpenAI. +- `xai`: same attachment behavior as OpenAI. - `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks. - `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints. +- `codex_exec` is available only when `CHAT_CODEX_TOOL_ENABLED=true`. It SSHes to `CHAT_CODEX_REMOTE_HOST`, creates/uses `CHAT_CODEX_REMOTE_WORKDIR`, and runs `codex exec ` there. Prefer `CHAT_CODEX_SSH_KEY_PATH` with a read-only mounted private key; `CHAT_CODEX_SSH_PRIVATE_KEY_B64` is also supported. Tool-enabled streaming notes (`openai`/`xai`): - Stream still emits standard `meta`, `delta`, `done|error` events. diff --git a/server/README.md b/server/README.md index 6422958..0dd4c3e 100644 --- a/server/README.md +++ b/server/README.md @@ -46,6 +46,14 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev). - `EXA_API_KEY` - `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only) - `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`) +- `CHAT_CODEX_TOOL_ENABLED` (`false` by default; enables the `codex_exec` chat tool for OpenAI/xAI) +- `CHAT_CODEX_REMOTE_HOST` (required when Codex tool is enabled; SSH host/IP or `user@host`) +- `CHAT_CODEX_REMOTE_USER` (optional SSH user when host does not include one) +- `CHAT_CODEX_REMOTE_PORT` (`22` by default) +- `CHAT_CODEX_REMOTE_WORKDIR` (`/workspace/sybil-codex` by default; created and reused on the devbox) +- `CHAT_CODEX_SSH_KEY_PATH` (recommended: path to a read-only mounted private key) +- `CHAT_CODEX_SSH_PRIVATE_KEY_B64` (optional fallback private key delivery) +- `CHAT_CODEX_EXEC_TIMEOUT_MS` (`600000` by default) ## API - `GET /health` diff --git a/server/src/env.ts b/server/src/env.ts index 3fe7d8f..e37ed54 100644 --- a/server/src/env.ts +++ b/server/src/env.ts @@ -20,6 +20,34 @@ const ChatWebSearchEngineSchema = z.preprocess( z.enum(["exa", "searxng"]).default("exa") ); +const BooleanFlagSchema = z.preprocess((value) => { + if (typeof value !== "string") return value; + const normalized = value.trim().toLowerCase(); + if (!normalized) return undefined; + if (["1", "true", "yes", "on"].includes(normalized)) return true; + if (["0", "false", "no", "off"].includes(normalized)) return false; + return value; +}, z.boolean().default(false)); + +const OptionalTrimmedStringSchema = z.preprocess( + (value) => (typeof value === "string" && value.trim() === "" ? undefined : value), + z.string().trim().min(1).optional() +); + +function defaultedPositiveInt(defaultValue: number) { + return z.preprocess( + (value) => (typeof value === "string" && value.trim() === "" ? undefined : value), + z.coerce.number().int().positive().default(defaultValue) + ); +} + +function defaultedTrimmedString(defaultValue: string) { + return z.preprocess( + (value) => (typeof value === "string" && value.trim() === "" ? undefined : value), + z.string().trim().min(1).default(defaultValue) + ); +} + const EnvSchema = z.object({ PORT: z.coerce.number().int().positive().default(8787), HOST: z.string().default("0.0.0.0"), @@ -36,6 +64,17 @@ const EnvSchema = z.object({ // Chat-mode web_search tool configuration. Search mode remains Exa-only for now. CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema, SEARXNG_BASE_URL: OptionalUrlSchema, + + // Optional chat-mode Codex tool. When enabled, the server SSHes into a remote + // devbox and runs `codex exec` in a persistent scratch directory there. + CHAT_CODEX_TOOL_ENABLED: BooleanFlagSchema, + CHAT_CODEX_REMOTE_HOST: OptionalTrimmedStringSchema, + CHAT_CODEX_REMOTE_USER: OptionalTrimmedStringSchema, + CHAT_CODEX_REMOTE_PORT: defaultedPositiveInt(22), + CHAT_CODEX_REMOTE_WORKDIR: defaultedTrimmedString("/workspace/sybil-codex"), + CHAT_CODEX_SSH_KEY_PATH: OptionalTrimmedStringSchema, + CHAT_CODEX_SSH_PRIVATE_KEY_B64: OptionalTrimmedStringSchema, + CHAT_CODEX_EXEC_TIMEOUT_MS: defaultedPositiveInt(600_000), }).superRefine((value, ctx) => { if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) { ctx.addIssue({ @@ -44,6 +83,14 @@ const EnvSchema = z.object({ message: "SEARXNG_BASE_URL is required when CHAT_WEB_SEARCH_ENGINE=searxng", }); } + + if (value.CHAT_CODEX_TOOL_ENABLED && !value.CHAT_CODEX_REMOTE_HOST) { + ctx.addIssue({ + code: "custom", + path: ["CHAT_CODEX_REMOTE_HOST"], + message: "CHAT_CODEX_REMOTE_HOST is required when CHAT_CODEX_TOOL_ENABLED=true", + }); + } }); export type Env = z.infer; diff --git a/server/src/llm/chat-tools.ts b/server/src/llm/chat-tools.ts index 8a365b7..9c9902b 100644 --- a/server/src/llm/chat-tools.ts +++ b/server/src/llm/chat-tools.ts @@ -1,3 +1,8 @@ +import { execFile } from "node:child_process"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { promisify } from "node:util"; import { convert as htmlToText } from "html-to-text"; import type OpenAI from "openai"; import { z } from "zod"; @@ -13,6 +18,12 @@ const MAX_WEB_RESULTS = 10; const DEFAULT_FETCH_MAX_CHARACTERS = 12_000; const MAX_FETCH_MAX_CHARACTERS = 50_000; const FETCH_TIMEOUT_MS = 12_000; +const MAX_CODEX_PROMPT_CHARACTERS = 60_000; +const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000; +const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000; +const CODEX_EXEC_MAX_BUFFER_BYTES = 1_000_000; + +const execFileAsync = promisify(execFile); const WebSearchArgsSchema = z .object({ @@ -33,7 +44,43 @@ const FetchUrlArgsSchema = z }) .strict(); -const CHAT_TOOLS: any[] = [ +const CodexExecArgsSchema = z + .object({ + prompt: z.string().trim().min(1).max(MAX_CODEX_PROMPT_CHARACTERS), + maxCharacters: z.coerce.number().int().min(1_000).max(MAX_CODEX_MAX_OUTPUT_CHARACTERS).optional(), + }) + .strict(); + +type CodexExecArgs = z.infer; + +const CODEX_EXEC_TOOL = { + type: "function", + function: { + name: "codex_exec", + description: + "Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. Return the remote Codex summary and relevant stdout/stderr.", + parameters: { + type: "object", + properties: { + prompt: { + type: "string", + description: + "A complete, self-contained instruction for the remote Codex instance. Include the goal, relevant context, constraints, and what result to report back.", + }, + maxCharacters: { + type: "integer", + minimum: 1_000, + maximum: MAX_CODEX_MAX_OUTPUT_CHARACTERS, + description: "Maximum stdout/stderr characters returned to the model (default 24000).", + }, + }, + required: ["prompt"], + additionalProperties: false, + }, + }, +}; + +const BASE_CHAT_TOOLS: any[] = [ { type: "function", function: { @@ -95,10 +142,15 @@ const CHAT_TOOLS: any[] = [ }, ]; +const CHAT_TOOLS: any[] = env.CHAT_CODEX_TOOL_ENABLED ? [...BASE_CHAT_TOOLS, CODEX_EXEC_TOOL] : BASE_CHAT_TOOLS; + export const CHAT_TOOL_SYSTEM_PROMPT = "You can use tools to gather up-to-date web information when needed. " + "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " + "Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " + + (env.CHAT_CODEX_TOOL_ENABLED + ? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, and expected report-back format. " + : "") + "Do not fabricate tool outputs; reason only from provided tool results."; type ToolRunOutcome = { @@ -192,6 +244,14 @@ function buildToolSummary(name: string, args: Record, status: " return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`; } + if (name === "codex_exec") { + const prompt = typeof args.prompt === "string" ? args.prompt.trim() : ""; + if (status === "completed") { + return prompt ? `Ran Codex task: '${toSingleLine(prompt, 120)}'.` : "Ran Codex task."; + } + return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`; + } + if (status === "completed") { return `Ran tool '${name}'.`; } @@ -402,9 +462,133 @@ async function runFetchUrlTool(input: unknown): Promise { }; } +function shellQuote(value: string) { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +function buildCodexSshTarget() { + const host = env.CHAT_CODEX_REMOTE_HOST; + if (!host) { + throw new Error("CHAT_CODEX_REMOTE_HOST not set"); + } + if (!env.CHAT_CODEX_REMOTE_USER || host.includes("@")) { + return host; + } + return `${env.CHAT_CODEX_REMOTE_USER}@${host}`; +} + +function buildRemoteCodexCommand(prompt: string) { + const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim(); + const codexCommand = `codex exec ${shellQuote(prompt)}`; + return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`; +} + +async function withCodexSshKeyPath(fn: (keyPath?: string) => Promise) { + if (env.CHAT_CODEX_SSH_KEY_PATH) { + return fn(env.CHAT_CODEX_SSH_KEY_PATH); + } + + if (!env.CHAT_CODEX_SSH_PRIVATE_KEY_B64) { + return fn(undefined); + } + + const tmpDir = await mkdtemp(path.join(os.tmpdir(), "sybil-codex-ssh-")); + const keyPath = path.join(tmpDir, "id"); + try { + await writeFile(keyPath, Buffer.from(env.CHAT_CODEX_SSH_PRIVATE_KEY_B64, "base64"), { mode: 0o600 }); + return await fn(keyPath); + } finally { + await rm(tmpDir, { recursive: true, force: true }); + } +} + +function clipCodexOutput(value: string, maxCharacters: number) { + if (value.length <= maxCharacters) { + return { text: value, truncated: false }; + } + return { + text: `${value.slice(0, maxCharacters)}\n\n[truncated ${value.length - maxCharacters} characters]`, + truncated: true, + }; +} + +function bufferOrStringToString(value: unknown) { + if (typeof value === "string") return value; + if (Buffer.isBuffer(value)) return value.toString("utf8"); + return ""; +} + +async function runCodexExecTool(input: unknown): Promise { + if (!env.CHAT_CODEX_TOOL_ENABLED) { + return { ok: false, error: "codex_exec is disabled." }; + } + + const args: CodexExecArgs = CodexExecArgsSchema.parse(input); + const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS; + const sshTarget = buildCodexSshTarget(); + const remoteCommand = buildRemoteCodexCommand(args.prompt); + + const run = async (keyPath?: string) => { + const sshArgs = [ + "-o", + "BatchMode=yes", + "-o", + "StrictHostKeyChecking=accept-new", + "-o", + "UserKnownHostsFile=/tmp/sybil-codex-known-hosts", + "-p", + String(env.CHAT_CODEX_REMOTE_PORT), + ]; + + if (keyPath) { + sshArgs.push("-i", keyPath); + } + + sshArgs.push(sshTarget, remoteCommand); + + try { + const result = await execFileAsync("ssh", sshArgs, { + timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS, + maxBuffer: CODEX_EXEC_MAX_BUFFER_BYTES, + }); + const stdout = clipCodexOutput(bufferOrStringToString(result.stdout), maxCharacters); + const stderr = clipCodexOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); + return { + ok: true, + host: env.CHAT_CODEX_REMOTE_HOST, + workdir: env.CHAT_CODEX_REMOTE_WORKDIR, + stdout: stdout.text, + stderr: stderr.text, + stdoutTruncated: stdout.truncated, + stderrTruncated: stderr.truncated, + }; + } catch (err: any) { + const stdout = clipCodexOutput(bufferOrStringToString(err?.stdout), maxCharacters); + const stderr = clipCodexOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); + return { + ok: false, + error: err?.killed + ? `Remote Codex command timed out after ${env.CHAT_CODEX_EXEC_TIMEOUT_MS}ms.` + : err?.message ?? String(err), + exitCode: typeof err?.code === "number" ? err.code : null, + signal: typeof err?.signal === "string" ? err.signal : null, + host: env.CHAT_CODEX_REMOTE_HOST, + workdir: env.CHAT_CODEX_REMOTE_WORKDIR, + stdout: stdout.text, + stderr: stderr.text, + stdoutTruncated: stdout.truncated, + stderrTruncated: stderr.truncated, + }; + } + }; + + return withCodexSshKeyPath(run); +} + async function executeTool(name: string, args: unknown): Promise { if (name === "web_search") return runWebSearchTool(args); if (name === "fetch_url") return runFetchUrlTool(args); + if (name === "codex_exec") return runCodexExecTool(args); return { ok: false, error: `Unknown tool: ${name}` }; } @@ -419,6 +603,16 @@ function parseToolArgs(raw: unknown) { } } +function buildEventArgs(name: string, args: Record) { + if (name !== "codex_exec" || typeof args.prompt !== "string") { + return args; + } + return { + ...args, + prompt: clipText(args.prompt, 1_000), + }; +} + function mergeUsage(acc: Required, usage: any) { if (!usage) return false; acc.inputTokens += usage.prompt_tokens ?? 0; @@ -468,12 +662,13 @@ async function executeToolCallAndBuildEvent( : undefined; const completedAtMs = Date.now(); + const eventArgs = buildEventArgs(call.name, parsedArgs); const event: ToolExecutionEvent = { toolCallId: call.id, name: call.name, status, - summary: buildToolSummary(call.name, parsedArgs, status, error), - args: parsedArgs, + summary: buildToolSummary(call.name, eventArgs, status, error), + args: eventArgs, startedAt, completedAt: new Date(completedAtMs).toISOString(), durationMs: completedAtMs - startedAtMs,