adds shell tool
This commit is contained in:
@@ -24,6 +24,8 @@ services:
|
||||
CHAT_CODEX_SSH_KEY_PATH: ${CHAT_CODEX_SSH_KEY_PATH:-}
|
||||
CHAT_CODEX_SSH_PRIVATE_KEY_B64: ${CHAT_CODEX_SSH_PRIVATE_KEY_B64:-}
|
||||
CHAT_CODEX_EXEC_TIMEOUT_MS: ${CHAT_CODEX_EXEC_TIMEOUT_MS:-600000}
|
||||
CHAT_SHELL_TOOL_ENABLED: ${CHAT_SHELL_TOOL_ENABLED:-false}
|
||||
CHAT_SHELL_EXEC_TIMEOUT_MS: ${CHAT_SHELL_EXEC_TIMEOUT_MS:-120000}
|
||||
volumes:
|
||||
- sybil_data:/data
|
||||
# Example key mount for codex_exec:
|
||||
|
||||
@@ -170,12 +170,14 @@ Behavior notes:
|
||||
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction.
|
||||
- For `openai` and `xai`, image attachments are sent as chat-completions content parts alongside text.
|
||||
- For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks.
|
||||
- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available.
|
||||
- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available.
|
||||
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
|
||||
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
|
||||
- `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec <prompt>` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`.
|
||||
- `codex_exec` configuration:
|
||||
- `shell_exec` runs arbitrary non-interactive shell commands on the same configured devbox, starting in `CHAT_CODEX_REMOTE_WORKDIR`. It uses `bash -lc` when bash exists, otherwise `sh -lc`, and does not run inside the Sybil server container.
|
||||
- Devbox tool configuration:
|
||||
- `CHAT_CODEX_TOOL_ENABLED=true`
|
||||
- `CHAT_SHELL_TOOL_ENABLED=true`
|
||||
- `CHAT_CODEX_REMOTE_HOST=<host-or-ip>` (required when enabled)
|
||||
- `CHAT_CODEX_REMOTE_USER=<ssh-user>` (optional; omitted if `CHAT_CODEX_REMOTE_HOST` already contains `user@host`)
|
||||
- `CHAT_CODEX_REMOTE_PORT=22` (optional)
|
||||
@@ -183,6 +185,7 @@ Behavior notes:
|
||||
- `CHAT_CODEX_SSH_KEY_PATH=/run/secrets/codex_ssh_key` (recommended private-key delivery via read-only volume mount)
|
||||
- `CHAT_CODEX_SSH_PRIVATE_KEY_B64=<base64-private-key>` (optional fallback when a volume mount is not practical)
|
||||
- `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional)
|
||||
- `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional)
|
||||
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
|
||||
- `anthropic` currently runs without server-managed tool calls.
|
||||
|
||||
|
||||
@@ -127,12 +127,13 @@ Event order:
|
||||
|
||||
## Provider Streaming Behavior
|
||||
|
||||
- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, and optional `codex_exec`) before producing final text.
|
||||
- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text.
|
||||
- `openai`: image attachments are sent as chat-completions content parts; text attachments are inlined as text parts.
|
||||
- `xai`: same attachment behavior as OpenAI.
|
||||
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks.
|
||||
- `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints.
|
||||
- `codex_exec` is available only when `CHAT_CODEX_TOOL_ENABLED=true`. It SSHes to `CHAT_CODEX_REMOTE_HOST`, creates/uses `CHAT_CODEX_REMOTE_WORKDIR`, and runs `codex exec <prompt>` there. Prefer `CHAT_CODEX_SSH_KEY_PATH` with a read-only mounted private key; `CHAT_CODEX_SSH_PRIVATE_KEY_B64` is also supported.
|
||||
- `shell_exec` is available only when `CHAT_SHELL_TOOL_ENABLED=true`. It uses the same devbox SSH configuration, starts in `CHAT_CODEX_REMOTE_WORKDIR`, and runs non-interactive shell commands there, not inside the Sybil server container.
|
||||
|
||||
Tool-enabled streaming notes (`openai`/`xai`):
|
||||
- Stream still emits standard `meta`, `delta`, `done|error` events.
|
||||
|
||||
@@ -54,6 +54,8 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
|
||||
- `CHAT_CODEX_SSH_KEY_PATH` (recommended: path to a read-only mounted private key)
|
||||
- `CHAT_CODEX_SSH_PRIVATE_KEY_B64` (optional fallback private key delivery)
|
||||
- `CHAT_CODEX_EXEC_TIMEOUT_MS` (`600000` by default)
|
||||
- `CHAT_SHELL_TOOL_ENABLED` (`false` by default; enables the `shell_exec` chat tool for OpenAI/xAI on the same devbox)
|
||||
- `CHAT_SHELL_EXEC_TIMEOUT_MS` (`120000` by default)
|
||||
|
||||
## API
|
||||
- `GET /health`
|
||||
|
||||
@@ -75,6 +75,10 @@ const EnvSchema = z.object({
|
||||
CHAT_CODEX_SSH_KEY_PATH: OptionalTrimmedStringSchema,
|
||||
CHAT_CODEX_SSH_PRIVATE_KEY_B64: OptionalTrimmedStringSchema,
|
||||
CHAT_CODEX_EXEC_TIMEOUT_MS: defaultedPositiveInt(600_000),
|
||||
|
||||
// Optional arbitrary shell tool that runs only on the configured devbox.
|
||||
CHAT_SHELL_TOOL_ENABLED: BooleanFlagSchema,
|
||||
CHAT_SHELL_EXEC_TIMEOUT_MS: defaultedPositiveInt(120_000),
|
||||
}).superRefine((value, ctx) => {
|
||||
if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) {
|
||||
ctx.addIssue({
|
||||
@@ -84,11 +88,11 @@ const EnvSchema = z.object({
|
||||
});
|
||||
}
|
||||
|
||||
if (value.CHAT_CODEX_TOOL_ENABLED && !value.CHAT_CODEX_REMOTE_HOST) {
|
||||
if ((value.CHAT_CODEX_TOOL_ENABLED || value.CHAT_SHELL_TOOL_ENABLED) && !value.CHAT_CODEX_REMOTE_HOST) {
|
||||
ctx.addIssue({
|
||||
code: "custom",
|
||||
path: ["CHAT_CODEX_REMOTE_HOST"],
|
||||
message: "CHAT_CODEX_REMOTE_HOST is required when CHAT_CODEX_TOOL_ENABLED=true",
|
||||
message: "CHAT_CODEX_REMOTE_HOST is required when CHAT_CODEX_TOOL_ENABLED=true or CHAT_SHELL_TOOL_ENABLED=true",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
@@ -21,7 +21,10 @@ const FETCH_TIMEOUT_MS = 12_000;
|
||||
const MAX_CODEX_PROMPT_CHARACTERS = 60_000;
|
||||
const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000;
|
||||
const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000;
|
||||
const CODEX_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
||||
const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
|
||||
const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
|
||||
const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
|
||||
const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -53,6 +56,15 @@ const CodexExecArgsSchema = z
|
||||
|
||||
type CodexExecArgs = z.infer<typeof CodexExecArgsSchema>;
|
||||
|
||||
const ShellExecArgsSchema = z
|
||||
.object({
|
||||
command: z.string().trim().min(1).max(MAX_SHELL_COMMAND_CHARACTERS),
|
||||
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_SHELL_MAX_OUTPUT_CHARACTERS).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
type ShellExecArgs = z.infer<typeof ShellExecArgsSchema>;
|
||||
|
||||
const CODEX_EXEC_TOOL = {
|
||||
type: "function",
|
||||
function: {
|
||||
@@ -80,6 +92,33 @@ const CODEX_EXEC_TOOL = {
|
||||
},
|
||||
};
|
||||
|
||||
const SHELL_EXEC_TOOL = {
|
||||
type: "function",
|
||||
function: {
|
||||
name: "shell_exec",
|
||||
description:
|
||||
"Run an arbitrary non-interactive shell command on the configured remote devbox, starting in the persistent scratch workspace. Use for quick Python scripts, calculations, file inspection, package/tool checks, tests, and command-line work that needs a real shell. This does not run inside the Sybil server container.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
command: {
|
||||
type: "string",
|
||||
description:
|
||||
"Shell command to run on the devbox. The command is executed with bash -lc when bash exists, otherwise sh -lc, starting in the persistent scratch workspace.",
|
||||
},
|
||||
maxCharacters: {
|
||||
type: "integer",
|
||||
minimum: 1_000,
|
||||
maximum: MAX_SHELL_MAX_OUTPUT_CHARACTERS,
|
||||
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
|
||||
},
|
||||
},
|
||||
required: ["command"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const BASE_CHAT_TOOLS: any[] = [
|
||||
{
|
||||
type: "function",
|
||||
@@ -142,7 +181,11 @@ const BASE_CHAT_TOOLS: any[] = [
|
||||
},
|
||||
];
|
||||
|
||||
const CHAT_TOOLS: any[] = env.CHAT_CODEX_TOOL_ENABLED ? [...BASE_CHAT_TOOLS, CODEX_EXEC_TOOL] : BASE_CHAT_TOOLS;
|
||||
const CHAT_TOOLS: any[] = [
|
||||
...BASE_CHAT_TOOLS,
|
||||
...(env.CHAT_CODEX_TOOL_ENABLED ? [CODEX_EXEC_TOOL] : []),
|
||||
...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []),
|
||||
];
|
||||
|
||||
export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
@@ -151,6 +194,9 @@ export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
(env.CHAT_CODEX_TOOL_ENABLED
|
||||
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, and expected report-back format. "
|
||||
: "") +
|
||||
(env.CHAT_SHELL_TOOL_ENABLED
|
||||
? "Use shell_exec for direct command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. "
|
||||
: "") +
|
||||
"Do not fabricate tool outputs; reason only from provided tool results.";
|
||||
|
||||
type ToolRunOutcome = {
|
||||
@@ -252,6 +298,16 @@ function buildToolSummary(name: string, args: Record<string, unknown>, status: "
|
||||
return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (name === "shell_exec") {
|
||||
const command = typeof args.command === "string" ? args.command.trim() : "";
|
||||
if (status === "completed") {
|
||||
return command ? `Ran devbox shell command: '${toSingleLine(command, 120)}'.` : "Ran devbox shell command.";
|
||||
}
|
||||
return command
|
||||
? `Devbox shell command '${toSingleLine(command, 120)}' failed.${errSuffix}`
|
||||
: `Devbox shell command failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (status === "completed") {
|
||||
return `Ran tool '${name}'.`;
|
||||
}
|
||||
@@ -466,7 +522,7 @@ function shellQuote(value: string) {
|
||||
return `'${value.replace(/'/g, `'\\''`)}'`;
|
||||
}
|
||||
|
||||
function buildCodexSshTarget() {
|
||||
function buildDevboxSshTarget() {
|
||||
const host = env.CHAT_CODEX_REMOTE_HOST;
|
||||
if (!host) {
|
||||
throw new Error("CHAT_CODEX_REMOTE_HOST not set");
|
||||
@@ -483,7 +539,16 @@ function buildRemoteCodexCommand(prompt: string) {
|
||||
return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`;
|
||||
}
|
||||
|
||||
async function withCodexSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
|
||||
function buildRemoteShellCommand(command: string) {
|
||||
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
|
||||
const quotedCommand = shellQuote(command);
|
||||
return (
|
||||
`mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ` +
|
||||
`if command -v bash >/dev/null 2>&1; then bash -lc ${quotedCommand}; else sh -lc ${quotedCommand}; fi`
|
||||
);
|
||||
}
|
||||
|
||||
async function withDevboxSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
|
||||
if (env.CHAT_CODEX_SSH_KEY_PATH) {
|
||||
return fn(env.CHAT_CODEX_SSH_KEY_PATH);
|
||||
}
|
||||
@@ -502,7 +567,7 @@ async function withCodexSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
|
||||
}
|
||||
}
|
||||
|
||||
function clipCodexOutput(value: string, maxCharacters: number) {
|
||||
function clipRemoteOutput(value: string, maxCharacters: number) {
|
||||
if (value.length <= maxCharacters) {
|
||||
return { text: value, truncated: false };
|
||||
}
|
||||
@@ -525,7 +590,7 @@ async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
|
||||
const args: CodexExecArgs = CodexExecArgsSchema.parse(input);
|
||||
const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS;
|
||||
const sshTarget = buildCodexSshTarget();
|
||||
const sshTarget = buildDevboxSshTarget();
|
||||
const remoteCommand = buildRemoteCodexCommand(args.prompt);
|
||||
|
||||
const run = async (keyPath?: string) => {
|
||||
@@ -549,10 +614,10 @@ async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
try {
|
||||
const result = await execFileAsync("ssh", sshArgs, {
|
||||
timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS,
|
||||
maxBuffer: CODEX_EXEC_MAX_BUFFER_BYTES,
|
||||
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
|
||||
});
|
||||
const stdout = clipCodexOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
||||
const stderr = clipCodexOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
||||
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
||||
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: true,
|
||||
host: env.CHAT_CODEX_REMOTE_HOST,
|
||||
@@ -563,8 +628,8 @@ async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
stderrTruncated: stderr.truncated,
|
||||
};
|
||||
} catch (err: any) {
|
||||
const stdout = clipCodexOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
||||
const stderr = clipCodexOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
||||
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
||||
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: false,
|
||||
error: err?.killed
|
||||
@@ -582,13 +647,83 @@ async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
}
|
||||
};
|
||||
|
||||
return withCodexSshKeyPath(run);
|
||||
return withDevboxSshKeyPath(run);
|
||||
}
|
||||
|
||||
async function runShellExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
if (!env.CHAT_SHELL_TOOL_ENABLED) {
|
||||
return { ok: false, error: "shell_exec is disabled." };
|
||||
}
|
||||
|
||||
const args: ShellExecArgs = ShellExecArgsSchema.parse(input);
|
||||
const maxCharacters = args.maxCharacters ?? DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS;
|
||||
const sshTarget = buildDevboxSshTarget();
|
||||
const remoteCommand = buildRemoteShellCommand(args.command);
|
||||
|
||||
const run = async (keyPath?: string) => {
|
||||
const sshArgs = [
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=accept-new",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
|
||||
"-p",
|
||||
String(env.CHAT_CODEX_REMOTE_PORT),
|
||||
];
|
||||
|
||||
if (keyPath) {
|
||||
sshArgs.push("-i", keyPath);
|
||||
}
|
||||
|
||||
sshArgs.push(sshTarget, remoteCommand);
|
||||
|
||||
try {
|
||||
const result = await execFileAsync("ssh", sshArgs, {
|
||||
timeout: env.CHAT_SHELL_EXEC_TIMEOUT_MS,
|
||||
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
|
||||
});
|
||||
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
||||
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: true,
|
||||
host: env.CHAT_CODEX_REMOTE_HOST,
|
||||
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
||||
command: args.command,
|
||||
stdout: stdout.text,
|
||||
stderr: stderr.text,
|
||||
stdoutTruncated: stdout.truncated,
|
||||
stderrTruncated: stderr.truncated,
|
||||
};
|
||||
} catch (err: any) {
|
||||
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
||||
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: false,
|
||||
error: err?.killed
|
||||
? `Remote shell command timed out after ${env.CHAT_SHELL_EXEC_TIMEOUT_MS}ms.`
|
||||
: err?.message ?? String(err),
|
||||
exitCode: typeof err?.code === "number" ? err.code : null,
|
||||
signal: typeof err?.signal === "string" ? err.signal : null,
|
||||
host: env.CHAT_CODEX_REMOTE_HOST,
|
||||
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
||||
command: args.command,
|
||||
stdout: stdout.text,
|
||||
stderr: stderr.text,
|
||||
stdoutTruncated: stdout.truncated,
|
||||
stderrTruncated: stderr.truncated,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
return withDevboxSshKeyPath(run);
|
||||
}
|
||||
|
||||
async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome> {
|
||||
if (name === "web_search") return runWebSearchTool(args);
|
||||
if (name === "fetch_url") return runFetchUrlTool(args);
|
||||
if (name === "codex_exec") return runCodexExecTool(args);
|
||||
if (name === "shell_exec") return runShellExecTool(args);
|
||||
return { ok: false, error: `Unknown tool: ${name}` };
|
||||
}
|
||||
|
||||
@@ -604,15 +739,23 @@ function parseToolArgs(raw: unknown) {
|
||||
}
|
||||
|
||||
function buildEventArgs(name: string, args: Record<string, unknown>) {
|
||||
if (name !== "codex_exec" || typeof args.prompt !== "string") {
|
||||
return args;
|
||||
}
|
||||
if (name === "codex_exec" && typeof args.prompt === "string") {
|
||||
return {
|
||||
...args,
|
||||
prompt: clipText(args.prompt, 1_000),
|
||||
};
|
||||
}
|
||||
|
||||
if (name === "shell_exec" && typeof args.command === "string") {
|
||||
return {
|
||||
...args,
|
||||
command: clipText(args.command, 1_000),
|
||||
};
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.prompt_tokens ?? 0;
|
||||
|
||||
Reference in New Issue
Block a user