experimental devbox support
This commit is contained in:
@@ -1,3 +1,8 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import { mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
import { convert as htmlToText } from "html-to-text";
|
||||
import type OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
@@ -13,6 +18,12 @@ const MAX_WEB_RESULTS = 10;
|
||||
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
|
||||
const MAX_FETCH_MAX_CHARACTERS = 50_000;
|
||||
const FETCH_TIMEOUT_MS = 12_000;
|
||||
const MAX_CODEX_PROMPT_CHARACTERS = 60_000;
|
||||
const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000;
|
||||
const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000;
|
||||
const CODEX_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const WebSearchArgsSchema = z
|
||||
.object({
|
||||
@@ -33,7 +44,43 @@ const FetchUrlArgsSchema = z
|
||||
})
|
||||
.strict();
|
||||
|
||||
const CHAT_TOOLS: any[] = [
|
||||
const CodexExecArgsSchema = z
|
||||
.object({
|
||||
prompt: z.string().trim().min(1).max(MAX_CODEX_PROMPT_CHARACTERS),
|
||||
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_CODEX_MAX_OUTPUT_CHARACTERS).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
type CodexExecArgs = z.infer<typeof CodexExecArgsSchema>;
|
||||
|
||||
const CODEX_EXEC_TOOL = {
|
||||
type: "function",
|
||||
function: {
|
||||
name: "codex_exec",
|
||||
description:
|
||||
"Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. Return the remote Codex summary and relevant stdout/stderr.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
prompt: {
|
||||
type: "string",
|
||||
description:
|
||||
"A complete, self-contained instruction for the remote Codex instance. Include the goal, relevant context, constraints, and what result to report back.",
|
||||
},
|
||||
maxCharacters: {
|
||||
type: "integer",
|
||||
minimum: 1_000,
|
||||
maximum: MAX_CODEX_MAX_OUTPUT_CHARACTERS,
|
||||
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
|
||||
},
|
||||
},
|
||||
required: ["prompt"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const BASE_CHAT_TOOLS: any[] = [
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
@@ -95,10 +142,15 @@ const CHAT_TOOLS: any[] = [
|
||||
},
|
||||
];
|
||||
|
||||
const CHAT_TOOLS: any[] = env.CHAT_CODEX_TOOL_ENABLED ? [...BASE_CHAT_TOOLS, CODEX_EXEC_TOOL] : BASE_CHAT_TOOLS;
|
||||
|
||||
export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
|
||||
"Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " +
|
||||
(env.CHAT_CODEX_TOOL_ENABLED
|
||||
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, and expected report-back format. "
|
||||
: "") +
|
||||
"Do not fabricate tool outputs; reason only from provided tool results.";
|
||||
|
||||
type ToolRunOutcome = {
|
||||
@@ -192,6 +244,14 @@ function buildToolSummary(name: string, args: Record<string, unknown>, status: "
|
||||
return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (name === "codex_exec") {
|
||||
const prompt = typeof args.prompt === "string" ? args.prompt.trim() : "";
|
||||
if (status === "completed") {
|
||||
return prompt ? `Ran Codex task: '${toSingleLine(prompt, 120)}'.` : "Ran Codex task.";
|
||||
}
|
||||
return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (status === "completed") {
|
||||
return `Ran tool '${name}'.`;
|
||||
}
|
||||
@@ -402,9 +462,133 @@ async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
};
|
||||
}
|
||||
|
||||
function shellQuote(value: string) {
|
||||
return `'${value.replace(/'/g, `'\\''`)}'`;
|
||||
}
|
||||
|
||||
function buildCodexSshTarget() {
|
||||
const host = env.CHAT_CODEX_REMOTE_HOST;
|
||||
if (!host) {
|
||||
throw new Error("CHAT_CODEX_REMOTE_HOST not set");
|
||||
}
|
||||
if (!env.CHAT_CODEX_REMOTE_USER || host.includes("@")) {
|
||||
return host;
|
||||
}
|
||||
return `${env.CHAT_CODEX_REMOTE_USER}@${host}`;
|
||||
}
|
||||
|
||||
function buildRemoteCodexCommand(prompt: string) {
|
||||
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
|
||||
const codexCommand = `codex exec ${shellQuote(prompt)}`;
|
||||
return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`;
|
||||
}
|
||||
|
||||
async function withCodexSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
|
||||
if (env.CHAT_CODEX_SSH_KEY_PATH) {
|
||||
return fn(env.CHAT_CODEX_SSH_KEY_PATH);
|
||||
}
|
||||
|
||||
if (!env.CHAT_CODEX_SSH_PRIVATE_KEY_B64) {
|
||||
return fn(undefined);
|
||||
}
|
||||
|
||||
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "sybil-codex-ssh-"));
|
||||
const keyPath = path.join(tmpDir, "id");
|
||||
try {
|
||||
await writeFile(keyPath, Buffer.from(env.CHAT_CODEX_SSH_PRIVATE_KEY_B64, "base64"), { mode: 0o600 });
|
||||
return await fn(keyPath);
|
||||
} finally {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function clipCodexOutput(value: string, maxCharacters: number) {
|
||||
if (value.length <= maxCharacters) {
|
||||
return { text: value, truncated: false };
|
||||
}
|
||||
return {
|
||||
text: `${value.slice(0, maxCharacters)}\n\n[truncated ${value.length - maxCharacters} characters]`,
|
||||
truncated: true,
|
||||
};
|
||||
}
|
||||
|
||||
function bufferOrStringToString(value: unknown) {
|
||||
if (typeof value === "string") return value;
|
||||
if (Buffer.isBuffer(value)) return value.toString("utf8");
|
||||
return "";
|
||||
}
|
||||
|
||||
async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
if (!env.CHAT_CODEX_TOOL_ENABLED) {
|
||||
return { ok: false, error: "codex_exec is disabled." };
|
||||
}
|
||||
|
||||
const args: CodexExecArgs = CodexExecArgsSchema.parse(input);
|
||||
const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS;
|
||||
const sshTarget = buildCodexSshTarget();
|
||||
const remoteCommand = buildRemoteCodexCommand(args.prompt);
|
||||
|
||||
const run = async (keyPath?: string) => {
|
||||
const sshArgs = [
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
"StrictHostKeyChecking=accept-new",
|
||||
"-o",
|
||||
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
|
||||
"-p",
|
||||
String(env.CHAT_CODEX_REMOTE_PORT),
|
||||
];
|
||||
|
||||
if (keyPath) {
|
||||
sshArgs.push("-i", keyPath);
|
||||
}
|
||||
|
||||
sshArgs.push(sshTarget, remoteCommand);
|
||||
|
||||
try {
|
||||
const result = await execFileAsync("ssh", sshArgs, {
|
||||
timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS,
|
||||
maxBuffer: CODEX_EXEC_MAX_BUFFER_BYTES,
|
||||
});
|
||||
const stdout = clipCodexOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
||||
const stderr = clipCodexOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: true,
|
||||
host: env.CHAT_CODEX_REMOTE_HOST,
|
||||
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
||||
stdout: stdout.text,
|
||||
stderr: stderr.text,
|
||||
stdoutTruncated: stdout.truncated,
|
||||
stderrTruncated: stderr.truncated,
|
||||
};
|
||||
} catch (err: any) {
|
||||
const stdout = clipCodexOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
||||
const stderr = clipCodexOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
||||
return {
|
||||
ok: false,
|
||||
error: err?.killed
|
||||
? `Remote Codex command timed out after ${env.CHAT_CODEX_EXEC_TIMEOUT_MS}ms.`
|
||||
: err?.message ?? String(err),
|
||||
exitCode: typeof err?.code === "number" ? err.code : null,
|
||||
signal: typeof err?.signal === "string" ? err.signal : null,
|
||||
host: env.CHAT_CODEX_REMOTE_HOST,
|
||||
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
||||
stdout: stdout.text,
|
||||
stderr: stderr.text,
|
||||
stdoutTruncated: stdout.truncated,
|
||||
stderrTruncated: stderr.truncated,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
return withCodexSshKeyPath(run);
|
||||
}
|
||||
|
||||
async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome> {
|
||||
if (name === "web_search") return runWebSearchTool(args);
|
||||
if (name === "fetch_url") return runFetchUrlTool(args);
|
||||
if (name === "codex_exec") return runCodexExecTool(args);
|
||||
return { ok: false, error: `Unknown tool: ${name}` };
|
||||
}
|
||||
|
||||
@@ -419,6 +603,16 @@ function parseToolArgs(raw: unknown) {
|
||||
}
|
||||
}
|
||||
|
||||
function buildEventArgs(name: string, args: Record<string, unknown>) {
|
||||
if (name !== "codex_exec" || typeof args.prompt !== "string") {
|
||||
return args;
|
||||
}
|
||||
return {
|
||||
...args,
|
||||
prompt: clipText(args.prompt, 1_000),
|
||||
};
|
||||
}
|
||||
|
||||
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.prompt_tokens ?? 0;
|
||||
@@ -468,12 +662,13 @@ async function executeToolCallAndBuildEvent(
|
||||
: undefined;
|
||||
|
||||
const completedAtMs = Date.now();
|
||||
const eventArgs = buildEventArgs(call.name, parsedArgs);
|
||||
const event: ToolExecutionEvent = {
|
||||
toolCallId: call.id,
|
||||
name: call.name,
|
||||
status,
|
||||
summary: buildToolSummary(call.name, parsedArgs, status, error),
|
||||
args: parsedArgs,
|
||||
summary: buildToolSummary(call.name, eventArgs, status, error),
|
||||
args: eventArgs,
|
||||
startedAt,
|
||||
completedAt: new Date(completedAtMs).toISOString(),
|
||||
durationMs: completedAtMs - startedAtMs,
|
||||
|
||||
Reference in New Issue
Block a user