Files
Sybil-2/server/src/llm/chat-tools.ts

1313 lines
45 KiB
TypeScript
Raw Normal View History

2026-05-02 19:38:15 -07:00
import { execFile } from "node:child_process";
import { mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { promisify } from "node:util";
import { convert as htmlToText } from "html-to-text";
import type OpenAI from "openai";
import { z } from "zod";
2026-05-02 18:14:41 -07:00
import { env } from "../env.js";
import { exaClient } from "../search/exa.js";
2026-05-02 18:14:41 -07:00
import { searchSearxng } from "../search/searxng.js";
2026-05-02 21:44:32 -07:00
import { buildOpenAIConversationMessage, buildOpenAIResponsesInputMessage } from "./message-content.js";
import type { ChatMessage } from "./types.js";
2026-05-02 21:19:52 -07:00
const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
const DEFAULT_WEB_RESULTS = 5;
const MAX_WEB_RESULTS = 10;
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
const MAX_FETCH_MAX_CHARACTERS = 50_000;
const FETCH_TIMEOUT_MS = 12_000;
2026-05-02 19:38:15 -07:00
const MAX_CODEX_PROMPT_CHARACTERS = 60_000;
const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000;
const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000;
2026-05-02 19:52:09 -07:00
const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
2026-05-02 21:19:52 -07:00
const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
2026-05-02 19:38:15 -07:00
const execFileAsync = promisify(execFile);
const WebSearchArgsSchema = z
.object({
query: z.string().trim().min(1),
numResults: z.coerce.number().int().min(1).max(MAX_WEB_RESULTS).optional(),
type: z.enum(["auto", "fast", "instant"]).optional(),
includeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
excludeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
})
.strict();
2026-05-02 18:14:41 -07:00
type WebSearchArgs = z.infer<typeof WebSearchArgsSchema>;
const FetchUrlArgsSchema = z
.object({
url: z.string().trim().url(),
maxCharacters: z.coerce.number().int().min(500).max(MAX_FETCH_MAX_CHARACTERS).optional(),
})
.strict();
2026-05-02 19:38:15 -07:00
const CodexExecArgsSchema = z
.object({
prompt: z.string().trim().min(1).max(MAX_CODEX_PROMPT_CHARACTERS),
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_CODEX_MAX_OUTPUT_CHARACTERS).optional(),
})
.strict();
type CodexExecArgs = z.infer<typeof CodexExecArgsSchema>;
2026-05-02 19:52:09 -07:00
const ShellExecArgsSchema = z
.object({
command: z.string().trim().min(1).max(MAX_SHELL_COMMAND_CHARACTERS),
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_SHELL_MAX_OUTPUT_CHARACTERS).optional(),
})
.strict();
type ShellExecArgs = z.infer<typeof ShellExecArgsSchema>;
2026-05-02 19:38:15 -07:00
const CODEX_EXEC_TOOL = {
type: "function",
function: {
name: "codex_exec",
description:
2026-05-02 21:19:52 -07:00
"Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. The task runs non-interactively; the remote Codex instance must make reasonable assumptions, complete the task, and return a final summary with relevant stdout/stderr.",
2026-05-02 19:38:15 -07:00
parameters: {
type: "object",
properties: {
prompt: {
type: "string",
description:
"A complete, self-contained instruction for the remote Codex instance. Include the goal, relevant context, constraints, and what result to report back.",
},
maxCharacters: {
type: "integer",
minimum: 1_000,
maximum: MAX_CODEX_MAX_OUTPUT_CHARACTERS,
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
},
},
required: ["prompt"],
additionalProperties: false,
},
},
};
2026-05-02 19:52:09 -07:00
const SHELL_EXEC_TOOL = {
type: "function",
function: {
name: "shell_exec",
description:
"Run an arbitrary non-interactive shell command on the configured remote devbox, starting in the persistent scratch workspace. Use for quick Python scripts, calculations, file inspection, package/tool checks, tests, and command-line work that needs a real shell. This does not run inside the Sybil server container.",
parameters: {
type: "object",
properties: {
command: {
type: "string",
description:
"Shell command to run on the devbox. The command is executed with bash -lc when bash exists, otherwise sh -lc, starting in the persistent scratch workspace.",
},
maxCharacters: {
type: "integer",
minimum: 1_000,
maximum: MAX_SHELL_MAX_OUTPUT_CHARACTERS,
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
},
},
required: ["command"],
additionalProperties: false,
},
},
};
2026-05-02 19:38:15 -07:00
const BASE_CHAT_TOOLS: any[] = [
{
type: "function",
function: {
name: "web_search",
description:
"Search the public web for recent or factual information. Returns ranked results with per-result summaries and snippets.",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Search query." },
numResults: {
type: "integer",
minimum: 1,
maximum: MAX_WEB_RESULTS,
description: "Number of results to return (default 5).",
},
type: {
type: "string",
enum: ["auto", "fast", "instant"],
description: "Search mode.",
},
includeDomains: {
type: "array",
items: { type: "string" },
description: "Only include these domains.",
},
excludeDomains: {
type: "array",
items: { type: "string" },
description: "Exclude these domains.",
},
},
required: ["query"],
additionalProperties: false,
},
},
},
{
type: "function",
function: {
name: "fetch_url",
description:
"Fetch a webpage by URL and return readable plaintext content extracted from the page for deeper inspection.",
parameters: {
type: "object",
properties: {
url: { type: "string", description: "Absolute URL to fetch, including http/https." },
maxCharacters: {
type: "integer",
minimum: 500,
maximum: MAX_FETCH_MAX_CHARACTERS,
description: "Maximum response text characters returned (default 12000).",
},
},
required: ["url"],
additionalProperties: false,
},
},
},
];
2026-05-02 19:52:09 -07:00
const CHAT_TOOLS: any[] = [
...BASE_CHAT_TOOLS,
...(env.CHAT_CODEX_TOOL_ENABLED ? [CODEX_EXEC_TOOL] : []),
...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []),
];
2026-05-02 19:38:15 -07:00
2026-05-02 21:44:32 -07:00
const RESPONSES_CHAT_TOOLS: any[] = CHAT_TOOLS.map((tool) => {
if (tool?.type !== "function") return tool;
return {
type: "function",
name: tool.function.name,
description: tool.function.description,
parameters: tool.function.parameters,
strict: false,
};
});
export const CHAT_TOOL_SYSTEM_PROMPT =
"You can use tools to gather up-to-date web information when needed. " +
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
"Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " +
2026-05-02 21:19:52 -07:00
"When you decide tool use is needed, call the tool immediately in the same response; do not say you are running a tool unless you actually call it. " +
2026-05-02 19:38:15 -07:00
(env.CHAT_CODEX_TOOL_ENABLED
2026-05-02 21:19:52 -07:00
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, assumptions, and expected report-back format. Never ask codex_exec to wait for user input or run interactive commands. "
2026-05-02 19:38:15 -07:00
: "") +
2026-05-02 19:52:09 -07:00
(env.CHAT_SHELL_TOOL_ENABLED
2026-05-02 21:19:52 -07:00
? "Use shell_exec for direct non-interactive command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. "
2026-05-02 19:52:09 -07:00
: "") +
"Do not fabricate tool outputs; reason only from provided tool results.";
type ToolRunOutcome = {
ok: boolean;
[key: string]: unknown;
};
type ToolAwareUsage = {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
};
type ToolAwareCompletionResult = {
text: string;
usage?: ToolAwareUsage;
raw: unknown;
toolEvents: ToolExecutionEvent[];
};
2026-03-02 16:39:05 -08:00
export type ToolAwareStreamingEvent =
| { type: "delta"; text: string }
| { type: "tool_call"; event: ToolExecutionEvent }
| { type: "done"; result: ToolAwareCompletionResult };
type ToolAwareCompletionParams = {
client: OpenAI;
model: string;
messages: ChatMessage[];
temperature?: number;
maxTokens?: number;
onToolEvent?: (event: ToolExecutionEvent) => void | Promise<void>;
logContext?: {
provider: string;
model: string;
chatId?: string;
};
};
export type ToolExecutionEvent = {
toolCallId: string;
name: string;
status: "completed" | "failed";
summary: string;
args: Record<string, unknown>;
startedAt: string;
completedAt: string;
durationMs: number;
error?: string;
resultPreview?: string;
};
function compactWhitespace(input: string) {
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
}
function clipText(input: string, maxCharacters: number) {
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
}
function toRecord(value: unknown): Record<string, unknown> {
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
return { ...(value as Record<string, unknown>) };
}
function toSingleLine(value: string, maxLength = 220) {
return clipText(
value
.replace(/\r?\n+/g, " ")
.replace(/\s+/g, " ")
.trim(),
maxLength
);
}
function buildToolSummary(name: string, args: Record<string, unknown>, status: "completed" | "failed", error?: string) {
const errSuffix = status === "failed" && error ? ` Error: ${toSingleLine(error, 140)}` : "";
if (name === "web_search") {
const query = typeof args.query === "string" ? args.query.trim() : "";
if (status === "completed") {
return query ? `Performed web search for '${toSingleLine(query, 100)}'.` : "Performed web search.";
}
return query ? `Web search for '${toSingleLine(query, 100)}' failed.${errSuffix}` : `Web search failed.${errSuffix}`;
}
if (name === "fetch_url") {
const url = typeof args.url === "string" ? args.url.trim() : "";
if (status === "completed") {
return url ? `Fetched URL ${toSingleLine(url, 140)}.` : "Fetched URL.";
}
return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`;
}
2026-05-02 19:38:15 -07:00
if (name === "codex_exec") {
const prompt = typeof args.prompt === "string" ? args.prompt.trim() : "";
if (status === "completed") {
return prompt ? `Ran Codex task: '${toSingleLine(prompt, 120)}'.` : "Ran Codex task.";
}
return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`;
}
2026-05-02 19:52:09 -07:00
if (name === "shell_exec") {
const command = typeof args.command === "string" ? args.command.trim() : "";
if (status === "completed") {
return command ? `Ran devbox shell command: '${toSingleLine(command, 120)}'.` : "Ran devbox shell command.";
}
return command
? `Devbox shell command '${toSingleLine(command, 120)}' failed.${errSuffix}`
: `Devbox shell command failed.${errSuffix}`;
}
if (status === "completed") {
return `Ran tool '${name}'.`;
}
return `Tool '${name}' failed.${errSuffix}`;
}
function logToolEvent(event: ToolExecutionEvent, context?: ToolAwareCompletionParams["logContext"]) {
const payload = {
kind: "tool_call",
...context,
...event,
};
const line = `[tool_call] ${JSON.stringify(payload)}`;
if (event.status === "failed") console.error(line);
else console.info(line);
}
function buildResultPreview(toolResult: ToolRunOutcome) {
const serialized = JSON.stringify(toolResult);
return serialized ? clipText(serialized, 400) : undefined;
}
export function buildToolLogMessageData(chatId: string, event: ToolExecutionEvent) {
return {
chatId,
role: "tool" as const,
content: event.summary,
name: event.name,
metadata: {
kind: "tool_call",
toolCallId: event.toolCallId,
toolName: event.name,
status: event.status,
summary: event.summary,
args: event.args,
startedAt: event.startedAt,
completedAt: event.completedAt,
durationMs: event.durationMs,
error: event.error ?? null,
resultPreview: event.resultPreview ?? null,
},
};
}
function extractHtmlTitle(html: string) {
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
if (!match?.[1]) return null;
return compactWhitespace(
match[1]
.replace(/&nbsp;/gi, " ")
.replace(/&amp;/gi, "&")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">")
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
);
}
function normalizeIncomingMessages(messages: ChatMessage[]) {
2026-05-02 19:21:06 -07:00
const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}
2026-05-02 21:44:32 -07:00
function normalizeIncomingResponsesInput(messages: ChatMessage[]) {
const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}
2026-05-02 18:14:41 -07:00
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
const exa = exaClient();
const response = await exa.search(args.query, {
type: args.type ?? "auto",
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
includeDomains: args.includeDomains,
excludeDomains: args.excludeDomains,
moderation: true,
userLocation: "US",
contents: {
summary: { query: args.query },
highlights: {
query: args.query,
maxCharacters: 320,
numSentences: 2,
highlightsPerUrl: 2,
},
text: { maxCharacters: 1_000 },
},
} as any);
const results = Array.isArray(response?.results) ? response.results : [];
return {
ok: true,
2026-05-02 18:14:41 -07:00
searchEngine: "exa",
query: args.query,
requestId: response?.requestId ?? null,
results: results.map((result: any, index: number) => ({
rank: index + 1,
title: typeof result?.title === "string" ? result.title : null,
url: typeof result?.url === "string" ? result.url : null,
publishedDate: typeof result?.publishedDate === "string" ? result.publishedDate : null,
author: typeof result?.author === "string" ? result.author : null,
summary: typeof result?.summary === "string" ? clipText(result.summary, 1_400) : null,
text: typeof result?.text === "string" ? clipText(result.text, 700) : null,
highlights: Array.isArray(result?.highlights)
? result.highlights.filter((h: unknown) => typeof h === "string").slice(0, 3).map((h: string) => clipText(h, 280))
: [],
})),
};
}
2026-05-02 18:14:41 -07:00
async function runSearxngWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
const response = await searchSearxng(args.query, {
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
includeDomains: args.includeDomains,
excludeDomains: args.excludeDomains,
});
return {
ok: true,
searchEngine: "searxng",
query: args.query,
requestId: response.requestId,
results: response.results.map((result, index) => ({
rank: index + 1,
title: result.title,
url: result.url,
publishedDate: result.publishedDate,
author: null,
summary: result.summary,
text: result.text,
highlights: result.summary ? [clipText(result.summary, 280)] : [],
engines: result.engines,
})),
};
}
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
const args = WebSearchArgsSchema.parse(input);
if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") {
return runSearxngWebSearchTool(args);
}
return runExaWebSearchTool(args);
}
function assertSafeFetchUrl(urlRaw: string) {
const parsed = new URL(urlRaw);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
throw new Error("Only http:// and https:// URLs are supported.");
}
return parsed;
}
async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
const args = FetchUrlArgsSchema.parse(input);
const parsed = assertSafeFetchUrl(args.url);
const maxCharacters = args.maxCharacters ?? DEFAULT_FETCH_MAX_CHARACTERS;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
let response: Response;
try {
response = await fetch(parsed.toString(), {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5",
},
});
} finally {
clearTimeout(timeout);
}
if (!response.ok) {
throw new Error(`Fetch failed with status ${response.status}.`);
}
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
const body = await response.text();
const isHtml = contentType.includes("text/html") || /<!doctype html|<html[\s>]/i.test(body);
let extracted = body;
if (isHtml) {
extracted = htmlToText(body, {
wordwrap: false,
preserveNewlines: true,
selectors: [
{ selector: "img", format: "skip" },
{ selector: "script", format: "skip" },
{ selector: "style", format: "skip" },
{ selector: "noscript", format: "skip" },
{ selector: "a", options: { ignoreHref: true } },
],
});
}
const normalized = compactWhitespace(extracted);
const truncated = normalized.length > maxCharacters;
const text = truncated
? `${normalized.slice(0, maxCharacters)}\n\n[truncated ${normalized.length - maxCharacters} characters]`
: normalized;
return {
ok: true,
url: response.url || parsed.toString(),
status: response.status,
contentType: contentType || null,
title: isHtml ? extractHtmlTitle(body) : null,
truncated,
text,
};
}
2026-05-02 19:38:15 -07:00
function shellQuote(value: string) {
return `'${value.replace(/'/g, `'\\''`)}'`;
}
2026-05-02 19:52:09 -07:00
function buildDevboxSshTarget() {
2026-05-02 19:38:15 -07:00
const host = env.CHAT_CODEX_REMOTE_HOST;
if (!host) {
throw new Error("CHAT_CODEX_REMOTE_HOST not set");
}
if (!env.CHAT_CODEX_REMOTE_USER || host.includes("@")) {
return host;
}
return `${env.CHAT_CODEX_REMOTE_USER}@${host}`;
}
function buildRemoteCodexCommand(prompt: string) {
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
2026-05-02 21:19:52 -07:00
const wrappedPrompt = [
"You are running in a non-interactive batch environment.",
"",
"Rules:",
"- Do not ask questions or wait for user input.",
"- Do not use interactive commands, editors, pagers, or prompts.",
"- If details are ambiguous, make a reasonable assumption and continue.",
"- Complete the task in one run, including any requested file edits, commands, and verification.",
"- End with a concise final report that includes changed files, commands run, and outcomes.",
"",
"Task:",
prompt,
].join("\n");
const codexCommand = `codex exec --skip-git-repo-check ${shellQuote(wrappedPrompt)} < /dev/null`;
2026-05-02 19:38:15 -07:00
return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`;
}
2026-05-02 19:52:09 -07:00
function buildRemoteShellCommand(command: string) {
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
const quotedCommand = shellQuote(command);
return (
`mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ` +
`if command -v bash >/dev/null 2>&1; then bash -lc ${quotedCommand}; else sh -lc ${quotedCommand}; fi`
);
}
async function withDevboxSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
2026-05-02 19:38:15 -07:00
if (env.CHAT_CODEX_SSH_KEY_PATH) {
return fn(env.CHAT_CODEX_SSH_KEY_PATH);
}
if (!env.CHAT_CODEX_SSH_PRIVATE_KEY_B64) {
return fn(undefined);
}
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "sybil-codex-ssh-"));
const keyPath = path.join(tmpDir, "id");
try {
await writeFile(keyPath, Buffer.from(env.CHAT_CODEX_SSH_PRIVATE_KEY_B64, "base64"), { mode: 0o600 });
return await fn(keyPath);
} finally {
await rm(tmpDir, { recursive: true, force: true });
}
}
2026-05-02 19:52:09 -07:00
function clipRemoteOutput(value: string, maxCharacters: number) {
2026-05-02 19:38:15 -07:00
if (value.length <= maxCharacters) {
return { text: value, truncated: false };
}
return {
text: `${value.slice(0, maxCharacters)}\n\n[truncated ${value.length - maxCharacters} characters]`,
truncated: true,
};
}
function bufferOrStringToString(value: unknown) {
if (typeof value === "string") return value;
if (Buffer.isBuffer(value)) return value.toString("utf8");
return "";
}
async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
if (!env.CHAT_CODEX_TOOL_ENABLED) {
return { ok: false, error: "codex_exec is disabled." };
}
const args: CodexExecArgs = CodexExecArgsSchema.parse(input);
const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS;
2026-05-02 19:52:09 -07:00
const sshTarget = buildDevboxSshTarget();
2026-05-02 19:38:15 -07:00
const remoteCommand = buildRemoteCodexCommand(args.prompt);
const run = async (keyPath?: string) => {
const sshArgs = [
2026-05-02 21:19:52 -07:00
"-n",
2026-05-02 19:38:15 -07:00
"-o",
"BatchMode=yes",
"-o",
"StrictHostKeyChecking=accept-new",
"-o",
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
"-p",
String(env.CHAT_CODEX_REMOTE_PORT),
];
if (keyPath) {
sshArgs.push("-i", keyPath);
}
sshArgs.push(sshTarget, remoteCommand);
try {
const result = await execFileAsync("ssh", sshArgs, {
timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS,
2026-05-02 19:52:09 -07:00
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
2026-05-02 19:38:15 -07:00
});
2026-05-02 19:52:09 -07:00
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
2026-05-02 19:38:15 -07:00
return {
ok: true,
host: env.CHAT_CODEX_REMOTE_HOST,
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
stdout: stdout.text,
stderr: stderr.text,
stdoutTruncated: stdout.truncated,
stderrTruncated: stderr.truncated,
};
} catch (err: any) {
2026-05-02 19:52:09 -07:00
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
2026-05-02 19:38:15 -07:00
return {
ok: false,
error: err?.killed
? `Remote Codex command timed out after ${env.CHAT_CODEX_EXEC_TIMEOUT_MS}ms.`
: err?.message ?? String(err),
exitCode: typeof err?.code === "number" ? err.code : null,
signal: typeof err?.signal === "string" ? err.signal : null,
host: env.CHAT_CODEX_REMOTE_HOST,
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
stdout: stdout.text,
stderr: stderr.text,
stdoutTruncated: stdout.truncated,
stderrTruncated: stderr.truncated,
};
}
};
2026-05-02 19:52:09 -07:00
return withDevboxSshKeyPath(run);
}
async function runShellExecTool(input: unknown): Promise<ToolRunOutcome> {
if (!env.CHAT_SHELL_TOOL_ENABLED) {
return { ok: false, error: "shell_exec is disabled." };
}
const args: ShellExecArgs = ShellExecArgsSchema.parse(input);
const maxCharacters = args.maxCharacters ?? DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS;
const sshTarget = buildDevboxSshTarget();
const remoteCommand = buildRemoteShellCommand(args.command);
const run = async (keyPath?: string) => {
const sshArgs = [
2026-05-02 21:19:52 -07:00
"-n",
2026-05-02 19:52:09 -07:00
"-o",
"BatchMode=yes",
"-o",
"StrictHostKeyChecking=accept-new",
"-o",
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
"-p",
String(env.CHAT_CODEX_REMOTE_PORT),
];
if (keyPath) {
sshArgs.push("-i", keyPath);
}
sshArgs.push(sshTarget, remoteCommand);
try {
const result = await execFileAsync("ssh", sshArgs, {
timeout: env.CHAT_SHELL_EXEC_TIMEOUT_MS,
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
});
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
return {
ok: true,
host: env.CHAT_CODEX_REMOTE_HOST,
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
command: args.command,
stdout: stdout.text,
stderr: stderr.text,
stdoutTruncated: stdout.truncated,
stderrTruncated: stderr.truncated,
};
} catch (err: any) {
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
return {
ok: false,
error: err?.killed
? `Remote shell command timed out after ${env.CHAT_SHELL_EXEC_TIMEOUT_MS}ms.`
: err?.message ?? String(err),
exitCode: typeof err?.code === "number" ? err.code : null,
signal: typeof err?.signal === "string" ? err.signal : null,
host: env.CHAT_CODEX_REMOTE_HOST,
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
command: args.command,
stdout: stdout.text,
stderr: stderr.text,
stdoutTruncated: stdout.truncated,
stderrTruncated: stderr.truncated,
};
}
};
return withDevboxSshKeyPath(run);
2026-05-02 19:38:15 -07:00
}
async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome> {
if (name === "web_search") return runWebSearchTool(args);
if (name === "fetch_url") return runFetchUrlTool(args);
2026-05-02 19:38:15 -07:00
if (name === "codex_exec") return runCodexExecTool(args);
2026-05-02 19:52:09 -07:00
if (name === "shell_exec") return runShellExecTool(args);
return { ok: false, error: `Unknown tool: ${name}` };
}
function parseToolArgs(raw: unknown) {
if (typeof raw !== "string") return {};
const trimmed = raw.trim();
if (!trimmed) return {};
try {
return JSON.parse(trimmed);
} catch (err: any) {
throw new Error(`Invalid JSON arguments: ${err?.message ?? String(err)}`);
}
}
2026-05-02 19:38:15 -07:00
function buildEventArgs(name: string, args: Record<string, unknown>) {
2026-05-02 19:52:09 -07:00
if (name === "codex_exec" && typeof args.prompt === "string") {
return {
...args,
prompt: clipText(args.prompt, 1_000),
};
2026-05-02 19:38:15 -07:00
}
2026-05-02 19:52:09 -07:00
if (name === "shell_exec" && typeof args.command === "string") {
return {
...args,
command: clipText(args.command, 1_000),
};
}
return args;
2026-05-02 19:38:15 -07:00
}
2026-05-02 21:19:52 -07:00
function looksLikeDanglingToolIntent(text: string) {
const normalized = text
.toLowerCase()
.replace(/[`*_>#-]/g, " ")
.replace(/\s+/g, " ")
.trim();
if (!normalized) return false;
if (normalized.length > 800) return false;
if (/\blet me know\b/.test(normalized) || /\bif you (want|would like)\b/.test(normalized)) return false;
return (
/\b(calling|running|executing|trying|checking|testing)\b.{0,80}\b(now|it|tool|command|shell_exec|codex_exec)\b/.test(normalized) ||
/\b(let me|i'?ll|i will)\b.{0,120}\b(run|execute|call|try|check|test)\b/.test(normalized) ||
/\b(stand by|hang on|one moment)\b/.test(normalized)
);
}
function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
conversation.push({ role: "assistant", content: text });
conversation.push({
role: "system",
content:
"Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.",
});
}
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
if (!usage) return false;
acc.inputTokens += usage.prompt_tokens ?? 0;
acc.outputTokens += usage.completion_tokens ?? 0;
acc.totalTokens += usage.total_tokens ?? 0;
return true;
}
2026-05-02 21:44:32 -07:00
function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
if (!usage) return false;
acc.inputTokens += usage.input_tokens ?? 0;
acc.outputTokens += usage.output_tokens ?? 0;
acc.totalTokens += usage.total_tokens ?? 0;
return true;
}
function getResponseOutputItems(response: any) {
return Array.isArray(response?.output) ? response.output : [];
}
function extractResponsesText(response: any, fallback = "") {
if (typeof response?.output_text === "string") return response.output_text;
const parts: string[] = [];
for (const item of getResponseOutputItems(response)) {
if (item?.type !== "message" || !Array.isArray(item.content)) continue;
for (const content of item.content) {
if (content?.type === "output_text" && typeof content.text === "string") {
parts.push(content.text);
} else if (content?.type === "refusal" && typeof content.refusal === "string") {
parts.push(content.refusal);
}
}
}
return parts.join("") || fallback;
}
function getResponseFailureMessage(response: any) {
if (response?.status !== "failed" && response?.status !== "incomplete") return null;
const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
}
function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
return outputItems
.filter((item) => item?.type === "function_call")
.map((call: any, index: number) => ({
id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
name: call.name ?? "unknown_tool",
arguments: call.arguments ?? "{}",
}));
}
2026-03-02 16:39:05 -08:00
type NormalizedToolCall = {
id: string;
name: string;
arguments: string;
};
function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
return toolCalls.map((call: any, index: number) => ({
id: call?.id ?? `tool_call_${round}_${index}`,
name: call?.function?.name ?? "unknown_tool",
arguments: call?.function?.arguments ?? "{}",
}));
}
async function executeToolCallAndBuildEvent(
call: NormalizedToolCall,
params: ToolAwareCompletionParams
): Promise<{ event: ToolExecutionEvent; toolResult: ToolRunOutcome }> {
const startedAtMs = Date.now();
const startedAt = new Date(startedAtMs).toISOString();
let toolResult: ToolRunOutcome;
let parsedArgs: Record<string, unknown> = {};
try {
parsedArgs = toRecord(parseToolArgs(call.arguments));
toolResult = await executeTool(call.name, parsedArgs);
} catch (err: any) {
toolResult = {
ok: false,
error: err?.message ?? String(err),
};
}
const status: "completed" | "failed" = toolResult.ok ? "completed" : "failed";
const error =
status === "failed"
? typeof toolResult.error === "string"
? toolResult.error
: "Tool execution failed."
: undefined;
const completedAtMs = Date.now();
2026-05-02 19:38:15 -07:00
const eventArgs = buildEventArgs(call.name, parsedArgs);
2026-03-02 16:39:05 -08:00
const event: ToolExecutionEvent = {
toolCallId: call.id,
name: call.name,
status,
2026-05-02 19:38:15 -07:00
summary: buildToolSummary(call.name, eventArgs, status, error),
args: eventArgs,
2026-03-02 16:39:05 -08:00
startedAt,
completedAt: new Date(completedAtMs).toISOString(),
durationMs: completedAtMs - startedAtMs,
error,
resultPreview: buildResultPreview(toolResult),
};
logToolEvent(event, params.logContext);
if (params.onToolEvent) {
await params.onToolEvent(event);
}
return { event, toolResult };
}
export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
2026-05-02 21:44:32 -07:00
const input: any[] = normalizeIncomingResponsesInput(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const response = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: RESPONSES_CHAT_TOOLS,
tool_choice: "auto",
parallel_tool_calls: true,
// Tool loops pass response output items back as input; reasoning items need persistence.
store: true,
} as any);
rawResponses.push(response);
sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
const failureMessage = getResponseFailureMessage(response);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getResponseOutputItems(response);
const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
if (!normalizedToolCalls.length) {
const text = extractResponsesText(response);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
return {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
};
}
totalToolCalls += normalizedToolCalls.length;
input.push(...outputItems);
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
};
}
export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const conversation: any[] = normalizeIncomingMessages(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
2026-05-02 21:19:52 -07:00
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const completion = await params.client.chat.completions.create({
model: params.model,
messages: conversation,
temperature: params.temperature,
max_tokens: params.maxTokens,
tools: CHAT_TOOLS,
tool_choice: "auto",
} as any);
rawResponses.push(completion);
sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
const message = completion?.choices?.[0]?.message;
if (!message) {
return {
text: "",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
toolEvents,
};
}
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
if (!toolCalls.length) {
2026-05-02 21:19:52 -07:00
const text = typeof message.content === "string" ? message.content : "";
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(conversation, text);
continue;
}
return {
2026-05-02 21:19:52 -07:00
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
toolEvents,
};
}
2026-03-02 16:39:05 -08:00
const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
totalToolCalls += normalizedToolCalls.length;
const assistantToolCallMessage: any = {
role: "assistant",
2026-03-02 16:39:05 -08:00
tool_calls: normalizedToolCalls.map((call) => ({
id: call.id,
type: "function",
function: {
2026-03-02 16:39:05 -08:00
name: call.name,
arguments: call.arguments,
},
})),
};
if (typeof message.content === "string" && message.content.length) {
assistantToolCallMessage.content = message.content;
}
conversation.push(assistantToolCallMessage);
2026-03-02 16:39:05 -08:00
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
conversation.push({
role: "tool",
2026-03-02 16:39:05 -08:00
tool_call_id: call.id,
content: JSON.stringify(toolResult),
});
}
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
toolEvents,
};
}
2026-03-02 16:39:05 -08:00
export async function* runToolAwareOpenAIChatStream(
params: ToolAwareCompletionParams
2026-05-02 21:44:32 -07:00
): AsyncGenerator<ToolAwareStreamingEvent> {
const input: any[] = normalizeIncomingResponsesInput(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: RESPONSES_CHAT_TOOLS,
tool_choice: "auto",
parallel_tool_calls: true,
// Tool loops pass response output items back as input; reasoning items need persistence.
store: true,
stream: true,
} as any);
let roundText = "";
let completedResponse: any | null = null;
const completedOutputItems: any[] = [];
for await (const event of stream as any as AsyncIterable<any>) {
rawResponses.push(event);
if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
roundText += event.delta;
} else if (event?.type === "response.output_item.done" && event.item) {
completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
} else if (event?.type === "response.completed") {
completedResponse = event.response;
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
completedResponse = event.response;
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "error") {
throw new Error(event.message ?? "OpenAI Responses stream failed.");
}
}
const failureMessage = getResponseFailureMessage(completedResponse);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getResponseOutputItems(completedResponse);
const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
if (!normalizedToolCalls.length) {
const text = extractResponsesText(completedResponse, roundText);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
if (text) {
yield { type: "delta", text };
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
input.push(...responseOutputItems);
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
yield { type: "tool_call", event };
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
},
};
}
export async function* runToolAwareChatCompletionsStream(
params: ToolAwareCompletionParams
2026-03-02 16:39:05 -08:00
): AsyncGenerator<ToolAwareStreamingEvent> {
const conversation: any[] = normalizeIncomingMessages(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
2026-05-02 21:19:52 -07:00
let danglingToolIntentRetries = 0;
2026-03-02 16:39:05 -08:00
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.chat.completions.create({
model: params.model,
messages: conversation,
temperature: params.temperature,
max_tokens: params.maxTokens,
tools: CHAT_TOOLS,
tool_choice: "auto",
stream: true,
stream_options: { include_usage: true },
} as any);
let roundText = "";
const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
for await (const chunk of stream as any as AsyncIterable<any>) {
rawResponses.push(chunk);
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
const choice = chunk?.choices?.[0];
const deltaText = choice?.delta?.content ?? "";
if (typeof deltaText === "string" && deltaText.length) {
roundText += deltaText;
}
const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
for (const toolCall of deltaToolCalls) {
const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
const entry = roundToolCalls.get(idx) ?? { arguments: "" };
if (typeof toolCall?.id === "string" && toolCall.id.length) {
entry.id = toolCall.id;
}
if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
entry.name = toolCall.function.name;
}
if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
entry.arguments += toolCall.function.arguments;
}
roundToolCalls.set(idx, entry);
}
}
const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
.sort((a, b) => a[0] - b[0])
.map(([_, call], index) => ({
id: call.id ?? `tool_call_${round}_${index}`,
name: call.name ?? "unknown_tool",
arguments: call.arguments || "{}",
}));
if (!normalizedToolCalls.length) {
2026-05-02 21:19:52 -07:00
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(conversation, roundText);
continue;
}
if (roundText) {
yield { type: "delta", text: roundText };
}
2026-03-02 16:39:05 -08:00
yield {
type: "done",
result: {
text: roundText,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
2026-05-02 21:19:52 -07:00
const assistantToolCallMessage: any = {
2026-03-02 16:39:05 -08:00
role: "assistant",
tool_calls: normalizedToolCalls.map((call) => ({
id: call.id,
type: "function",
function: {
name: call.name,
arguments: call.arguments,
},
})),
2026-05-02 21:19:52 -07:00
};
if (roundText) {
assistantToolCallMessage.content = roundText;
}
conversation.push(assistantToolCallMessage);
2026-03-02 16:39:05 -08:00
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
yield { type: "tool_call", event };
conversation.push({
role: "tool",
tool_call_id: call.id,
content: JSON.stringify(toolResult),
});
}
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
toolEvents,
},
};
}