import { execFile } from "node:child_process"; import { mkdtemp, rm, writeFile } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { promisify } from "node:util"; import { convert as htmlToText } from "html-to-text"; import type OpenAI from "openai"; import { z } from "zod"; import { env } from "../env.js"; import { exaClient } from "../search/exa.js"; import { searchSearxng } from "../search/searxng.js"; import { buildOpenAIConversationMessage, buildOpenAIResponsesInputMessage } from "./message-content.js"; import type { ChatMessage } from "./types.js"; const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS; const DEFAULT_WEB_RESULTS = 5; const MAX_WEB_RESULTS = 10; const DEFAULT_FETCH_MAX_CHARACTERS = 12_000; const MAX_FETCH_MAX_CHARACTERS = 50_000; const FETCH_TIMEOUT_MS = 12_000; const MAX_CODEX_PROMPT_CHARACTERS = 60_000; const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000; const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000; const MAX_SHELL_COMMAND_CHARACTERS = 20_000; const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000; const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000; const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000; const MAX_DANGLING_TOOL_INTENT_RETRIES = 1; const execFileAsync = promisify(execFile); const WebSearchArgsSchema = z .object({ query: z.string().trim().min(1), numResults: z.coerce.number().int().min(1).max(MAX_WEB_RESULTS).optional(), type: z.enum(["auto", "fast", "instant"]).optional(), includeDomains: z.array(z.string().trim().min(1)).max(25).optional(), excludeDomains: z.array(z.string().trim().min(1)).max(25).optional(), }) .strict(); type WebSearchArgs = z.infer; const FetchUrlArgsSchema = z .object({ url: z.string().trim().url(), maxCharacters: z.coerce.number().int().min(500).max(MAX_FETCH_MAX_CHARACTERS).optional(), }) .strict(); const CodexExecArgsSchema = z .object({ prompt: z.string().trim().min(1).max(MAX_CODEX_PROMPT_CHARACTERS), maxCharacters: z.coerce.number().int().min(1_000).max(MAX_CODEX_MAX_OUTPUT_CHARACTERS).optional(), }) .strict(); type CodexExecArgs = z.infer; const ShellExecArgsSchema = z .object({ command: z.string().trim().min(1).max(MAX_SHELL_COMMAND_CHARACTERS), maxCharacters: z.coerce.number().int().min(1_000).max(MAX_SHELL_MAX_OUTPUT_CHARACTERS).optional(), }) .strict(); type ShellExecArgs = z.infer; const CODEX_EXEC_TOOL = { type: "function", function: { name: "codex_exec", description: "Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. The task runs non-interactively; the remote Codex instance must make reasonable assumptions, complete the task, and return a final summary with relevant stdout/stderr.", parameters: { type: "object", properties: { prompt: { type: "string", description: "A complete, self-contained instruction for the remote Codex instance. Include the goal, relevant context, constraints, and what result to report back.", }, maxCharacters: { type: "integer", minimum: 1_000, maximum: MAX_CODEX_MAX_OUTPUT_CHARACTERS, description: "Maximum stdout/stderr characters returned to the model (default 24000).", }, }, required: ["prompt"], additionalProperties: false, }, }, }; const SHELL_EXEC_TOOL = { type: "function", function: { name: "shell_exec", description: "Run an arbitrary non-interactive shell command on the configured remote devbox, starting in the persistent scratch workspace. Use for quick Python scripts, calculations, file inspection, package/tool checks, tests, and command-line work that needs a real shell. This does not run inside the Sybil server container.", parameters: { type: "object", properties: { command: { type: "string", description: "Shell command to run on the devbox. The command is executed with bash -lc when bash exists, otherwise sh -lc, starting in the persistent scratch workspace.", }, maxCharacters: { type: "integer", minimum: 1_000, maximum: MAX_SHELL_MAX_OUTPUT_CHARACTERS, description: "Maximum stdout/stderr characters returned to the model (default 24000).", }, }, required: ["command"], additionalProperties: false, }, }, }; const BASE_CHAT_TOOLS: any[] = [ { type: "function", function: { name: "web_search", description: "Search the public web for recent or factual information. Returns ranked results with per-result summaries and snippets.", parameters: { type: "object", properties: { query: { type: "string", description: "Search query." }, numResults: { type: "integer", minimum: 1, maximum: MAX_WEB_RESULTS, description: "Number of results to return (default 5).", }, type: { type: "string", enum: ["auto", "fast", "instant"], description: "Search mode.", }, includeDomains: { type: "array", items: { type: "string" }, description: "Only include these domains.", }, excludeDomains: { type: "array", items: { type: "string" }, description: "Exclude these domains.", }, }, required: ["query"], additionalProperties: false, }, }, }, { type: "function", function: { name: "fetch_url", description: "Fetch a webpage by URL and return readable plaintext content extracted from the page for deeper inspection.", parameters: { type: "object", properties: { url: { type: "string", description: "Absolute URL to fetch, including http/https." }, maxCharacters: { type: "integer", minimum: 500, maximum: MAX_FETCH_MAX_CHARACTERS, description: "Maximum response text characters returned (default 12000).", }, }, required: ["url"], additionalProperties: false, }, }, }, ]; const CHAT_TOOLS: any[] = [ ...BASE_CHAT_TOOLS, ...(env.CHAT_CODEX_TOOL_ENABLED ? [CODEX_EXEC_TOOL] : []), ...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []), ]; const RESPONSES_CHAT_TOOLS: any[] = CHAT_TOOLS.map((tool) => { if (tool?.type !== "function") return tool; return { type: "function", name: tool.function.name, description: tool.function.description, parameters: tool.function.parameters, strict: false, }; }); export const CHAT_TOOL_SYSTEM_PROMPT = "You can use tools to gather up-to-date web information when needed. " + "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " + "Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " + "When you decide tool use is needed, call the tool immediately in the same response; do not say you are running a tool unless you actually call it. " + (env.CHAT_CODEX_TOOL_ENABLED ? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, assumptions, and expected report-back format. Never ask codex_exec to wait for user input or run interactive commands. " : "") + (env.CHAT_SHELL_TOOL_ENABLED ? "Use shell_exec for direct non-interactive command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. " : "") + "Do not fabricate tool outputs; reason only from provided tool results."; type ToolRunOutcome = { ok: boolean; [key: string]: unknown; }; type ToolAwareUsage = { inputTokens?: number; outputTokens?: number; totalTokens?: number; }; type ToolAwareCompletionResult = { text: string; usage?: ToolAwareUsage; raw: unknown; toolEvents: ToolExecutionEvent[]; }; export type ToolAwareStreamingEvent = | { type: "delta"; text: string } | { type: "tool_call"; event: ToolExecutionEvent } | { type: "done"; result: ToolAwareCompletionResult }; type ToolAwareCompletionParams = { client: OpenAI; model: string; messages: ChatMessage[]; temperature?: number; maxTokens?: number; onToolEvent?: (event: ToolExecutionEvent) => void | Promise; logContext?: { provider: string; model: string; chatId?: string; }; }; export type ToolExecutionEvent = { toolCallId: string; name: string; status: "completed" | "failed"; summary: string; args: Record; startedAt: string; completedAt: string; durationMs: number; error?: string; resultPreview?: string; }; function compactWhitespace(input: string) { return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim(); } function clipText(input: string, maxCharacters: number) { return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`; } function toRecord(value: unknown): Record { if (!value || typeof value !== "object" || Array.isArray(value)) return {}; return { ...(value as Record) }; } function toSingleLine(value: string, maxLength = 220) { return clipText( value .replace(/\r?\n+/g, " ") .replace(/\s+/g, " ") .trim(), maxLength ); } function buildToolSummary(name: string, args: Record, status: "completed" | "failed", error?: string) { const errSuffix = status === "failed" && error ? ` Error: ${toSingleLine(error, 140)}` : ""; if (name === "web_search") { const query = typeof args.query === "string" ? args.query.trim() : ""; if (status === "completed") { return query ? `Performed web search for '${toSingleLine(query, 100)}'.` : "Performed web search."; } return query ? `Web search for '${toSingleLine(query, 100)}' failed.${errSuffix}` : `Web search failed.${errSuffix}`; } if (name === "fetch_url") { const url = typeof args.url === "string" ? args.url.trim() : ""; if (status === "completed") { return url ? `Fetched URL ${toSingleLine(url, 140)}.` : "Fetched URL."; } return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`; } if (name === "codex_exec") { const prompt = typeof args.prompt === "string" ? args.prompt.trim() : ""; if (status === "completed") { return prompt ? `Ran Codex task: '${toSingleLine(prompt, 120)}'.` : "Ran Codex task."; } return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`; } if (name === "shell_exec") { const command = typeof args.command === "string" ? args.command.trim() : ""; if (status === "completed") { return command ? `Ran devbox shell command: '${toSingleLine(command, 120)}'.` : "Ran devbox shell command."; } return command ? `Devbox shell command '${toSingleLine(command, 120)}' failed.${errSuffix}` : `Devbox shell command failed.${errSuffix}`; } if (status === "completed") { return `Ran tool '${name}'.`; } return `Tool '${name}' failed.${errSuffix}`; } function logToolEvent(event: ToolExecutionEvent, context?: ToolAwareCompletionParams["logContext"]) { const payload = { kind: "tool_call", ...context, ...event, }; const line = `[tool_call] ${JSON.stringify(payload)}`; if (event.status === "failed") console.error(line); else console.info(line); } function buildResultPreview(toolResult: ToolRunOutcome) { const serialized = JSON.stringify(toolResult); return serialized ? clipText(serialized, 400) : undefined; } export function buildToolLogMessageData(chatId: string, event: ToolExecutionEvent) { return { chatId, role: "tool" as const, content: event.summary, name: event.name, metadata: { kind: "tool_call", toolCallId: event.toolCallId, toolName: event.name, status: event.status, summary: event.summary, args: event.args, startedAt: event.startedAt, completedAt: event.completedAt, durationMs: event.durationMs, error: event.error ?? null, resultPreview: event.resultPreview ?? null, }, }; } function extractHtmlTitle(html: string) { const match = html.match(/]*>([\s\S]*?)<\/title>/i); if (!match?.[1]) return null; return compactWhitespace( match[1] .replace(/ /gi, " ") .replace(/&/gi, "&") .replace(/</gi, "<") .replace(/>/gi, ">") .replace(/"/gi, '"') .replace(/'/gi, "'") ); } function normalizeIncomingMessages(messages: ChatMessage[]) { const normalized = messages.map((message) => buildOpenAIConversationMessage(message)); return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized]; } function normalizeIncomingResponsesInput(messages: ChatMessage[]) { const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message)); return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized]; } async function runExaWebSearchTool(args: WebSearchArgs): Promise { const exa = exaClient(); const response = await exa.search(args.query, { type: args.type ?? "auto", numResults: args.numResults ?? DEFAULT_WEB_RESULTS, includeDomains: args.includeDomains, excludeDomains: args.excludeDomains, moderation: true, userLocation: "US", contents: { summary: { query: args.query }, highlights: { query: args.query, maxCharacters: 320, numSentences: 2, highlightsPerUrl: 2, }, text: { maxCharacters: 1_000 }, }, } as any); const results = Array.isArray(response?.results) ? response.results : []; return { ok: true, searchEngine: "exa", query: args.query, requestId: response?.requestId ?? null, results: results.map((result: any, index: number) => ({ rank: index + 1, title: typeof result?.title === "string" ? result.title : null, url: typeof result?.url === "string" ? result.url : null, publishedDate: typeof result?.publishedDate === "string" ? result.publishedDate : null, author: typeof result?.author === "string" ? result.author : null, summary: typeof result?.summary === "string" ? clipText(result.summary, 1_400) : null, text: typeof result?.text === "string" ? clipText(result.text, 700) : null, highlights: Array.isArray(result?.highlights) ? result.highlights.filter((h: unknown) => typeof h === "string").slice(0, 3).map((h: string) => clipText(h, 280)) : [], })), }; } async function runSearxngWebSearchTool(args: WebSearchArgs): Promise { const response = await searchSearxng(args.query, { numResults: args.numResults ?? DEFAULT_WEB_RESULTS, includeDomains: args.includeDomains, excludeDomains: args.excludeDomains, }); return { ok: true, searchEngine: "searxng", query: args.query, requestId: response.requestId, results: response.results.map((result, index) => ({ rank: index + 1, title: result.title, url: result.url, publishedDate: result.publishedDate, author: null, summary: result.summary, text: result.text, highlights: result.summary ? [clipText(result.summary, 280)] : [], engines: result.engines, })), }; } async function runWebSearchTool(input: unknown): Promise { const args = WebSearchArgsSchema.parse(input); if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") { return runSearxngWebSearchTool(args); } return runExaWebSearchTool(args); } function assertSafeFetchUrl(urlRaw: string) { const parsed = new URL(urlRaw); if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { throw new Error("Only http:// and https:// URLs are supported."); } return parsed; } async function runFetchUrlTool(input: unknown): Promise { const args = FetchUrlArgsSchema.parse(input); const parsed = assertSafeFetchUrl(args.url); const maxCharacters = args.maxCharacters ?? DEFAULT_FETCH_MAX_CHARACTERS; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); let response: Response; try { response = await fetch(parsed.toString(), { redirect: "follow", signal: controller.signal, headers: { "User-Agent": "SybilBot/1.0 (+https://sybil.local)", Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5", }, }); } finally { clearTimeout(timeout); } if (!response.ok) { throw new Error(`Fetch failed with status ${response.status}.`); } const contentType = (response.headers.get("content-type") ?? "").toLowerCase(); const body = await response.text(); const isHtml = contentType.includes("text/html") || /]/i.test(body); let extracted = body; if (isHtml) { extracted = htmlToText(body, { wordwrap: false, preserveNewlines: true, selectors: [ { selector: "img", format: "skip" }, { selector: "script", format: "skip" }, { selector: "style", format: "skip" }, { selector: "noscript", format: "skip" }, { selector: "a", options: { ignoreHref: true } }, ], }); } const normalized = compactWhitespace(extracted); const truncated = normalized.length > maxCharacters; const text = truncated ? `${normalized.slice(0, maxCharacters)}\n\n[truncated ${normalized.length - maxCharacters} characters]` : normalized; return { ok: true, url: response.url || parsed.toString(), status: response.status, contentType: contentType || null, title: isHtml ? extractHtmlTitle(body) : null, truncated, text, }; } function shellQuote(value: string) { return `'${value.replace(/'/g, `'\\''`)}'`; } function buildDevboxSshTarget() { const host = env.CHAT_CODEX_REMOTE_HOST; if (!host) { throw new Error("CHAT_CODEX_REMOTE_HOST not set"); } if (!env.CHAT_CODEX_REMOTE_USER || host.includes("@")) { return host; } return `${env.CHAT_CODEX_REMOTE_USER}@${host}`; } function buildRemoteCodexCommand(prompt: string) { const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim(); const wrappedPrompt = [ "You are running in a non-interactive batch environment.", "", "Rules:", "- Do not ask questions or wait for user input.", "- Do not use interactive commands, editors, pagers, or prompts.", "- If details are ambiguous, make a reasonable assumption and continue.", "- Complete the task in one run, including any requested file edits, commands, and verification.", "- End with a concise final report that includes changed files, commands run, and outcomes.", "", "Task:", prompt, ].join("\n"); const codexCommand = `codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check ${shellQuote(wrappedPrompt)} < /dev/null`; return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`; } function buildRemoteShellCommand(command: string) { const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim(); const quotedCommand = shellQuote(command); return ( `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ` + `if command -v bash >/dev/null 2>&1; then bash -lc ${quotedCommand}; else sh -lc ${quotedCommand}; fi` ); } async function withDevboxSshKeyPath(fn: (keyPath?: string) => Promise) { if (env.CHAT_CODEX_SSH_KEY_PATH) { return fn(env.CHAT_CODEX_SSH_KEY_PATH); } if (!env.CHAT_CODEX_SSH_PRIVATE_KEY_B64) { return fn(undefined); } const tmpDir = await mkdtemp(path.join(os.tmpdir(), "sybil-codex-ssh-")); const keyPath = path.join(tmpDir, "id"); try { await writeFile(keyPath, Buffer.from(env.CHAT_CODEX_SSH_PRIVATE_KEY_B64, "base64"), { mode: 0o600 }); return await fn(keyPath); } finally { await rm(tmpDir, { recursive: true, force: true }); } } function clipRemoteOutput(value: string, maxCharacters: number) { if (value.length <= maxCharacters) { return { text: value, truncated: false }; } return { text: `${value.slice(0, maxCharacters)}\n\n[truncated ${value.length - maxCharacters} characters]`, truncated: true, }; } function bufferOrStringToString(value: unknown) { if (typeof value === "string") return value; if (Buffer.isBuffer(value)) return value.toString("utf8"); return ""; } async function runCodexExecTool(input: unknown): Promise { if (!env.CHAT_CODEX_TOOL_ENABLED) { return { ok: false, error: "codex_exec is disabled." }; } const args: CodexExecArgs = CodexExecArgsSchema.parse(input); const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS; const sshTarget = buildDevboxSshTarget(); const remoteCommand = buildRemoteCodexCommand(args.prompt); const run = async (keyPath?: string) => { const sshArgs = [ "-n", "-o", "BatchMode=yes", "-o", "StrictHostKeyChecking=accept-new", "-o", "UserKnownHostsFile=/tmp/sybil-codex-known-hosts", "-p", String(env.CHAT_CODEX_REMOTE_PORT), ]; if (keyPath) { sshArgs.push("-i", keyPath); } sshArgs.push(sshTarget, remoteCommand); try { const result = await execFileAsync("ssh", sshArgs, { timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS, maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES, }); const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters); const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); return { ok: true, host: env.CHAT_CODEX_REMOTE_HOST, workdir: env.CHAT_CODEX_REMOTE_WORKDIR, stdout: stdout.text, stderr: stderr.text, stdoutTruncated: stdout.truncated, stderrTruncated: stderr.truncated, }; } catch (err: any) { const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters); const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); return { ok: false, error: err?.killed ? `Remote Codex command timed out after ${env.CHAT_CODEX_EXEC_TIMEOUT_MS}ms.` : err?.message ?? String(err), exitCode: typeof err?.code === "number" ? err.code : null, signal: typeof err?.signal === "string" ? err.signal : null, host: env.CHAT_CODEX_REMOTE_HOST, workdir: env.CHAT_CODEX_REMOTE_WORKDIR, stdout: stdout.text, stderr: stderr.text, stdoutTruncated: stdout.truncated, stderrTruncated: stderr.truncated, }; } }; return withDevboxSshKeyPath(run); } async function runShellExecTool(input: unknown): Promise { if (!env.CHAT_SHELL_TOOL_ENABLED) { return { ok: false, error: "shell_exec is disabled." }; } const args: ShellExecArgs = ShellExecArgsSchema.parse(input); const maxCharacters = args.maxCharacters ?? DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS; const sshTarget = buildDevboxSshTarget(); const remoteCommand = buildRemoteShellCommand(args.command); const run = async (keyPath?: string) => { const sshArgs = [ "-n", "-o", "BatchMode=yes", "-o", "StrictHostKeyChecking=accept-new", "-o", "UserKnownHostsFile=/tmp/sybil-codex-known-hosts", "-p", String(env.CHAT_CODEX_REMOTE_PORT), ]; if (keyPath) { sshArgs.push("-i", keyPath); } sshArgs.push(sshTarget, remoteCommand); try { const result = await execFileAsync("ssh", sshArgs, { timeout: env.CHAT_SHELL_EXEC_TIMEOUT_MS, maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES, }); const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters); const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000)); return { ok: true, host: env.CHAT_CODEX_REMOTE_HOST, workdir: env.CHAT_CODEX_REMOTE_WORKDIR, command: args.command, stdout: stdout.text, stderr: stderr.text, stdoutTruncated: stdout.truncated, stderrTruncated: stderr.truncated, }; } catch (err: any) { const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters); const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000)); return { ok: false, error: err?.killed ? `Remote shell command timed out after ${env.CHAT_SHELL_EXEC_TIMEOUT_MS}ms.` : err?.message ?? String(err), exitCode: typeof err?.code === "number" ? err.code : null, signal: typeof err?.signal === "string" ? err.signal : null, host: env.CHAT_CODEX_REMOTE_HOST, workdir: env.CHAT_CODEX_REMOTE_WORKDIR, command: args.command, stdout: stdout.text, stderr: stderr.text, stdoutTruncated: stdout.truncated, stderrTruncated: stderr.truncated, }; } }; return withDevboxSshKeyPath(run); } async function executeTool(name: string, args: unknown): Promise { if (name === "web_search") return runWebSearchTool(args); if (name === "fetch_url") return runFetchUrlTool(args); if (name === "codex_exec") return runCodexExecTool(args); if (name === "shell_exec") return runShellExecTool(args); return { ok: false, error: `Unknown tool: ${name}` }; } function parseToolArgs(raw: unknown) { if (typeof raw !== "string") return {}; const trimmed = raw.trim(); if (!trimmed) return {}; try { return JSON.parse(trimmed); } catch (err: any) { throw new Error(`Invalid JSON arguments: ${err?.message ?? String(err)}`); } } function buildEventArgs(name: string, args: Record) { if (name === "codex_exec" && typeof args.prompt === "string") { return { ...args, prompt: clipText(args.prompt, 1_000), }; } if (name === "shell_exec" && typeof args.command === "string") { return { ...args, command: clipText(args.command, 1_000), }; } return args; } function looksLikeDanglingToolIntent(text: string) { const normalized = text .toLowerCase() .replace(/[`*_>#-]/g, " ") .replace(/\s+/g, " ") .trim(); if (!normalized) return false; if (normalized.length > 800) return false; if (/\blet me know\b/.test(normalized) || /\bif you (want|would like)\b/.test(normalized)) return false; return ( /\b(calling|running|executing|trying|checking|testing)\b.{0,80}\b(now|it|tool|command|shell_exec|codex_exec)\b/.test(normalized) || /\b(let me|i'?ll|i will)\b.{0,120}\b(run|execute|call|try|check|test)\b/.test(normalized) || /\b(stand by|hang on|one moment)\b/.test(normalized) ); } function appendDanglingToolIntentCorrection(conversation: any[], text: string) { conversation.push({ role: "assistant", content: text }); conversation.push({ role: "system", content: "Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.", }); } function mergeUsage(acc: Required, usage: any) { if (!usage) return false; acc.inputTokens += usage.prompt_tokens ?? 0; acc.outputTokens += usage.completion_tokens ?? 0; acc.totalTokens += usage.total_tokens ?? 0; return true; } function mergeResponsesUsage(acc: Required, usage: any) { if (!usage) return false; acc.inputTokens += usage.input_tokens ?? 0; acc.outputTokens += usage.output_tokens ?? 0; acc.totalTokens += usage.total_tokens ?? 0; return true; } function getResponseOutputItems(response: any) { return Array.isArray(response?.output) ? response.output : []; } function extractResponsesText(response: any, fallback = "") { if (typeof response?.output_text === "string") return response.output_text; const parts: string[] = []; for (const item of getResponseOutputItems(response)) { if (item?.type !== "message" || !Array.isArray(item.content)) continue; for (const content of item.content) { if (content?.type === "output_text" && typeof content.text === "string") { parts.push(content.text); } else if (content?.type === "refusal" && typeof content.refusal === "string") { parts.push(content.refusal); } } } return parts.join("") || fallback; } function getResponseFailureMessage(response: any) { if (response?.status !== "failed" && response?.status !== "incomplete") return null; const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null; const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null; return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`); } function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] { return outputItems .filter((item) => item?.type === "function_call") .map((call: any, index: number) => ({ id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`, name: call.name ?? "unknown_tool", arguments: call.arguments ?? "{}", })); } type NormalizedToolCall = { id: string; name: string; arguments: string; }; function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] { return toolCalls.map((call: any, index: number) => ({ id: call?.id ?? `tool_call_${round}_${index}`, name: call?.function?.name ?? "unknown_tool", arguments: call?.function?.arguments ?? "{}", })); } async function executeToolCallAndBuildEvent( call: NormalizedToolCall, params: ToolAwareCompletionParams ): Promise<{ event: ToolExecutionEvent; toolResult: ToolRunOutcome }> { const startedAtMs = Date.now(); const startedAt = new Date(startedAtMs).toISOString(); let toolResult: ToolRunOutcome; let parsedArgs: Record = {}; try { parsedArgs = toRecord(parseToolArgs(call.arguments)); toolResult = await executeTool(call.name, parsedArgs); } catch (err: any) { toolResult = { ok: false, error: err?.message ?? String(err), }; } const status: "completed" | "failed" = toolResult.ok ? "completed" : "failed"; const error = status === "failed" ? typeof toolResult.error === "string" ? toolResult.error : "Tool execution failed." : undefined; const completedAtMs = Date.now(); const eventArgs = buildEventArgs(call.name, parsedArgs); const event: ToolExecutionEvent = { toolCallId: call.id, name: call.name, status, summary: buildToolSummary(call.name, eventArgs, status, error), args: eventArgs, startedAt, completedAt: new Date(completedAtMs).toISOString(), durationMs: completedAtMs - startedAtMs, error, resultPreview: buildResultPreview(toolResult), }; logToolEvent(event, params.logContext); if (params.onToolEvent) { await params.onToolEvent(event); } return { event, toolResult }; } export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise { const input: any[] = normalizeIncomingResponsesInput(params.messages); const rawResponses: unknown[] = []; const toolEvents: ToolExecutionEvent[] = []; const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; let sawUsage = false; let totalToolCalls = 0; let danglingToolIntentRetries = 0; for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) { const response = await params.client.responses.create({ model: params.model, input, temperature: params.temperature, max_output_tokens: params.maxTokens, tools: RESPONSES_CHAT_TOOLS, tool_choice: "auto", parallel_tool_calls: true, // Tool loops pass response output items back as input; reasoning items need persistence. store: true, } as any); rawResponses.push(response); sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage; const failureMessage = getResponseFailureMessage(response); if (failureMessage) { throw new Error(failureMessage); } const outputItems = getResponseOutputItems(response); const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round); if (!normalizedToolCalls.length) { const text = extractResponsesText(response); if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) { danglingToolIntentRetries += 1; appendDanglingToolIntentCorrection(input, text); continue; } return { text, usage: sawUsage ? usageAcc : undefined, raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" }, toolEvents, }; } totalToolCalls += normalizedToolCalls.length; input.push(...outputItems); for (const call of normalizedToolCalls) { const { event, toolResult } = await executeToolCallAndBuildEvent(call, params); toolEvents.push(event); input.push({ type: "function_call_output", call_id: call.id, output: JSON.stringify(toolResult), }); } } return { text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.", usage: sawUsage ? usageAcc : undefined, raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" }, toolEvents, }; } export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise { const conversation: any[] = normalizeIncomingMessages(params.messages); const rawResponses: unknown[] = []; const toolEvents: ToolExecutionEvent[] = []; const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; let sawUsage = false; let totalToolCalls = 0; let danglingToolIntentRetries = 0; for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) { const completion = await params.client.chat.completions.create({ model: params.model, messages: conversation, temperature: params.temperature, max_tokens: params.maxTokens, tools: CHAT_TOOLS, tool_choice: "auto", } as any); rawResponses.push(completion); sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage; const message = completion?.choices?.[0]?.message; if (!message) { return { text: "", usage: sawUsage ? usageAcc : undefined, raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true }, toolEvents, }; } const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; if (!toolCalls.length) { const text = typeof message.content === "string" ? message.content : ""; if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) { danglingToolIntentRetries += 1; appendDanglingToolIntentCorrection(conversation, text); continue; } return { text, usage: sawUsage ? usageAcc : undefined, raw: { responses: rawResponses, toolCallsUsed: totalToolCalls }, toolEvents, }; } const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round); totalToolCalls += normalizedToolCalls.length; const assistantToolCallMessage: any = { role: "assistant", tool_calls: normalizedToolCalls.map((call) => ({ id: call.id, type: "function", function: { name: call.name, arguments: call.arguments, }, })), }; if (typeof message.content === "string" && message.content.length) { assistantToolCallMessage.content = message.content; } conversation.push(assistantToolCallMessage); for (const call of normalizedToolCalls) { const { event, toolResult } = await executeToolCallAndBuildEvent(call, params); toolEvents.push(event); conversation.push({ role: "tool", tool_call_id: call.id, content: JSON.stringify(toolResult), }); } } return { text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.", usage: sawUsage ? usageAcc : undefined, raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true }, toolEvents, }; } export async function* runToolAwareOpenAIChatStream( params: ToolAwareCompletionParams ): AsyncGenerator { const input: any[] = normalizeIncomingResponsesInput(params.messages); const rawResponses: unknown[] = []; const toolEvents: ToolExecutionEvent[] = []; const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; let sawUsage = false; let totalToolCalls = 0; let danglingToolIntentRetries = 0; for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) { const stream = await params.client.responses.create({ model: params.model, input, temperature: params.temperature, max_output_tokens: params.maxTokens, tools: RESPONSES_CHAT_TOOLS, tool_choice: "auto", parallel_tool_calls: true, // Tool loops pass response output items back as input; reasoning items need persistence. store: true, stream: true, } as any); let roundText = ""; let completedResponse: any | null = null; const completedOutputItems: any[] = []; for await (const event of stream as any as AsyncIterable) { rawResponses.push(event); if (event?.type === "response.output_text.delta" && typeof event.delta === "string") { roundText += event.delta; } else if (event?.type === "response.output_item.done" && event.item) { completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item; } else if (event?.type === "response.completed") { completedResponse = event.response; sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage; } else if (event?.type === "response.failed" || event?.type === "response.incomplete") { completedResponse = event.response; sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage; } else if (event?.type === "error") { throw new Error(event.message ?? "OpenAI Responses stream failed."); } } const failureMessage = getResponseFailureMessage(completedResponse); if (failureMessage) { throw new Error(failureMessage); } const outputItems = getResponseOutputItems(completedResponse); const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean); const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round); if (!normalizedToolCalls.length) { const text = extractResponsesText(completedResponse, roundText); if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) { danglingToolIntentRetries += 1; appendDanglingToolIntentCorrection(input, text); continue; } if (text) { yield { type: "delta", text }; } yield { type: "done", result: { text, usage: sawUsage ? usageAcc : undefined, raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" }, toolEvents, }, }; return; } totalToolCalls += normalizedToolCalls.length; input.push(...responseOutputItems); for (const call of normalizedToolCalls) { const { event, toolResult } = await executeToolCallAndBuildEvent(call, params); toolEvents.push(event); yield { type: "tool_call", event }; input.push({ type: "function_call_output", call_id: call.id, output: JSON.stringify(toolResult), }); } } yield { type: "done", result: { text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.", usage: sawUsage ? usageAcc : undefined, raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" }, toolEvents, }, }; } export async function* runToolAwareChatCompletionsStream( params: ToolAwareCompletionParams ): AsyncGenerator { const conversation: any[] = normalizeIncomingMessages(params.messages); const rawResponses: unknown[] = []; const toolEvents: ToolExecutionEvent[] = []; const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; let sawUsage = false; let totalToolCalls = 0; let danglingToolIntentRetries = 0; for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) { const stream = await params.client.chat.completions.create({ model: params.model, messages: conversation, temperature: params.temperature, max_tokens: params.maxTokens, tools: CHAT_TOOLS, tool_choice: "auto", stream: true, stream_options: { include_usage: true }, } as any); let roundText = ""; const roundToolCalls = new Map(); for await (const chunk of stream as any as AsyncIterable) { rawResponses.push(chunk); sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage; const choice = chunk?.choices?.[0]; const deltaText = choice?.delta?.content ?? ""; if (typeof deltaText === "string" && deltaText.length) { roundText += deltaText; } const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : []; for (const toolCall of deltaToolCalls) { const idx = typeof toolCall?.index === "number" ? toolCall.index : 0; const entry = roundToolCalls.get(idx) ?? { arguments: "" }; if (typeof toolCall?.id === "string" && toolCall.id.length) { entry.id = toolCall.id; } if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) { entry.name = toolCall.function.name; } if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) { entry.arguments += toolCall.function.arguments; } roundToolCalls.set(idx, entry); } } const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()] .sort((a, b) => a[0] - b[0]) .map(([_, call], index) => ({ id: call.id ?? `tool_call_${round}_${index}`, name: call.name ?? "unknown_tool", arguments: call.arguments || "{}", })); if (!normalizedToolCalls.length) { if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) { danglingToolIntentRetries += 1; appendDanglingToolIntentCorrection(conversation, roundText); continue; } if (roundText) { yield { type: "delta", text: roundText }; } yield { type: "done", result: { text: roundText, usage: sawUsage ? usageAcc : undefined, raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls }, toolEvents, }, }; return; } totalToolCalls += normalizedToolCalls.length; const assistantToolCallMessage: any = { role: "assistant", tool_calls: normalizedToolCalls.map((call) => ({ id: call.id, type: "function", function: { name: call.name, arguments: call.arguments, }, })), }; if (roundText) { assistantToolCallMessage.content = roundText; } conversation.push(assistantToolCallMessage); for (const call of normalizedToolCalls) { const { event, toolResult } = await executeToolCallAndBuildEvent(call, params); toolEvents.push(event); yield { type: "tool_call", event }; conversation.push({ role: "tool", tool_call_id: call.id, content: JSON.stringify(toolResult), }); } } yield { type: "done", result: { text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.", usage: sawUsage ? usageAcc : undefined, raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true }, toolEvents, }, }; }