1357 lines
46 KiB
TypeScript
1357 lines
46 KiB
TypeScript
import { execFile } from "node:child_process";
|
|
import { mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { promisify } from "node:util";
|
|
import { convert as htmlToText } from "html-to-text";
|
|
import type OpenAI from "openai";
|
|
import { z } from "zod";
|
|
import { env } from "../env.js";
|
|
import { exaClient } from "../search/exa.js";
|
|
import { searchSearxng } from "../search/searxng.js";
|
|
import { buildOpenAIConversationMessage, buildOpenAIResponsesInputMessage } from "./message-content.js";
|
|
import type { ChatMessage } from "./types.js";
|
|
|
|
const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
|
|
const DEFAULT_WEB_RESULTS = 5;
|
|
const MAX_WEB_RESULTS = 10;
|
|
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
|
|
const MAX_FETCH_MAX_CHARACTERS = 50_000;
|
|
const FETCH_TIMEOUT_MS = 12_000;
|
|
const MAX_CODEX_PROMPT_CHARACTERS = 60_000;
|
|
const DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS = 24_000;
|
|
const MAX_CODEX_MAX_OUTPUT_CHARACTERS = 80_000;
|
|
const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
|
|
const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
|
|
const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
|
|
const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
|
const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
const WebSearchArgsSchema = z
|
|
.object({
|
|
query: z.string().trim().min(1),
|
|
numResults: z.coerce.number().int().min(1).max(MAX_WEB_RESULTS).optional(),
|
|
type: z.enum(["auto", "fast", "instant"]).optional(),
|
|
includeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
|
|
excludeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
|
|
})
|
|
.strict();
|
|
|
|
type WebSearchArgs = z.infer<typeof WebSearchArgsSchema>;
|
|
|
|
const FetchUrlArgsSchema = z
|
|
.object({
|
|
url: z.string().trim().url(),
|
|
maxCharacters: z.coerce.number().int().min(500).max(MAX_FETCH_MAX_CHARACTERS).optional(),
|
|
})
|
|
.strict();
|
|
|
|
const CodexExecArgsSchema = z
|
|
.object({
|
|
prompt: z.string().trim().min(1).max(MAX_CODEX_PROMPT_CHARACTERS),
|
|
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_CODEX_MAX_OUTPUT_CHARACTERS).optional(),
|
|
})
|
|
.strict();
|
|
|
|
type CodexExecArgs = z.infer<typeof CodexExecArgsSchema>;
|
|
|
|
const ShellExecArgsSchema = z
|
|
.object({
|
|
command: z.string().trim().min(1).max(MAX_SHELL_COMMAND_CHARACTERS),
|
|
maxCharacters: z.coerce.number().int().min(1_000).max(MAX_SHELL_MAX_OUTPUT_CHARACTERS).optional(),
|
|
})
|
|
.strict();
|
|
|
|
type ShellExecArgs = z.infer<typeof ShellExecArgsSchema>;
|
|
|
|
const CODEX_EXEC_TOOL = {
|
|
type: "function",
|
|
function: {
|
|
name: "codex_exec",
|
|
description:
|
|
"Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. The task runs non-interactively; the remote Codex instance must make reasonable assumptions, complete the task, and return a final summary with relevant stdout/stderr.",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
prompt: {
|
|
type: "string",
|
|
description:
|
|
"A complete, self-contained instruction for the remote Codex instance. Include the goal, relevant context, constraints, and what result to report back.",
|
|
},
|
|
maxCharacters: {
|
|
type: "integer",
|
|
minimum: 1_000,
|
|
maximum: MAX_CODEX_MAX_OUTPUT_CHARACTERS,
|
|
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
|
|
},
|
|
},
|
|
required: ["prompt"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
};
|
|
|
|
const SHELL_EXEC_TOOL = {
|
|
type: "function",
|
|
function: {
|
|
name: "shell_exec",
|
|
description:
|
|
"Run an arbitrary non-interactive shell command on the configured remote devbox, starting in the persistent scratch workspace. Use for quick Python scripts, calculations, file inspection, package/tool checks, tests, and command-line work that needs a real shell. This does not run inside the Sybil server container.",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
command: {
|
|
type: "string",
|
|
description:
|
|
"Shell command to run on the devbox. The command is executed with bash -lc when bash exists, otherwise sh -lc, starting in the persistent scratch workspace.",
|
|
},
|
|
maxCharacters: {
|
|
type: "integer",
|
|
minimum: 1_000,
|
|
maximum: MAX_SHELL_MAX_OUTPUT_CHARACTERS,
|
|
description: "Maximum stdout/stderr characters returned to the model (default 24000).",
|
|
},
|
|
},
|
|
required: ["command"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
};
|
|
|
|
const BASE_CHAT_TOOLS: any[] = [
|
|
{
|
|
type: "function",
|
|
function: {
|
|
name: "web_search",
|
|
description:
|
|
"Search the public web for recent or factual information. Returns ranked results with per-result summaries and snippets.",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
query: { type: "string", description: "Search query." },
|
|
numResults: {
|
|
type: "integer",
|
|
minimum: 1,
|
|
maximum: MAX_WEB_RESULTS,
|
|
description: "Number of results to return (default 5).",
|
|
},
|
|
type: {
|
|
type: "string",
|
|
enum: ["auto", "fast", "instant"],
|
|
description: "Search mode.",
|
|
},
|
|
includeDomains: {
|
|
type: "array",
|
|
items: { type: "string" },
|
|
description: "Only include these domains.",
|
|
},
|
|
excludeDomains: {
|
|
type: "array",
|
|
items: { type: "string" },
|
|
description: "Exclude these domains.",
|
|
},
|
|
},
|
|
required: ["query"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
type: "function",
|
|
function: {
|
|
name: "fetch_url",
|
|
description:
|
|
"Fetch a webpage by URL and return readable plaintext content extracted from the page for deeper inspection.",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
url: { type: "string", description: "Absolute URL to fetch, including http/https." },
|
|
maxCharacters: {
|
|
type: "integer",
|
|
minimum: 500,
|
|
maximum: MAX_FETCH_MAX_CHARACTERS,
|
|
description: "Maximum response text characters returned (default 12000).",
|
|
},
|
|
},
|
|
required: ["url"],
|
|
additionalProperties: false,
|
|
},
|
|
},
|
|
},
|
|
];
|
|
|
|
const CHAT_TOOLS: any[] = [
|
|
...BASE_CHAT_TOOLS,
|
|
...(env.CHAT_CODEX_TOOL_ENABLED ? [CODEX_EXEC_TOOL] : []),
|
|
...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []),
|
|
];
|
|
|
|
const RESPONSES_CHAT_TOOLS: any[] = CHAT_TOOLS.map((tool) => {
|
|
if (tool?.type !== "function") return tool;
|
|
return {
|
|
type: "function",
|
|
name: tool.function.name,
|
|
description: tool.function.description,
|
|
parameters: tool.function.parameters,
|
|
strict: false,
|
|
};
|
|
});
|
|
|
|
export const CHAT_TOOL_SYSTEM_PROMPT =
|
|
"You can use tools to gather up-to-date web information when needed. " +
|
|
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
|
|
"Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " +
|
|
"When you decide tool use is needed, call the tool immediately in the same response; do not say you are running a tool unless you actually call it. " +
|
|
(env.CHAT_CODEX_TOOL_ENABLED
|
|
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, assumptions, and expected report-back format. Never ask codex_exec to wait for user input or run interactive commands. "
|
|
: "") +
|
|
(env.CHAT_SHELL_TOOL_ENABLED
|
|
? "Use shell_exec for direct non-interactive command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. "
|
|
: "") +
|
|
"Do not fabricate tool outputs; reason only from provided tool results.";
|
|
|
|
type ToolRunOutcome = {
|
|
ok: boolean;
|
|
[key: string]: unknown;
|
|
};
|
|
|
|
type ToolAwareUsage = {
|
|
inputTokens?: number;
|
|
outputTokens?: number;
|
|
totalTokens?: number;
|
|
};
|
|
|
|
type ToolAwareCompletionResult = {
|
|
text: string;
|
|
usage?: ToolAwareUsage;
|
|
raw: unknown;
|
|
toolEvents: ToolExecutionEvent[];
|
|
};
|
|
|
|
export type ToolAwareStreamingEvent =
|
|
| { type: "delta"; text: string }
|
|
| { type: "tool_call"; event: ToolExecutionEvent }
|
|
| { type: "done"; result: ToolAwareCompletionResult };
|
|
|
|
type ToolAwareCompletionParams = {
|
|
client: OpenAI;
|
|
model: string;
|
|
messages: ChatMessage[];
|
|
temperature?: number;
|
|
maxTokens?: number;
|
|
onToolEvent?: (event: ToolExecutionEvent) => void | Promise<void>;
|
|
logContext?: {
|
|
provider: string;
|
|
model: string;
|
|
chatId?: string;
|
|
};
|
|
};
|
|
|
|
export type ToolExecutionEvent = {
|
|
toolCallId: string;
|
|
name: string;
|
|
status: "completed" | "failed";
|
|
summary: string;
|
|
args: Record<string, unknown>;
|
|
startedAt: string;
|
|
completedAt: string;
|
|
durationMs: number;
|
|
error?: string;
|
|
resultPreview?: string;
|
|
};
|
|
|
|
function compactWhitespace(input: string) {
|
|
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
|
|
}
|
|
|
|
function clipText(input: string, maxCharacters: number) {
|
|
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
|
|
}
|
|
|
|
function toRecord(value: unknown): Record<string, unknown> {
|
|
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
|
|
return { ...(value as Record<string, unknown>) };
|
|
}
|
|
|
|
function toSingleLine(value: string, maxLength = 220) {
|
|
return clipText(
|
|
value
|
|
.replace(/\r?\n+/g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim(),
|
|
maxLength
|
|
);
|
|
}
|
|
|
|
function buildToolSummary(name: string, args: Record<string, unknown>, status: "completed" | "failed", error?: string) {
|
|
const errSuffix = status === "failed" && error ? ` Error: ${toSingleLine(error, 140)}` : "";
|
|
if (name === "web_search") {
|
|
const query = typeof args.query === "string" ? args.query.trim() : "";
|
|
if (status === "completed") {
|
|
return query ? `Performed web search for '${toSingleLine(query, 100)}'.` : "Performed web search.";
|
|
}
|
|
return query ? `Web search for '${toSingleLine(query, 100)}' failed.${errSuffix}` : `Web search failed.${errSuffix}`;
|
|
}
|
|
|
|
if (name === "fetch_url") {
|
|
const url = typeof args.url === "string" ? args.url.trim() : "";
|
|
if (status === "completed") {
|
|
return url ? `Fetched URL ${toSingleLine(url, 140)}.` : "Fetched URL.";
|
|
}
|
|
return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`;
|
|
}
|
|
|
|
if (name === "codex_exec") {
|
|
const prompt = typeof args.prompt === "string" ? args.prompt.trim() : "";
|
|
if (status === "completed") {
|
|
return prompt ? `Ran Codex task: '${toSingleLine(prompt, 120)}'.` : "Ran Codex task.";
|
|
}
|
|
return prompt ? `Codex task '${toSingleLine(prompt, 120)}' failed.${errSuffix}` : `Codex task failed.${errSuffix}`;
|
|
}
|
|
|
|
if (name === "shell_exec") {
|
|
const command = typeof args.command === "string" ? args.command.trim() : "";
|
|
if (status === "completed") {
|
|
return command ? `Ran devbox shell command: '${toSingleLine(command, 120)}'.` : "Ran devbox shell command.";
|
|
}
|
|
return command
|
|
? `Devbox shell command '${toSingleLine(command, 120)}' failed.${errSuffix}`
|
|
: `Devbox shell command failed.${errSuffix}`;
|
|
}
|
|
|
|
if (status === "completed") {
|
|
return `Ran tool '${name}'.`;
|
|
}
|
|
return `Tool '${name}' failed.${errSuffix}`;
|
|
}
|
|
|
|
function logToolEvent(event: ToolExecutionEvent, context?: ToolAwareCompletionParams["logContext"]) {
|
|
const payload = {
|
|
kind: "tool_call",
|
|
...context,
|
|
...event,
|
|
};
|
|
const line = `[tool_call] ${JSON.stringify(payload)}`;
|
|
if (event.status === "failed") console.error(line);
|
|
else console.info(line);
|
|
}
|
|
|
|
function buildResultPreview(toolResult: ToolRunOutcome) {
|
|
const serialized = JSON.stringify(toolResult);
|
|
return serialized ? clipText(serialized, 400) : undefined;
|
|
}
|
|
|
|
export function buildToolLogMessageData(chatId: string, event: ToolExecutionEvent) {
|
|
return {
|
|
chatId,
|
|
role: "tool" as const,
|
|
content: event.summary,
|
|
name: event.name,
|
|
metadata: {
|
|
kind: "tool_call",
|
|
toolCallId: event.toolCallId,
|
|
toolName: event.name,
|
|
status: event.status,
|
|
summary: event.summary,
|
|
args: event.args,
|
|
startedAt: event.startedAt,
|
|
completedAt: event.completedAt,
|
|
durationMs: event.durationMs,
|
|
error: event.error ?? null,
|
|
resultPreview: event.resultPreview ?? null,
|
|
},
|
|
};
|
|
}
|
|
|
|
function extractHtmlTitle(html: string) {
|
|
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
if (!match?.[1]) return null;
|
|
return compactWhitespace(
|
|
match[1]
|
|
.replace(/ /gi, " ")
|
|
.replace(/&/gi, "&")
|
|
.replace(/</gi, "<")
|
|
.replace(/>/gi, ">")
|
|
.replace(/"/gi, '"')
|
|
.replace(/'/gi, "'")
|
|
);
|
|
}
|
|
|
|
function normalizeIncomingMessages(messages: ChatMessage[]) {
|
|
const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
|
|
|
|
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
|
}
|
|
|
|
function normalizeIncomingResponsesInput(messages: ChatMessage[]) {
|
|
const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
|
|
|
|
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
|
}
|
|
|
|
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
|
const exa = exaClient();
|
|
const response = await exa.search(args.query, {
|
|
type: args.type ?? "auto",
|
|
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
|
|
includeDomains: args.includeDomains,
|
|
excludeDomains: args.excludeDomains,
|
|
moderation: true,
|
|
userLocation: "US",
|
|
contents: {
|
|
summary: { query: args.query },
|
|
highlights: {
|
|
query: args.query,
|
|
maxCharacters: 320,
|
|
numSentences: 2,
|
|
highlightsPerUrl: 2,
|
|
},
|
|
text: { maxCharacters: 1_000 },
|
|
},
|
|
} as any);
|
|
|
|
const results = Array.isArray(response?.results) ? response.results : [];
|
|
return {
|
|
ok: true,
|
|
searchEngine: "exa",
|
|
query: args.query,
|
|
requestId: response?.requestId ?? null,
|
|
results: results.map((result: any, index: number) => ({
|
|
rank: index + 1,
|
|
title: typeof result?.title === "string" ? result.title : null,
|
|
url: typeof result?.url === "string" ? result.url : null,
|
|
publishedDate: typeof result?.publishedDate === "string" ? result.publishedDate : null,
|
|
author: typeof result?.author === "string" ? result.author : null,
|
|
summary: typeof result?.summary === "string" ? clipText(result.summary, 1_400) : null,
|
|
text: typeof result?.text === "string" ? clipText(result.text, 700) : null,
|
|
highlights: Array.isArray(result?.highlights)
|
|
? result.highlights.filter((h: unknown) => typeof h === "string").slice(0, 3).map((h: string) => clipText(h, 280))
|
|
: [],
|
|
})),
|
|
};
|
|
}
|
|
|
|
async function runSearxngWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
|
const response = await searchSearxng(args.query, {
|
|
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
|
|
includeDomains: args.includeDomains,
|
|
excludeDomains: args.excludeDomains,
|
|
});
|
|
|
|
return {
|
|
ok: true,
|
|
searchEngine: "searxng",
|
|
query: args.query,
|
|
requestId: response.requestId,
|
|
results: response.results.map((result, index) => ({
|
|
rank: index + 1,
|
|
title: result.title,
|
|
url: result.url,
|
|
publishedDate: result.publishedDate,
|
|
author: null,
|
|
summary: result.summary,
|
|
text: result.text,
|
|
highlights: result.summary ? [clipText(result.summary, 280)] : [],
|
|
engines: result.engines,
|
|
})),
|
|
};
|
|
}
|
|
|
|
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
|
const args = WebSearchArgsSchema.parse(input);
|
|
if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") {
|
|
return runSearxngWebSearchTool(args);
|
|
}
|
|
return runExaWebSearchTool(args);
|
|
}
|
|
|
|
function assertSafeFetchUrl(urlRaw: string) {
|
|
const parsed = new URL(urlRaw);
|
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
throw new Error("Only http:// and https:// URLs are supported.");
|
|
}
|
|
return parsed;
|
|
}
|
|
|
|
async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
|
|
const args = FetchUrlArgsSchema.parse(input);
|
|
const parsed = assertSafeFetchUrl(args.url);
|
|
const maxCharacters = args.maxCharacters ?? DEFAULT_FETCH_MAX_CHARACTERS;
|
|
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
|
|
|
let response: Response;
|
|
try {
|
|
response = await fetch(parsed.toString(), {
|
|
redirect: "follow",
|
|
signal: controller.signal,
|
|
headers: {
|
|
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
|
|
Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5",
|
|
},
|
|
});
|
|
} finally {
|
|
clearTimeout(timeout);
|
|
}
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`Fetch failed with status ${response.status}.`);
|
|
}
|
|
|
|
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
|
|
const body = await response.text();
|
|
const isHtml = contentType.includes("text/html") || /<!doctype html|<html[\s>]/i.test(body);
|
|
|
|
let extracted = body;
|
|
if (isHtml) {
|
|
extracted = htmlToText(body, {
|
|
wordwrap: false,
|
|
preserveNewlines: true,
|
|
selectors: [
|
|
{ selector: "img", format: "skip" },
|
|
{ selector: "script", format: "skip" },
|
|
{ selector: "style", format: "skip" },
|
|
{ selector: "noscript", format: "skip" },
|
|
{ selector: "a", options: { ignoreHref: true } },
|
|
],
|
|
});
|
|
}
|
|
|
|
const normalized = compactWhitespace(extracted);
|
|
const truncated = normalized.length > maxCharacters;
|
|
const text = truncated
|
|
? `${normalized.slice(0, maxCharacters)}\n\n[truncated ${normalized.length - maxCharacters} characters]`
|
|
: normalized;
|
|
|
|
return {
|
|
ok: true,
|
|
url: response.url || parsed.toString(),
|
|
status: response.status,
|
|
contentType: contentType || null,
|
|
title: isHtml ? extractHtmlTitle(body) : null,
|
|
truncated,
|
|
text,
|
|
};
|
|
}
|
|
|
|
function shellQuote(value: string) {
|
|
return `'${value.replace(/'/g, `'\\''`)}'`;
|
|
}
|
|
|
|
function buildDevboxSshTarget() {
|
|
const host = env.CHAT_CODEX_REMOTE_HOST;
|
|
if (!host) {
|
|
throw new Error("CHAT_CODEX_REMOTE_HOST not set");
|
|
}
|
|
if (!env.CHAT_CODEX_REMOTE_USER || host.includes("@")) {
|
|
return host;
|
|
}
|
|
return `${env.CHAT_CODEX_REMOTE_USER}@${host}`;
|
|
}
|
|
|
|
function buildRemoteCodexCommand(prompt: string) {
|
|
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
|
|
const wrappedPrompt = [
|
|
"You are running in a non-interactive batch environment.",
|
|
"",
|
|
"Rules:",
|
|
"- Do not ask questions or wait for user input.",
|
|
"- Do not use interactive commands, editors, pagers, or prompts.",
|
|
"- If details are ambiguous, make a reasonable assumption and continue.",
|
|
"- Complete the task in one run, including any requested file edits, commands, and verification.",
|
|
"- End with a concise final report that includes changed files, commands run, and outcomes.",
|
|
"",
|
|
"Task:",
|
|
prompt,
|
|
].join("\n");
|
|
const codexCommand =
|
|
`codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check ${shellQuote(wrappedPrompt)} < /dev/null`;
|
|
return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`;
|
|
}
|
|
|
|
function buildRemoteShellCommand(command: string) {
|
|
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
|
|
const quotedCommand = shellQuote(command);
|
|
return (
|
|
`mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ` +
|
|
`if command -v bash >/dev/null 2>&1; then bash -lc ${quotedCommand}; else sh -lc ${quotedCommand}; fi`
|
|
);
|
|
}
|
|
|
|
async function withDevboxSshKeyPath<T>(fn: (keyPath?: string) => Promise<T>) {
|
|
if (env.CHAT_CODEX_SSH_KEY_PATH) {
|
|
return fn(env.CHAT_CODEX_SSH_KEY_PATH);
|
|
}
|
|
|
|
if (!env.CHAT_CODEX_SSH_PRIVATE_KEY_B64) {
|
|
return fn(undefined);
|
|
}
|
|
|
|
const tmpDir = await mkdtemp(path.join(os.tmpdir(), "sybil-codex-ssh-"));
|
|
const keyPath = path.join(tmpDir, "id");
|
|
try {
|
|
await writeFile(keyPath, Buffer.from(env.CHAT_CODEX_SSH_PRIVATE_KEY_B64, "base64"), { mode: 0o600 });
|
|
return await fn(keyPath);
|
|
} finally {
|
|
await rm(tmpDir, { recursive: true, force: true });
|
|
}
|
|
}
|
|
|
|
function clipRemoteOutput(value: string, maxCharacters: number) {
|
|
if (value.length <= maxCharacters) {
|
|
return { text: value, truncated: false };
|
|
}
|
|
return {
|
|
text: `${value.slice(0, maxCharacters)}\n\n[truncated ${value.length - maxCharacters} characters]`,
|
|
truncated: true,
|
|
};
|
|
}
|
|
|
|
function bufferOrStringToString(value: unknown) {
|
|
if (typeof value === "string") return value;
|
|
if (Buffer.isBuffer(value)) return value.toString("utf8");
|
|
return "";
|
|
}
|
|
|
|
async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
|
if (!env.CHAT_CODEX_TOOL_ENABLED) {
|
|
return { ok: false, error: "codex_exec is disabled." };
|
|
}
|
|
|
|
const args: CodexExecArgs = CodexExecArgsSchema.parse(input);
|
|
const maxCharacters = args.maxCharacters ?? DEFAULT_CODEX_MAX_OUTPUT_CHARACTERS;
|
|
const sshTarget = buildDevboxSshTarget();
|
|
const remoteCommand = buildRemoteCodexCommand(args.prompt);
|
|
|
|
const run = async (keyPath?: string) => {
|
|
const sshArgs = [
|
|
"-n",
|
|
"-o",
|
|
"BatchMode=yes",
|
|
"-o",
|
|
"StrictHostKeyChecking=accept-new",
|
|
"-o",
|
|
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
|
|
"-p",
|
|
String(env.CHAT_CODEX_REMOTE_PORT),
|
|
];
|
|
|
|
if (keyPath) {
|
|
sshArgs.push("-i", keyPath);
|
|
}
|
|
|
|
sshArgs.push(sshTarget, remoteCommand);
|
|
|
|
try {
|
|
const result = await execFileAsync("ssh", sshArgs, {
|
|
timeout: env.CHAT_CODEX_EXEC_TIMEOUT_MS,
|
|
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
|
|
});
|
|
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
|
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
|
return {
|
|
ok: true,
|
|
host: env.CHAT_CODEX_REMOTE_HOST,
|
|
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
|
stdout: stdout.text,
|
|
stderr: stderr.text,
|
|
stdoutTruncated: stdout.truncated,
|
|
stderrTruncated: stderr.truncated,
|
|
};
|
|
} catch (err: any) {
|
|
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
|
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
|
return {
|
|
ok: false,
|
|
error: err?.killed
|
|
? `Remote Codex command timed out after ${env.CHAT_CODEX_EXEC_TIMEOUT_MS}ms.`
|
|
: err?.message ?? String(err),
|
|
exitCode: typeof err?.code === "number" ? err.code : null,
|
|
signal: typeof err?.signal === "string" ? err.signal : null,
|
|
host: env.CHAT_CODEX_REMOTE_HOST,
|
|
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
|
stdout: stdout.text,
|
|
stderr: stderr.text,
|
|
stdoutTruncated: stdout.truncated,
|
|
stderrTruncated: stderr.truncated,
|
|
};
|
|
}
|
|
};
|
|
|
|
return withDevboxSshKeyPath(run);
|
|
}
|
|
|
|
async function runShellExecTool(input: unknown): Promise<ToolRunOutcome> {
|
|
if (!env.CHAT_SHELL_TOOL_ENABLED) {
|
|
return { ok: false, error: "shell_exec is disabled." };
|
|
}
|
|
|
|
const args: ShellExecArgs = ShellExecArgsSchema.parse(input);
|
|
const maxCharacters = args.maxCharacters ?? DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS;
|
|
const sshTarget = buildDevboxSshTarget();
|
|
const remoteCommand = buildRemoteShellCommand(args.command);
|
|
|
|
const run = async (keyPath?: string) => {
|
|
const sshArgs = [
|
|
"-n",
|
|
"-o",
|
|
"BatchMode=yes",
|
|
"-o",
|
|
"StrictHostKeyChecking=accept-new",
|
|
"-o",
|
|
"UserKnownHostsFile=/tmp/sybil-codex-known-hosts",
|
|
"-p",
|
|
String(env.CHAT_CODEX_REMOTE_PORT),
|
|
];
|
|
|
|
if (keyPath) {
|
|
sshArgs.push("-i", keyPath);
|
|
}
|
|
|
|
sshArgs.push(sshTarget, remoteCommand);
|
|
|
|
try {
|
|
const result = await execFileAsync("ssh", sshArgs, {
|
|
timeout: env.CHAT_SHELL_EXEC_TIMEOUT_MS,
|
|
maxBuffer: REMOTE_EXEC_MAX_BUFFER_BYTES,
|
|
});
|
|
const stdout = clipRemoteOutput(bufferOrStringToString(result.stdout), maxCharacters);
|
|
const stderr = clipRemoteOutput(bufferOrStringToString(result.stderr), Math.min(maxCharacters, 12_000));
|
|
return {
|
|
ok: true,
|
|
host: env.CHAT_CODEX_REMOTE_HOST,
|
|
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
|
command: args.command,
|
|
stdout: stdout.text,
|
|
stderr: stderr.text,
|
|
stdoutTruncated: stdout.truncated,
|
|
stderrTruncated: stderr.truncated,
|
|
};
|
|
} catch (err: any) {
|
|
const stdout = clipRemoteOutput(bufferOrStringToString(err?.stdout), maxCharacters);
|
|
const stderr = clipRemoteOutput(bufferOrStringToString(err?.stderr), Math.min(maxCharacters, 12_000));
|
|
return {
|
|
ok: false,
|
|
error: err?.killed
|
|
? `Remote shell command timed out after ${env.CHAT_SHELL_EXEC_TIMEOUT_MS}ms.`
|
|
: err?.message ?? String(err),
|
|
exitCode: typeof err?.code === "number" ? err.code : null,
|
|
signal: typeof err?.signal === "string" ? err.signal : null,
|
|
host: env.CHAT_CODEX_REMOTE_HOST,
|
|
workdir: env.CHAT_CODEX_REMOTE_WORKDIR,
|
|
command: args.command,
|
|
stdout: stdout.text,
|
|
stderr: stderr.text,
|
|
stdoutTruncated: stdout.truncated,
|
|
stderrTruncated: stderr.truncated,
|
|
};
|
|
}
|
|
};
|
|
|
|
return withDevboxSshKeyPath(run);
|
|
}
|
|
|
|
async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome> {
|
|
if (name === "web_search") return runWebSearchTool(args);
|
|
if (name === "fetch_url") return runFetchUrlTool(args);
|
|
if (name === "codex_exec") return runCodexExecTool(args);
|
|
if (name === "shell_exec") return runShellExecTool(args);
|
|
return { ok: false, error: `Unknown tool: ${name}` };
|
|
}
|
|
|
|
function parseToolArgs(raw: unknown) {
|
|
if (typeof raw !== "string") return {};
|
|
const trimmed = raw.trim();
|
|
if (!trimmed) return {};
|
|
try {
|
|
return JSON.parse(trimmed);
|
|
} catch (err: any) {
|
|
throw new Error(`Invalid JSON arguments: ${err?.message ?? String(err)}`);
|
|
}
|
|
}
|
|
|
|
function buildEventArgs(name: string, args: Record<string, unknown>) {
|
|
if (name === "codex_exec" && typeof args.prompt === "string") {
|
|
return {
|
|
...args,
|
|
prompt: clipText(args.prompt, 1_000),
|
|
};
|
|
}
|
|
|
|
if (name === "shell_exec" && typeof args.command === "string") {
|
|
return {
|
|
...args,
|
|
command: clipText(args.command, 1_000),
|
|
};
|
|
}
|
|
|
|
return args;
|
|
}
|
|
|
|
function looksLikeDanglingToolIntent(text: string) {
|
|
const normalized = text
|
|
.toLowerCase()
|
|
.replace(/[`*_>#-]/g, " ")
|
|
.replace(/\s+/g, " ")
|
|
.trim();
|
|
if (!normalized) return false;
|
|
if (normalized.length > 800) return false;
|
|
if (/\blet me know\b/.test(normalized) || /\bif you (want|would like)\b/.test(normalized)) return false;
|
|
return (
|
|
/\b(calling|running|executing|trying|checking|testing)\b.{0,80}\b(now|it|tool|command|shell_exec|codex_exec)\b/.test(normalized) ||
|
|
/\b(let me|i'?ll|i will)\b.{0,120}\b(run|execute|call|try|check|test)\b/.test(normalized) ||
|
|
/\b(stand by|hang on|one moment)\b/.test(normalized)
|
|
);
|
|
}
|
|
|
|
function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
|
|
conversation.push({ role: "assistant", content: text });
|
|
conversation.push({
|
|
role: "system",
|
|
content:
|
|
"Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.",
|
|
});
|
|
}
|
|
|
|
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
|
if (!usage) return false;
|
|
acc.inputTokens += usage.prompt_tokens ?? 0;
|
|
acc.outputTokens += usage.completion_tokens ?? 0;
|
|
acc.totalTokens += usage.total_tokens ?? 0;
|
|
return true;
|
|
}
|
|
|
|
function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
|
if (!usage) return false;
|
|
acc.inputTokens += usage.input_tokens ?? 0;
|
|
acc.outputTokens += usage.output_tokens ?? 0;
|
|
acc.totalTokens += usage.total_tokens ?? 0;
|
|
return true;
|
|
}
|
|
|
|
function getResponseOutputItems(response: any) {
|
|
return Array.isArray(response?.output) ? response.output : [];
|
|
}
|
|
|
|
function extractResponsesText(response: any, fallback = "") {
|
|
if (typeof response?.output_text === "string") return response.output_text;
|
|
|
|
const parts: string[] = [];
|
|
for (const item of getResponseOutputItems(response)) {
|
|
if (item?.type !== "message" || !Array.isArray(item.content)) continue;
|
|
for (const content of item.content) {
|
|
if (content?.type === "output_text" && typeof content.text === "string") {
|
|
parts.push(content.text);
|
|
} else if (content?.type === "refusal" && typeof content.refusal === "string") {
|
|
parts.push(content.refusal);
|
|
}
|
|
}
|
|
}
|
|
return parts.join("") || fallback;
|
|
}
|
|
|
|
function getUnstreamedText(finalText: string, streamedText: string) {
|
|
if (!finalText) return "";
|
|
if (!streamedText) return finalText;
|
|
return finalText.startsWith(streamedText) ? finalText.slice(streamedText.length) : "";
|
|
}
|
|
|
|
function getResponseFailureMessage(response: any) {
|
|
if (response?.status !== "failed" && response?.status !== "incomplete") return null;
|
|
const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
|
|
const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
|
|
return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
|
|
}
|
|
|
|
function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
|
|
return outputItems
|
|
.filter((item) => item?.type === "function_call")
|
|
.map((call: any, index: number) => ({
|
|
id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
|
|
name: call.name ?? "unknown_tool",
|
|
arguments: call.arguments ?? "{}",
|
|
}));
|
|
}
|
|
|
|
type NormalizedToolCall = {
|
|
id: string;
|
|
name: string;
|
|
arguments: string;
|
|
};
|
|
|
|
function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
|
|
return toolCalls.map((call: any, index: number) => ({
|
|
id: call?.id ?? `tool_call_${round}_${index}`,
|
|
name: call?.function?.name ?? "unknown_tool",
|
|
arguments: call?.function?.arguments ?? "{}",
|
|
}));
|
|
}
|
|
|
|
async function executeToolCallAndBuildEvent(
|
|
call: NormalizedToolCall,
|
|
params: ToolAwareCompletionParams
|
|
): Promise<{ event: ToolExecutionEvent; toolResult: ToolRunOutcome }> {
|
|
const startedAtMs = Date.now();
|
|
const startedAt = new Date(startedAtMs).toISOString();
|
|
let toolResult: ToolRunOutcome;
|
|
let parsedArgs: Record<string, unknown> = {};
|
|
try {
|
|
parsedArgs = toRecord(parseToolArgs(call.arguments));
|
|
toolResult = await executeTool(call.name, parsedArgs);
|
|
} catch (err: any) {
|
|
toolResult = {
|
|
ok: false,
|
|
error: err?.message ?? String(err),
|
|
};
|
|
}
|
|
|
|
const status: "completed" | "failed" = toolResult.ok ? "completed" : "failed";
|
|
const error =
|
|
status === "failed"
|
|
? typeof toolResult.error === "string"
|
|
? toolResult.error
|
|
: "Tool execution failed."
|
|
: undefined;
|
|
|
|
const completedAtMs = Date.now();
|
|
const eventArgs = buildEventArgs(call.name, parsedArgs);
|
|
const event: ToolExecutionEvent = {
|
|
toolCallId: call.id,
|
|
name: call.name,
|
|
status,
|
|
summary: buildToolSummary(call.name, eventArgs, status, error),
|
|
args: eventArgs,
|
|
startedAt,
|
|
completedAt: new Date(completedAtMs).toISOString(),
|
|
durationMs: completedAtMs - startedAtMs,
|
|
error,
|
|
resultPreview: buildResultPreview(toolResult),
|
|
};
|
|
logToolEvent(event, params.logContext);
|
|
if (params.onToolEvent) {
|
|
await params.onToolEvent(event);
|
|
}
|
|
|
|
return { event, toolResult };
|
|
}
|
|
|
|
export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
|
const input: any[] = normalizeIncomingResponsesInput(params.messages);
|
|
const rawResponses: unknown[] = [];
|
|
const toolEvents: ToolExecutionEvent[] = [];
|
|
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
let sawUsage = false;
|
|
let totalToolCalls = 0;
|
|
let danglingToolIntentRetries = 0;
|
|
|
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
|
const response = await params.client.responses.create({
|
|
model: params.model,
|
|
input,
|
|
temperature: params.temperature,
|
|
max_output_tokens: params.maxTokens,
|
|
tools: RESPONSES_CHAT_TOOLS,
|
|
tool_choice: "auto",
|
|
parallel_tool_calls: true,
|
|
// Tool loops pass response output items back as input; reasoning items need persistence.
|
|
store: true,
|
|
} as any);
|
|
rawResponses.push(response);
|
|
sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
|
|
|
|
const failureMessage = getResponseFailureMessage(response);
|
|
if (failureMessage) {
|
|
throw new Error(failureMessage);
|
|
}
|
|
|
|
const outputItems = getResponseOutputItems(response);
|
|
const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
|
|
if (!normalizedToolCalls.length) {
|
|
const text = extractResponsesText(response);
|
|
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
|
|
danglingToolIntentRetries += 1;
|
|
appendDanglingToolIntentCorrection(input, text);
|
|
continue;
|
|
}
|
|
return {
|
|
text,
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
|
|
toolEvents,
|
|
};
|
|
}
|
|
|
|
totalToolCalls += normalizedToolCalls.length;
|
|
input.push(...outputItems);
|
|
|
|
for (const call of normalizedToolCalls) {
|
|
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
|
|
toolEvents.push(event);
|
|
|
|
input.push({
|
|
type: "function_call_output",
|
|
call_id: call.id,
|
|
output: JSON.stringify(toolResult),
|
|
});
|
|
}
|
|
}
|
|
|
|
return {
|
|
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
|
|
toolEvents,
|
|
};
|
|
}
|
|
|
|
export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
|
const conversation: any[] = normalizeIncomingMessages(params.messages);
|
|
const rawResponses: unknown[] = [];
|
|
const toolEvents: ToolExecutionEvent[] = [];
|
|
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
let sawUsage = false;
|
|
let totalToolCalls = 0;
|
|
let danglingToolIntentRetries = 0;
|
|
|
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
|
const completion = await params.client.chat.completions.create({
|
|
model: params.model,
|
|
messages: conversation,
|
|
temperature: params.temperature,
|
|
max_tokens: params.maxTokens,
|
|
tools: CHAT_TOOLS,
|
|
tool_choice: "auto",
|
|
} as any);
|
|
rawResponses.push(completion);
|
|
sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
|
|
|
|
const message = completion?.choices?.[0]?.message;
|
|
if (!message) {
|
|
return {
|
|
text: "",
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
|
|
toolEvents,
|
|
};
|
|
}
|
|
|
|
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
|
if (!toolCalls.length) {
|
|
const text = typeof message.content === "string" ? message.content : "";
|
|
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
|
|
danglingToolIntentRetries += 1;
|
|
appendDanglingToolIntentCorrection(conversation, text);
|
|
continue;
|
|
}
|
|
return {
|
|
text,
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
|
|
toolEvents,
|
|
};
|
|
}
|
|
|
|
const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
|
|
totalToolCalls += normalizedToolCalls.length;
|
|
|
|
const assistantToolCallMessage: any = {
|
|
role: "assistant",
|
|
tool_calls: normalizedToolCalls.map((call) => ({
|
|
id: call.id,
|
|
type: "function",
|
|
function: {
|
|
name: call.name,
|
|
arguments: call.arguments,
|
|
},
|
|
})),
|
|
};
|
|
if (typeof message.content === "string" && message.content.length) {
|
|
assistantToolCallMessage.content = message.content;
|
|
}
|
|
conversation.push(assistantToolCallMessage);
|
|
|
|
for (const call of normalizedToolCalls) {
|
|
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
|
|
toolEvents.push(event);
|
|
|
|
conversation.push({
|
|
role: "tool",
|
|
tool_call_id: call.id,
|
|
content: JSON.stringify(toolResult),
|
|
});
|
|
}
|
|
}
|
|
|
|
return {
|
|
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
|
|
toolEvents,
|
|
};
|
|
}
|
|
|
|
export async function* runToolAwareOpenAIChatStream(
|
|
params: ToolAwareCompletionParams
|
|
): AsyncGenerator<ToolAwareStreamingEvent> {
|
|
const input: any[] = normalizeIncomingResponsesInput(params.messages);
|
|
const rawResponses: unknown[] = [];
|
|
const toolEvents: ToolExecutionEvent[] = [];
|
|
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
let sawUsage = false;
|
|
let totalToolCalls = 0;
|
|
let danglingToolIntentRetries = 0;
|
|
|
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
|
const stream = await params.client.responses.create({
|
|
model: params.model,
|
|
input,
|
|
temperature: params.temperature,
|
|
max_output_tokens: params.maxTokens,
|
|
tools: RESPONSES_CHAT_TOOLS,
|
|
tool_choice: "auto",
|
|
parallel_tool_calls: true,
|
|
// Tool loops pass response output items back as input; reasoning items need persistence.
|
|
store: true,
|
|
stream: true,
|
|
} as any);
|
|
|
|
let roundText = "";
|
|
let streamedRoundText = "";
|
|
let roundHasToolCalls = false;
|
|
let canStreamRoundText = false;
|
|
let completedResponse: any | null = null;
|
|
const completedOutputItems: any[] = [];
|
|
|
|
for await (const event of stream as any as AsyncIterable<any>) {
|
|
rawResponses.push(event);
|
|
|
|
if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
|
|
roundText += event.delta;
|
|
if (canStreamRoundText && !roundHasToolCalls && event.delta.length) {
|
|
streamedRoundText += event.delta;
|
|
yield { type: "delta", text: event.delta };
|
|
}
|
|
} else if (event?.type === "response.output_item.added" && event.item) {
|
|
if (event.item.type === "function_call") {
|
|
roundHasToolCalls = true;
|
|
canStreamRoundText = false;
|
|
} else if (event.item.type === "message" && !roundHasToolCalls) {
|
|
canStreamRoundText = true;
|
|
}
|
|
} else if (event?.type === "response.output_item.done" && event.item) {
|
|
completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
|
|
if (event.item.type === "function_call") {
|
|
roundHasToolCalls = true;
|
|
canStreamRoundText = false;
|
|
}
|
|
} else if (event?.type === "response.completed") {
|
|
completedResponse = event.response;
|
|
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
|
|
} else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
|
|
completedResponse = event.response;
|
|
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
|
|
} else if (event?.type === "error") {
|
|
throw new Error(event.message ?? "OpenAI Responses stream failed.");
|
|
}
|
|
}
|
|
|
|
const failureMessage = getResponseFailureMessage(completedResponse);
|
|
if (failureMessage) {
|
|
throw new Error(failureMessage);
|
|
}
|
|
|
|
const outputItems = getResponseOutputItems(completedResponse);
|
|
const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
|
|
const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
|
|
if (!normalizedToolCalls.length) {
|
|
const text = extractResponsesText(completedResponse, roundText);
|
|
if (
|
|
!streamedRoundText &&
|
|
danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
|
|
looksLikeDanglingToolIntent(text)
|
|
) {
|
|
danglingToolIntentRetries += 1;
|
|
appendDanglingToolIntentCorrection(input, text);
|
|
continue;
|
|
}
|
|
const unstreamedText = getUnstreamedText(text, streamedRoundText);
|
|
if (unstreamedText) {
|
|
yield { type: "delta", text: unstreamedText };
|
|
}
|
|
yield {
|
|
type: "done",
|
|
result: {
|
|
text,
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
|
|
toolEvents,
|
|
},
|
|
};
|
|
return;
|
|
}
|
|
|
|
totalToolCalls += normalizedToolCalls.length;
|
|
input.push(...responseOutputItems);
|
|
|
|
for (const call of normalizedToolCalls) {
|
|
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
|
|
toolEvents.push(event);
|
|
yield { type: "tool_call", event };
|
|
input.push({
|
|
type: "function_call_output",
|
|
call_id: call.id,
|
|
output: JSON.stringify(toolResult),
|
|
});
|
|
}
|
|
}
|
|
|
|
yield {
|
|
type: "done",
|
|
result: {
|
|
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
|
|
toolEvents,
|
|
},
|
|
};
|
|
}
|
|
|
|
export async function* runToolAwareChatCompletionsStream(
|
|
params: ToolAwareCompletionParams
|
|
): AsyncGenerator<ToolAwareStreamingEvent> {
|
|
const conversation: any[] = normalizeIncomingMessages(params.messages);
|
|
const rawResponses: unknown[] = [];
|
|
const toolEvents: ToolExecutionEvent[] = [];
|
|
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
let sawUsage = false;
|
|
let totalToolCalls = 0;
|
|
let danglingToolIntentRetries = 0;
|
|
|
|
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
|
const stream = await params.client.chat.completions.create({
|
|
model: params.model,
|
|
messages: conversation,
|
|
temperature: params.temperature,
|
|
max_tokens: params.maxTokens,
|
|
tools: CHAT_TOOLS,
|
|
tool_choice: "auto",
|
|
stream: true,
|
|
stream_options: { include_usage: true },
|
|
} as any);
|
|
|
|
let roundText = "";
|
|
let streamedRoundText = "";
|
|
let roundHasToolCalls = false;
|
|
const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
|
|
|
|
for await (const chunk of stream as any as AsyncIterable<any>) {
|
|
rawResponses.push(chunk);
|
|
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
|
|
|
|
const choice = chunk?.choices?.[0];
|
|
const deltaText = choice?.delta?.content ?? "";
|
|
if (typeof deltaText === "string" && deltaText.length) {
|
|
roundText += deltaText;
|
|
if (!roundHasToolCalls) {
|
|
streamedRoundText += deltaText;
|
|
yield { type: "delta", text: deltaText };
|
|
}
|
|
}
|
|
|
|
const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
|
|
if (deltaToolCalls.length) {
|
|
roundHasToolCalls = true;
|
|
}
|
|
for (const toolCall of deltaToolCalls) {
|
|
const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
|
|
const entry = roundToolCalls.get(idx) ?? { arguments: "" };
|
|
if (typeof toolCall?.id === "string" && toolCall.id.length) {
|
|
entry.id = toolCall.id;
|
|
}
|
|
if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
|
|
entry.name = toolCall.function.name;
|
|
}
|
|
if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
|
|
entry.arguments += toolCall.function.arguments;
|
|
}
|
|
roundToolCalls.set(idx, entry);
|
|
}
|
|
}
|
|
|
|
const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
|
|
.sort((a, b) => a[0] - b[0])
|
|
.map(([_, call], index) => ({
|
|
id: call.id ?? `tool_call_${round}_${index}`,
|
|
name: call.name ?? "unknown_tool",
|
|
arguments: call.arguments || "{}",
|
|
}));
|
|
|
|
if (!normalizedToolCalls.length) {
|
|
if (
|
|
!streamedRoundText &&
|
|
danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
|
|
looksLikeDanglingToolIntent(roundText)
|
|
) {
|
|
danglingToolIntentRetries += 1;
|
|
appendDanglingToolIntentCorrection(conversation, roundText);
|
|
continue;
|
|
}
|
|
const unstreamedText = getUnstreamedText(roundText, streamedRoundText);
|
|
if (unstreamedText) {
|
|
yield { type: "delta", text: unstreamedText };
|
|
}
|
|
yield {
|
|
type: "done",
|
|
result: {
|
|
text: roundText,
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
|
|
toolEvents,
|
|
},
|
|
};
|
|
return;
|
|
}
|
|
|
|
totalToolCalls += normalizedToolCalls.length;
|
|
const assistantToolCallMessage: any = {
|
|
role: "assistant",
|
|
tool_calls: normalizedToolCalls.map((call) => ({
|
|
id: call.id,
|
|
type: "function",
|
|
function: {
|
|
name: call.name,
|
|
arguments: call.arguments,
|
|
},
|
|
})),
|
|
};
|
|
if (roundText) {
|
|
assistantToolCallMessage.content = roundText;
|
|
}
|
|
conversation.push(assistantToolCallMessage);
|
|
|
|
for (const call of normalizedToolCalls) {
|
|
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
|
|
toolEvents.push(event);
|
|
yield { type: "tool_call", event };
|
|
conversation.push({
|
|
role: "tool",
|
|
tool_call_id: call.id,
|
|
content: JSON.stringify(toolResult),
|
|
});
|
|
}
|
|
}
|
|
|
|
yield {
|
|
type: "done",
|
|
result: {
|
|
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
|
usage: sawUsage ? usageAcc : undefined,
|
|
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
|
|
toolEvents,
|
|
},
|
|
};
|
|
}
|