oai responses api, tool call retries

This commit is contained in:
2026-05-02 21:44:32 -07:00
parent 8d6c069a33
commit 015253c0af
11 changed files with 369 additions and 40 deletions

View File

@@ -15,7 +15,7 @@ services:
EXA_API_KEY: ${EXA_API_KEY:-} EXA_API_KEY: ${EXA_API_KEY:-}
CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa} CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa}
SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-} SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-}
CHAT_MAX_TOOL_ROUNDS: ${CHAT_MAX_TOOL_ROUNDS:-8} CHAT_MAX_TOOL_ROUNDS: ${CHAT_MAX_TOOL_ROUNDS:-100}
CHAT_CODEX_TOOL_ENABLED: ${CHAT_CODEX_TOOL_ENABLED:-false} CHAT_CODEX_TOOL_ENABLED: ${CHAT_CODEX_TOOL_ENABLED:-false}
CHAT_CODEX_REMOTE_HOST: ${CHAT_CODEX_REMOTE_HOST:-} CHAT_CODEX_REMOTE_HOST: ${CHAT_CODEX_REMOTE_HOST:-}
CHAT_CODEX_REMOTE_USER: ${CHAT_CODEX_REMOTE_USER:-} CHAT_CODEX_REMOTE_USER: ${CHAT_CODEX_REMOTE_USER:-}

View File

@@ -37,7 +37,7 @@ Chat upload limits:
} }
} }
``` ```
- OpenAI model lists are filtered to models that are expected to work with the backend's current Chat Completions implementation. - OpenAI model lists are filtered to models that are expected to work with the backend's Responses API implementation.
## Chats ## Chats
@@ -168,8 +168,11 @@ Behavior notes:
- Attachments are optional and currently apply to `user` messages. Persisted chat history stores them under `message.metadata.attachments`. - Attachments are optional and currently apply to `user` messages. Persisted chat history stores them under `message.metadata.attachments`.
- Images are forwarded inline to providers as multimodal image parts. Use PNG or JPEG for cross-provider compatibility. - Images are forwarded inline to providers as multimodal image parts. Use PNG or JPEG for cross-provider compatibility.
- Text files are forwarded as explicit text blocks rather than provider-managed file references. Large text attachments should already be truncated client-side before submission. - Text files are forwarded as explicit text blocks rather than provider-managed file references. Large text attachments should already be truncated client-side before submission.
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction. - For `openai`, backend calls OpenAI's Responses API and enables internal tool use with an internal system instruction.
- For `openai` and `xai`, image attachments are sent as chat-completions content parts alongside text. - For `xai`, backend calls xAI's OpenAI-compatible Chat Completions API and enables internal tool use with the same internal system instruction.
- For `openai`, image attachments are sent as Responses `input_image` items and text attachments are sent as `input_text` items.
- For `xai`, image attachments are sent as Chat Completions content parts alongside text.
- For `openai`, Responses calls that can enter the server-managed tool loop use `store: true` so reasoning and function-call items can be passed between tool rounds.
- For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks. - For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks.
- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available. - Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available.
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. - `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
@@ -177,7 +180,7 @@ Behavior notes:
- `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec --skip-git-repo-check <non-interactive wrapped prompt>` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`, with SSH stdin closed. - `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec --skip-git-repo-check <non-interactive wrapped prompt>` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`, with SSH stdin closed.
- `shell_exec` runs arbitrary non-interactive shell commands on the same configured devbox, starting in `CHAT_CODEX_REMOTE_WORKDIR`. It uses `bash -lc` when bash exists, otherwise `sh -lc`, closes SSH stdin, and does not run inside the Sybil server container. - `shell_exec` runs arbitrary non-interactive shell commands on the same configured devbox, starting in `CHAT_CODEX_REMOTE_WORKDIR`. It uses `bash -lc` when bash exists, otherwise `sh -lc`, closes SSH stdin, and does not run inside the Sybil server container.
- Devbox tool configuration: - Devbox tool configuration:
- `CHAT_MAX_TOOL_ROUNDS=8` (optional; maximum model/tool result cycles before the backend returns a limit message) - `CHAT_MAX_TOOL_ROUNDS=100` (optional; maximum model/tool result cycles before the backend returns a limit message)
- `CHAT_CODEX_TOOL_ENABLED=true` - `CHAT_CODEX_TOOL_ENABLED=true`
- `CHAT_SHELL_TOOL_ENABLED=true` - `CHAT_SHELL_TOOL_ENABLED=true`
- `CHAT_CODEX_REMOTE_HOST=<host-or-ip>` (required when enabled) - `CHAT_CODEX_REMOTE_HOST=<host-or-ip>` (required when enabled)

View File

@@ -127,19 +127,22 @@ Event order:
## Provider Streaming Behavior ## Provider Streaming Behavior
- `openai`/`xai`: backend may execute internal tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text. - `openai`: backend uses OpenAI's Responses API and may execute internal function tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text.
- `openai`: image attachments are sent as chat-completions content parts; text attachments are inlined as text parts. - `xai`: backend uses xAI's OpenAI-compatible Chat Completions API and may execute the same internal tool calls before producing final text.
- `xai`: same attachment behavior as OpenAI. - `openai`: image attachments are sent as Responses `input_image` items; text attachments are sent as `input_text` items.
- `xai`: image attachments are sent as Chat Completions content parts; text attachments are inlined as text parts.
- `openai`: Responses calls that can enter the server-managed tool loop use `store: true` so reasoning and function-call items can be passed between tool rounds.
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks. - `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks.
- `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints. - `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints.
- `codex_exec` is available only when `CHAT_CODEX_TOOL_ENABLED=true`. It SSHes to `CHAT_CODEX_REMOTE_HOST`, creates/uses `CHAT_CODEX_REMOTE_WORKDIR`, and runs `codex exec --skip-git-repo-check <non-interactive wrapped prompt>` there with SSH stdin closed. Prefer `CHAT_CODEX_SSH_KEY_PATH` with a read-only mounted private key; `CHAT_CODEX_SSH_PRIVATE_KEY_B64` is also supported. - `codex_exec` is available only when `CHAT_CODEX_TOOL_ENABLED=true`. It SSHes to `CHAT_CODEX_REMOTE_HOST`, creates/uses `CHAT_CODEX_REMOTE_WORKDIR`, and runs `codex exec --skip-git-repo-check <non-interactive wrapped prompt>` there with SSH stdin closed. Prefer `CHAT_CODEX_SSH_KEY_PATH` with a read-only mounted private key; `CHAT_CODEX_SSH_PRIVATE_KEY_B64` is also supported.
- `shell_exec` is available only when `CHAT_SHELL_TOOL_ENABLED=true`. It uses the same devbox SSH configuration, starts in `CHAT_CODEX_REMOTE_WORKDIR`, and runs non-interactive shell commands there with SSH stdin closed, not inside the Sybil server container. - `shell_exec` is available only when `CHAT_SHELL_TOOL_ENABLED=true`. It uses the same devbox SSH configuration, starts in `CHAT_CODEX_REMOTE_WORKDIR`, and runs non-interactive shell commands there with SSH stdin closed, not inside the Sybil server container.
- `CHAT_MAX_TOOL_ROUNDS` controls how many model/tool result cycles may occur before the backend returns a tool-call limit message; default is 8. - `CHAT_MAX_TOOL_ROUNDS` controls how many model/tool result cycles may occur before the backend returns a tool-call limit message; default is 100.
Tool-enabled streaming notes (`openai`/`xai`): Tool-enabled streaming notes (`openai`/`xai`):
- Stream still emits standard `meta`, `delta`, `done|error` events. - Stream still emits standard `meta`, `delta`, `done|error` events.
- Stream may emit `tool_call` events while tool calls are executed. - Stream may emit `tool_call` events while tool calls are executed.
- `delta` events stream incrementally as text is generated. - `delta` events carry assistant text. The backend may buffer model-native text briefly while determining whether a provider round contains tool calls.
- OpenAI Responses stream events are normalized by the backend into this SSE contract; clients do not consume OpenAI's raw Responses stream event names.
## Persistence + Consistency Model ## Persistence + Consistency Model

View File

@@ -1,7 +1,7 @@
# Sybil Server # Sybil Server
Backend API for: Backend API for:
- LLM multiplexer (OpenAI / Anthropic / xAI (Grok)) - LLM multiplexer (OpenAI Responses / Anthropic / xAI Chat Completions-compatible Grok)
- Personal chat database (chats/messages + LLM call log) - Personal chat database (chats/messages + LLM call log)
## Stack ## Stack
@@ -46,7 +46,7 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
- `EXA_API_KEY` - `EXA_API_KEY`
- `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only) - `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only)
- `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`) - `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`)
- `CHAT_MAX_TOOL_ROUNDS` (`8` by default; maximum model/tool result cycles per chat completion) - `CHAT_MAX_TOOL_ROUNDS` (`100` by default; maximum model/tool result cycles per chat completion)
- `CHAT_CODEX_TOOL_ENABLED` (`false` by default; enables the `codex_exec` chat tool for OpenAI/xAI) - `CHAT_CODEX_TOOL_ENABLED` (`false` by default; enables the `codex_exec` chat tool for OpenAI/xAI)
- `CHAT_CODEX_REMOTE_HOST` (required when Codex tool is enabled; SSH host/IP or `user@host`) - `CHAT_CODEX_REMOTE_HOST` (required when Codex tool is enabled; SSH host/IP or `user@host`)
- `CHAT_CODEX_REMOTE_USER` (optional SSH user when host does not include one) - `CHAT_CODEX_REMOTE_USER` (optional SSH user when host does not include one)

View File

@@ -64,7 +64,7 @@ const EnvSchema = z.object({
// Chat-mode web_search tool configuration. Search mode remains Exa-only for now. // Chat-mode web_search tool configuration. Search mode remains Exa-only for now.
CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema, CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema,
SEARXNG_BASE_URL: OptionalUrlSchema, SEARXNG_BASE_URL: OptionalUrlSchema,
CHAT_MAX_TOOL_ROUNDS: defaultedPositiveInt(8), CHAT_MAX_TOOL_ROUNDS: defaultedPositiveInt(100),
// Optional chat-mode Codex tool. When enabled, the server SSHes into a remote // Optional chat-mode Codex tool. When enabled, the server SSHes into a remote
// devbox and runs `codex exec` in a persistent scratch directory there. // devbox and runs `codex exec` in a persistent scratch directory there.

View File

@@ -9,7 +9,7 @@ import { z } from "zod";
import { env } from "../env.js"; import { env } from "../env.js";
import { exaClient } from "../search/exa.js"; import { exaClient } from "../search/exa.js";
import { searchSearxng } from "../search/searxng.js"; import { searchSearxng } from "../search/searxng.js";
import { buildOpenAIConversationMessage } from "./message-content.js"; import { buildOpenAIConversationMessage, buildOpenAIResponsesInputMessage } from "./message-content.js";
import type { ChatMessage } from "./types.js"; import type { ChatMessage } from "./types.js";
const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS; const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
@@ -188,6 +188,17 @@ const CHAT_TOOLS: any[] = [
...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []), ...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []),
]; ];
const RESPONSES_CHAT_TOOLS: any[] = CHAT_TOOLS.map((tool) => {
if (tool?.type !== "function") return tool;
return {
type: "function",
name: tool.function.name,
description: tool.function.description,
parameters: tool.function.parameters,
strict: false,
};
});
export const CHAT_TOOL_SYSTEM_PROMPT = export const CHAT_TOOL_SYSTEM_PROMPT =
"You can use tools to gather up-to-date web information when needed. " + "You can use tools to gather up-to-date web information when needed. " +
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " + "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
@@ -374,6 +385,12 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized]; return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
} }
function normalizeIncomingResponsesInput(messages: ChatMessage[]) {
const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> { async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
const exa = exaClient(); const exa = exaClient();
const response = await exa.search(args.query, { const response = await exa.search(args.query, {
@@ -806,6 +823,52 @@ function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
return true; return true;
} }
function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
if (!usage) return false;
acc.inputTokens += usage.input_tokens ?? 0;
acc.outputTokens += usage.output_tokens ?? 0;
acc.totalTokens += usage.total_tokens ?? 0;
return true;
}
function getResponseOutputItems(response: any) {
return Array.isArray(response?.output) ? response.output : [];
}
function extractResponsesText(response: any, fallback = "") {
if (typeof response?.output_text === "string") return response.output_text;
const parts: string[] = [];
for (const item of getResponseOutputItems(response)) {
if (item?.type !== "message" || !Array.isArray(item.content)) continue;
for (const content of item.content) {
if (content?.type === "output_text" && typeof content.text === "string") {
parts.push(content.text);
} else if (content?.type === "refusal" && typeof content.refusal === "string") {
parts.push(content.refusal);
}
}
}
return parts.join("") || fallback;
}
function getResponseFailureMessage(response: any) {
if (response?.status !== "failed" && response?.status !== "incomplete") return null;
const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
}
function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
return outputItems
.filter((item) => item?.type === "function_call")
.map((call: any, index: number) => ({
id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
name: call.name ?? "unknown_tool",
arguments: call.arguments ?? "{}",
}));
}
type NormalizedToolCall = { type NormalizedToolCall = {
id: string; id: string;
name: string; name: string;
@@ -869,6 +932,75 @@ async function executeToolCallAndBuildEvent(
} }
export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> { export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const input: any[] = normalizeIncomingResponsesInput(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const response = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: RESPONSES_CHAT_TOOLS,
tool_choice: "auto",
parallel_tool_calls: true,
// Tool loops pass response output items back as input; reasoning items need persistence.
store: true,
} as any);
rawResponses.push(response);
sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
const failureMessage = getResponseFailureMessage(response);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getResponseOutputItems(response);
const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
if (!normalizedToolCalls.length) {
const text = extractResponsesText(response);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
return {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
};
}
totalToolCalls += normalizedToolCalls.length;
input.push(...outputItems);
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
};
}
export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const conversation: any[] = normalizeIncomingMessages(params.messages); const conversation: any[] = normalizeIncomingMessages(params.messages);
const rawResponses: unknown[] = []; const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = []; const toolEvents: ToolExecutionEvent[] = [];
@@ -956,6 +1088,109 @@ export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams):
export async function* runToolAwareOpenAIChatStream( export async function* runToolAwareOpenAIChatStream(
params: ToolAwareCompletionParams params: ToolAwareCompletionParams
): AsyncGenerator<ToolAwareStreamingEvent> {
const input: any[] = normalizeIncomingResponsesInput(params.messages);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: RESPONSES_CHAT_TOOLS,
tool_choice: "auto",
parallel_tool_calls: true,
// Tool loops pass response output items back as input; reasoning items need persistence.
store: true,
stream: true,
} as any);
let roundText = "";
let completedResponse: any | null = null;
const completedOutputItems: any[] = [];
for await (const event of stream as any as AsyncIterable<any>) {
rawResponses.push(event);
if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
roundText += event.delta;
} else if (event?.type === "response.output_item.done" && event.item) {
completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
} else if (event?.type === "response.completed") {
completedResponse = event.response;
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
completedResponse = event.response;
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "error") {
throw new Error(event.message ?? "OpenAI Responses stream failed.");
}
}
const failureMessage = getResponseFailureMessage(completedResponse);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getResponseOutputItems(completedResponse);
const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
if (!normalizedToolCalls.length) {
const text = extractResponsesText(completedResponse, roundText);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
if (text) {
yield { type: "delta", text };
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
input.push(...responseOutputItems);
for (const call of normalizedToolCalls) {
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
toolEvents.push(event);
yield { type: "tool_call", event };
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
},
};
}
export async function* runToolAwareChatCompletionsStream(
params: ToolAwareCompletionParams
): AsyncGenerator<ToolAwareStreamingEvent> { ): AsyncGenerator<ToolAwareStreamingEvent> {
const conversation: any[] = normalizeIncomingMessages(params.messages); const conversation: any[] = normalizeIncomingMessages(params.messages);
const rawResponses: unknown[] = []; const rawResponses: unknown[] = [];

View File

@@ -67,6 +67,43 @@ function toOpenAIContent(message: ChatMessage) {
return parts; return parts;
} }
function toOpenAIResponsesContent(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const parts: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
parts.push({
type: "input_image",
image_url: attachment.dataUrl,
detail: "auto",
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
parts.push({ type: "input_text", text: imageSummary });
}
for (const attachment of textAttachments) {
parts.push({ type: "input_text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
parts.push({ type: "input_text", text: message.content });
}
if (parts.length === 1 && parts[0]?.type === "input_text" && typeof parts[0].text === "string") {
return parts[0].text;
}
return parts;
}
function parseImageDataUrl(attachment: ChatImageAttachment) { function parseImageDataUrl(attachment: ChatImageAttachment) {
const match = attachment.dataUrl.match(/^data:(image\/(?:png|jpeg));base64,([a-z0-9+/=\s]+)$/i); const match = attachment.dataUrl.match(/^data:(image\/(?:png|jpeg));base64,([a-z0-9+/=\s]+)$/i);
if (!match) { if (!match) {
@@ -146,6 +183,21 @@ export function buildOpenAIConversationMessage(message: ChatMessage) {
return out; return out;
} }
export function buildOpenAIResponsesInputMessage(message: ChatMessage) {
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
return {
role: message.role,
content: toOpenAIResponsesContent(message),
};
}
const ANTHROPIC_NO_SERVER_TOOLS_PROMPT = const ANTHROPIC_NO_SERVER_TOOLS_PROMPT =
"This Anthropic backend path does not have server-managed tool calls. Do not claim to run shell commands, Codex tasks, web searches, or fetch URLs. If the user asks for tool execution, explain that they should switch to OpenAI or xAI in this app for tool-enabled chat."; "This Anthropic backend path does not have server-managed tool calls. Do not claim to run shell commands, Codex tasks, web searches, or fetch URLs. If the user asks for tool execution, explain that they should switch to OpenAI or xAI in this app for tool-enabled chat.";

View File

@@ -23,13 +23,12 @@ function uniqSorted(models: string[]) {
return [...new Set(models.map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b)); return [...new Set(models.map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
} }
function isLikelyOpenAIChatCompletionsModel(model: string) { function isLikelyOpenAIResponsesModel(model: string) {
const id = model.toLowerCase(); const id = model.toLowerCase();
if (id.includes("embedding") || id.includes("moderation")) return false; if (id.includes("embedding") || id.includes("moderation")) return false;
if (id.includes("audio") || id.includes("realtime") || id.includes("transcribe") || id.includes("tts")) return false; if (id.includes("audio") || id.includes("realtime") || id.includes("transcribe") || id.includes("tts")) return false;
if (id.includes("image") || id.includes("dall-e") || id.includes("sora")) return false; if (id.includes("image") || id.includes("dall-e") || id.includes("sora")) return false;
if (id.includes("search") || id.includes("computer-use")) return false; if (id.includes("search") || id.includes("computer-use")) return false;
if (/^gpt-[\d.]+-pro(?:-|$)/.test(id)) return false;
return /^(gpt-|o\d|chatgpt-)/.test(id); return /^(gpt-|o\d|chatgpt-)/.test(id);
} }
@@ -52,7 +51,7 @@ async function withTimeout<T>(promise: Promise<T>, timeoutMs: number, label: str
async function fetchProviderModels(provider: Provider) { async function fetchProviderModels(provider: Provider) {
if (provider === "openai") { if (provider === "openai") {
const page = await openaiClient().models.list(); const page = await openaiClient().models.list();
return uniqSorted(page.data.map((model) => model.id).filter(isLikelyOpenAIChatCompletionsModel)); return uniqSorted(page.data.map((model) => model.id).filter(isLikelyOpenAIResponsesModel));
} }
if (provider === "anthropic") { if (provider === "anthropic") {

View File

@@ -1,7 +1,7 @@
import { performance } from "node:perf_hooks"; import { performance } from "node:perf_hooks";
import { prisma } from "../db.js"; import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runToolAwareOpenAIChat } from "./chat-tools.js"; import { buildToolLogMessageData, runToolAwareChatCompletions, runToolAwareOpenAIChat } from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js"; import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js"; import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js";
@@ -48,8 +48,8 @@ export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResp
let raw: unknown; let raw: unknown;
let toolMessages: ReturnType<typeof buildToolLogMessageData>[] = []; let toolMessages: ReturnType<typeof buildToolLogMessageData>[] = [];
if (req.provider === "openai" || req.provider === "xai") { if (req.provider === "openai") {
const client = req.provider === "openai" ? openaiClient() : xaiClient(); const client = openaiClient();
const r = await runToolAwareOpenAIChat({ const r = await runToolAwareOpenAIChat({
client, client,
model: req.model, model: req.model,
@@ -66,6 +66,24 @@ export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResp
outText = r.text; outText = r.text;
usage = r.usage; usage = r.usage;
toolMessages = r.toolEvents.map((event) => buildToolLogMessageData(call.chatId, event)); toolMessages = r.toolEvents.map((event) => buildToolLogMessageData(call.chatId, event));
} else if (req.provider === "xai") {
const client = xaiClient();
const r = await runToolAwareChatCompletions({
client,
model: req.model,
messages: req.messages,
temperature: req.temperature,
maxTokens: req.maxTokens,
logContext: {
provider: req.provider,
model: req.model,
chatId,
},
});
raw = r.raw;
outText = r.text;
usage = r.usage;
toolMessages = r.toolEvents.map((event) => buildToolLogMessageData(call.chatId, event));
} else if (req.provider === "anthropic") { } else if (req.provider === "anthropic") {
const client = anthropicClient(); const client = anthropicClient();

View File

@@ -1,7 +1,12 @@
import { performance } from "node:perf_hooks"; import { performance } from "node:perf_hooks";
import { prisma } from "../db.js"; import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runToolAwareOpenAIChatStream, type ToolExecutionEvent } from "./chat-tools.js"; import {
buildToolLogMessageData,
runToolAwareChatCompletionsStream,
runToolAwareOpenAIChatStream,
type ToolExecutionEvent,
} from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js"; import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import type { MultiplexRequest, Provider } from "./types.js"; import type { MultiplexRequest, Provider } from "./types.js";
@@ -58,18 +63,33 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
try { try {
if (req.provider === "openai" || req.provider === "xai") { if (req.provider === "openai" || req.provider === "xai") {
const client = req.provider === "openai" ? openaiClient() : xaiClient(); const client = req.provider === "openai" ? openaiClient() : xaiClient();
for await (const ev of runToolAwareOpenAIChatStream({ const streamEvents =
client, req.provider === "openai"
model: req.model, ? runToolAwareOpenAIChatStream({
messages: req.messages, client,
temperature: req.temperature, model: req.model,
maxTokens: req.maxTokens, messages: req.messages,
logContext: { temperature: req.temperature,
provider: req.provider, maxTokens: req.maxTokens,
model: req.model, logContext: {
chatId, provider: req.provider,
}, model: req.model,
})) { chatId,
},
})
: runToolAwareChatCompletionsStream({
client,
model: req.model,
messages: req.messages,
temperature: req.temperature,
maxTokens: req.maxTokens,
logContext: {
provider: req.provider,
model: req.model,
chatId,
},
});
for await (const ev of streamEvents) {
if (ev.type === "delta") { if (ev.type === "delta") {
text += ev.text; text += ev.text;
yield { type: "delta", text: ev.text }; yield { type: "delta", text: ev.text };

View File

@@ -203,16 +203,15 @@ async function generateChatTitle(content: string) {
const systemPrompt = const systemPrompt =
"You create short chat titles. Return exactly one line, maximum 4 words, no quotes, no trailing punctuation."; "You create short chat titles. Return exactly one line, maximum 4 words, no quotes, no trailing punctuation.";
const userPrompt = `User request:\n${content}\n\nTitle:`; const userPrompt = `User request:\n${content}\n\nTitle:`;
const response = await openaiClient().chat.completions.create({ const response = await openaiClient().responses.create({
model: "gpt-4.1-mini", model: "gpt-4.1-mini",
temperature: 0, temperature: 0,
max_completion_tokens: 20, max_output_tokens: 20,
messages: [ instructions: systemPrompt,
{ role: "system", content: systemPrompt }, input: userPrompt,
{ role: "user", content: userPrompt }, store: false,
],
}); });
return response.choices?.[0]?.message?.content ?? ""; return response.output_text ?? "";
} }
function normalizeUrlForMatch(input: string | null | undefined) { function normalizeUrlForMatch(input: string | null | undefined) {