oai responses api, tool call retries

2026-05-02 21:44:32 -07:00
parent 8d6c069a33
commit 015253c0af
11 changed files with 369 additions and 40 deletions
--- a/server/src/llm/chat-tools.ts
+++ b/server/src/llm/chat-tools.ts
@@ -9,7 +9,7 @@ import { z } from "zod";
 import { env } from "../env.js";
 import { exaClient } from "../search/exa.js";
 import { searchSearxng } from "../search/searxng.js";
-import { buildOpenAIConversationMessage } from "./message-content.js";
+import { buildOpenAIConversationMessage, buildOpenAIResponsesInputMessage } from "./message-content.js";
 import type { ChatMessage } from "./types.js";

 const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
@@ -188,6 +188,17 @@ const CHAT_TOOLS: any[] = [
  ...(env.CHAT_SHELL_TOOL_ENABLED ? [SHELL_EXEC_TOOL] : []),
 ];

+const RESPONSES_CHAT_TOOLS: any[] = CHAT_TOOLS.map((tool) => {
+  if (tool?.type !== "function") return tool;
+  return {
+    type: "function",
+    name: tool.function.name,
+    description: tool.function.description,
+    parameters: tool.function.parameters,
+    strict: false,
+  };
+});
+
 export const CHAT_TOOL_SYSTEM_PROMPT =
  "You can use tools to gather up-to-date web information when needed. " +
  "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
@@ -374,6 +385,12 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
  return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
 }

+function normalizeIncomingResponsesInput(messages: ChatMessage[]) {
+  const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
+
+  return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
+}
+
 async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
  const exa = exaClient();
  const response = await exa.search(args.query, {
@@ -806,6 +823,52 @@ function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
  return true;
 }

+function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
+  if (!usage) return false;
+  acc.inputTokens += usage.input_tokens ?? 0;
+  acc.outputTokens += usage.output_tokens ?? 0;
+  acc.totalTokens += usage.total_tokens ?? 0;
+  return true;
+}
+
+function getResponseOutputItems(response: any) {
+  return Array.isArray(response?.output) ? response.output : [];
+}
+
+function extractResponsesText(response: any, fallback = "") {
+  if (typeof response?.output_text === "string") return response.output_text;
+
+  const parts: string[] = [];
+  for (const item of getResponseOutputItems(response)) {
+    if (item?.type !== "message" || !Array.isArray(item.content)) continue;
+    for (const content of item.content) {
+      if (content?.type === "output_text" && typeof content.text === "string") {
+        parts.push(content.text);
+      } else if (content?.type === "refusal" && typeof content.refusal === "string") {
+        parts.push(content.refusal);
+      }
+    }
+  }
+  return parts.join("") || fallback;
+}
+
+function getResponseFailureMessage(response: any) {
+  if (response?.status !== "failed" && response?.status !== "incomplete") return null;
+  const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
+  const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
+  return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
+}
+
+function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
+  return outputItems
+    .filter((item) => item?.type === "function_call")
+    .map((call: any, index: number) => ({
+      id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
+      name: call.name ?? "unknown_tool",
+      arguments: call.arguments ?? "{}",
+    }));
+}
+
 type NormalizedToolCall = {
  id: string;
  name: string;
@@ -869,6 +932,75 @@ async function executeToolCallAndBuildEvent(
 }

 export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
+  const input: any[] = normalizeIncomingResponsesInput(params.messages);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const response = await params.client.responses.create({
+      model: params.model,
+      input,
+      temperature: params.temperature,
+      max_output_tokens: params.maxTokens,
+      tools: RESPONSES_CHAT_TOOLS,
+      tool_choice: "auto",
+      parallel_tool_calls: true,
+      // Tool loops pass response output items back as input; reasoning items need persistence.
+      store: true,
+    } as any);
+    rawResponses.push(response);
+    sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
+
+    const failureMessage = getResponseFailureMessage(response);
+    if (failureMessage) {
+      throw new Error(failureMessage);
+    }
+
+    const outputItems = getResponseOutputItems(response);
+    const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
+    if (!normalizedToolCalls.length) {
+      const text = extractResponsesText(response);
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(input, text);
+        continue;
+      }
+      return {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
+        toolEvents,
+      };
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    input.push(...outputItems);
+
+    for (const call of normalizedToolCalls) {
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
+      toolEvents.push(event);
+
+      input.push({
+        type: "function_call_output",
+        call_id: call.id,
+        output: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  return {
+    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+    usage: sawUsage ? usageAcc : undefined,
+    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
+    toolEvents,
+  };
+}
+
+export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
  const conversation: any[] = normalizeIncomingMessages(params.messages);
  const rawResponses: unknown[] = [];
  const toolEvents: ToolExecutionEvent[] = [];
@@ -956,6 +1088,109 @@ export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams):

 export async function* runToolAwareOpenAIChatStream(
  params: ToolAwareCompletionParams
+): AsyncGenerator<ToolAwareStreamingEvent> {
+  const input: any[] = normalizeIncomingResponsesInput(params.messages);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const stream = await params.client.responses.create({
+      model: params.model,
+      input,
+      temperature: params.temperature,
+      max_output_tokens: params.maxTokens,
+      tools: RESPONSES_CHAT_TOOLS,
+      tool_choice: "auto",
+      parallel_tool_calls: true,
+      // Tool loops pass response output items back as input; reasoning items need persistence.
+      store: true,
+      stream: true,
+    } as any);
+
+    let roundText = "";
+    let completedResponse: any | null = null;
+    const completedOutputItems: any[] = [];
+
+    for await (const event of stream as any as AsyncIterable<any>) {
+      rawResponses.push(event);
+
+      if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
+        roundText += event.delta;
+      } else if (event?.type === "response.output_item.done" && event.item) {
+        completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
+      } else if (event?.type === "response.completed") {
+        completedResponse = event.response;
+        sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
+      } else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
+        completedResponse = event.response;
+        sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
+      } else if (event?.type === "error") {
+        throw new Error(event.message ?? "OpenAI Responses stream failed.");
+      }
+    }
+
+    const failureMessage = getResponseFailureMessage(completedResponse);
+    if (failureMessage) {
+      throw new Error(failureMessage);
+    }
+
+    const outputItems = getResponseOutputItems(completedResponse);
+    const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
+    const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
+    if (!normalizedToolCalls.length) {
+      const text = extractResponsesText(completedResponse, roundText);
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(input, text);
+        continue;
+      }
+      if (text) {
+        yield { type: "delta", text };
+      }
+      yield {
+        type: "done",
+        result: {
+          text,
+          usage: sawUsage ? usageAcc : undefined,
+          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
+          toolEvents,
+        },
+      };
+      return;
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    input.push(...responseOutputItems);
+
+    for (const call of normalizedToolCalls) {
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
+      toolEvents.push(event);
+      yield { type: "tool_call", event };
+      input.push({
+        type: "function_call_output",
+        call_id: call.id,
+        output: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  yield {
+    type: "done",
+    result: {
+      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
+      toolEvents,
+    },
+  };
+}
+
+export async function* runToolAwareChatCompletionsStream(
+  params: ToolAwareCompletionParams
 ): AsyncGenerator<ToolAwareStreamingEvent> {
  const conversation: any[] = normalizeIncomingMessages(params.messages);
  const rawResponses: unknown[] = [];