big backend refactor

2026-06-13 12:02:22 -07:00
parent 7436544a69
commit 297b053a91
15 changed files with 1768 additions and 1068 deletions
--- a/server/src/llm/chat-tools.ts
+++ b/server/src/llm/chat-tools.ts
@@ -4,20 +4,14 @@ import os from "node:os";
 import path from "node:path";
 import { promisify } from "node:util";
 import { convert as htmlToText } from "html-to-text";
-import type OpenAI from "openai";
 import { z } from "zod";
 import { buildBrowserLikeNavigationHeaders } from "../browser-fetch-headers.js";
 import { env } from "../env.js";
 import { exaClient } from "../search/exa.js";
 import { searchSearxng } from "../search/searxng.js";
-import {
-  buildOpenAIConversationMessage,
-  buildOpenAIResponsesInputMessage,
-  buildSystemPromptAugmentationMessage,
-} from "./message-content.js";
 import type { ChatMessage } from "./types.js";

-const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
+export const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
 const DEFAULT_WEB_RESULTS = 5;
 const MAX_WEB_RESULTS = 10;
 const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
@@ -30,7 +24,7 @@ const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
 const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
 const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
 const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
-const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
+export const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;

 const execFileAsync = promisify(execFile);

@@ -220,7 +214,7 @@ function getEnabledToolSet(params: Pick<ToolAwareCompletionParams, "enabledTools
  return new Set(normalizeEnabledChatTools(params.enabledTools));
 }

-function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
+export function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
  const enabled = getEnabledToolSet(params);
  return CHAT_TOOLS.filter((tool) => {
    const name = getToolName(tool);
@@ -228,19 +222,6 @@ function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledToo
  });
 }

-function toResponsesChatTools(tools: any[]) {
-  return tools.map((tool) => {
-  if (tool?.type !== "function") return tool;
-  return {
-    type: "function",
-    name: tool.function.name,
-    description: tool.function.description,
-    parameters: tool.function.parameters,
-    strict: false,
-  };
-  });
-}
-
 export const CHAT_TOOL_SYSTEM_PROMPT =
  "You can use tools to gather up-to-date web information when needed. " +
  "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
@@ -254,18 +235,18 @@ export const CHAT_TOOL_SYSTEM_PROMPT =
    : "") +
  "Do not fabricate tool outputs; reason only from provided tool results.";

-type ToolRunOutcome = {
+export type ToolRunOutcome = {
  ok: boolean;
  [key: string]: unknown;
 };

-type ToolAwareUsage = {
+export type ToolAwareUsage = {
  inputTokens?: number;
  outputTokens?: number;
  totalTokens?: number;
 };

-type ToolAwareCompletionResult = {
+export type ToolAwareCompletionResult = {
  text: string;
  usage?: ToolAwareUsage;
  raw: unknown;
@@ -277,8 +258,8 @@ export type ToolAwareStreamingEvent =
  | { type: "tool_call"; event: ToolExecutionEvent }
  | { type: "done"; result: ToolAwareCompletionResult };

-type ToolAwareCompletionParams = {
-  client: OpenAI;
+export type ToolAwareCompletionParams = {
+  client: any;
  model: string;
  messages: ChatMessage[];
  enabledTools?: string[];
@@ -440,7 +421,7 @@ function extractHtmlTitle(html: string) {
  );
 }

-function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
+export function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
  const enabled = getEnabledToolSet(params);
  return (
    "You can use tools to gather up-to-date web information when needed. " +
@@ -458,22 +439,6 @@ function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enab
  );
 }

-function normalizeIncomingMessages(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
-  const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
-
-  return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
-}
-
-function normalizePlainIncomingMessages(messages: ChatMessage[], userLocation?: string) {
-  return [buildSystemPromptAugmentationMessage(userLocation), ...messages.map((message) => buildOpenAIConversationMessage(message))];
-}
-
-function normalizeIncomingResponsesInput(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
-  const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
-
-  return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
-}
-
 async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
  const exa = exaClient();
  const response = await exa.search(args.query, {
@@ -842,7 +807,7 @@ async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome>
  return { ok: false, error: `Unknown tool: ${name}` };
 }

-function parseToolArgs(raw: unknown) {
+export function parseToolArgs(raw: unknown) {
  if (typeof raw !== "string") return {};
  const trimmed = raw.trim();
  if (!trimmed) return {};
@@ -871,7 +836,7 @@ function buildEventArgs(name: string, args: Record<string, unknown>) {
  return args;
 }

-function looksLikeDanglingToolIntent(text: string) {
+export function looksLikeDanglingToolIntent(text: string) {
  const normalized = text
    .toLowerCase()
    .replace(/[`*_>#-]/g, " ")
@@ -887,7 +852,7 @@ function looksLikeDanglingToolIntent(text: string) {
  );
 }

-function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
+export function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
  conversation.push({ role: "assistant", content: text });
  conversation.push({
    role: "system",
@@ -896,7 +861,7 @@ function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
  });
 }

-function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
+export function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
  if (!usage) return false;
  acc.inputTokens += usage.prompt_tokens ?? 0;
  acc.outputTokens += usage.completion_tokens ?? 0;
@@ -904,79 +869,19 @@ function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
  return true;
 }

-function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
-  if (!usage) return false;
-  acc.inputTokens += usage.input_tokens ?? 0;
-  acc.outputTokens += usage.output_tokens ?? 0;
-  acc.totalTokens += usage.total_tokens ?? 0;
-  return true;
-}
-
-function getResponseOutputItems(response: any) {
-  return Array.isArray(response?.output) ? response.output : [];
-}
-
-function extractResponsesText(response: any, fallback = "") {
-  if (typeof response?.output_text === "string") return response.output_text;
-
-  const parts: string[] = [];
-  for (const item of getResponseOutputItems(response)) {
-    if (item?.type !== "message" || !Array.isArray(item.content)) continue;
-    for (const content of item.content) {
-      if (content?.type === "output_text" && typeof content.text === "string") {
-        parts.push(content.text);
-      } else if (content?.type === "refusal" && typeof content.refusal === "string") {
-        parts.push(content.refusal);
-      }
-    }
-  }
-  return parts.join("") || fallback;
-}
-
-function extractChatCompletionContent(message: any) {
-  if (typeof message?.content === "string") return message.content;
-  if (!Array.isArray(message?.content)) return "";
-
-  return message.content
-    .map((part: any) => {
-      if (typeof part === "string") return part;
-      if (typeof part?.text === "string") return part.text;
-      if (typeof part?.content === "string") return part.content;
-      return "";
-    })
-    .join("");
-}
-
-function getUnstreamedText(finalText: string, streamedText: string) {
+export function getUnstreamedText(finalText: string, streamedText: string) {
  if (!finalText) return "";
  if (!streamedText) return finalText;
  return finalText.startsWith(streamedText) ? finalText.slice(streamedText.length) : "";
 }

-function getResponseFailureMessage(response: any) {
-  if (response?.status !== "failed" && response?.status !== "incomplete") return null;
-  const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
-  const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
-  return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
-}
-
-function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
-  return outputItems
-    .filter((item) => item?.type === "function_call")
-    .map((call: any, index: number) => ({
-      id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
-      name: call.name ?? "unknown_tool",
-      arguments: call.arguments ?? "{}",
-    }));
-}
-
-type NormalizedToolCall = {
+export type NormalizedToolCall = {
  id: string;
  name: string;
  arguments: string;
 };

-function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
+export function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
  return toolCalls.map((call: any, index: number) => ({
    id: call?.id ?? `tool_call_${round}_${index}`,
    name: call?.function?.name ?? "unknown_tool",
@@ -984,7 +889,7 @@ function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToo
  }));
 }

-type PreparedToolCallExecution = {
+export type PreparedToolCallExecution = {
  startedAtMs: number;
  startedAt: string;
  parsedArgs: Record<string, unknown>;
@@ -992,7 +897,7 @@ type PreparedToolCallExecution = {
  parseError?: unknown;
 };

-function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecutionEvent; execution: PreparedToolCallExecution } {
+export function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecutionEvent; execution: PreparedToolCallExecution } {
  const startedAtMs = Date.now();
  const startedAt = new Date(startedAtMs).toISOString();
  let parsedArgs: Record<string, unknown> = {};
@@ -1024,7 +929,7 @@ function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecut
  };
 }

-async function executeToolCallAndBuildEvent(
+export async function executeToolCallAndBuildEvent(
  call: NormalizedToolCall,
  execution: PreparedToolCallExecution,
  params: ToolAwareCompletionParams
@@ -1068,488 +973,3 @@ async function executeToolCallAndBuildEvent(

  return { event, toolResult };
 }
-
-export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
-  const enabledTools = getEnabledChatTools(params);
-  const input: any[] = normalizeIncomingResponsesInput(params.messages, params.userLocation, params);
-  const rawResponses: unknown[] = [];
-  const toolEvents: ToolExecutionEvent[] = [];
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  let sawUsage = false;
-  let totalToolCalls = 0;
-  let danglingToolIntentRetries = 0;
-
-  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
-    const response = await params.client.responses.create({
-      model: params.model,
-      input,
-      temperature: params.temperature,
-      max_output_tokens: params.maxTokens,
-      tools: toResponsesChatTools(enabledTools),
-      tool_choice: "auto",
-      parallel_tool_calls: true,
-      // Tool loops pass response output items back as input; reasoning items need persistence.
-      store: true,
-    } as any);
-    rawResponses.push(response);
-    sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
-
-    const failureMessage = getResponseFailureMessage(response);
-    if (failureMessage) {
-      throw new Error(failureMessage);
-    }
-
-    const outputItems = getResponseOutputItems(response);
-    const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
-    if (!normalizedToolCalls.length) {
-      const text = extractResponsesText(response);
-      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
-        danglingToolIntentRetries += 1;
-        appendDanglingToolIntentCorrection(input, text);
-        continue;
-      }
-      return {
-        text,
-        usage: sawUsage ? usageAcc : undefined,
-        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
-        toolEvents,
-      };
-    }
-
-    totalToolCalls += normalizedToolCalls.length;
-    input.push(...outputItems);
-
-    for (const call of normalizedToolCalls) {
-      const { execution } = prepareToolCallExecution(call);
-      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
-      toolEvents.push(event);
-
-      input.push({
-        type: "function_call_output",
-        call_id: call.id,
-        output: JSON.stringify(toolResult),
-      });
-    }
-  }
-
-  return {
-    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
-    usage: sawUsage ? usageAcc : undefined,
-    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
-    toolEvents,
-  };
-}
-
-export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
-  const enabledTools = getEnabledChatTools(params);
-  const conversation: any[] = normalizeIncomingMessages(params.messages, params.userLocation, params);
-  const rawResponses: unknown[] = [];
-  const toolEvents: ToolExecutionEvent[] = [];
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  let sawUsage = false;
-  let totalToolCalls = 0;
-  let danglingToolIntentRetries = 0;
-
-  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
-    const completion = await params.client.chat.completions.create({
-      model: params.model,
-      messages: conversation,
-      temperature: params.temperature,
-      max_tokens: params.maxTokens,
-      tools: enabledTools,
-      tool_choice: "auto",
-    } as any);
-    rawResponses.push(completion);
-    sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
-
-    const message = completion?.choices?.[0]?.message;
-    if (!message) {
-      return {
-        text: "",
-        usage: sawUsage ? usageAcc : undefined,
-        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
-        toolEvents,
-      };
-    }
-
-    const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
-    if (!toolCalls.length) {
-      const text = typeof message.content === "string" ? message.content : "";
-      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
-        danglingToolIntentRetries += 1;
-        appendDanglingToolIntentCorrection(conversation, text);
-        continue;
-      }
-      return {
-        text,
-        usage: sawUsage ? usageAcc : undefined,
-        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
-        toolEvents,
-      };
-    }
-
-    const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
-    totalToolCalls += normalizedToolCalls.length;
-
-    const assistantToolCallMessage: any = {
-      role: "assistant",
-      tool_calls: normalizedToolCalls.map((call) => ({
-        id: call.id,
-        type: "function",
-        function: {
-          name: call.name,
-          arguments: call.arguments,
-        },
-      })),
-    };
-    if (typeof message.content === "string" && message.content.length) {
-      assistantToolCallMessage.content = message.content;
-    }
-    conversation.push(assistantToolCallMessage);
-
-    for (const call of normalizedToolCalls) {
-      const { execution } = prepareToolCallExecution(call);
-      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
-      toolEvents.push(event);
-
-      conversation.push({
-        role: "tool",
-        tool_call_id: call.id,
-        content: JSON.stringify(toolResult),
-      });
-    }
-  }
-
-  return {
-    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
-    usage: sawUsage ? usageAcc : undefined,
-    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
-    toolEvents,
-  };
-}
-
-export async function runPlainChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
-  const completion = await params.client.chat.completions.create({
-    model: params.model,
-    messages: normalizePlainIncomingMessages(params.messages, params.userLocation),
-    temperature: params.temperature,
-    max_tokens: params.maxTokens,
-  } as any);
-
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  const sawUsage = mergeUsage(usageAcc, completion?.usage);
-  const message = completion?.choices?.[0]?.message;
-
-  return {
-    text: extractChatCompletionContent(message),
-    usage: sawUsage ? usageAcc : undefined,
-    raw: { response: completion, api: "chat.completions" },
-    toolEvents: [],
-  };
-}
-
-export async function* runToolAwareOpenAIChatStream(
-  params: ToolAwareCompletionParams
-): AsyncGenerator<ToolAwareStreamingEvent> {
-  const enabledTools = getEnabledChatTools(params);
-  const input: any[] = normalizeIncomingResponsesInput(params.messages, params.userLocation, params);
-  const rawResponses: unknown[] = [];
-  const toolEvents: ToolExecutionEvent[] = [];
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  let sawUsage = false;
-  let totalToolCalls = 0;
-  let danglingToolIntentRetries = 0;
-
-  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
-    const stream = await params.client.responses.create({
-      model: params.model,
-      input,
-      temperature: params.temperature,
-      max_output_tokens: params.maxTokens,
-      tools: toResponsesChatTools(enabledTools),
-      tool_choice: "auto",
-      parallel_tool_calls: true,
-      // Tool loops pass response output items back as input; reasoning items need persistence.
-      store: true,
-      stream: true,
-    } as any);
-
-    let roundText = "";
-    let streamedRoundText = "";
-    let roundHasToolCalls = false;
-    let canStreamRoundText = false;
-    let completedResponse: any | null = null;
-    const completedOutputItems: any[] = [];
-
-    for await (const event of stream as any as AsyncIterable<any>) {
-      rawResponses.push(event);
-
-      if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
-        roundText += event.delta;
-        if (canStreamRoundText && !roundHasToolCalls && event.delta.length) {
-          streamedRoundText += event.delta;
-          yield { type: "delta", text: event.delta };
-        }
-      } else if (event?.type === "response.output_item.added" && event.item) {
-        if (event.item.type === "function_call") {
-          roundHasToolCalls = true;
-          canStreamRoundText = false;
-        } else if (event.item.type === "message" && !roundHasToolCalls) {
-          canStreamRoundText = true;
-        }
-      } else if (event?.type === "response.output_item.done" && event.item) {
-        completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
-        if (event.item.type === "function_call") {
-          roundHasToolCalls = true;
-          canStreamRoundText = false;
-        }
-      } else if (event?.type === "response.completed") {
-        completedResponse = event.response;
-        sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
-      } else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
-        completedResponse = event.response;
-        sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
-      } else if (event?.type === "error") {
-        throw new Error(event.message ?? "OpenAI Responses stream failed.");
-      }
-    }
-
-    const failureMessage = getResponseFailureMessage(completedResponse);
-    if (failureMessage) {
-      throw new Error(failureMessage);
-    }
-
-    const outputItems = getResponseOutputItems(completedResponse);
-    const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
-    const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
-    if (!normalizedToolCalls.length) {
-      const text = extractResponsesText(completedResponse, roundText);
-      if (
-        !streamedRoundText &&
-        danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
-        looksLikeDanglingToolIntent(text)
-      ) {
-        danglingToolIntentRetries += 1;
-        appendDanglingToolIntentCorrection(input, text);
-        continue;
-      }
-      const unstreamedText = getUnstreamedText(text, streamedRoundText);
-      if (unstreamedText) {
-        yield { type: "delta", text: unstreamedText };
-      }
-      yield {
-        type: "done",
-        result: {
-          text,
-          usage: sawUsage ? usageAcc : undefined,
-          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
-          toolEvents,
-        },
-      };
-      return;
-    }
-
-    totalToolCalls += normalizedToolCalls.length;
-    input.push(...responseOutputItems);
-
-    for (const call of normalizedToolCalls) {
-      const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
-      yield { type: "tool_call", event: initiatedEvent };
-      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
-      toolEvents.push(event);
-      yield { type: "tool_call", event };
-      input.push({
-        type: "function_call_output",
-        call_id: call.id,
-        output: JSON.stringify(toolResult),
-      });
-    }
-  }
-
-  yield {
-    type: "done",
-    result: {
-      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
-      usage: sawUsage ? usageAcc : undefined,
-      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
-      toolEvents,
-    },
-  };
-}
-
-export async function* runToolAwareChatCompletionsStream(
-  params: ToolAwareCompletionParams
-): AsyncGenerator<ToolAwareStreamingEvent> {
-  const enabledTools = getEnabledChatTools(params);
-  const conversation: any[] = normalizeIncomingMessages(params.messages, params.userLocation, params);
-  const rawResponses: unknown[] = [];
-  const toolEvents: ToolExecutionEvent[] = [];
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  let sawUsage = false;
-  let totalToolCalls = 0;
-  let danglingToolIntentRetries = 0;
-
-  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
-    const stream = await params.client.chat.completions.create({
-      model: params.model,
-      messages: conversation,
-      temperature: params.temperature,
-      max_tokens: params.maxTokens,
-      tools: enabledTools,
-      tool_choice: "auto",
-      stream: true,
-      stream_options: { include_usage: true },
-    } as any);
-
-    let roundText = "";
-    let streamedRoundText = "";
-    let roundHasToolCalls = false;
-    const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
-
-    for await (const chunk of stream as any as AsyncIterable<any>) {
-      rawResponses.push(chunk);
-      sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
-
-      const choice = chunk?.choices?.[0];
-      const deltaText = choice?.delta?.content ?? "";
-      if (typeof deltaText === "string" && deltaText.length) {
-        roundText += deltaText;
-        if (!roundHasToolCalls) {
-          streamedRoundText += deltaText;
-          yield { type: "delta", text: deltaText };
-        }
-      }
-
-      const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
-      if (deltaToolCalls.length) {
-        roundHasToolCalls = true;
-      }
-      for (const toolCall of deltaToolCalls) {
-        const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
-        const entry = roundToolCalls.get(idx) ?? { arguments: "" };
-        if (typeof toolCall?.id === "string" && toolCall.id.length) {
-          entry.id = toolCall.id;
-        }
-        if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
-          entry.name = toolCall.function.name;
-        }
-        if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
-          entry.arguments += toolCall.function.arguments;
-        }
-        roundToolCalls.set(idx, entry);
-      }
-    }
-
-    const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
-      .sort((a, b) => a[0] - b[0])
-      .map(([_, call], index) => ({
-        id: call.id ?? `tool_call_${round}_${index}`,
-        name: call.name ?? "unknown_tool",
-        arguments: call.arguments || "{}",
-      }));
-
-    if (!normalizedToolCalls.length) {
-      if (
-        !streamedRoundText &&
-        danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
-        looksLikeDanglingToolIntent(roundText)
-      ) {
-        danglingToolIntentRetries += 1;
-        appendDanglingToolIntentCorrection(conversation, roundText);
-        continue;
-      }
-      const unstreamedText = getUnstreamedText(roundText, streamedRoundText);
-      if (unstreamedText) {
-        yield { type: "delta", text: unstreamedText };
-      }
-      yield {
-        type: "done",
-        result: {
-          text: roundText,
-          usage: sawUsage ? usageAcc : undefined,
-          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
-          toolEvents,
-        },
-      };
-      return;
-    }
-
-    totalToolCalls += normalizedToolCalls.length;
-    const assistantToolCallMessage: any = {
-      role: "assistant",
-      tool_calls: normalizedToolCalls.map((call) => ({
-        id: call.id,
-        type: "function",
-        function: {
-          name: call.name,
-          arguments: call.arguments,
-        },
-      })),
-    };
-    if (roundText) {
-      assistantToolCallMessage.content = roundText;
-    }
-    conversation.push(assistantToolCallMessage);
-
-    for (const call of normalizedToolCalls) {
-      const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
-      yield { type: "tool_call", event: initiatedEvent };
-      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
-      toolEvents.push(event);
-      yield { type: "tool_call", event };
-      conversation.push({
-        role: "tool",
-        tool_call_id: call.id,
-        content: JSON.stringify(toolResult),
-      });
-    }
-  }
-
-  yield {
-    type: "done",
-    result: {
-      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
-      usage: sawUsage ? usageAcc : undefined,
-      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
-      toolEvents,
-    },
-  };
-}
-
-export async function* runPlainChatCompletionsStream(
-  params: ToolAwareCompletionParams
-): AsyncGenerator<ToolAwareStreamingEvent> {
-  const rawResponses: unknown[] = [];
-  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
-  let sawUsage = false;
-  let text = "";
-
-  const stream = await params.client.chat.completions.create({
-    model: params.model,
-    messages: normalizePlainIncomingMessages(params.messages, params.userLocation),
-    temperature: params.temperature,
-    max_tokens: params.maxTokens,
-    stream: true,
-  } as any);
-
-  for await (const chunk of stream as any as AsyncIterable<any>) {
-    rawResponses.push(chunk);
-    sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
-
-    const deltaText = chunk?.choices?.[0]?.delta?.content ?? "";
-    if (typeof deltaText === "string" && deltaText.length) {
-      text += deltaText;
-      yield { type: "delta", text: deltaText };
-    }
-  }
-
-  yield {
-    type: "done",
-    result: {
-      text,
-      usage: sawUsage ? usageAcc : undefined,
-      raw: { streamed: true, responses: rawResponses, api: "chat.completions" },
-      toolEvents: [],
-    },
-  };
-}