big backend refactor

2026-06-13 12:02:22 -07:00
parent 7436544a69
commit 297b053a91
15 changed files with 1768 additions and 1068 deletions
--- a/server/src/llm/protocols/chat-completions-api.ts
+++ b/server/src/llm/protocols/chat-completions-api.ts
@@ -0,0 +1,386 @@
+import {
+  appendDanglingToolIntentCorrection,
+  buildChatToolSystemPrompt,
+  executeToolCallAndBuildEvent,
+  getEnabledChatTools,
+  getUnstreamedText,
+  looksLikeDanglingToolIntent,
+  MAX_DANGLING_TOOL_INTENT_RETRIES,
+  MAX_TOOL_ROUNDS,
+  mergeUsage,
+  normalizeModelToolCalls,
+  prepareToolCallExecution,
+  type NormalizedToolCall,
+  type ToolAwareCompletionParams,
+  type ToolAwareCompletionResult,
+  type ToolAwareStreamingEvent,
+  type ToolExecutionEvent,
+} from "../chat-tools.js";
+import {
+  buildImageSummaryText,
+  buildSystemPromptAugmentationMessage,
+  buildTextAttachmentPrompt,
+  getImageAttachments,
+  getTextAttachments,
+} from "../message-content.js";
+import type { ChatMessage } from "../types.js";
+
+function toContentParts(message: ChatMessage) {
+  const imageAttachments = getImageAttachments(message);
+  const textAttachments = getTextAttachments(message);
+  if (!imageAttachments.length && !textAttachments.length) {
+    return message.content;
+  }
+
+  const parts: Array<Record<string, unknown>> = [];
+  for (const attachment of imageAttachments) {
+    parts.push({
+      type: "image_url",
+      image_url: {
+        url: attachment.dataUrl,
+        detail: "auto",
+      },
+    });
+  }
+
+  const imageSummary = buildImageSummaryText(imageAttachments);
+  if (imageSummary) {
+    parts.push({ type: "text", text: imageSummary });
+  }
+
+  for (const attachment of textAttachments) {
+    parts.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
+  }
+
+  if (message.content.trim()) {
+    parts.push({ type: "text", text: message.content });
+  }
+
+  if (parts.length === 1 && parts[0]?.type === "text" && typeof parts[0].text === "string") {
+    return parts[0].text;
+  }
+
+  return parts;
+}
+
+function buildConversationMessage(message: ChatMessage) {
+  if (message.role === "tool") {
+    const name = message.name?.trim() || "tool";
+    return {
+      role: "user",
+      content: `Tool output (${name}):\n${message.content}`,
+    };
+  }
+
+  const out: Record<string, unknown> = {
+    role: message.role,
+    content: toContentParts(message),
+  };
+
+  if (message.name && (message.role === "assistant" || message.role === "user")) {
+    out.name = message.name;
+  }
+
+  return out;
+}
+
+function normalizeMessages(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
+  const normalized = messages.map((message) => buildConversationMessage(message));
+  return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
+}
+
+function normalizePlainMessages(messages: ChatMessage[], userLocation?: string) {
+  return [buildSystemPromptAugmentationMessage(userLocation), ...messages.map((message) => buildConversationMessage(message))];
+}
+
+function extractContent(message: any) {
+  if (typeof message?.content === "string") return message.content;
+  if (!Array.isArray(message?.content)) return "";
+
+  return message.content
+    .map((part: any) => {
+      if (typeof part === "string") return part;
+      if (typeof part?.text === "string") return part.text;
+      if (typeof part?.content === "string") return part.content;
+      return "";
+    })
+    .join("");
+}
+
+export async function completeWithChatCompletionsApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
+  const enabledTools = getEnabledChatTools(params);
+  if (!enabledTools.length) {
+    const completion = await params.client.chat.completions.create({
+      model: params.model,
+      messages: normalizePlainMessages(params.messages, params.userLocation),
+      temperature: params.temperature,
+      max_tokens: params.maxTokens,
+    } as any);
+
+    const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+    const sawUsage = mergeUsage(usageAcc, completion?.usage);
+    const message = completion?.choices?.[0]?.message;
+
+    return {
+      text: extractContent(message),
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { response: completion, api: "chat.completions" },
+      toolEvents: [],
+    };
+  }
+
+  const conversation: any[] = normalizeMessages(params.messages, params.userLocation, params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const completion = await params.client.chat.completions.create({
+      model: params.model,
+      messages: conversation,
+      temperature: params.temperature,
+      max_tokens: params.maxTokens,
+      tools: enabledTools,
+      tool_choice: "auto",
+    } as any);
+    rawResponses.push(completion);
+    sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
+
+    const message = completion?.choices?.[0]?.message;
+    if (!message) {
+      return {
+        text: "",
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
+        toolEvents,
+      };
+    }
+
+    const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+    if (!toolCalls.length) {
+      const text = typeof message.content === "string" ? message.content : "";
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(conversation, text);
+        continue;
+      }
+      return {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
+        toolEvents,
+      };
+    }
+
+    const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
+    totalToolCalls += normalizedToolCalls.length;
+
+    const assistantToolCallMessage: any = {
+      role: "assistant",
+      tool_calls: normalizedToolCalls.map((call) => ({
+        id: call.id,
+        type: "function",
+        function: {
+          name: call.name,
+          arguments: call.arguments,
+        },
+      })),
+    };
+    if (typeof message.content === "string" && message.content.length) {
+      assistantToolCallMessage.content = message.content;
+    }
+    conversation.push(assistantToolCallMessage);
+
+    for (const call of normalizedToolCalls) {
+      const { execution } = prepareToolCallExecution(call);
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+
+      conversation.push({
+        role: "tool",
+        tool_call_id: call.id,
+        content: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  return {
+    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+    usage: sawUsage ? usageAcc : undefined,
+    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
+    toolEvents,
+  };
+}
+
+export async function* streamWithChatCompletionsApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
+  const enabledTools = getEnabledChatTools(params);
+  if (!enabledTools.length) {
+    const rawResponses: unknown[] = [];
+    const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+    let sawUsage = false;
+    let text = "";
+
+    const stream = await params.client.chat.completions.create({
+      model: params.model,
+      messages: normalizePlainMessages(params.messages, params.userLocation),
+      temperature: params.temperature,
+      max_tokens: params.maxTokens,
+      stream: true,
+    } as any);
+
+    for await (const chunk of stream as any as AsyncIterable<any>) {
+      rawResponses.push(chunk);
+      sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
+
+      const deltaText = chunk?.choices?.[0]?.delta?.content ?? "";
+      if (typeof deltaText === "string" && deltaText.length) {
+        text += deltaText;
+        yield { type: "delta", text: deltaText };
+      }
+    }
+
+    yield {
+      type: "done",
+      result: {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { streamed: true, responses: rawResponses, api: "chat.completions" },
+        toolEvents: [],
+      },
+    };
+    return;
+  }
+
+  const conversation: any[] = normalizeMessages(params.messages, params.userLocation, params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const stream = await params.client.chat.completions.create({
+      model: params.model,
+      messages: conversation,
+      temperature: params.temperature,
+      max_tokens: params.maxTokens,
+      tools: enabledTools,
+      tool_choice: "auto",
+      stream: true,
+      stream_options: { include_usage: true },
+    } as any);
+
+    let roundText = "";
+    let streamedRoundText = "";
+    let roundHasToolCalls = false;
+    const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
+
+    for await (const chunk of stream as any as AsyncIterable<any>) {
+      rawResponses.push(chunk);
+      sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
+
+      const choice = chunk?.choices?.[0];
+      const deltaText = choice?.delta?.content ?? "";
+      if (typeof deltaText === "string" && deltaText.length) {
+        roundText += deltaText;
+        if (!roundHasToolCalls) {
+          streamedRoundText += deltaText;
+          yield { type: "delta", text: deltaText };
+        }
+      }
+
+      const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
+      if (deltaToolCalls.length) {
+        roundHasToolCalls = true;
+      }
+      for (const toolCall of deltaToolCalls) {
+        const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
+        const entry = roundToolCalls.get(idx) ?? { arguments: "" };
+        if (typeof toolCall?.id === "string" && toolCall.id.length) {
+          entry.id = toolCall.id;
+        }
+        if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
+          entry.name = toolCall.function.name;
+        }
+        if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
+          entry.arguments += toolCall.function.arguments;
+        }
+        roundToolCalls.set(idx, entry);
+      }
+    }
+
+    const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
+      .sort((a, b) => a[0] - b[0])
+      .map(([_, call], index) => ({
+        id: call.id ?? `tool_call_${round}_${index}`,
+        name: call.name ?? "unknown_tool",
+        arguments: call.arguments || "{}",
+      }));
+
+    if (!normalizedToolCalls.length) {
+      if (!streamedRoundText && danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(conversation, roundText);
+        continue;
+      }
+      const unstreamedText = getUnstreamedText(roundText, streamedRoundText);
+      if (unstreamedText) {
+        yield { type: "delta", text: unstreamedText };
+      }
+      yield {
+        type: "done",
+        result: {
+          text: roundText,
+          usage: sawUsage ? usageAcc : undefined,
+          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
+          toolEvents,
+        },
+      };
+      return;
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    const assistantToolCallMessage: any = {
+      role: "assistant",
+      tool_calls: normalizedToolCalls.map((call) => ({
+        id: call.id,
+        type: "function",
+        function: {
+          name: call.name,
+          arguments: call.arguments,
+        },
+      })),
+    };
+    if (roundText) {
+      assistantToolCallMessage.content = roundText;
+    }
+    conversation.push(assistantToolCallMessage);
+
+    for (const call of normalizedToolCalls) {
+      const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
+      yield { type: "tool_call", event: initiatedEvent };
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+      yield { type: "tool_call", event };
+      conversation.push({
+        role: "tool",
+        tool_call_id: call.id,
+        content: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  yield {
+    type: "done",
+    result: {
+      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
+      toolEvents,
+    },
+  };
+}
--- a/server/src/llm/protocols/messages-api.ts
+++ b/server/src/llm/protocols/messages-api.ts
@@ -0,0 +1,470 @@
+import {
+  buildChatToolSystemPrompt,
+  executeToolCallAndBuildEvent,
+  getEnabledChatTools,
+  looksLikeDanglingToolIntent,
+  MAX_DANGLING_TOOL_INTENT_RETRIES,
+  MAX_TOOL_ROUNDS,
+  parseToolArgs,
+  prepareToolCallExecution,
+  type NormalizedToolCall,
+  type ToolAwareCompletionParams,
+  type ToolAwareCompletionResult,
+  type ToolAwareStreamingEvent,
+  type ToolAwareUsage,
+  type ToolExecutionEvent,
+  type ToolRunOutcome,
+} from "../chat-tools.js";
+import {
+  buildImageSummaryText,
+  buildTextAttachmentPrompt,
+  buildTopLevelSystemPrompt,
+  getImageAttachments,
+  getTextAttachments,
+  parseImageDataUrl,
+} from "../message-content.js";
+import type { ChatMessage } from "../types.js";
+
+const INTERNAL_CORRECTION =
+  "Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.";
+
+function toTools(tools: any[]) {
+  return tools
+    .map((tool) => {
+      if (tool?.type !== "function") return null;
+      return {
+        name: tool.function.name,
+        description: tool.function.description,
+        input_schema: tool.function.parameters,
+      };
+    })
+    .filter(Boolean);
+}
+
+function toContentBlocks(message: ChatMessage) {
+  const imageAttachments = getImageAttachments(message);
+  const textAttachments = getTextAttachments(message);
+  if (!imageAttachments.length && !textAttachments.length) {
+    return message.content;
+  }
+
+  const blocks: Array<Record<string, unknown>> = [];
+  for (const attachment of imageAttachments) {
+    const source = parseImageDataUrl(attachment);
+    blocks.push({
+      type: "image",
+      source: {
+        type: "base64",
+        media_type: source.mediaType,
+        data: source.data,
+      },
+    });
+  }
+
+  const imageSummary = buildImageSummaryText(imageAttachments);
+  if (imageSummary) {
+    blocks.push({ type: "text", text: imageSummary });
+  }
+
+  for (const attachment of textAttachments) {
+    blocks.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
+  }
+
+  if (message.content.trim()) {
+    blocks.push({ type: "text", text: message.content });
+  }
+
+  if (blocks.length === 1 && blocks[0]?.type === "text" && typeof blocks[0].text === "string") {
+    return blocks[0].text;
+  }
+
+  return blocks;
+}
+
+function buildConversationMessage(message: ChatMessage) {
+  if (message.role === "system") {
+    throw new Error("System messages must be handled separately for top-level-system protocols.");
+  }
+
+  if (message.role === "tool") {
+    const name = message.name?.trim() || "tool";
+    return {
+      role: "user",
+      content: `Tool output (${name}):\n${message.content}`,
+    };
+  }
+
+  return {
+    role: message.role === "assistant" ? "assistant" : "user",
+    content: toContentBlocks(message),
+  };
+}
+
+function buildBaseMessages(params: ToolAwareCompletionParams) {
+  return params.messages.filter((message) => message.role !== "system").map((message) => buildConversationMessage(message));
+}
+
+function stringifyToolInput(input: unknown) {
+  if (typeof input === "string") return input;
+  try {
+    return JSON.stringify(input ?? {});
+  } catch {
+    return "{}";
+  }
+}
+
+function normalizeToolCalls(content: any[], round: number): NormalizedToolCall[] {
+  return content
+    .filter((item) => item?.type === "tool_use")
+    .map((call: any, index: number) => ({
+      id: call?.id ?? `tool_call_${round}_${index}`,
+      name: call?.name ?? "unknown_tool",
+      arguments: stringifyToolInput(call?.input),
+    }));
+}
+
+function extractText(response: any) {
+  if (!Array.isArray(response?.content)) return "";
+  return response.content
+    .map((content: any) => (content?.type === "text" && typeof content.text === "string" ? content.text : ""))
+    .join("")
+    .trim();
+}
+
+function buildToolResultBlock(call: NormalizedToolCall, toolResult: ToolRunOutcome) {
+  return {
+    type: "tool_result",
+    tool_use_id: call.id,
+    content: JSON.stringify(toolResult),
+    is_error: !toolResult.ok,
+  };
+}
+
+function appendCorrection(conversation: any[], text: string) {
+  conversation.push({ role: "assistant", content: text });
+  conversation.push({
+    role: "user",
+    content: INTERNAL_CORRECTION,
+  });
+}
+
+function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
+  if (!usage) return false;
+  const inputTokens = usage.input_tokens ?? 0;
+  const outputTokens = usage.output_tokens ?? 0;
+  acc.inputTokens += inputTokens;
+  acc.outputTokens += outputTokens;
+  acc.totalTokens += inputTokens + outputTokens;
+  return true;
+}
+
+export async function completeWithMessagesApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
+  const enabledTools = getEnabledChatTools(params);
+  if (!enabledTools.length) {
+    const response = await params.client.messages.create({
+      model: params.model,
+      system: buildTopLevelSystemPrompt(params.messages, params.userLocation),
+      max_tokens: params.maxTokens ?? 1024,
+      temperature: params.temperature,
+      messages: buildBaseMessages(params),
+    } as any);
+
+    const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+    const sawUsage = mergeUsage(usageAcc, response?.usage);
+
+    return {
+      text: extractText(response),
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { response, api: "messages" },
+      toolEvents: [],
+    };
+  }
+
+  const conversation: any[] = buildBaseMessages(params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const response = await params.client.messages.create({
+      model: params.model,
+      system: buildTopLevelSystemPrompt(params.messages, params.userLocation, buildChatToolSystemPrompt(params)),
+      max_tokens: params.maxTokens ?? 1024,
+      temperature: params.temperature,
+      messages: conversation,
+      tools: toTools(enabledTools),
+      tool_choice: { type: "auto" },
+    } as any);
+    rawResponses.push(response);
+    sawUsage = mergeUsage(usageAcc, response?.usage) || sawUsage;
+
+    const content = Array.isArray(response?.content) ? response.content : [];
+    const normalizedToolCalls = normalizeToolCalls(content, round);
+    if (!normalizedToolCalls.length) {
+      const text = extractText(response);
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendCorrection(conversation, text);
+        continue;
+      }
+      return {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "messages" },
+        toolEvents,
+      };
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    conversation.push({
+      role: "assistant",
+      content,
+    });
+
+    const toolResultBlocks: any[] = [];
+    for (const call of normalizedToolCalls) {
+      const { execution } = prepareToolCallExecution(call);
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+      toolResultBlocks.push(buildToolResultBlock(call, toolResult));
+    }
+
+    conversation.push({
+      role: "user",
+      content: toolResultBlocks,
+    });
+  }
+
+  return {
+    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+    usage: sawUsage ? usageAcc : undefined,
+    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "messages" },
+    toolEvents,
+  };
+}
+
+export async function* streamWithMessagesApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
+  const enabledTools = getEnabledChatTools(params);
+  if (!enabledTools.length) {
+    const rawResponses: unknown[] = [];
+    const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+    let sawUsage = false;
+    let roundInputTokens = 0;
+    let roundOutputTokens = 0;
+    let text = "";
+
+    const stream = await params.client.messages.create({
+      model: params.model,
+      system: buildTopLevelSystemPrompt(params.messages, params.userLocation),
+      max_tokens: params.maxTokens ?? 1024,
+      temperature: params.temperature,
+      messages: buildBaseMessages(params),
+      stream: true,
+    } as any);
+
+    for await (const ev of stream as any as AsyncIterable<any>) {
+      rawResponses.push(ev);
+      if (ev?.type === "message_start" && ev?.message?.usage) {
+        roundInputTokens = ev.message.usage.input_tokens ?? roundInputTokens;
+        sawUsage = true;
+      }
+      if (ev?.type === "content_block_delta" && ev?.delta?.type === "text_delta") {
+        const delta = ev.delta.text ?? "";
+        if (delta) {
+          text += delta;
+          yield { type: "delta", text: delta };
+        }
+      }
+      if (ev?.type === "message_delta" && ev.usage) {
+        roundInputTokens = ev.usage.input_tokens ?? roundInputTokens;
+        roundOutputTokens = ev.usage.output_tokens ?? roundOutputTokens;
+        sawUsage = true;
+      }
+    }
+
+    if (sawUsage) {
+      usageAcc.inputTokens += roundInputTokens;
+      usageAcc.outputTokens += roundOutputTokens;
+      usageAcc.totalTokens += roundInputTokens + roundOutputTokens;
+    }
+
+    yield {
+      type: "done",
+      result: {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { streamed: true, responses: rawResponses, toolCallsUsed: 0, api: "messages" },
+        toolEvents: [],
+      },
+    };
+    return;
+  }
+
+  const conversation: any[] = buildBaseMessages(params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const stream = await params.client.messages.create({
+      model: params.model,
+      system: buildTopLevelSystemPrompt(params.messages, params.userLocation, buildChatToolSystemPrompt(params)),
+      max_tokens: params.maxTokens ?? 1024,
+      temperature: params.temperature,
+      messages: conversation,
+      tools: toTools(enabledTools),
+      tool_choice: { type: "auto" },
+      stream: true,
+    } as any);
+
+    const contentByIndex = new Map<number, any>();
+    const toolArgumentByIndex = new Map<number, string>();
+    let roundText = "";
+    let roundHasToolCalls = false;
+    let roundInputTokens = 0;
+    let roundOutputTokens = 0;
+    let sawRoundUsage = false;
+
+    for await (const ev of stream as any as AsyncIterable<any>) {
+      rawResponses.push(ev);
+
+      if (ev?.type === "message_start" && ev?.message?.usage) {
+        roundInputTokens = ev.message.usage.input_tokens ?? roundInputTokens;
+        sawRoundUsage = true;
+      }
+
+      if (ev?.type === "content_block_start" && typeof ev.index === "number") {
+        const block = ev.content_block ?? {};
+        if (block.type === "tool_use") {
+          roundHasToolCalls = true;
+          contentByIndex.set(ev.index, {
+            type: "tool_use",
+            id: block.id,
+            name: block.name,
+            input: block.input ?? {},
+          });
+          toolArgumentByIndex.set(ev.index, "");
+        } else if (block.type === "text") {
+          contentByIndex.set(ev.index, {
+            type: "text",
+            text: typeof block.text === "string" ? block.text : "",
+          });
+        } else if (block.type) {
+          contentByIndex.set(ev.index, block);
+        }
+      }
+
+      if (ev?.type === "content_block_delta" && typeof ev.index === "number") {
+        if (ev.delta?.type === "text_delta") {
+          const delta = typeof ev.delta.text === "string" ? ev.delta.text : "";
+          if (delta) {
+            const block = contentByIndex.get(ev.index) ?? { type: "text", text: "" };
+            if (block.type === "text") {
+              block.text = `${typeof block.text === "string" ? block.text : ""}${delta}`;
+              contentByIndex.set(ev.index, block);
+            }
+            roundText += delta;
+          }
+        } else if (ev.delta?.type === "input_json_delta") {
+          roundHasToolCalls = true;
+          const partialJson = typeof ev.delta.partial_json === "string" ? ev.delta.partial_json : "";
+          toolArgumentByIndex.set(ev.index, `${toolArgumentByIndex.get(ev.index) ?? ""}${partialJson}`);
+        }
+      }
+
+      if (ev?.type === "content_block_stop" && typeof ev.index === "number") {
+        const block = contentByIndex.get(ev.index);
+        if (block?.type === "tool_use") {
+          const rawArguments = toolArgumentByIndex.get(ev.index) || stringifyToolInput(block.input);
+          try {
+            block.input = parseToolArgs(rawArguments);
+          } catch {
+            block.input = {};
+          }
+          contentByIndex.set(ev.index, block);
+        }
+      }
+
+      if (ev?.type === "message_delta" && ev.usage) {
+        roundInputTokens = ev.usage.input_tokens ?? roundInputTokens;
+        roundOutputTokens = ev.usage.output_tokens ?? roundOutputTokens;
+        sawRoundUsage = true;
+      }
+    }
+
+    if (sawRoundUsage) {
+      usageAcc.inputTokens += roundInputTokens;
+      usageAcc.outputTokens += roundOutputTokens;
+      usageAcc.totalTokens += roundInputTokens + roundOutputTokens;
+      sawUsage = true;
+    }
+
+    const indexedContent = [...contentByIndex.entries()].sort((a, b) => a[0] - b[0]);
+    const assistantContent = indexedContent.map(([, block]) => block);
+    const normalizedToolCalls: NormalizedToolCall[] = indexedContent
+      .filter(([, block]) => block?.type === "tool_use")
+      .map(([index, block], callIndex) => ({
+        id: block.id ?? `tool_call_${round}_${callIndex}`,
+        name: block.name ?? "unknown_tool",
+        arguments: toolArgumentByIndex.get(index) || stringifyToolInput(block.input),
+      }));
+
+    if (!normalizedToolCalls.length) {
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
+        danglingToolIntentRetries += 1;
+        appendCorrection(conversation, roundText);
+        continue;
+      }
+      if (roundText) {
+        yield { type: "delta", text: roundText };
+      }
+      yield {
+        type: "done",
+        result: {
+          text: roundText,
+          usage: sawUsage ? usageAcc : undefined,
+          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "messages" },
+          toolEvents,
+        },
+      };
+      return;
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    conversation.push({
+      role: "assistant",
+      content: assistantContent,
+    });
+
+    const toolResultBlocks: any[] = [];
+    for (const call of normalizedToolCalls) {
+      const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
+      yield { type: "tool_call", event: initiatedEvent };
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+      yield { type: "tool_call", event };
+      toolResultBlocks.push(buildToolResultBlock(call, toolResult));
+    }
+
+    conversation.push({
+      role: "user",
+      content: toolResultBlocks,
+    });
+  }
+
+  yield {
+    type: "done",
+    result: {
+      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "messages" },
+      toolEvents,
+    },
+  };
+}
--- a/server/src/llm/protocols/responses-api.ts
+++ b/server/src/llm/protocols/responses-api.ts
@@ -0,0 +1,332 @@
+import {
+  appendDanglingToolIntentCorrection,
+  buildChatToolSystemPrompt,
+  executeToolCallAndBuildEvent,
+  getEnabledChatTools,
+  getUnstreamedText,
+  looksLikeDanglingToolIntent,
+  MAX_DANGLING_TOOL_INTENT_RETRIES,
+  MAX_TOOL_ROUNDS,
+  prepareToolCallExecution,
+  type NormalizedToolCall,
+  type ToolAwareCompletionParams,
+  type ToolAwareCompletionResult,
+  type ToolAwareStreamingEvent,
+  type ToolAwareUsage,
+  type ToolExecutionEvent,
+} from "../chat-tools.js";
+import {
+  buildImageSummaryText,
+  buildSystemPromptAugmentationMessage,
+  buildTextAttachmentPrompt,
+  getImageAttachments,
+  getTextAttachments,
+} from "../message-content.js";
+import type { ChatMessage } from "../types.js";
+
+function toResponsesTools(tools: any[]) {
+  return tools.map((tool) => {
+    if (tool?.type !== "function") return tool;
+    return {
+      type: "function",
+      name: tool.function.name,
+      description: tool.function.description,
+      parameters: tool.function.parameters,
+      strict: false,
+    };
+  });
+}
+
+function toContentParts(message: ChatMessage) {
+  const imageAttachments = getImageAttachments(message);
+  const textAttachments = getTextAttachments(message);
+  if (!imageAttachments.length && !textAttachments.length) {
+    return message.content;
+  }
+
+  const parts: Array<Record<string, unknown>> = [];
+  for (const attachment of imageAttachments) {
+    parts.push({
+      type: "input_image",
+      image_url: attachment.dataUrl,
+      detail: "auto",
+    });
+  }
+
+  const imageSummary = buildImageSummaryText(imageAttachments);
+  if (imageSummary) {
+    parts.push({ type: "input_text", text: imageSummary });
+  }
+
+  for (const attachment of textAttachments) {
+    parts.push({ type: "input_text", text: buildTextAttachmentPrompt(attachment) });
+  }
+
+  if (message.content.trim()) {
+    parts.push({ type: "input_text", text: message.content });
+  }
+
+  if (parts.length === 1 && parts[0]?.type === "input_text" && typeof parts[0].text === "string") {
+    return parts[0].text;
+  }
+
+  return parts;
+}
+
+function buildInputMessage(message: ChatMessage) {
+  if (message.role === "tool") {
+    const name = message.name?.trim() || "tool";
+    return {
+      role: "user",
+      content: `Tool output (${name}):\n${message.content}`,
+    };
+  }
+
+  return {
+    role: message.role,
+    content: toContentParts(message),
+  };
+}
+
+function normalizeInput(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
+  const normalized = messages.map((message) => buildInputMessage(message));
+  return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
+}
+
+function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
+  if (!usage) return false;
+  acc.inputTokens += usage.input_tokens ?? 0;
+  acc.outputTokens += usage.output_tokens ?? 0;
+  acc.totalTokens += usage.total_tokens ?? 0;
+  return true;
+}
+
+function getOutputItems(response: any) {
+  return Array.isArray(response?.output) ? response.output : [];
+}
+
+function extractText(response: any, fallback = "") {
+  if (typeof response?.output_text === "string") return response.output_text;
+
+  const parts: string[] = [];
+  for (const item of getOutputItems(response)) {
+    if (item?.type !== "message" || !Array.isArray(item.content)) continue;
+    for (const content of item.content) {
+      if (content?.type === "output_text" && typeof content.text === "string") {
+        parts.push(content.text);
+      } else if (content?.type === "refusal" && typeof content.refusal === "string") {
+        parts.push(content.refusal);
+      }
+    }
+  }
+  return parts.join("") || fallback;
+}
+
+function getFailureMessage(response: any) {
+  if (response?.status !== "failed" && response?.status !== "incomplete") return null;
+  const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
+  const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
+  return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
+}
+
+function normalizeToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
+  return outputItems
+    .filter((item) => item?.type === "function_call")
+    .map((call: any, index: number) => ({
+      id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
+      name: call.name ?? "unknown_tool",
+      arguments: call.arguments ?? "{}",
+    }));
+}
+
+export async function completeWithResponsesApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
+  const enabledTools = getEnabledChatTools(params);
+  const input: any[] = normalizeInput(params.messages, params.userLocation, params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const response = await params.client.responses.create({
+      model: params.model,
+      input,
+      temperature: params.temperature,
+      max_output_tokens: params.maxTokens,
+      tools: toResponsesTools(enabledTools),
+      tool_choice: "auto",
+      parallel_tool_calls: true,
+      store: true,
+    } as any);
+    rawResponses.push(response);
+    sawUsage = mergeUsage(usageAcc, response?.usage) || sawUsage;
+
+    const failureMessage = getFailureMessage(response);
+    if (failureMessage) {
+      throw new Error(failureMessage);
+    }
+
+    const outputItems = getOutputItems(response);
+    const normalizedToolCalls = normalizeToolCalls(outputItems, round);
+    if (!normalizedToolCalls.length) {
+      const text = extractText(response);
+      if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(input, text);
+        continue;
+      }
+      return {
+        text,
+        usage: sawUsage ? usageAcc : undefined,
+        raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
+        toolEvents,
+      };
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    input.push(...outputItems);
+
+    for (const call of normalizedToolCalls) {
+      const { execution } = prepareToolCallExecution(call);
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+
+      input.push({
+        type: "function_call_output",
+        call_id: call.id,
+        output: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  return {
+    text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+    usage: sawUsage ? usageAcc : undefined,
+    raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
+    toolEvents,
+  };
+}
+
+export async function* streamWithResponsesApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
+  const enabledTools = getEnabledChatTools(params);
+  const input: any[] = normalizeInput(params.messages, params.userLocation, params);
+  const rawResponses: unknown[] = [];
+  const toolEvents: ToolExecutionEvent[] = [];
+  const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+  let sawUsage = false;
+  let totalToolCalls = 0;
+  let danglingToolIntentRetries = 0;
+
+  for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
+    const stream = await params.client.responses.create({
+      model: params.model,
+      input,
+      temperature: params.temperature,
+      max_output_tokens: params.maxTokens,
+      tools: toResponsesTools(enabledTools),
+      tool_choice: "auto",
+      parallel_tool_calls: true,
+      store: true,
+      stream: true,
+    } as any);
+
+    let roundText = "";
+    let streamedRoundText = "";
+    let roundHasToolCalls = false;
+    let canStreamRoundText = false;
+    let completedResponse: any | null = null;
+    const completedOutputItems: any[] = [];
+
+    for await (const event of stream as any as AsyncIterable<any>) {
+      rawResponses.push(event);
+
+      if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
+        roundText += event.delta;
+        if (canStreamRoundText && !roundHasToolCalls && event.delta.length) {
+          streamedRoundText += event.delta;
+          yield { type: "delta", text: event.delta };
+        }
+      } else if (event?.type === "response.output_item.added" && event.item) {
+        if (event.item.type === "function_call") {
+          roundHasToolCalls = true;
+          canStreamRoundText = false;
+        } else if (event.item.type === "message" && !roundHasToolCalls) {
+          canStreamRoundText = true;
+        }
+      } else if (event?.type === "response.output_item.done" && event.item) {
+        completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
+        if (event.item.type === "function_call") {
+          roundHasToolCalls = true;
+          canStreamRoundText = false;
+        }
+      } else if (event?.type === "response.completed") {
+        completedResponse = event.response;
+        sawUsage = mergeUsage(usageAcc, event.response?.usage) || sawUsage;
+      } else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
+        completedResponse = event.response;
+        sawUsage = mergeUsage(usageAcc, event.response?.usage) || sawUsage;
+      } else if (event?.type === "error") {
+        throw new Error(event.message ?? "Responses stream failed.");
+      }
+    }
+
+    const failureMessage = getFailureMessage(completedResponse);
+    if (failureMessage) {
+      throw new Error(failureMessage);
+    }
+
+    const outputItems = getOutputItems(completedResponse);
+    const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
+    const normalizedToolCalls = normalizeToolCalls(responseOutputItems, round);
+    if (!normalizedToolCalls.length) {
+      const text = extractText(completedResponse, roundText);
+      if (!streamedRoundText && danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
+        danglingToolIntentRetries += 1;
+        appendDanglingToolIntentCorrection(input, text);
+        continue;
+      }
+      const unstreamedText = getUnstreamedText(text, streamedRoundText);
+      if (unstreamedText) {
+        yield { type: "delta", text: unstreamedText };
+      }
+      yield {
+        type: "done",
+        result: {
+          text,
+          usage: sawUsage ? usageAcc : undefined,
+          raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
+          toolEvents,
+        },
+      };
+      return;
+    }
+
+    totalToolCalls += normalizedToolCalls.length;
+    input.push(...responseOutputItems);
+
+    for (const call of normalizedToolCalls) {
+      const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
+      yield { type: "tool_call", event: initiatedEvent };
+      const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
+      toolEvents.push(event);
+      yield { type: "tool_call", event };
+      input.push({
+        type: "function_call_output",
+        call_id: call.id,
+        output: JSON.stringify(toolResult),
+      });
+    }
+  }
+
+  yield {
+    type: "done",
+    result: {
+      text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
+      usage: sawUsage ? usageAcc : undefined,
+      raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
+      toolEvents,
+    },
+  };
+}