big backend refactor

This commit is contained in:
2026-06-13 12:02:22 -07:00
parent 7436544a69
commit 297b053a91
15 changed files with 1768 additions and 1068 deletions

View File

@@ -0,0 +1,386 @@
import {
appendDanglingToolIntentCorrection,
buildChatToolSystemPrompt,
executeToolCallAndBuildEvent,
getEnabledChatTools,
getUnstreamedText,
looksLikeDanglingToolIntent,
MAX_DANGLING_TOOL_INTENT_RETRIES,
MAX_TOOL_ROUNDS,
mergeUsage,
normalizeModelToolCalls,
prepareToolCallExecution,
type NormalizedToolCall,
type ToolAwareCompletionParams,
type ToolAwareCompletionResult,
type ToolAwareStreamingEvent,
type ToolExecutionEvent,
} from "../chat-tools.js";
import {
buildImageSummaryText,
buildSystemPromptAugmentationMessage,
buildTextAttachmentPrompt,
getImageAttachments,
getTextAttachments,
} from "../message-content.js";
import type { ChatMessage } from "../types.js";
function toContentParts(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const parts: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
parts.push({
type: "image_url",
image_url: {
url: attachment.dataUrl,
detail: "auto",
},
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
parts.push({ type: "text", text: imageSummary });
}
for (const attachment of textAttachments) {
parts.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
parts.push({ type: "text", text: message.content });
}
if (parts.length === 1 && parts[0]?.type === "text" && typeof parts[0].text === "string") {
return parts[0].text;
}
return parts;
}
function buildConversationMessage(message: ChatMessage) {
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
const out: Record<string, unknown> = {
role: message.role,
content: toContentParts(message),
};
if (message.name && (message.role === "assistant" || message.role === "user")) {
out.name = message.name;
}
return out;
}
function normalizeMessages(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
const normalized = messages.map((message) => buildConversationMessage(message));
return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
}
function normalizePlainMessages(messages: ChatMessage[], userLocation?: string) {
return [buildSystemPromptAugmentationMessage(userLocation), ...messages.map((message) => buildConversationMessage(message))];
}
function extractContent(message: any) {
if (typeof message?.content === "string") return message.content;
if (!Array.isArray(message?.content)) return "";
return message.content
.map((part: any) => {
if (typeof part === "string") return part;
if (typeof part?.text === "string") return part.text;
if (typeof part?.content === "string") return part.content;
return "";
})
.join("");
}
export async function completeWithChatCompletionsApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const enabledTools = getEnabledChatTools(params);
if (!enabledTools.length) {
const completion = await params.client.chat.completions.create({
model: params.model,
messages: normalizePlainMessages(params.messages, params.userLocation),
temperature: params.temperature,
max_tokens: params.maxTokens,
} as any);
const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
const sawUsage = mergeUsage(usageAcc, completion?.usage);
const message = completion?.choices?.[0]?.message;
return {
text: extractContent(message),
usage: sawUsage ? usageAcc : undefined,
raw: { response: completion, api: "chat.completions" },
toolEvents: [],
};
}
const conversation: any[] = normalizeMessages(params.messages, params.userLocation, params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const completion = await params.client.chat.completions.create({
model: params.model,
messages: conversation,
temperature: params.temperature,
max_tokens: params.maxTokens,
tools: enabledTools,
tool_choice: "auto",
} as any);
rawResponses.push(completion);
sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
const message = completion?.choices?.[0]?.message;
if (!message) {
return {
text: "",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
toolEvents,
};
}
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
if (!toolCalls.length) {
const text = typeof message.content === "string" ? message.content : "";
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(conversation, text);
continue;
}
return {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
toolEvents,
};
}
const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
totalToolCalls += normalizedToolCalls.length;
const assistantToolCallMessage: any = {
role: "assistant",
tool_calls: normalizedToolCalls.map((call) => ({
id: call.id,
type: "function",
function: {
name: call.name,
arguments: call.arguments,
},
})),
};
if (typeof message.content === "string" && message.content.length) {
assistantToolCallMessage.content = message.content;
}
conversation.push(assistantToolCallMessage);
for (const call of normalizedToolCalls) {
const { execution } = prepareToolCallExecution(call);
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
conversation.push({
role: "tool",
tool_call_id: call.id,
content: JSON.stringify(toolResult),
});
}
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
toolEvents,
};
}
export async function* streamWithChatCompletionsApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
const enabledTools = getEnabledChatTools(params);
if (!enabledTools.length) {
const rawResponses: unknown[] = [];
const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let text = "";
const stream = await params.client.chat.completions.create({
model: params.model,
messages: normalizePlainMessages(params.messages, params.userLocation),
temperature: params.temperature,
max_tokens: params.maxTokens,
stream: true,
} as any);
for await (const chunk of stream as any as AsyncIterable<any>) {
rawResponses.push(chunk);
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
const deltaText = chunk?.choices?.[0]?.delta?.content ?? "";
if (typeof deltaText === "string" && deltaText.length) {
text += deltaText;
yield { type: "delta", text: deltaText };
}
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, api: "chat.completions" },
toolEvents: [],
},
};
return;
}
const conversation: any[] = normalizeMessages(params.messages, params.userLocation, params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<NonNullable<ToolAwareCompletionResult["usage"]>> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.chat.completions.create({
model: params.model,
messages: conversation,
temperature: params.temperature,
max_tokens: params.maxTokens,
tools: enabledTools,
tool_choice: "auto",
stream: true,
stream_options: { include_usage: true },
} as any);
let roundText = "";
let streamedRoundText = "";
let roundHasToolCalls = false;
const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
for await (const chunk of stream as any as AsyncIterable<any>) {
rawResponses.push(chunk);
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
const choice = chunk?.choices?.[0];
const deltaText = choice?.delta?.content ?? "";
if (typeof deltaText === "string" && deltaText.length) {
roundText += deltaText;
if (!roundHasToolCalls) {
streamedRoundText += deltaText;
yield { type: "delta", text: deltaText };
}
}
const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
if (deltaToolCalls.length) {
roundHasToolCalls = true;
}
for (const toolCall of deltaToolCalls) {
const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
const entry = roundToolCalls.get(idx) ?? { arguments: "" };
if (typeof toolCall?.id === "string" && toolCall.id.length) {
entry.id = toolCall.id;
}
if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
entry.name = toolCall.function.name;
}
if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
entry.arguments += toolCall.function.arguments;
}
roundToolCalls.set(idx, entry);
}
}
const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
.sort((a, b) => a[0] - b[0])
.map(([_, call], index) => ({
id: call.id ?? `tool_call_${round}_${index}`,
name: call.name ?? "unknown_tool",
arguments: call.arguments || "{}",
}));
if (!normalizedToolCalls.length) {
if (!streamedRoundText && danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(conversation, roundText);
continue;
}
const unstreamedText = getUnstreamedText(roundText, streamedRoundText);
if (unstreamedText) {
yield { type: "delta", text: unstreamedText };
}
yield {
type: "done",
result: {
text: roundText,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
const assistantToolCallMessage: any = {
role: "assistant",
tool_calls: normalizedToolCalls.map((call) => ({
id: call.id,
type: "function",
function: {
name: call.name,
arguments: call.arguments,
},
})),
};
if (roundText) {
assistantToolCallMessage.content = roundText;
}
conversation.push(assistantToolCallMessage);
for (const call of normalizedToolCalls) {
const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
yield { type: "tool_call", event: initiatedEvent };
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
yield { type: "tool_call", event };
conversation.push({
role: "tool",
tool_call_id: call.id,
content: JSON.stringify(toolResult),
});
}
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
toolEvents,
},
};
}

View File

@@ -0,0 +1,470 @@
import {
buildChatToolSystemPrompt,
executeToolCallAndBuildEvent,
getEnabledChatTools,
looksLikeDanglingToolIntent,
MAX_DANGLING_TOOL_INTENT_RETRIES,
MAX_TOOL_ROUNDS,
parseToolArgs,
prepareToolCallExecution,
type NormalizedToolCall,
type ToolAwareCompletionParams,
type ToolAwareCompletionResult,
type ToolAwareStreamingEvent,
type ToolAwareUsage,
type ToolExecutionEvent,
type ToolRunOutcome,
} from "../chat-tools.js";
import {
buildImageSummaryText,
buildTextAttachmentPrompt,
buildTopLevelSystemPrompt,
getImageAttachments,
getTextAttachments,
parseImageDataUrl,
} from "../message-content.js";
import type { ChatMessage } from "../types.js";
const INTERNAL_CORRECTION =
"Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.";
function toTools(tools: any[]) {
return tools
.map((tool) => {
if (tool?.type !== "function") return null;
return {
name: tool.function.name,
description: tool.function.description,
input_schema: tool.function.parameters,
};
})
.filter(Boolean);
}
function toContentBlocks(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const blocks: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
const source = parseImageDataUrl(attachment);
blocks.push({
type: "image",
source: {
type: "base64",
media_type: source.mediaType,
data: source.data,
},
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
blocks.push({ type: "text", text: imageSummary });
}
for (const attachment of textAttachments) {
blocks.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
blocks.push({ type: "text", text: message.content });
}
if (blocks.length === 1 && blocks[0]?.type === "text" && typeof blocks[0].text === "string") {
return blocks[0].text;
}
return blocks;
}
function buildConversationMessage(message: ChatMessage) {
if (message.role === "system") {
throw new Error("System messages must be handled separately for top-level-system protocols.");
}
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
return {
role: message.role === "assistant" ? "assistant" : "user",
content: toContentBlocks(message),
};
}
function buildBaseMessages(params: ToolAwareCompletionParams) {
return params.messages.filter((message) => message.role !== "system").map((message) => buildConversationMessage(message));
}
function stringifyToolInput(input: unknown) {
if (typeof input === "string") return input;
try {
return JSON.stringify(input ?? {});
} catch {
return "{}";
}
}
function normalizeToolCalls(content: any[], round: number): NormalizedToolCall[] {
return content
.filter((item) => item?.type === "tool_use")
.map((call: any, index: number) => ({
id: call?.id ?? `tool_call_${round}_${index}`,
name: call?.name ?? "unknown_tool",
arguments: stringifyToolInput(call?.input),
}));
}
function extractText(response: any) {
if (!Array.isArray(response?.content)) return "";
return response.content
.map((content: any) => (content?.type === "text" && typeof content.text === "string" ? content.text : ""))
.join("")
.trim();
}
function buildToolResultBlock(call: NormalizedToolCall, toolResult: ToolRunOutcome) {
return {
type: "tool_result",
tool_use_id: call.id,
content: JSON.stringify(toolResult),
is_error: !toolResult.ok,
};
}
function appendCorrection(conversation: any[], text: string) {
conversation.push({ role: "assistant", content: text });
conversation.push({
role: "user",
content: INTERNAL_CORRECTION,
});
}
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
if (!usage) return false;
const inputTokens = usage.input_tokens ?? 0;
const outputTokens = usage.output_tokens ?? 0;
acc.inputTokens += inputTokens;
acc.outputTokens += outputTokens;
acc.totalTokens += inputTokens + outputTokens;
return true;
}
export async function completeWithMessagesApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const enabledTools = getEnabledChatTools(params);
if (!enabledTools.length) {
const response = await params.client.messages.create({
model: params.model,
system: buildTopLevelSystemPrompt(params.messages, params.userLocation),
max_tokens: params.maxTokens ?? 1024,
temperature: params.temperature,
messages: buildBaseMessages(params),
} as any);
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
const sawUsage = mergeUsage(usageAcc, response?.usage);
return {
text: extractText(response),
usage: sawUsage ? usageAcc : undefined,
raw: { response, api: "messages" },
toolEvents: [],
};
}
const conversation: any[] = buildBaseMessages(params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const response = await params.client.messages.create({
model: params.model,
system: buildTopLevelSystemPrompt(params.messages, params.userLocation, buildChatToolSystemPrompt(params)),
max_tokens: params.maxTokens ?? 1024,
temperature: params.temperature,
messages: conversation,
tools: toTools(enabledTools),
tool_choice: { type: "auto" },
} as any);
rawResponses.push(response);
sawUsage = mergeUsage(usageAcc, response?.usage) || sawUsage;
const content = Array.isArray(response?.content) ? response.content : [];
const normalizedToolCalls = normalizeToolCalls(content, round);
if (!normalizedToolCalls.length) {
const text = extractText(response);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendCorrection(conversation, text);
continue;
}
return {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "messages" },
toolEvents,
};
}
totalToolCalls += normalizedToolCalls.length;
conversation.push({
role: "assistant",
content,
});
const toolResultBlocks: any[] = [];
for (const call of normalizedToolCalls) {
const { execution } = prepareToolCallExecution(call);
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
toolResultBlocks.push(buildToolResultBlock(call, toolResult));
}
conversation.push({
role: "user",
content: toolResultBlocks,
});
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "messages" },
toolEvents,
};
}
export async function* streamWithMessagesApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
const enabledTools = getEnabledChatTools(params);
if (!enabledTools.length) {
const rawResponses: unknown[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let roundInputTokens = 0;
let roundOutputTokens = 0;
let text = "";
const stream = await params.client.messages.create({
model: params.model,
system: buildTopLevelSystemPrompt(params.messages, params.userLocation),
max_tokens: params.maxTokens ?? 1024,
temperature: params.temperature,
messages: buildBaseMessages(params),
stream: true,
} as any);
for await (const ev of stream as any as AsyncIterable<any>) {
rawResponses.push(ev);
if (ev?.type === "message_start" && ev?.message?.usage) {
roundInputTokens = ev.message.usage.input_tokens ?? roundInputTokens;
sawUsage = true;
}
if (ev?.type === "content_block_delta" && ev?.delta?.type === "text_delta") {
const delta = ev.delta.text ?? "";
if (delta) {
text += delta;
yield { type: "delta", text: delta };
}
}
if (ev?.type === "message_delta" && ev.usage) {
roundInputTokens = ev.usage.input_tokens ?? roundInputTokens;
roundOutputTokens = ev.usage.output_tokens ?? roundOutputTokens;
sawUsage = true;
}
}
if (sawUsage) {
usageAcc.inputTokens += roundInputTokens;
usageAcc.outputTokens += roundOutputTokens;
usageAcc.totalTokens += roundInputTokens + roundOutputTokens;
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: 0, api: "messages" },
toolEvents: [],
},
};
return;
}
const conversation: any[] = buildBaseMessages(params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.messages.create({
model: params.model,
system: buildTopLevelSystemPrompt(params.messages, params.userLocation, buildChatToolSystemPrompt(params)),
max_tokens: params.maxTokens ?? 1024,
temperature: params.temperature,
messages: conversation,
tools: toTools(enabledTools),
tool_choice: { type: "auto" },
stream: true,
} as any);
const contentByIndex = new Map<number, any>();
const toolArgumentByIndex = new Map<number, string>();
let roundText = "";
let roundHasToolCalls = false;
let roundInputTokens = 0;
let roundOutputTokens = 0;
let sawRoundUsage = false;
for await (const ev of stream as any as AsyncIterable<any>) {
rawResponses.push(ev);
if (ev?.type === "message_start" && ev?.message?.usage) {
roundInputTokens = ev.message.usage.input_tokens ?? roundInputTokens;
sawRoundUsage = true;
}
if (ev?.type === "content_block_start" && typeof ev.index === "number") {
const block = ev.content_block ?? {};
if (block.type === "tool_use") {
roundHasToolCalls = true;
contentByIndex.set(ev.index, {
type: "tool_use",
id: block.id,
name: block.name,
input: block.input ?? {},
});
toolArgumentByIndex.set(ev.index, "");
} else if (block.type === "text") {
contentByIndex.set(ev.index, {
type: "text",
text: typeof block.text === "string" ? block.text : "",
});
} else if (block.type) {
contentByIndex.set(ev.index, block);
}
}
if (ev?.type === "content_block_delta" && typeof ev.index === "number") {
if (ev.delta?.type === "text_delta") {
const delta = typeof ev.delta.text === "string" ? ev.delta.text : "";
if (delta) {
const block = contentByIndex.get(ev.index) ?? { type: "text", text: "" };
if (block.type === "text") {
block.text = `${typeof block.text === "string" ? block.text : ""}${delta}`;
contentByIndex.set(ev.index, block);
}
roundText += delta;
}
} else if (ev.delta?.type === "input_json_delta") {
roundHasToolCalls = true;
const partialJson = typeof ev.delta.partial_json === "string" ? ev.delta.partial_json : "";
toolArgumentByIndex.set(ev.index, `${toolArgumentByIndex.get(ev.index) ?? ""}${partialJson}`);
}
}
if (ev?.type === "content_block_stop" && typeof ev.index === "number") {
const block = contentByIndex.get(ev.index);
if (block?.type === "tool_use") {
const rawArguments = toolArgumentByIndex.get(ev.index) || stringifyToolInput(block.input);
try {
block.input = parseToolArgs(rawArguments);
} catch {
block.input = {};
}
contentByIndex.set(ev.index, block);
}
}
if (ev?.type === "message_delta" && ev.usage) {
roundInputTokens = ev.usage.input_tokens ?? roundInputTokens;
roundOutputTokens = ev.usage.output_tokens ?? roundOutputTokens;
sawRoundUsage = true;
}
}
if (sawRoundUsage) {
usageAcc.inputTokens += roundInputTokens;
usageAcc.outputTokens += roundOutputTokens;
usageAcc.totalTokens += roundInputTokens + roundOutputTokens;
sawUsage = true;
}
const indexedContent = [...contentByIndex.entries()].sort((a, b) => a[0] - b[0]);
const assistantContent = indexedContent.map(([, block]) => block);
const normalizedToolCalls: NormalizedToolCall[] = indexedContent
.filter(([, block]) => block?.type === "tool_use")
.map(([index, block], callIndex) => ({
id: block.id ?? `tool_call_${round}_${callIndex}`,
name: block.name ?? "unknown_tool",
arguments: toolArgumentByIndex.get(index) || stringifyToolInput(block.input),
}));
if (!normalizedToolCalls.length) {
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
danglingToolIntentRetries += 1;
appendCorrection(conversation, roundText);
continue;
}
if (roundText) {
yield { type: "delta", text: roundText };
}
yield {
type: "done",
result: {
text: roundText,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "messages" },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
conversation.push({
role: "assistant",
content: assistantContent,
});
const toolResultBlocks: any[] = [];
for (const call of normalizedToolCalls) {
const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
yield { type: "tool_call", event: initiatedEvent };
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
yield { type: "tool_call", event };
toolResultBlocks.push(buildToolResultBlock(call, toolResult));
}
conversation.push({
role: "user",
content: toolResultBlocks,
});
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "messages" },
toolEvents,
},
};
}

View File

@@ -0,0 +1,332 @@
import {
appendDanglingToolIntentCorrection,
buildChatToolSystemPrompt,
executeToolCallAndBuildEvent,
getEnabledChatTools,
getUnstreamedText,
looksLikeDanglingToolIntent,
MAX_DANGLING_TOOL_INTENT_RETRIES,
MAX_TOOL_ROUNDS,
prepareToolCallExecution,
type NormalizedToolCall,
type ToolAwareCompletionParams,
type ToolAwareCompletionResult,
type ToolAwareStreamingEvent,
type ToolAwareUsage,
type ToolExecutionEvent,
} from "../chat-tools.js";
import {
buildImageSummaryText,
buildSystemPromptAugmentationMessage,
buildTextAttachmentPrompt,
getImageAttachments,
getTextAttachments,
} from "../message-content.js";
import type { ChatMessage } from "../types.js";
function toResponsesTools(tools: any[]) {
return tools.map((tool) => {
if (tool?.type !== "function") return tool;
return {
type: "function",
name: tool.function.name,
description: tool.function.description,
parameters: tool.function.parameters,
strict: false,
};
});
}
function toContentParts(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const parts: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
parts.push({
type: "input_image",
image_url: attachment.dataUrl,
detail: "auto",
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
parts.push({ type: "input_text", text: imageSummary });
}
for (const attachment of textAttachments) {
parts.push({ type: "input_text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
parts.push({ type: "input_text", text: message.content });
}
if (parts.length === 1 && parts[0]?.type === "input_text" && typeof parts[0].text === "string") {
return parts[0].text;
}
return parts;
}
function buildInputMessage(message: ChatMessage) {
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
return {
role: message.role,
content: toContentParts(message),
};
}
function normalizeInput(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
const normalized = messages.map((message) => buildInputMessage(message));
return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
}
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
if (!usage) return false;
acc.inputTokens += usage.input_tokens ?? 0;
acc.outputTokens += usage.output_tokens ?? 0;
acc.totalTokens += usage.total_tokens ?? 0;
return true;
}
function getOutputItems(response: any) {
return Array.isArray(response?.output) ? response.output : [];
}
function extractText(response: any, fallback = "") {
if (typeof response?.output_text === "string") return response.output_text;
const parts: string[] = [];
for (const item of getOutputItems(response)) {
if (item?.type !== "message" || !Array.isArray(item.content)) continue;
for (const content of item.content) {
if (content?.type === "output_text" && typeof content.text === "string") {
parts.push(content.text);
} else if (content?.type === "refusal" && typeof content.refusal === "string") {
parts.push(content.refusal);
}
}
}
return parts.join("") || fallback;
}
function getFailureMessage(response: any) {
if (response?.status !== "failed" && response?.status !== "incomplete") return null;
const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
}
function normalizeToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
return outputItems
.filter((item) => item?.type === "function_call")
.map((call: any, index: number) => ({
id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
name: call.name ?? "unknown_tool",
arguments: call.arguments ?? "{}",
}));
}
export async function completeWithResponsesApi(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const enabledTools = getEnabledChatTools(params);
const input: any[] = normalizeInput(params.messages, params.userLocation, params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const response = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: toResponsesTools(enabledTools),
tool_choice: "auto",
parallel_tool_calls: true,
store: true,
} as any);
rawResponses.push(response);
sawUsage = mergeUsage(usageAcc, response?.usage) || sawUsage;
const failureMessage = getFailureMessage(response);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getOutputItems(response);
const normalizedToolCalls = normalizeToolCalls(outputItems, round);
if (!normalizedToolCalls.length) {
const text = extractText(response);
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
return {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
};
}
totalToolCalls += normalizedToolCalls.length;
input.push(...outputItems);
for (const call of normalizedToolCalls) {
const { execution } = prepareToolCallExecution(call);
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
return {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
};
}
export async function* streamWithResponsesApi(params: ToolAwareCompletionParams): AsyncGenerator<ToolAwareStreamingEvent> {
const enabledTools = getEnabledChatTools(params);
const input: any[] = normalizeInput(params.messages, params.userLocation, params);
const rawResponses: unknown[] = [];
const toolEvents: ToolExecutionEvent[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let totalToolCalls = 0;
let danglingToolIntentRetries = 0;
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
const stream = await params.client.responses.create({
model: params.model,
input,
temperature: params.temperature,
max_output_tokens: params.maxTokens,
tools: toResponsesTools(enabledTools),
tool_choice: "auto",
parallel_tool_calls: true,
store: true,
stream: true,
} as any);
let roundText = "";
let streamedRoundText = "";
let roundHasToolCalls = false;
let canStreamRoundText = false;
let completedResponse: any | null = null;
const completedOutputItems: any[] = [];
for await (const event of stream as any as AsyncIterable<any>) {
rawResponses.push(event);
if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
roundText += event.delta;
if (canStreamRoundText && !roundHasToolCalls && event.delta.length) {
streamedRoundText += event.delta;
yield { type: "delta", text: event.delta };
}
} else if (event?.type === "response.output_item.added" && event.item) {
if (event.item.type === "function_call") {
roundHasToolCalls = true;
canStreamRoundText = false;
} else if (event.item.type === "message" && !roundHasToolCalls) {
canStreamRoundText = true;
}
} else if (event?.type === "response.output_item.done" && event.item) {
completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
if (event.item.type === "function_call") {
roundHasToolCalls = true;
canStreamRoundText = false;
}
} else if (event?.type === "response.completed") {
completedResponse = event.response;
sawUsage = mergeUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
completedResponse = event.response;
sawUsage = mergeUsage(usageAcc, event.response?.usage) || sawUsage;
} else if (event?.type === "error") {
throw new Error(event.message ?? "Responses stream failed.");
}
}
const failureMessage = getFailureMessage(completedResponse);
if (failureMessage) {
throw new Error(failureMessage);
}
const outputItems = getOutputItems(completedResponse);
const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
const normalizedToolCalls = normalizeToolCalls(responseOutputItems, round);
if (!normalizedToolCalls.length) {
const text = extractText(completedResponse, roundText);
if (!streamedRoundText && danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
danglingToolIntentRetries += 1;
appendDanglingToolIntentCorrection(input, text);
continue;
}
const unstreamedText = getUnstreamedText(text, streamedRoundText);
if (unstreamedText) {
yield { type: "delta", text: unstreamedText };
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
toolEvents,
},
};
return;
}
totalToolCalls += normalizedToolCalls.length;
input.push(...responseOutputItems);
for (const call of normalizedToolCalls) {
const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
yield { type: "tool_call", event: initiatedEvent };
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
toolEvents.push(event);
yield { type: "tool_call", event };
input.push({
type: "function_call_output",
call_id: call.id,
output: JSON.stringify(toolResult),
});
}
}
yield {
type: "done",
result: {
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
toolEvents,
},
};
}