big backend refactor
This commit is contained in:
@@ -4,20 +4,14 @@ import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
import { convert as htmlToText } from "html-to-text";
|
||||
import type OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
import { buildBrowserLikeNavigationHeaders } from "../browser-fetch-headers.js";
|
||||
import { env } from "../env.js";
|
||||
import { exaClient } from "../search/exa.js";
|
||||
import { searchSearxng } from "../search/searxng.js";
|
||||
import {
|
||||
buildOpenAIConversationMessage,
|
||||
buildOpenAIResponsesInputMessage,
|
||||
buildSystemPromptAugmentationMessage,
|
||||
} from "./message-content.js";
|
||||
import type { ChatMessage } from "./types.js";
|
||||
|
||||
const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
|
||||
export const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
|
||||
const DEFAULT_WEB_RESULTS = 5;
|
||||
const MAX_WEB_RESULTS = 10;
|
||||
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
|
||||
@@ -30,7 +24,7 @@ const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
|
||||
const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
|
||||
const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
|
||||
const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
||||
const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
|
||||
export const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -220,7 +214,7 @@ function getEnabledToolSet(params: Pick<ToolAwareCompletionParams, "enabledTools
|
||||
return new Set(normalizeEnabledChatTools(params.enabledTools));
|
||||
}
|
||||
|
||||
function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
|
||||
export function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
|
||||
const enabled = getEnabledToolSet(params);
|
||||
return CHAT_TOOLS.filter((tool) => {
|
||||
const name = getToolName(tool);
|
||||
@@ -228,19 +222,6 @@ function getEnabledChatTools(params: Pick<ToolAwareCompletionParams, "enabledToo
|
||||
});
|
||||
}
|
||||
|
||||
function toResponsesChatTools(tools: any[]) {
|
||||
return tools.map((tool) => {
|
||||
if (tool?.type !== "function") return tool;
|
||||
return {
|
||||
type: "function",
|
||||
name: tool.function.name,
|
||||
description: tool.function.description,
|
||||
parameters: tool.function.parameters,
|
||||
strict: false,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
|
||||
@@ -254,18 +235,18 @@ export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
: "") +
|
||||
"Do not fabricate tool outputs; reason only from provided tool results.";
|
||||
|
||||
type ToolRunOutcome = {
|
||||
export type ToolRunOutcome = {
|
||||
ok: boolean;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
type ToolAwareUsage = {
|
||||
export type ToolAwareUsage = {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
};
|
||||
|
||||
type ToolAwareCompletionResult = {
|
||||
export type ToolAwareCompletionResult = {
|
||||
text: string;
|
||||
usage?: ToolAwareUsage;
|
||||
raw: unknown;
|
||||
@@ -277,8 +258,8 @@ export type ToolAwareStreamingEvent =
|
||||
| { type: "tool_call"; event: ToolExecutionEvent }
|
||||
| { type: "done"; result: ToolAwareCompletionResult };
|
||||
|
||||
type ToolAwareCompletionParams = {
|
||||
client: OpenAI;
|
||||
export type ToolAwareCompletionParams = {
|
||||
client: any;
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
enabledTools?: string[];
|
||||
@@ -440,7 +421,7 @@ function extractHtmlTitle(html: string) {
|
||||
);
|
||||
}
|
||||
|
||||
function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
|
||||
export function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enabledTools">) {
|
||||
const enabled = getEnabledToolSet(params);
|
||||
return (
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
@@ -458,22 +439,6 @@ function buildChatToolSystemPrompt(params: Pick<ToolAwareCompletionParams, "enab
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeIncomingMessages(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
|
||||
const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
|
||||
|
||||
return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
|
||||
}
|
||||
|
||||
function normalizePlainIncomingMessages(messages: ChatMessage[], userLocation?: string) {
|
||||
return [buildSystemPromptAugmentationMessage(userLocation), ...messages.map((message) => buildOpenAIConversationMessage(message))];
|
||||
}
|
||||
|
||||
function normalizeIncomingResponsesInput(messages: ChatMessage[], userLocation?: string, params: Pick<ToolAwareCompletionParams, "enabledTools"> = {}) {
|
||||
const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
|
||||
|
||||
return [{ role: "system", content: buildChatToolSystemPrompt(params) }, buildSystemPromptAugmentationMessage(userLocation), ...normalized];
|
||||
}
|
||||
|
||||
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
||||
const exa = exaClient();
|
||||
const response = await exa.search(args.query, {
|
||||
@@ -842,7 +807,7 @@ async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome>
|
||||
return { ok: false, error: `Unknown tool: ${name}` };
|
||||
}
|
||||
|
||||
function parseToolArgs(raw: unknown) {
|
||||
export function parseToolArgs(raw: unknown) {
|
||||
if (typeof raw !== "string") return {};
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) return {};
|
||||
@@ -871,7 +836,7 @@ function buildEventArgs(name: string, args: Record<string, unknown>) {
|
||||
return args;
|
||||
}
|
||||
|
||||
function looksLikeDanglingToolIntent(text: string) {
|
||||
export function looksLikeDanglingToolIntent(text: string) {
|
||||
const normalized = text
|
||||
.toLowerCase()
|
||||
.replace(/[`*_>#-]/g, " ")
|
||||
@@ -887,7 +852,7 @@ function looksLikeDanglingToolIntent(text: string) {
|
||||
);
|
||||
}
|
||||
|
||||
function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
|
||||
export function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
|
||||
conversation.push({ role: "assistant", content: text });
|
||||
conversation.push({
|
||||
role: "system",
|
||||
@@ -896,7 +861,7 @@ function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
|
||||
});
|
||||
}
|
||||
|
||||
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
export function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.prompt_tokens ?? 0;
|
||||
acc.outputTokens += usage.completion_tokens ?? 0;
|
||||
@@ -904,79 +869,19 @@ function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
return true;
|
||||
}
|
||||
|
||||
function mergeResponsesUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.input_tokens ?? 0;
|
||||
acc.outputTokens += usage.output_tokens ?? 0;
|
||||
acc.totalTokens += usage.total_tokens ?? 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
function getResponseOutputItems(response: any) {
|
||||
return Array.isArray(response?.output) ? response.output : [];
|
||||
}
|
||||
|
||||
function extractResponsesText(response: any, fallback = "") {
|
||||
if (typeof response?.output_text === "string") return response.output_text;
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const item of getResponseOutputItems(response)) {
|
||||
if (item?.type !== "message" || !Array.isArray(item.content)) continue;
|
||||
for (const content of item.content) {
|
||||
if (content?.type === "output_text" && typeof content.text === "string") {
|
||||
parts.push(content.text);
|
||||
} else if (content?.type === "refusal" && typeof content.refusal === "string") {
|
||||
parts.push(content.refusal);
|
||||
}
|
||||
}
|
||||
}
|
||||
return parts.join("") || fallback;
|
||||
}
|
||||
|
||||
function extractChatCompletionContent(message: any) {
|
||||
if (typeof message?.content === "string") return message.content;
|
||||
if (!Array.isArray(message?.content)) return "";
|
||||
|
||||
return message.content
|
||||
.map((part: any) => {
|
||||
if (typeof part === "string") return part;
|
||||
if (typeof part?.text === "string") return part.text;
|
||||
if (typeof part?.content === "string") return part.content;
|
||||
return "";
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
function getUnstreamedText(finalText: string, streamedText: string) {
|
||||
export function getUnstreamedText(finalText: string, streamedText: string) {
|
||||
if (!finalText) return "";
|
||||
if (!streamedText) return finalText;
|
||||
return finalText.startsWith(streamedText) ? finalText.slice(streamedText.length) : "";
|
||||
}
|
||||
|
||||
function getResponseFailureMessage(response: any) {
|
||||
if (response?.status !== "failed" && response?.status !== "incomplete") return null;
|
||||
const errorMessage = typeof response?.error?.message === "string" ? response.error.message : null;
|
||||
const incompleteReason = typeof response?.incomplete_details?.reason === "string" ? response.incomplete_details.reason : null;
|
||||
return errorMessage ?? (incompleteReason ? `Response incomplete: ${incompleteReason}` : `Response ${response.status}.`);
|
||||
}
|
||||
|
||||
function normalizeResponsesToolCalls(outputItems: any[], round: number): NormalizedToolCall[] {
|
||||
return outputItems
|
||||
.filter((item) => item?.type === "function_call")
|
||||
.map((call: any, index: number) => ({
|
||||
id: call.call_id ?? call.id ?? `tool_call_${round}_${index}`,
|
||||
name: call.name ?? "unknown_tool",
|
||||
arguments: call.arguments ?? "{}",
|
||||
}));
|
||||
}
|
||||
|
||||
type NormalizedToolCall = {
|
||||
export type NormalizedToolCall = {
|
||||
id: string;
|
||||
name: string;
|
||||
arguments: string;
|
||||
};
|
||||
|
||||
function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
|
||||
export function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToolCall[] {
|
||||
return toolCalls.map((call: any, index: number) => ({
|
||||
id: call?.id ?? `tool_call_${round}_${index}`,
|
||||
name: call?.function?.name ?? "unknown_tool",
|
||||
@@ -984,7 +889,7 @@ function normalizeModelToolCalls(toolCalls: any[], round: number): NormalizedToo
|
||||
}));
|
||||
}
|
||||
|
||||
type PreparedToolCallExecution = {
|
||||
export type PreparedToolCallExecution = {
|
||||
startedAtMs: number;
|
||||
startedAt: string;
|
||||
parsedArgs: Record<string, unknown>;
|
||||
@@ -992,7 +897,7 @@ type PreparedToolCallExecution = {
|
||||
parseError?: unknown;
|
||||
};
|
||||
|
||||
function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecutionEvent; execution: PreparedToolCallExecution } {
|
||||
export function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecutionEvent; execution: PreparedToolCallExecution } {
|
||||
const startedAtMs = Date.now();
|
||||
const startedAt = new Date(startedAtMs).toISOString();
|
||||
let parsedArgs: Record<string, unknown> = {};
|
||||
@@ -1024,7 +929,7 @@ function prepareToolCallExecution(call: NormalizedToolCall): { event: ToolExecut
|
||||
};
|
||||
}
|
||||
|
||||
async function executeToolCallAndBuildEvent(
|
||||
export async function executeToolCallAndBuildEvent(
|
||||
call: NormalizedToolCall,
|
||||
execution: PreparedToolCallExecution,
|
||||
params: ToolAwareCompletionParams
|
||||
@@ -1068,488 +973,3 @@ async function executeToolCallAndBuildEvent(
|
||||
|
||||
return { event, toolResult };
|
||||
}
|
||||
|
||||
export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
||||
const enabledTools = getEnabledChatTools(params);
|
||||
const input: any[] = normalizeIncomingResponsesInput(params.messages, params.userLocation, params);
|
||||
const rawResponses: unknown[] = [];
|
||||
const toolEvents: ToolExecutionEvent[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const response = await params.client.responses.create({
|
||||
model: params.model,
|
||||
input,
|
||||
temperature: params.temperature,
|
||||
max_output_tokens: params.maxTokens,
|
||||
tools: toResponsesChatTools(enabledTools),
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
// Tool loops pass response output items back as input; reasoning items need persistence.
|
||||
store: true,
|
||||
} as any);
|
||||
rawResponses.push(response);
|
||||
sawUsage = mergeResponsesUsage(usageAcc, response?.usage) || sawUsage;
|
||||
|
||||
const failureMessage = getResponseFailureMessage(response);
|
||||
if (failureMessage) {
|
||||
throw new Error(failureMessage);
|
||||
}
|
||||
|
||||
const outputItems = getResponseOutputItems(response);
|
||||
const normalizedToolCalls = normalizeResponsesToolCalls(outputItems, round);
|
||||
if (!normalizedToolCalls.length) {
|
||||
const text = extractResponsesText(response);
|
||||
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(input, text);
|
||||
continue;
|
||||
}
|
||||
return {
|
||||
text,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
totalToolCalls += normalizedToolCalls.length;
|
||||
input.push(...outputItems);
|
||||
|
||||
for (const call of normalizedToolCalls) {
|
||||
const { execution } = prepareToolCallExecution(call);
|
||||
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
|
||||
toolEvents.push(event);
|
||||
|
||||
input.push({
|
||||
type: "function_call_output",
|
||||
call_id: call.id,
|
||||
output: JSON.stringify(toolResult),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runToolAwareChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
||||
const enabledTools = getEnabledChatTools(params);
|
||||
const conversation: any[] = normalizeIncomingMessages(params.messages, params.userLocation, params);
|
||||
const rawResponses: unknown[] = [];
|
||||
const toolEvents: ToolExecutionEvent[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const completion = await params.client.chat.completions.create({
|
||||
model: params.model,
|
||||
messages: conversation,
|
||||
temperature: params.temperature,
|
||||
max_tokens: params.maxTokens,
|
||||
tools: enabledTools,
|
||||
tool_choice: "auto",
|
||||
} as any);
|
||||
rawResponses.push(completion);
|
||||
sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
|
||||
|
||||
const message = completion?.choices?.[0]?.message;
|
||||
if (!message) {
|
||||
return {
|
||||
text: "",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
||||
if (!toolCalls.length) {
|
||||
const text = typeof message.content === "string" ? message.content : "";
|
||||
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(conversation, text);
|
||||
continue;
|
||||
}
|
||||
return {
|
||||
text,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedToolCalls = normalizeModelToolCalls(toolCalls, round);
|
||||
totalToolCalls += normalizedToolCalls.length;
|
||||
|
||||
const assistantToolCallMessage: any = {
|
||||
role: "assistant",
|
||||
tool_calls: normalizedToolCalls.map((call) => ({
|
||||
id: call.id,
|
||||
type: "function",
|
||||
function: {
|
||||
name: call.name,
|
||||
arguments: call.arguments,
|
||||
},
|
||||
})),
|
||||
};
|
||||
if (typeof message.content === "string" && message.content.length) {
|
||||
assistantToolCallMessage.content = message.content;
|
||||
}
|
||||
conversation.push(assistantToolCallMessage);
|
||||
|
||||
for (const call of normalizedToolCalls) {
|
||||
const { execution } = prepareToolCallExecution(call);
|
||||
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
|
||||
toolEvents.push(event);
|
||||
|
||||
conversation.push({
|
||||
role: "tool",
|
||||
tool_call_id: call.id,
|
||||
content: JSON.stringify(toolResult),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runPlainChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
||||
const completion = await params.client.chat.completions.create({
|
||||
model: params.model,
|
||||
messages: normalizePlainIncomingMessages(params.messages, params.userLocation),
|
||||
temperature: params.temperature,
|
||||
max_tokens: params.maxTokens,
|
||||
} as any);
|
||||
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
const sawUsage = mergeUsage(usageAcc, completion?.usage);
|
||||
const message = completion?.choices?.[0]?.message;
|
||||
|
||||
return {
|
||||
text: extractChatCompletionContent(message),
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { response: completion, api: "chat.completions" },
|
||||
toolEvents: [],
|
||||
};
|
||||
}
|
||||
|
||||
export async function* runToolAwareOpenAIChatStream(
|
||||
params: ToolAwareCompletionParams
|
||||
): AsyncGenerator<ToolAwareStreamingEvent> {
|
||||
const enabledTools = getEnabledChatTools(params);
|
||||
const input: any[] = normalizeIncomingResponsesInput(params.messages, params.userLocation, params);
|
||||
const rawResponses: unknown[] = [];
|
||||
const toolEvents: ToolExecutionEvent[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const stream = await params.client.responses.create({
|
||||
model: params.model,
|
||||
input,
|
||||
temperature: params.temperature,
|
||||
max_output_tokens: params.maxTokens,
|
||||
tools: toResponsesChatTools(enabledTools),
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
// Tool loops pass response output items back as input; reasoning items need persistence.
|
||||
store: true,
|
||||
stream: true,
|
||||
} as any);
|
||||
|
||||
let roundText = "";
|
||||
let streamedRoundText = "";
|
||||
let roundHasToolCalls = false;
|
||||
let canStreamRoundText = false;
|
||||
let completedResponse: any | null = null;
|
||||
const completedOutputItems: any[] = [];
|
||||
|
||||
for await (const event of stream as any as AsyncIterable<any>) {
|
||||
rawResponses.push(event);
|
||||
|
||||
if (event?.type === "response.output_text.delta" && typeof event.delta === "string") {
|
||||
roundText += event.delta;
|
||||
if (canStreamRoundText && !roundHasToolCalls && event.delta.length) {
|
||||
streamedRoundText += event.delta;
|
||||
yield { type: "delta", text: event.delta };
|
||||
}
|
||||
} else if (event?.type === "response.output_item.added" && event.item) {
|
||||
if (event.item.type === "function_call") {
|
||||
roundHasToolCalls = true;
|
||||
canStreamRoundText = false;
|
||||
} else if (event.item.type === "message" && !roundHasToolCalls) {
|
||||
canStreamRoundText = true;
|
||||
}
|
||||
} else if (event?.type === "response.output_item.done" && event.item) {
|
||||
completedOutputItems[event.output_index ?? completedOutputItems.length] = event.item;
|
||||
if (event.item.type === "function_call") {
|
||||
roundHasToolCalls = true;
|
||||
canStreamRoundText = false;
|
||||
}
|
||||
} else if (event?.type === "response.completed") {
|
||||
completedResponse = event.response;
|
||||
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
|
||||
} else if (event?.type === "response.failed" || event?.type === "response.incomplete") {
|
||||
completedResponse = event.response;
|
||||
sawUsage = mergeResponsesUsage(usageAcc, event.response?.usage) || sawUsage;
|
||||
} else if (event?.type === "error") {
|
||||
throw new Error(event.message ?? "OpenAI Responses stream failed.");
|
||||
}
|
||||
}
|
||||
|
||||
const failureMessage = getResponseFailureMessage(completedResponse);
|
||||
if (failureMessage) {
|
||||
throw new Error(failureMessage);
|
||||
}
|
||||
|
||||
const outputItems = getResponseOutputItems(completedResponse);
|
||||
const responseOutputItems = outputItems.length ? outputItems : completedOutputItems.filter(Boolean);
|
||||
const normalizedToolCalls = normalizeResponsesToolCalls(responseOutputItems, round);
|
||||
if (!normalizedToolCalls.length) {
|
||||
const text = extractResponsesText(completedResponse, roundText);
|
||||
if (
|
||||
!streamedRoundText &&
|
||||
danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
|
||||
looksLikeDanglingToolIntent(text)
|
||||
) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(input, text);
|
||||
continue;
|
||||
}
|
||||
const unstreamedText = getUnstreamedText(text, streamedRoundText);
|
||||
if (unstreamedText) {
|
||||
yield { type: "delta", text: unstreamedText };
|
||||
}
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
text,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, api: "responses" },
|
||||
toolEvents,
|
||||
},
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
totalToolCalls += normalizedToolCalls.length;
|
||||
input.push(...responseOutputItems);
|
||||
|
||||
for (const call of normalizedToolCalls) {
|
||||
const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
|
||||
yield { type: "tool_call", event: initiatedEvent };
|
||||
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
|
||||
toolEvents.push(event);
|
||||
yield { type: "tool_call", event };
|
||||
input.push({
|
||||
type: "function_call_output",
|
||||
call_id: call.id,
|
||||
output: JSON.stringify(toolResult),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true, api: "responses" },
|
||||
toolEvents,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function* runToolAwareChatCompletionsStream(
|
||||
params: ToolAwareCompletionParams
|
||||
): AsyncGenerator<ToolAwareStreamingEvent> {
|
||||
const enabledTools = getEnabledChatTools(params);
|
||||
const conversation: any[] = normalizeIncomingMessages(params.messages, params.userLocation, params);
|
||||
const rawResponses: unknown[] = [];
|
||||
const toolEvents: ToolExecutionEvent[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const stream = await params.client.chat.completions.create({
|
||||
model: params.model,
|
||||
messages: conversation,
|
||||
temperature: params.temperature,
|
||||
max_tokens: params.maxTokens,
|
||||
tools: enabledTools,
|
||||
tool_choice: "auto",
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
} as any);
|
||||
|
||||
let roundText = "";
|
||||
let streamedRoundText = "";
|
||||
let roundHasToolCalls = false;
|
||||
const roundToolCalls = new Map<number, { id?: string; name?: string; arguments: string }>();
|
||||
|
||||
for await (const chunk of stream as any as AsyncIterable<any>) {
|
||||
rawResponses.push(chunk);
|
||||
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
|
||||
|
||||
const choice = chunk?.choices?.[0];
|
||||
const deltaText = choice?.delta?.content ?? "";
|
||||
if (typeof deltaText === "string" && deltaText.length) {
|
||||
roundText += deltaText;
|
||||
if (!roundHasToolCalls) {
|
||||
streamedRoundText += deltaText;
|
||||
yield { type: "delta", text: deltaText };
|
||||
}
|
||||
}
|
||||
|
||||
const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
|
||||
if (deltaToolCalls.length) {
|
||||
roundHasToolCalls = true;
|
||||
}
|
||||
for (const toolCall of deltaToolCalls) {
|
||||
const idx = typeof toolCall?.index === "number" ? toolCall.index : 0;
|
||||
const entry = roundToolCalls.get(idx) ?? { arguments: "" };
|
||||
if (typeof toolCall?.id === "string" && toolCall.id.length) {
|
||||
entry.id = toolCall.id;
|
||||
}
|
||||
if (typeof toolCall?.function?.name === "string" && toolCall.function.name.length) {
|
||||
entry.name = toolCall.function.name;
|
||||
}
|
||||
if (typeof toolCall?.function?.arguments === "string" && toolCall.function.arguments.length) {
|
||||
entry.arguments += toolCall.function.arguments;
|
||||
}
|
||||
roundToolCalls.set(idx, entry);
|
||||
}
|
||||
}
|
||||
|
||||
const normalizedToolCalls: NormalizedToolCall[] = [...roundToolCalls.entries()]
|
||||
.sort((a, b) => a[0] - b[0])
|
||||
.map(([_, call], index) => ({
|
||||
id: call.id ?? `tool_call_${round}_${index}`,
|
||||
name: call.name ?? "unknown_tool",
|
||||
arguments: call.arguments || "{}",
|
||||
}));
|
||||
|
||||
if (!normalizedToolCalls.length) {
|
||||
if (
|
||||
!streamedRoundText &&
|
||||
danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES &&
|
||||
looksLikeDanglingToolIntent(roundText)
|
||||
) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(conversation, roundText);
|
||||
continue;
|
||||
}
|
||||
const unstreamedText = getUnstreamedText(roundText, streamedRoundText);
|
||||
if (unstreamedText) {
|
||||
yield { type: "delta", text: unstreamedText };
|
||||
}
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
text: roundText,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls },
|
||||
toolEvents,
|
||||
},
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
totalToolCalls += normalizedToolCalls.length;
|
||||
const assistantToolCallMessage: any = {
|
||||
role: "assistant",
|
||||
tool_calls: normalizedToolCalls.map((call) => ({
|
||||
id: call.id,
|
||||
type: "function",
|
||||
function: {
|
||||
name: call.name,
|
||||
arguments: call.arguments,
|
||||
},
|
||||
})),
|
||||
};
|
||||
if (roundText) {
|
||||
assistantToolCallMessage.content = roundText;
|
||||
}
|
||||
conversation.push(assistantToolCallMessage);
|
||||
|
||||
for (const call of normalizedToolCalls) {
|
||||
const { event: initiatedEvent, execution } = prepareToolCallExecution(call);
|
||||
yield { type: "tool_call", event: initiatedEvent };
|
||||
const { event, toolResult } = await executeToolCallAndBuildEvent(call, execution, params);
|
||||
toolEvents.push(event);
|
||||
yield { type: "tool_call", event };
|
||||
conversation.push({
|
||||
role: "tool",
|
||||
tool_call_id: call.id,
|
||||
content: JSON.stringify(toolResult),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { streamed: true, responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
|
||||
toolEvents,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export async function* runPlainChatCompletionsStream(
|
||||
params: ToolAwareCompletionParams
|
||||
): AsyncGenerator<ToolAwareStreamingEvent> {
|
||||
const rawResponses: unknown[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let text = "";
|
||||
|
||||
const stream = await params.client.chat.completions.create({
|
||||
model: params.model,
|
||||
messages: normalizePlainIncomingMessages(params.messages, params.userLocation),
|
||||
temperature: params.temperature,
|
||||
max_tokens: params.maxTokens,
|
||||
stream: true,
|
||||
} as any);
|
||||
|
||||
for await (const chunk of stream as any as AsyncIterable<any>) {
|
||||
rawResponses.push(chunk);
|
||||
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
|
||||
|
||||
const deltaText = chunk?.choices?.[0]?.delta?.content ?? "";
|
||||
if (typeof deltaText === "string" && deltaText.length) {
|
||||
text += deltaText;
|
||||
yield { type: "delta", text: deltaText };
|
||||
}
|
||||
}
|
||||
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
text,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { streamed: true, responses: rawResponses, api: "chat.completions" },
|
||||
toolEvents: [],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user