[feature] adds web_search and fetch_url tool calls
This commit is contained in:
517
server/src/llm/chat-tools.ts
Normal file
517
server/src/llm/chat-tools.ts
Normal file
@@ -0,0 +1,517 @@
|
||||
import { convert as htmlToText } from "html-to-text";
|
||||
import type OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
import { exaClient } from "../search/exa.js";
|
||||
import type { ChatMessage } from "./types.js";
|
||||
|
||||
const MAX_TOOL_ROUNDS = 4;
|
||||
const DEFAULT_WEB_RESULTS = 5;
|
||||
const MAX_WEB_RESULTS = 10;
|
||||
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
|
||||
const MAX_FETCH_MAX_CHARACTERS = 50_000;
|
||||
const FETCH_TIMEOUT_MS = 12_000;
|
||||
|
||||
const WebSearchArgsSchema = z
|
||||
.object({
|
||||
query: z.string().trim().min(1),
|
||||
numResults: z.coerce.number().int().min(1).max(MAX_WEB_RESULTS).optional(),
|
||||
type: z.enum(["auto", "fast", "instant"]).optional(),
|
||||
includeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
|
||||
excludeDomains: z.array(z.string().trim().min(1)).max(25).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
const FetchUrlArgsSchema = z
|
||||
.object({
|
||||
url: z.string().trim().url(),
|
||||
maxCharacters: z.coerce.number().int().min(500).max(MAX_FETCH_MAX_CHARACTERS).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
const CHAT_TOOLS: any[] = [
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "web_search",
|
||||
description:
|
||||
"Search the public web for recent or factual information. Returns ranked results with per-result summaries and snippets.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
query: { type: "string", description: "Search query." },
|
||||
numResults: {
|
||||
type: "integer",
|
||||
minimum: 1,
|
||||
maximum: MAX_WEB_RESULTS,
|
||||
description: "Number of results to return (default 5).",
|
||||
},
|
||||
type: {
|
||||
type: "string",
|
||||
enum: ["auto", "fast", "instant"],
|
||||
description: "Search mode.",
|
||||
},
|
||||
includeDomains: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Only include these domains.",
|
||||
},
|
||||
excludeDomains: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
description: "Exclude these domains.",
|
||||
},
|
||||
},
|
||||
required: ["query"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "function",
|
||||
function: {
|
||||
name: "fetch_url",
|
||||
description:
|
||||
"Fetch a webpage by URL and return readable plaintext content extracted from the page for deeper inspection.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
url: { type: "string", description: "Absolute URL to fetch, including http/https." },
|
||||
maxCharacters: {
|
||||
type: "integer",
|
||||
minimum: 500,
|
||||
maximum: MAX_FETCH_MAX_CHARACTERS,
|
||||
description: "Maximum response text characters returned (default 12000).",
|
||||
},
|
||||
},
|
||||
required: ["url"],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
|
||||
"Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " +
|
||||
"Do not fabricate tool outputs; reason only from provided tool results.";
|
||||
|
||||
type ToolRunOutcome = {
|
||||
ok: boolean;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
type ToolAwareUsage = {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
};
|
||||
|
||||
type ToolAwareCompletionResult = {
|
||||
text: string;
|
||||
usage?: ToolAwareUsage;
|
||||
raw: unknown;
|
||||
toolEvents: ToolExecutionEvent[];
|
||||
};
|
||||
|
||||
type ToolAwareCompletionParams = {
|
||||
client: OpenAI;
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
onToolEvent?: (event: ToolExecutionEvent) => void | Promise<void>;
|
||||
logContext?: {
|
||||
provider: string;
|
||||
model: string;
|
||||
chatId?: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type ToolExecutionEvent = {
|
||||
toolCallId: string;
|
||||
name: string;
|
||||
status: "completed" | "failed";
|
||||
summary: string;
|
||||
args: Record<string, unknown>;
|
||||
startedAt: string;
|
||||
completedAt: string;
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
resultPreview?: string;
|
||||
};
|
||||
|
||||
function compactWhitespace(input: string) {
|
||||
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
|
||||
}
|
||||
|
||||
function clipText(input: string, maxCharacters: number) {
|
||||
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
|
||||
}
|
||||
|
||||
function toRecord(value: unknown): Record<string, unknown> {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
|
||||
return { ...(value as Record<string, unknown>) };
|
||||
}
|
||||
|
||||
function toSingleLine(value: string, maxLength = 220) {
|
||||
return clipText(
|
||||
value
|
||||
.replace(/\r?\n+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim(),
|
||||
maxLength
|
||||
);
|
||||
}
|
||||
|
||||
function buildToolSummary(name: string, args: Record<string, unknown>, status: "completed" | "failed", error?: string) {
|
||||
const errSuffix = status === "failed" && error ? ` Error: ${toSingleLine(error, 140)}` : "";
|
||||
if (name === "web_search") {
|
||||
const query = typeof args.query === "string" ? args.query.trim() : "";
|
||||
if (status === "completed") {
|
||||
return query ? `Performed web search for '${toSingleLine(query, 100)}'.` : "Performed web search.";
|
||||
}
|
||||
return query ? `Web search for '${toSingleLine(query, 100)}' failed.${errSuffix}` : `Web search failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (name === "fetch_url") {
|
||||
const url = typeof args.url === "string" ? args.url.trim() : "";
|
||||
if (status === "completed") {
|
||||
return url ? `Fetched URL ${toSingleLine(url, 140)}.` : "Fetched URL.";
|
||||
}
|
||||
return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
if (status === "completed") {
|
||||
return `Ran tool '${name}'.`;
|
||||
}
|
||||
return `Tool '${name}' failed.${errSuffix}`;
|
||||
}
|
||||
|
||||
function logToolEvent(event: ToolExecutionEvent, context?: ToolAwareCompletionParams["logContext"]) {
|
||||
const payload = {
|
||||
kind: "tool_call",
|
||||
...context,
|
||||
...event,
|
||||
};
|
||||
const line = `[tool_call] ${JSON.stringify(payload)}`;
|
||||
if (event.status === "failed") console.error(line);
|
||||
else console.info(line);
|
||||
}
|
||||
|
||||
function buildResultPreview(toolResult: ToolRunOutcome) {
|
||||
const serialized = JSON.stringify(toolResult);
|
||||
return serialized ? clipText(serialized, 400) : undefined;
|
||||
}
|
||||
|
||||
export function buildToolLogMessageData(chatId: string, event: ToolExecutionEvent) {
|
||||
return {
|
||||
chatId,
|
||||
role: "tool" as const,
|
||||
content: event.summary,
|
||||
name: event.name,
|
||||
metadata: {
|
||||
kind: "tool_call",
|
||||
toolCallId: event.toolCallId,
|
||||
toolName: event.name,
|
||||
status: event.status,
|
||||
summary: event.summary,
|
||||
args: event.args,
|
||||
startedAt: event.startedAt,
|
||||
completedAt: event.completedAt,
|
||||
durationMs: event.durationMs,
|
||||
error: event.error ?? null,
|
||||
resultPreview: event.resultPreview ?? null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function extractHtmlTitle(html: string) {
|
||||
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||||
if (!match?.[1]) return null;
|
||||
return compactWhitespace(
|
||||
match[1]
|
||||
.replace(/ /gi, " ")
|
||||
.replace(/&/gi, "&")
|
||||
.replace(/</gi, "<")
|
||||
.replace(/>/gi, ">")
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeIncomingMessages(messages: ChatMessage[]) {
|
||||
const normalized = messages.map((m) => {
|
||||
if (m.role === "tool") {
|
||||
const name = m.name?.trim() || "tool";
|
||||
return {
|
||||
role: "user",
|
||||
content: `Tool output (${name}):\n${m.content}`,
|
||||
};
|
||||
}
|
||||
if (m.role === "assistant" || m.role === "system" || m.role === "user") {
|
||||
const out: any = { role: m.role, content: m.content };
|
||||
if (m.name && (m.role === "assistant" || m.role === "user")) {
|
||||
out.name = m.name;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return { role: "user", content: m.content };
|
||||
});
|
||||
|
||||
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
||||
}
|
||||
|
||||
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
const args = WebSearchArgsSchema.parse(input);
|
||||
const exa = exaClient();
|
||||
const response = await exa.search(args.query, {
|
||||
type: args.type ?? "auto",
|
||||
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
|
||||
includeDomains: args.includeDomains,
|
||||
excludeDomains: args.excludeDomains,
|
||||
moderation: true,
|
||||
userLocation: "US",
|
||||
contents: {
|
||||
summary: { query: args.query },
|
||||
highlights: {
|
||||
query: args.query,
|
||||
maxCharacters: 320,
|
||||
numSentences: 2,
|
||||
highlightsPerUrl: 2,
|
||||
},
|
||||
text: { maxCharacters: 1_000 },
|
||||
},
|
||||
} as any);
|
||||
|
||||
const results = Array.isArray(response?.results) ? response.results : [];
|
||||
return {
|
||||
ok: true,
|
||||
query: args.query,
|
||||
requestId: response?.requestId ?? null,
|
||||
results: results.map((result: any, index: number) => ({
|
||||
rank: index + 1,
|
||||
title: typeof result?.title === "string" ? result.title : null,
|
||||
url: typeof result?.url === "string" ? result.url : null,
|
||||
publishedDate: typeof result?.publishedDate === "string" ? result.publishedDate : null,
|
||||
author: typeof result?.author === "string" ? result.author : null,
|
||||
summary: typeof result?.summary === "string" ? clipText(result.summary, 1_400) : null,
|
||||
text: typeof result?.text === "string" ? clipText(result.text, 700) : null,
|
||||
highlights: Array.isArray(result?.highlights)
|
||||
? result.highlights.filter((h: unknown) => typeof h === "string").slice(0, 3).map((h: string) => clipText(h, 280))
|
||||
: [],
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
function assertSafeFetchUrl(urlRaw: string) {
|
||||
const parsed = new URL(urlRaw);
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
||||
throw new Error("Only http:// and https:// URLs are supported.");
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
const args = FetchUrlArgsSchema.parse(input);
|
||||
const parsed = assertSafeFetchUrl(args.url);
|
||||
const maxCharacters = args.maxCharacters ?? DEFAULT_FETCH_MAX_CHARACTERS;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(parsed.toString(), {
|
||||
redirect: "follow",
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
|
||||
Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5",
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Fetch failed with status ${response.status}.`);
|
||||
}
|
||||
|
||||
const contentType = (response.headers.get("content-type") ?? "").toLowerCase();
|
||||
const body = await response.text();
|
||||
const isHtml = contentType.includes("text/html") || /<!doctype html|<html[\s>]/i.test(body);
|
||||
|
||||
let extracted = body;
|
||||
if (isHtml) {
|
||||
extracted = htmlToText(body, {
|
||||
wordwrap: false,
|
||||
preserveNewlines: true,
|
||||
selectors: [
|
||||
{ selector: "img", format: "skip" },
|
||||
{ selector: "script", format: "skip" },
|
||||
{ selector: "style", format: "skip" },
|
||||
{ selector: "noscript", format: "skip" },
|
||||
{ selector: "a", options: { ignoreHref: true } },
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
const normalized = compactWhitespace(extracted);
|
||||
const truncated = normalized.length > maxCharacters;
|
||||
const text = truncated
|
||||
? `${normalized.slice(0, maxCharacters)}\n\n[truncated ${normalized.length - maxCharacters} characters]`
|
||||
: normalized;
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
url: response.url || parsed.toString(),
|
||||
status: response.status,
|
||||
contentType: contentType || null,
|
||||
title: isHtml ? extractHtmlTitle(body) : null,
|
||||
truncated,
|
||||
text,
|
||||
};
|
||||
}
|
||||
|
||||
async function executeTool(name: string, args: unknown): Promise<ToolRunOutcome> {
|
||||
if (name === "web_search") return runWebSearchTool(args);
|
||||
if (name === "fetch_url") return runFetchUrlTool(args);
|
||||
return { ok: false, error: `Unknown tool: ${name}` };
|
||||
}
|
||||
|
||||
function parseToolArgs(raw: unknown) {
|
||||
if (typeof raw !== "string") return {};
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) return {};
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch (err: any) {
|
||||
throw new Error(`Invalid JSON arguments: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.prompt_tokens ?? 0;
|
||||
acc.outputTokens += usage.completion_tokens ?? 0;
|
||||
acc.totalTokens += usage.total_tokens ?? 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
|
||||
const conversation: any[] = normalizeIncomingMessages(params.messages);
|
||||
const rawResponses: unknown[] = [];
|
||||
const toolEvents: ToolExecutionEvent[] = [];
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const completion = await params.client.chat.completions.create({
|
||||
model: params.model,
|
||||
messages: conversation,
|
||||
temperature: params.temperature,
|
||||
max_tokens: params.maxTokens,
|
||||
tools: CHAT_TOOLS,
|
||||
tool_choice: "auto",
|
||||
} as any);
|
||||
rawResponses.push(completion);
|
||||
sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage;
|
||||
|
||||
const message = completion?.choices?.[0]?.message;
|
||||
if (!message) {
|
||||
return {
|
||||
text: "",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
||||
if (!toolCalls.length) {
|
||||
return {
|
||||
text: typeof message.content === "string" ? message.content : "",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
|
||||
totalToolCalls += toolCalls.length;
|
||||
|
||||
const assistantToolCallMessage: any = {
|
||||
role: "assistant",
|
||||
tool_calls: toolCalls.map((call: any, index: number) => ({
|
||||
id: call?.id ?? `tool_call_${round}_${index}`,
|
||||
type: "function",
|
||||
function: {
|
||||
name: call?.function?.name ?? "unknown_tool",
|
||||
arguments: call?.function?.arguments ?? "{}",
|
||||
},
|
||||
})),
|
||||
};
|
||||
if (typeof message.content === "string" && message.content.length) {
|
||||
assistantToolCallMessage.content = message.content;
|
||||
}
|
||||
conversation.push(assistantToolCallMessage);
|
||||
|
||||
for (let index = 0; index < toolCalls.length; index += 1) {
|
||||
const call: any = toolCalls[index];
|
||||
const toolName = call?.function?.name ?? "unknown_tool";
|
||||
const toolCallId = call?.id ?? `tool_call_${round}_${index}`;
|
||||
const startedAtMs = Date.now();
|
||||
const startedAt = new Date(startedAtMs).toISOString();
|
||||
let toolResult: ToolRunOutcome;
|
||||
let parsedArgs: Record<string, unknown> = {};
|
||||
try {
|
||||
parsedArgs = toRecord(parseToolArgs(call?.function?.arguments));
|
||||
toolResult = await executeTool(toolName, parsedArgs);
|
||||
} catch (err: any) {
|
||||
toolResult = {
|
||||
ok: false,
|
||||
error: err?.message ?? String(err),
|
||||
};
|
||||
}
|
||||
const status: "completed" | "failed" = toolResult.ok ? "completed" : "failed";
|
||||
const error =
|
||||
status === "failed"
|
||||
? typeof toolResult.error === "string"
|
||||
? toolResult.error
|
||||
: "Tool execution failed."
|
||||
: undefined;
|
||||
const completedAtMs = Date.now();
|
||||
const event: ToolExecutionEvent = {
|
||||
toolCallId,
|
||||
name: toolName,
|
||||
status,
|
||||
summary: buildToolSummary(toolName, parsedArgs, status, error),
|
||||
args: parsedArgs,
|
||||
startedAt,
|
||||
completedAt: new Date(completedAtMs).toISOString(),
|
||||
durationMs: completedAtMs - startedAtMs,
|
||||
error,
|
||||
resultPreview: buildResultPreview(toolResult),
|
||||
};
|
||||
toolEvents.push(event);
|
||||
logToolEvent(event, params.logContext);
|
||||
if (params.onToolEvent) {
|
||||
await params.onToolEvent(event);
|
||||
}
|
||||
|
||||
conversation.push({
|
||||
role: "tool",
|
||||
tool_call_id: toolCallId,
|
||||
content: JSON.stringify(toolResult),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.",
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true },
|
||||
toolEvents,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user