From d5b06ce22a5df9f533776b4576cabc93e3bf81e6 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Mon, 2 Mar 2026 16:13:34 -0800 Subject: [PATCH] [feature] adds web_search and fetch_url tool calls --- docs/api/rest.md | 9 +- docs/api/streaming-chat.md | 31 +- server/package-lock.json | 168 ++++++ server/package.json | 1 + server/src/llm/chat-tools.ts | 517 ++++++++++++++++++ server/src/llm/multiplexer.ts | 53 +- server/src/llm/streaming.ts | 64 ++- server/src/routes.ts | 1 + server/src/types/html-to-text.d.ts | 3 + web/src/App.tsx | 86 ++- .../components/chat/chat-messages-panel.tsx | 50 ++ web/src/lib/api.ts | 16 + 12 files changed, 951 insertions(+), 48 deletions(-) create mode 100644 server/src/llm/chat-tools.ts create mode 100644 server/src/types/html-to-text.d.ts diff --git a/docs/api/rest.md b/docs/api/rest.md index 7eb6876..06ae183 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -112,6 +112,12 @@ Behavior notes: - For `chatId` calls, server stores only *new* non-assistant messages from provided history to avoid duplicates. - Server persists final assistant output and call metadata (`LlmCall`) in DB. - Server updates chat-level model metadata on each call: `lastUsedProvider`/`lastUsedModel`; first successful/failed call also initializes `initiatedProvider`/`initiatedModel` if unset. +- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction. +- Available tool calls for chat: `web_search` and `fetch_url`. +- `web_search` uses Exa and returns ranked results with per-result summaries/snippets. +- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side). +- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output. +- `anthropic` currently runs without server-managed tool calls. ## Searches @@ -171,7 +177,8 @@ Search run notes: "createdAt": "...", "role": "system|user|assistant|tool", "content": "...", - "name": null + "name": null, + "metadata": null } ``` diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md index 3410fea..c6335d7 100644 --- a/docs/api/streaming-chat.md +++ b/docs/api/streaming-chat.md @@ -37,8 +37,9 @@ Notes: Event order: 1. Exactly one `meta` -2. Zero or more `delta` -3. Exactly one terminal event: `done` or `error` +2. Zero or more `tool_call` +3. Zero or more `delta` +4. Exactly one terminal event: `done` or `error` ### `meta` @@ -60,6 +61,23 @@ Event order: `text` may contain partial words, punctuation, or whitespace. +### `tool_call` + +```json +{ + "toolCallId": "call_123", + "name": "web_search", + "status": "completed", + "summary": "Performed web search for 'latest CPI release'.", + "args": { "query": "latest CPI release" }, + "startedAt": "2026-03-02T10:00:00.000Z", + "completedAt": "2026-03-02T10:00:00.820Z", + "durationMs": 820, + "error": null, + "resultPreview": "{\"ok\":true,...}" +} +``` + ### `done` ```json @@ -84,10 +102,15 @@ Event order: ## Provider Streaming Behavior -- `openai`: streamed via OpenAI chat completion chunks; emits `delta` from `choices[0].delta.content`. -- `xai`: uses OpenAI-compatible API, same chunk extraction as OpenAI. +- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text. +- `xai`: same tool-enabled behavior as OpenAI. - `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. +Tool-enabled streaming notes (`openai`/`xai`): +- Stream still emits standard `meta`, `delta`, `done|error` events. +- Stream may emit `tool_call` events before final assistant text. +- `delta` may arrive as one consolidated chunk after tool execution, rather than many token-level chunks. + ## Persistence + Consistency Model Backend database remains source of truth. diff --git a/server/package-lock.json b/server/package-lock.json index a3ef01f..0ed9cd4 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -18,6 +18,7 @@ "dotenv": "^17.2.3", "exa-js": "^2.4.0", "fastify": "^5.7.2", + "html-to-text": "^9.0.5", "openai": "^6.16.0", "pino-pretty": "^13.1.3", "prisma": "^6.6.0", @@ -852,6 +853,19 @@ "@prisma/debug": "6.6.0" } }, + "node_modules/@selderee/plugin-htmlparser2": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@selderee/plugin-htmlparser2/-/plugin-htmlparser2-0.11.0.tgz", + "integrity": "sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "selderee": "^0.11.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/@types/node": { "version": "25.0.10", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.10.tgz", @@ -996,6 +1010,15 @@ } } }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/depd": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", @@ -1014,6 +1037,61 @@ "node": ">=6" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "17.2.3", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz", @@ -1035,6 +1113,18 @@ "once": "^1.4.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/esbuild": { "version": "0.27.2", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz", @@ -1353,6 +1443,41 @@ "integrity": "sha512-7xgomUX6ADmcYzFik0HzAxh/73YlKR9bmFzf51CZwR+b6YtzU2m0u49hQCqV6SvlqIqsaxovfwdvbnsw3b/zpg==", "license": "MIT" }, + "node_modules/html-to-text": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/html-to-text/-/html-to-text-9.0.5.tgz", + "integrity": "sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==", + "license": "MIT", + "dependencies": { + "@selderee/plugin-htmlparser2": "^0.11.0", + "deepmerge": "^4.3.1", + "dom-serializer": "^2.0.0", + "htmlparser2": "^8.0.2", + "selderee": "^0.11.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -1452,6 +1577,15 @@ "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", "license": "MIT" }, + "node_modules/leac": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/leac/-/leac-0.6.0.tgz", + "integrity": "sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==", + "license": "MIT", + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/light-my-request": { "version": "6.6.0", "resolved": "https://registry.npmjs.org/light-my-request/-/light-my-request-6.6.0.tgz", @@ -1648,6 +1782,19 @@ "integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==", "license": "MIT" }, + "node_modules/parseley": { + "version": "0.12.1", + "resolved": "https://registry.npmjs.org/parseley/-/parseley-0.12.1.tgz", + "integrity": "sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==", + "license": "MIT", + "dependencies": { + "leac": "^0.6.0", + "peberminta": "^0.9.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/path-scurry": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.1.tgz", @@ -1664,6 +1811,15 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/peberminta": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/peberminta/-/peberminta-0.9.0.tgz", + "integrity": "sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==", + "license": "MIT", + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/pino": { "version": "10.3.0", "resolved": "https://registry.npmjs.org/pino/-/pino-10.3.0.tgz", @@ -1882,6 +2038,18 @@ ], "license": "BSD-3-Clause" }, + "node_modules/selderee": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/selderee/-/selderee-0.11.0.tgz", + "integrity": "sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==", + "license": "MIT", + "dependencies": { + "parseley": "^0.12.0" + }, + "funding": { + "url": "https://ko-fi.com/killymxi" + } + }, "node_modules/semver": { "version": "7.7.3", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", diff --git a/server/package.json b/server/package.json index c2cd11f..89c08c7 100644 --- a/server/package.json +++ b/server/package.json @@ -26,6 +26,7 @@ "dotenv": "^17.2.3", "exa-js": "^2.4.0", "fastify": "^5.7.2", + "html-to-text": "^9.0.5", "openai": "^6.16.0", "pino-pretty": "^13.1.3", "prisma": "^6.6.0", diff --git a/server/src/llm/chat-tools.ts b/server/src/llm/chat-tools.ts new file mode 100644 index 0000000..e898ab5 --- /dev/null +++ b/server/src/llm/chat-tools.ts @@ -0,0 +1,517 @@ +import { convert as htmlToText } from "html-to-text"; +import type OpenAI from "openai"; +import { z } from "zod"; +import { exaClient } from "../search/exa.js"; +import type { ChatMessage } from "./types.js"; + +const MAX_TOOL_ROUNDS = 4; +const DEFAULT_WEB_RESULTS = 5; +const MAX_WEB_RESULTS = 10; +const DEFAULT_FETCH_MAX_CHARACTERS = 12_000; +const MAX_FETCH_MAX_CHARACTERS = 50_000; +const FETCH_TIMEOUT_MS = 12_000; + +const WebSearchArgsSchema = z + .object({ + query: z.string().trim().min(1), + numResults: z.coerce.number().int().min(1).max(MAX_WEB_RESULTS).optional(), + type: z.enum(["auto", "fast", "instant"]).optional(), + includeDomains: z.array(z.string().trim().min(1)).max(25).optional(), + excludeDomains: z.array(z.string().trim().min(1)).max(25).optional(), + }) + .strict(); + +const FetchUrlArgsSchema = z + .object({ + url: z.string().trim().url(), + maxCharacters: z.coerce.number().int().min(500).max(MAX_FETCH_MAX_CHARACTERS).optional(), + }) + .strict(); + +const CHAT_TOOLS: any[] = [ + { + type: "function", + function: { + name: "web_search", + description: + "Search the public web for recent or factual information. Returns ranked results with per-result summaries and snippets.", + parameters: { + type: "object", + properties: { + query: { type: "string", description: "Search query." }, + numResults: { + type: "integer", + minimum: 1, + maximum: MAX_WEB_RESULTS, + description: "Number of results to return (default 5).", + }, + type: { + type: "string", + enum: ["auto", "fast", "instant"], + description: "Search mode.", + }, + includeDomains: { + type: "array", + items: { type: "string" }, + description: "Only include these domains.", + }, + excludeDomains: { + type: "array", + items: { type: "string" }, + description: "Exclude these domains.", + }, + }, + required: ["query"], + additionalProperties: false, + }, + }, + }, + { + type: "function", + function: { + name: "fetch_url", + description: + "Fetch a webpage by URL and return readable plaintext content extracted from the page for deeper inspection.", + parameters: { + type: "object", + properties: { + url: { type: "string", description: "Absolute URL to fetch, including http/https." }, + maxCharacters: { + type: "integer", + minimum: 500, + maximum: MAX_FETCH_MAX_CHARACTERS, + description: "Maximum response text characters returned (default 12000).", + }, + }, + required: ["url"], + additionalProperties: false, + }, + }, + }, +]; + +export const CHAT_TOOL_SYSTEM_PROMPT = + "You can use tools to gather up-to-date web information when needed. " + + "Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " + + "Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " + + "Do not fabricate tool outputs; reason only from provided tool results."; + +type ToolRunOutcome = { + ok: boolean; + [key: string]: unknown; +}; + +type ToolAwareUsage = { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; +}; + +type ToolAwareCompletionResult = { + text: string; + usage?: ToolAwareUsage; + raw: unknown; + toolEvents: ToolExecutionEvent[]; +}; + +type ToolAwareCompletionParams = { + client: OpenAI; + model: string; + messages: ChatMessage[]; + temperature?: number; + maxTokens?: number; + onToolEvent?: (event: ToolExecutionEvent) => void | Promise; + logContext?: { + provider: string; + model: string; + chatId?: string; + }; +}; + +export type ToolExecutionEvent = { + toolCallId: string; + name: string; + status: "completed" | "failed"; + summary: string; + args: Record; + startedAt: string; + completedAt: string; + durationMs: number; + error?: string; + resultPreview?: string; +}; + +function compactWhitespace(input: string) { + return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim(); +} + +function clipText(input: string, maxCharacters: number) { + return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`; +} + +function toRecord(value: unknown): Record { + if (!value || typeof value !== "object" || Array.isArray(value)) return {}; + return { ...(value as Record) }; +} + +function toSingleLine(value: string, maxLength = 220) { + return clipText( + value + .replace(/\r?\n+/g, " ") + .replace(/\s+/g, " ") + .trim(), + maxLength + ); +} + +function buildToolSummary(name: string, args: Record, status: "completed" | "failed", error?: string) { + const errSuffix = status === "failed" && error ? ` Error: ${toSingleLine(error, 140)}` : ""; + if (name === "web_search") { + const query = typeof args.query === "string" ? args.query.trim() : ""; + if (status === "completed") { + return query ? `Performed web search for '${toSingleLine(query, 100)}'.` : "Performed web search."; + } + return query ? `Web search for '${toSingleLine(query, 100)}' failed.${errSuffix}` : `Web search failed.${errSuffix}`; + } + + if (name === "fetch_url") { + const url = typeof args.url === "string" ? args.url.trim() : ""; + if (status === "completed") { + return url ? `Fetched URL ${toSingleLine(url, 140)}.` : "Fetched URL."; + } + return url ? `Fetching URL ${toSingleLine(url, 140)} failed.${errSuffix}` : `Fetching URL failed.${errSuffix}`; + } + + if (status === "completed") { + return `Ran tool '${name}'.`; + } + return `Tool '${name}' failed.${errSuffix}`; +} + +function logToolEvent(event: ToolExecutionEvent, context?: ToolAwareCompletionParams["logContext"]) { + const payload = { + kind: "tool_call", + ...context, + ...event, + }; + const line = `[tool_call] ${JSON.stringify(payload)}`; + if (event.status === "failed") console.error(line); + else console.info(line); +} + +function buildResultPreview(toolResult: ToolRunOutcome) { + const serialized = JSON.stringify(toolResult); + return serialized ? clipText(serialized, 400) : undefined; +} + +export function buildToolLogMessageData(chatId: string, event: ToolExecutionEvent) { + return { + chatId, + role: "tool" as const, + content: event.summary, + name: event.name, + metadata: { + kind: "tool_call", + toolCallId: event.toolCallId, + toolName: event.name, + status: event.status, + summary: event.summary, + args: event.args, + startedAt: event.startedAt, + completedAt: event.completedAt, + durationMs: event.durationMs, + error: event.error ?? null, + resultPreview: event.resultPreview ?? null, + }, + }; +} + +function extractHtmlTitle(html: string) { + const match = html.match(/]*>([\s\S]*?)<\/title>/i); + if (!match?.[1]) return null; + return compactWhitespace( + match[1] + .replace(/ /gi, " ") + .replace(/&/gi, "&") + .replace(/</gi, "<") + .replace(/>/gi, ">") + .replace(/"/gi, '"') + .replace(/'/gi, "'") + ); +} + +function normalizeIncomingMessages(messages: ChatMessage[]) { + const normalized = messages.map((m) => { + if (m.role === "tool") { + const name = m.name?.trim() || "tool"; + return { + role: "user", + content: `Tool output (${name}):\n${m.content}`, + }; + } + if (m.role === "assistant" || m.role === "system" || m.role === "user") { + const out: any = { role: m.role, content: m.content }; + if (m.name && (m.role === "assistant" || m.role === "user")) { + out.name = m.name; + } + return out; + } + return { role: "user", content: m.content }; + }); + + return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized]; +} + +async function runWebSearchTool(input: unknown): Promise { + const args = WebSearchArgsSchema.parse(input); + const exa = exaClient(); + const response = await exa.search(args.query, { + type: args.type ?? "auto", + numResults: args.numResults ?? DEFAULT_WEB_RESULTS, + includeDomains: args.includeDomains, + excludeDomains: args.excludeDomains, + moderation: true, + userLocation: "US", + contents: { + summary: { query: args.query }, + highlights: { + query: args.query, + maxCharacters: 320, + numSentences: 2, + highlightsPerUrl: 2, + }, + text: { maxCharacters: 1_000 }, + }, + } as any); + + const results = Array.isArray(response?.results) ? response.results : []; + return { + ok: true, + query: args.query, + requestId: response?.requestId ?? null, + results: results.map((result: any, index: number) => ({ + rank: index + 1, + title: typeof result?.title === "string" ? result.title : null, + url: typeof result?.url === "string" ? result.url : null, + publishedDate: typeof result?.publishedDate === "string" ? result.publishedDate : null, + author: typeof result?.author === "string" ? result.author : null, + summary: typeof result?.summary === "string" ? clipText(result.summary, 1_400) : null, + text: typeof result?.text === "string" ? clipText(result.text, 700) : null, + highlights: Array.isArray(result?.highlights) + ? result.highlights.filter((h: unknown) => typeof h === "string").slice(0, 3).map((h: string) => clipText(h, 280)) + : [], + })), + }; +} + +function assertSafeFetchUrl(urlRaw: string) { + const parsed = new URL(urlRaw); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error("Only http:// and https:// URLs are supported."); + } + return parsed; +} + +async function runFetchUrlTool(input: unknown): Promise { + const args = FetchUrlArgsSchema.parse(input); + const parsed = assertSafeFetchUrl(args.url); + const maxCharacters = args.maxCharacters ?? DEFAULT_FETCH_MAX_CHARACTERS; + + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + + let response: Response; + try { + response = await fetch(parsed.toString(), { + redirect: "follow", + signal: controller.signal, + headers: { + "User-Agent": "SybilBot/1.0 (+https://sybil.local)", + Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5", + }, + }); + } finally { + clearTimeout(timeout); + } + + if (!response.ok) { + throw new Error(`Fetch failed with status ${response.status}.`); + } + + const contentType = (response.headers.get("content-type") ?? "").toLowerCase(); + const body = await response.text(); + const isHtml = contentType.includes("text/html") || /]/i.test(body); + + let extracted = body; + if (isHtml) { + extracted = htmlToText(body, { + wordwrap: false, + preserveNewlines: true, + selectors: [ + { selector: "img", format: "skip" }, + { selector: "script", format: "skip" }, + { selector: "style", format: "skip" }, + { selector: "noscript", format: "skip" }, + { selector: "a", options: { ignoreHref: true } }, + ], + }); + } + + const normalized = compactWhitespace(extracted); + const truncated = normalized.length > maxCharacters; + const text = truncated + ? `${normalized.slice(0, maxCharacters)}\n\n[truncated ${normalized.length - maxCharacters} characters]` + : normalized; + + return { + ok: true, + url: response.url || parsed.toString(), + status: response.status, + contentType: contentType || null, + title: isHtml ? extractHtmlTitle(body) : null, + truncated, + text, + }; +} + +async function executeTool(name: string, args: unknown): Promise { + if (name === "web_search") return runWebSearchTool(args); + if (name === "fetch_url") return runFetchUrlTool(args); + return { ok: false, error: `Unknown tool: ${name}` }; +} + +function parseToolArgs(raw: unknown) { + if (typeof raw !== "string") return {}; + const trimmed = raw.trim(); + if (!trimmed) return {}; + try { + return JSON.parse(trimmed); + } catch (err: any) { + throw new Error(`Invalid JSON arguments: ${err?.message ?? String(err)}`); + } +} + +function mergeUsage(acc: Required, usage: any) { + if (!usage) return false; + acc.inputTokens += usage.prompt_tokens ?? 0; + acc.outputTokens += usage.completion_tokens ?? 0; + acc.totalTokens += usage.total_tokens ?? 0; + return true; +} + +export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams): Promise { + const conversation: any[] = normalizeIncomingMessages(params.messages); + const rawResponses: unknown[] = []; + const toolEvents: ToolExecutionEvent[] = []; + const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; + let sawUsage = false; + let totalToolCalls = 0; + + for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) { + const completion = await params.client.chat.completions.create({ + model: params.model, + messages: conversation, + temperature: params.temperature, + max_tokens: params.maxTokens, + tools: CHAT_TOOLS, + tool_choice: "auto", + } as any); + rawResponses.push(completion); + sawUsage = mergeUsage(usageAcc, completion?.usage) || sawUsage; + + const message = completion?.choices?.[0]?.message; + if (!message) { + return { + text: "", + usage: sawUsage ? usageAcc : undefined, + raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, missingMessage: true }, + toolEvents, + }; + } + + const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; + if (!toolCalls.length) { + return { + text: typeof message.content === "string" ? message.content : "", + usage: sawUsage ? usageAcc : undefined, + raw: { responses: rawResponses, toolCallsUsed: totalToolCalls }, + toolEvents, + }; + } + + totalToolCalls += toolCalls.length; + + const assistantToolCallMessage: any = { + role: "assistant", + tool_calls: toolCalls.map((call: any, index: number) => ({ + id: call?.id ?? `tool_call_${round}_${index}`, + type: "function", + function: { + name: call?.function?.name ?? "unknown_tool", + arguments: call?.function?.arguments ?? "{}", + }, + })), + }; + if (typeof message.content === "string" && message.content.length) { + assistantToolCallMessage.content = message.content; + } + conversation.push(assistantToolCallMessage); + + for (let index = 0; index < toolCalls.length; index += 1) { + const call: any = toolCalls[index]; + const toolName = call?.function?.name ?? "unknown_tool"; + const toolCallId = call?.id ?? `tool_call_${round}_${index}`; + const startedAtMs = Date.now(); + const startedAt = new Date(startedAtMs).toISOString(); + let toolResult: ToolRunOutcome; + let parsedArgs: Record = {}; + try { + parsedArgs = toRecord(parseToolArgs(call?.function?.arguments)); + toolResult = await executeTool(toolName, parsedArgs); + } catch (err: any) { + toolResult = { + ok: false, + error: err?.message ?? String(err), + }; + } + const status: "completed" | "failed" = toolResult.ok ? "completed" : "failed"; + const error = + status === "failed" + ? typeof toolResult.error === "string" + ? toolResult.error + : "Tool execution failed." + : undefined; + const completedAtMs = Date.now(); + const event: ToolExecutionEvent = { + toolCallId, + name: toolName, + status, + summary: buildToolSummary(toolName, parsedArgs, status, error), + args: parsedArgs, + startedAt, + completedAt: new Date(completedAtMs).toISOString(), + durationMs: completedAtMs - startedAtMs, + error, + resultPreview: buildResultPreview(toolResult), + }; + toolEvents.push(event); + logToolEvent(event, params.logContext); + if (params.onToolEvent) { + await params.onToolEvent(event); + } + + conversation.push({ + role: "tool", + tool_call_id: toolCallId, + content: JSON.stringify(toolResult), + }); + } + } + + return { + text: "I reached the tool-call limit while gathering information. Please narrow the request and try again.", + usage: sawUsage ? usageAcc : undefined, + raw: { responses: rawResponses, toolCallsUsed: totalToolCalls, toolCallLimitReached: true }, + toolEvents, + }; +} diff --git a/server/src/llm/multiplexer.ts b/server/src/llm/multiplexer.ts index 39be339..324df0e 100644 --- a/server/src/llm/multiplexer.ts +++ b/server/src/llm/multiplexer.ts @@ -1,6 +1,7 @@ import { performance } from "node:perf_hooks"; import { prisma } from "../db.js"; import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; +import { buildToolLogMessageData, runToolAwareOpenAIChat } from "./chat-tools.js"; import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js"; function asProviderEnum(p: Provider) { @@ -44,25 +45,26 @@ export async function runMultiplex(req: MultiplexRequest): Promise[] = []; if (req.provider === "openai" || req.provider === "xai") { const client = req.provider === "openai" ? openaiClient() : xaiClient(); - const r = await client.chat.completions.create({ + const r = await runToolAwareOpenAIChat({ + client, model: req.model, - // OpenAI SDK has very specific message union types; our normalized schema is compatible. - messages: req.messages.map((m) => ({ role: m.role, content: m.content, name: m.name })) as any, + messages: req.messages, temperature: req.temperature, - max_tokens: req.maxTokens, + maxTokens: req.maxTokens, + logContext: { + provider: req.provider, + model: req.model, + chatId, + }, }); - raw = r; - outText = r.choices?.[0]?.message?.content ?? ""; - usage = r.usage - ? { - inputTokens: r.usage.prompt_tokens, - outputTokens: r.usage.completion_tokens, - totalTokens: r.usage.total_tokens, - } - : undefined; + raw = r.raw; + outText = r.text; + usage = r.usage; + toolMessages = r.toolEvents.map((event) => buildToolLogMessageData(call.chatId, event)); } else if (req.provider === "anthropic") { const client = anthropicClient(); @@ -100,16 +102,27 @@ export async function runMultiplex(req: MultiplexRequest): Promise { + if (toolMessages.length) { + await tx.message.createMany({ + data: toolMessages.map((message) => ({ + chatId: message.chatId, + role: message.role as any, + content: message.content, + name: message.name, + metadata: message.metadata as any, + })), + }); + } + await tx.message.create({ data: { chatId: call.chatId, role: "assistant" as any, content: outText, }, - }), - prisma.llmCall.update({ + }); + await tx.llmCall.update({ where: { id: call.id }, data: { response: raw as any, @@ -118,8 +131,8 @@ export async function runMultiplex(req: MultiplexRequest): Promise[] = []; try { if (req.provider === "openai" || req.provider === "xai") { const client = req.provider === "openai" ? openaiClient() : xaiClient(); - - const stream = await client.chat.completions.create({ + const toolEvents: ToolExecutionEvent[] = []; + const r = await runToolAwareOpenAIChat({ + client, model: req.model, - messages: req.messages.map((m) => ({ role: m.role, content: m.content, name: m.name })) as any, + messages: req.messages, temperature: req.temperature, - max_tokens: req.maxTokens, - stream: true, + maxTokens: req.maxTokens, + onToolEvent: (event) => { + toolEvents.push(event); + }, + logContext: { + provider: req.provider, + model: req.model, + chatId, + }, }); - - for await (const chunk of stream as any as AsyncIterable) { - const delta = chunk.choices?.[0]?.delta?.content ?? ""; - if (delta) { - text += delta; - yield { type: "delta", text: delta }; - } + raw = r.raw; + text = r.text; + usage = r.usage; + toolMessages = toolEvents.map((event) => buildToolLogMessageData(chatId, event)); + for (const event of toolEvents) { + yield { type: "tool_call", event }; + } + if (text) { + yield { type: "delta", text }; } - - // no guaranteed usage in stream mode across providers; leave empty for now } else if (req.provider === "anthropic") { const client = anthropicClient(); @@ -110,17 +120,29 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator } // some streams end with message_stop } + raw = { streamed: true, provider: "anthropic" }; } else { throw new Error(`unknown provider: ${req.provider}`); } const latencyMs = Math.round(performance.now() - t0); - await prisma.$transaction([ - prisma.message.create({ + await prisma.$transaction(async (tx) => { + if (toolMessages.length) { + await tx.message.createMany({ + data: toolMessages.map((message) => ({ + chatId: message.chatId, + role: message.role as any, + content: message.content, + name: message.name, + metadata: message.metadata as any, + })), + }); + } + await tx.message.create({ data: { chatId, role: "assistant" as any, content: text }, - }), - prisma.llmCall.update({ + }); + await tx.llmCall.update({ where: { id: call.id }, data: { response: raw as any, @@ -129,8 +151,8 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator outputTokens: usage?.outputTokens, totalTokens: usage?.totalTokens, }, - }), - ]); + }); + }); yield { type: "done", text, usage }; } catch (e: any) { diff --git a/server/src/routes.ts b/server/src/routes.ts index 828c760..23ac615 100644 --- a/server/src/routes.ts +++ b/server/src/routes.ts @@ -748,6 +748,7 @@ export async function registerRoutes(app: FastifyInstance) { for await (const ev of runMultiplexStream(body)) { if (ev.type === "meta") send("meta", ev); + else if (ev.type === "tool_call") send("tool_call", ev.event); else if (ev.type === "delta") send("delta", ev); else if (ev.type === "done") send("done", ev); else if (ev.type === "error") send("error", ev); diff --git a/server/src/types/html-to-text.d.ts b/server/src/types/html-to-text.d.ts new file mode 100644 index 0000000..1951c63 --- /dev/null +++ b/server/src/types/html-to-text.d.ts @@ -0,0 +1,3 @@ +declare module "html-to-text" { + export function convert(html: string, options?: unknown): string; +} diff --git a/web/src/App.tsx b/web/src/App.tsx index 83418e6..5c288fc 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -27,6 +27,7 @@ import { type Message, type SearchDetail, type SearchSummary, + type ToolCallEvent, } from "@/lib/api"; import { useSessionAuth } from "@/hooks/use-session-auth"; import { cn } from "@/lib/utils"; @@ -139,6 +140,54 @@ function getChatModelSelection(chat: Pick; + startedAt?: string; + completedAt?: string; + durationMs?: number; + error?: string | null; + resultPreview?: string | null; +}; + +function asToolLogMetadata(value: unknown): ToolLogMetadata | null { + if (!value || typeof value !== "object" || Array.isArray(value)) return null; + const record = value as Record; + if (record.kind !== "tool_call") return null; + return record as ToolLogMetadata; +} + +function isToolCallLogMessage(message: Message) { + return asToolLogMetadata(message.metadata) !== null; +} + +function buildOptimisticToolMessage(event: ToolCallEvent): Message { + return { + id: `temp-tool-${event.toolCallId}`, + createdAt: event.completedAt ?? new Date().toISOString(), + role: "tool", + content: event.summary, + name: event.name, + metadata: { + kind: "tool_call", + toolCallId: event.toolCallId, + toolName: event.name, + status: event.status, + summary: event.summary, + args: event.args, + startedAt: event.startedAt, + completedAt: event.completedAt, + durationMs: event.durationMs, + error: event.error ?? null, + resultPreview: event.resultPreview ?? null, + } satisfies ToolLogMetadata, + }; +} + type ModelComboboxProps = { options: string[]; value: string; @@ -707,6 +756,7 @@ export default function App() { role: "user", content, name: null, + metadata: null, }; const optimisticAssistantMessage: Message = { @@ -715,6 +765,7 @@ export default function App() { role: "assistant", content: "", name: null, + metadata: null, }; setPendingChatState({ @@ -758,11 +809,13 @@ export default function App() { } const requestMessages: CompletionRequestMessage[] = [ - ...baseChat.messages.map((message) => ({ + ...baseChat.messages + .filter((message) => !isToolCallLogMessage(message)) + .map((message) => ({ role: message.role, content: message.content, ...(message.name ? { name: message.name } : {}), - })), + })), { role: "user", content, @@ -813,6 +866,35 @@ export default function App() { if (payload.chatId !== chatId) return; setPendingChatState((current) => (current ? { ...current, chatId: payload.chatId } : current)); }, + onToolCall: (payload) => { + setPendingChatState((current) => { + if (!current) return current; + if ( + current.messages.some( + (message) => + asToolLogMetadata(message.metadata)?.toolCallId === payload.toolCallId || message.id === `temp-tool-${payload.toolCallId}` + ) + ) { + return current; + } + + const toolMessage = buildOptimisticToolMessage(payload); + const assistantIndex = current.messages.findIndex( + (message, index, all) => index === all.length - 1 && message.id.startsWith("temp-assistant-") + ); + if (assistantIndex < 0) { + return { ...current, messages: current.messages.concat(toolMessage) }; + } + return { + ...current, + messages: [ + ...current.messages.slice(0, assistantIndex), + toolMessage, + ...current.messages.slice(assistantIndex), + ], + }; + }); + }, onDelta: (payload) => { if (!payload.text) return; setPendingChatState((current) => { diff --git a/web/src/components/chat/chat-messages-panel.tsx b/web/src/components/chat/chat-messages-panel.tsx index 4434a8d..a145dda 100644 --- a/web/src/components/chat/chat-messages-panel.tsx +++ b/web/src/components/chat/chat-messages-panel.tsx @@ -1,6 +1,7 @@ import { cn } from "@/lib/utils"; import type { Message } from "@/lib/api"; import { MarkdownContent } from "@/components/markdown/markdown-content"; +import { Globe2, Link2, Wrench } from "lucide-preact"; type Props = { messages: Message[]; @@ -8,6 +9,33 @@ type Props = { isSending: boolean; }; +type ToolLogMetadata = { + kind: "tool_call"; + toolName?: string; + status?: "completed" | "failed"; + summary?: string; +}; + +function asToolLogMetadata(value: unknown): ToolLogMetadata | null { + if (!value || typeof value !== "object" || Array.isArray(value)) return null; + const record = value as Record; + if (record.kind !== "tool_call") return null; + return record as ToolLogMetadata; +} + +function getToolSummary(message: Message, metadata: ToolLogMetadata) { + if (typeof metadata.summary === "string" && metadata.summary.trim()) return metadata.summary.trim(); + const toolName = metadata.toolName?.trim() || message.name?.trim() || "unknown_tool"; + return `Ran tool '${toolName}'.`; +} + +function getToolIconName(toolName: string | null | undefined) { + const lowered = toolName?.toLowerCase() ?? ""; + if (lowered.includes("search")) return "search"; + if (lowered.includes("url") || lowered.includes("fetch") || lowered.includes("http")) return "fetch"; + return "generic"; +} + export function ChatMessagesPanel({ messages, isLoading, isSending }: Props) { const hasPendingAssistant = messages.some((message) => message.id.startsWith("temp-assistant-") && message.content.trim().length === 0); @@ -16,6 +44,28 @@ export function ChatMessagesPanel({ messages, isLoading, isSending }: Props) { {isLoading && messages.length === 0 ?

Loading messages...

: null}
{messages.map((message) => { + const toolLogMetadata = asToolLogMetadata(message.metadata); + if (message.role === "tool" && toolLogMetadata) { + const iconKind = getToolIconName(toolLogMetadata.toolName ?? message.name); + const Icon = iconKind === "search" ? Globe2 : iconKind === "fetch" ? Link2 : Wrench; + const isFailed = toolLogMetadata.status === "failed"; + return ( +
+
+ + {getToolSummary(message, toolLogMetadata)} +
+
+ ); + } + const isUser = message.role === "user"; const isPendingAssistant = message.id.startsWith("temp-assistant-") && isSending && message.content.trim().length === 0; return ( diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 38e67ec..c07cea9 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -23,6 +23,20 @@ export type Message = { role: "system" | "user" | "assistant" | "tool"; content: string; name: string | null; + metadata: unknown | null; +}; + +export type ToolCallEvent = { + toolCallId: string; + name: string; + status: "completed" | "failed"; + summary: string; + args: Record; + startedAt: string; + completedAt: string; + durationMs: number; + error?: string; + resultPreview?: string; }; export type ChatDetail = { @@ -113,6 +127,7 @@ type CompletionResponse = { type CompletionStreamHandlers = { onMeta?: (payload: { chatId: string; callId: string; provider: Provider; model: string }) => void; + onToolCall?: (payload: ToolCallEvent) => void; onDelta?: (payload: { text: string }) => void; onDone?: (payload: { text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }) => void; onError?: (payload: { message: string }) => void; @@ -415,6 +430,7 @@ export async function runCompletionStream( } if (eventName === "meta") handlers.onMeta?.(payload); + else if (eventName === "tool_call") handlers.onToolCall?.(payload); else if (eventName === "delta") handlers.onDelta?.(payload); else if (eventName === "done") handlers.onDone?.(payload); else if (eventName === "error") handlers.onError?.(payload);