2 Commits

Author SHA1 Message Date
d7967eaa75 Adds searxng support for tool calling 2026-05-02 18:15:14 -07:00
2125c5dfa4 web: tweak scrolling behavior 2026-05-02 18:15:14 -07:00
8 changed files with 271 additions and 14 deletions

View File

@@ -13,6 +13,8 @@ services:
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
XAI_API_KEY: ${XAI_API_KEY:-}
EXA_API_KEY: ${EXA_API_KEY:-}
CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa}
SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-}
volumes:
- sybil_data:/data
expose:

View File

@@ -114,7 +114,7 @@ Behavior notes:
- Server updates chat-level model metadata on each call: `lastUsedProvider`/`lastUsedModel`; first successful/failed call also initializes `initiatedProvider`/`initiatedModel` if unset.
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction.
- Available tool calls for chat: `web_search` and `fetch_url`.
- `web_search` uses Exa and returns ranked results with per-result summaries/snippets.
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
- `anthropic` currently runs without server-managed tool calls.
@@ -161,6 +161,7 @@ Behavior notes:
Search run notes:
- Backend executes Exa search and Exa answer.
- Search mode is independent from chat `web_search` tool configuration and remains Exa-only.
- Persists answer text/citations + ranked results.
- If both search and answer fail, endpoint returns an error.

View File

@@ -105,6 +105,7 @@ Event order:
- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text.
- `xai`: same tool-enabled behavior as OpenAI.
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`.
- `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints.
Tool-enabled streaming notes (`openai`/`xai`):
- Stream still emits standard `meta`, `delta`, `done|error` events.

View File

@@ -44,6 +44,8 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
- `ANTHROPIC_API_KEY`
- `XAI_API_KEY`
- `EXA_API_KEY`
- `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only)
- `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`)
## API
- `GET /health`

View File

@@ -1,5 +1,24 @@
import path from "node:path";
import { fileURLToPath } from "node:url";
import { config as loadDotenv } from "dotenv";
import { z } from "zod";
import "dotenv/config";
loadDotenv({ quiet: true });
loadDotenv({ path: path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.env"), quiet: true });
const OptionalUrlSchema = z.preprocess(
(value) => (typeof value === "string" && value.trim() === "" ? undefined : value),
z.string().trim().url().optional()
);
const ChatWebSearchEngineSchema = z.preprocess(
(value) => {
if (typeof value !== "string") return value;
const trimmed = value.trim();
return trimmed ? trimmed.toLowerCase() : undefined;
},
z.enum(["exa", "searxng"]).default("exa")
);
const EnvSchema = z.object({
PORT: z.coerce.number().int().positive().default(8787),
@@ -13,6 +32,18 @@ const EnvSchema = z.object({
ANTHROPIC_API_KEY: z.string().optional(),
XAI_API_KEY: z.string().optional(),
EXA_API_KEY: z.string().optional(),
// Chat-mode web_search tool configuration. Search mode remains Exa-only for now.
CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema,
SEARXNG_BASE_URL: OptionalUrlSchema,
}).superRefine((value, ctx) => {
if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) {
ctx.addIssue({
code: "custom",
path: ["SEARXNG_BASE_URL"],
message: "SEARXNG_BASE_URL is required when CHAT_WEB_SEARCH_ENGINE=searxng",
});
}
});
export type Env = z.infer<typeof EnvSchema>;

View File

@@ -1,7 +1,9 @@
import { convert as htmlToText } from "html-to-text";
import type OpenAI from "openai";
import { z } from "zod";
import { env } from "../env.js";
import { exaClient } from "../search/exa.js";
import { searchSearxng } from "../search/searxng.js";
import type { ChatMessage } from "./types.js";
const MAX_TOOL_ROUNDS = 4;
@@ -21,6 +23,8 @@ const WebSearchArgsSchema = z
})
.strict();
type WebSearchArgs = z.infer<typeof WebSearchArgsSchema>;
const FetchUrlArgsSchema = z
.object({
url: z.string().trim().url(),
@@ -267,8 +271,7 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
const args = WebSearchArgsSchema.parse(input);
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
const exa = exaClient();
const response = await exa.search(args.query, {
type: args.type ?? "auto",
@@ -292,6 +295,7 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
const results = Array.isArray(response?.results) ? response.results : [];
return {
ok: true,
searchEngine: "exa",
query: args.query,
requestId: response?.requestId ?? null,
results: results.map((result: any, index: number) => ({
@@ -309,6 +313,40 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
};
}
async function runSearxngWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
const response = await searchSearxng(args.query, {
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
includeDomains: args.includeDomains,
excludeDomains: args.excludeDomains,
});
return {
ok: true,
searchEngine: "searxng",
query: args.query,
requestId: response.requestId,
results: response.results.map((result, index) => ({
rank: index + 1,
title: result.title,
url: result.url,
publishedDate: result.publishedDate,
author: null,
summary: result.summary,
text: result.text,
highlights: result.summary ? [clipText(result.summary, 280)] : [],
engines: result.engines,
})),
};
}
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
const args = WebSearchArgsSchema.parse(input);
if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") {
return runSearxngWebSearchTool(args);
}
return runExaWebSearchTool(args);
}
function assertSafeFetchUrl(urlRaw: string) {
const parsed = new URL(urlRaw);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {

View File

@@ -0,0 +1,160 @@
import { env } from "../env.js";
const SEARXNG_TIMEOUT_MS = 12_000;
const DEFAULT_SEARXNG_CATEGORIES = "general";
export type SearxngSearchOptions = {
numResults: number;
includeDomains?: string[];
excludeDomains?: string[];
};
export type SearxngSearchResult = {
title: string | null;
url: string | null;
publishedDate: string | null;
summary: string | null;
text: string | null;
engines: string[];
};
export type SearxngSearchResponse = {
query: string;
requestId: null;
results: SearxngSearchResult[];
};
function clipText(input: string, maxCharacters: number) {
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
}
function compactWhitespace(input: string) {
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/\s+/g, " ").trim();
}
function requireSearxngBaseUrl() {
if (!env.SEARXNG_BASE_URL) {
throw new Error("SEARXNG_BASE_URL not set");
}
return env.SEARXNG_BASE_URL.endsWith("/") ? env.SEARXNG_BASE_URL : `${env.SEARXNG_BASE_URL}/`;
}
function normalizeDomain(input: string) {
const trimmed = input.trim().toLowerCase();
if (!trimmed) return null;
try {
const parsed = new URL(trimmed.includes("://") ? trimmed : `https://${trimmed}`);
return parsed.hostname.replace(/^www\./, "");
} catch {
return trimmed.split(/[/?#]/, 1)[0]?.replace(/^www\./, "") || null;
}
}
function normalizeDomains(input: string[] | undefined) {
return Array.from(new Set((input ?? []).map(normalizeDomain).filter((domain): domain is string => Boolean(domain))));
}
function hostnameMatchesDomain(urlRaw: string | null, domain: string) {
if (!urlRaw) return false;
try {
const hostname = new URL(urlRaw).hostname.toLowerCase().replace(/^www\./, "");
return hostname === domain || hostname.endsWith(`.${domain}`);
} catch {
return false;
}
}
function filterResultsByDomains(results: SearxngSearchResult[], options: SearxngSearchOptions) {
const includeDomains = normalizeDomains(options.includeDomains);
const excludeDomains = normalizeDomains(options.excludeDomains);
return results.filter((result) => {
if (includeDomains.length && !includeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
if (excludeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
return true;
});
}
function buildSearxngQuery(query: string, options: SearxngSearchOptions) {
const includeDomains = normalizeDomains(options.includeDomains);
const excludeDomains = normalizeDomains(options.excludeDomains);
const includeClause =
includeDomains.length === 0
? ""
: includeDomains.length === 1
? `site:${includeDomains[0]}`
: `(${includeDomains.map((domain) => `site:${domain}`).join(" OR ")})`;
const excludeClause = excludeDomains.map((domain) => `-site:${domain}`).join(" ");
return [query, includeClause, excludeClause].filter(Boolean).join(" ");
}
function buildSearchUrl(query: string, options: SearxngSearchOptions) {
const url = new URL("search", requireSearxngBaseUrl());
url.searchParams.set("q", buildSearxngQuery(query, options));
url.searchParams.set("categories", DEFAULT_SEARXNG_CATEGORIES);
url.searchParams.set("language", "auto");
url.searchParams.set("safesearch", "1");
url.searchParams.set("format", "json");
return url;
}
async function fetchSearxng(url: URL, accept: string) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), SEARXNG_TIMEOUT_MS);
try {
return await fetch(url, {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: accept,
},
});
} finally {
clearTimeout(timeout);
}
}
function stringOrNull(value: unknown) {
if (typeof value !== "string") return null;
const normalized = compactWhitespace(value);
return normalized || null;
}
function stringArray(value: unknown) {
if (!Array.isArray(value)) return [];
return value.filter((item): item is string => typeof item === "string").map(compactWhitespace).filter(Boolean);
}
function mapJsonResult(result: any): SearxngSearchResult {
const summary = stringOrNull(result?.content) ?? stringOrNull(result?.snippet);
const text = summary ? clipText(summary, 700) : null;
return {
title: stringOrNull(result?.title),
url: stringOrNull(result?.url),
publishedDate: stringOrNull(result?.publishedDate) ?? stringOrNull(result?.published_date),
summary: summary ? clipText(summary, 1_400) : null,
text,
engines: stringArray(result?.engines ?? (typeof result?.engine === "string" ? [result.engine] : [])),
};
}
export async function searchSearxng(query: string, options: SearxngSearchOptions): Promise<SearxngSearchResponse> {
const url = buildSearchUrl(query, options);
const response = await fetchSearxng(url, "application/json");
if (!response.ok) {
await response.arrayBuffer();
throw new Error(`SearXNG JSON search failed with status ${response.status}. Verify search.formats includes json.`);
}
const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
if (!contentType.includes("application/json")) {
await response.arrayBuffer();
throw new Error(`SearXNG JSON search returned ${contentType || "unknown content type"}.`);
}
const data: any = await response.json();
const results = Array.isArray(data?.results) ? data.results.map(mapJsonResult) : [];
return { query, requestId: null, results: filterResultsByDomains(results, options).slice(0, options.numResults) };
}

View File

@@ -452,6 +452,7 @@ export default function App() {
const searchRunCounterRef = useRef(0);
const shouldAutoScrollRef = useRef(true);
const wasSendingRef = useRef(false);
const pendingReplyScrollRef = useRef(false);
const [contextMenu, setContextMenu] = useState<ContextMenuState | null>(null);
const [isMobileSidebarOpen, setIsMobileSidebarOpen] = useState(false);
const [sidebarQuery, setSidebarQuery] = useState("");
@@ -643,6 +644,12 @@ export default function App() {
}, [providerModelPreferences]);
const selectedKey = selectedItem ? `${selectedItem.kind}:${selectedItem.id}` : null;
const isChatReplyStreamingInView =
isSending &&
draftKind !== "search" &&
selectedItem?.kind !== "search" &&
!!pendingChatState &&
(!pendingChatState.chatId || (selectedItem?.kind === "chat" && selectedItem.id === pendingChatState.chatId));
useEffect(() => {
shouldAutoScrollRef.current = true;
@@ -675,11 +682,27 @@ export default function App() {
if (draftKind === "search" || selectedItem?.kind === "search") return;
const wasSending = wasSendingRef.current;
wasSendingRef.current = isSending;
if (wasSending && !isSending) return;
if (isSending) return;
if (wasSending) {
shouldAutoScrollRef.current = false;
return;
}
if (!shouldAutoScrollRef.current) return;
transcriptEndRef.current?.scrollIntoView({ behavior: isSending ? "smooth" : "auto", block: "end" });
transcriptEndRef.current?.scrollIntoView({ behavior: "auto", block: "end" });
}, [draftKind, selectedChat?.messages.length, isSending, selectedItem?.kind, selectedKey]);
useEffect(() => {
if (!isChatReplyStreamingInView || !pendingReplyScrollRef.current) return;
pendingReplyScrollRef.current = false;
shouldAutoScrollRef.current = true;
window.requestAnimationFrame(() => {
const container = transcriptContainerRef.current;
if (!container) return;
container.scrollTo({ top: container.scrollHeight, behavior: "smooth" });
});
}, [isChatReplyStreamingInView, pendingChatState?.chatId]);
useEffect(() => {
if (isSending) return;
const hasWorkspaceSelection = Boolean(selectedItem) || draftKind !== null;
@@ -697,13 +720,7 @@ export default function App() {
const messages = selectedChat?.messages ?? [];
const isSearchMode = draftKind ? draftKind === "search" : selectedItem?.kind === "search";
const isSearchRunning = isSending && isSearchMode;
const isSendingActiveChat =
isSending &&
!isSearchMode &&
!!pendingChatState &&
!!pendingChatState.chatId &&
selectedItem?.kind === "chat" &&
selectedItem.id === pendingChatState.chatId;
const isSendingActiveChat = isChatReplyStreamingInView;
const displayMessages = useMemo(() => {
if (!pendingChatState) return messages.filter(isDisplayableMessage);
if (pendingChatState.chatId) {
@@ -837,6 +854,8 @@ export default function App() {
}, [contextMenu]);
const handleSendChat = async (content: string) => {
pendingReplyScrollRef.current = true;
const optimisticUserMessage: Message = {
id: `temp-user-${Date.now()}`,
createdAt: new Date().toISOString(),
@@ -1424,7 +1443,7 @@ export default function App() {
<div
ref={transcriptContainerRef}
className="flex-1 overflow-y-auto px-4 pt-8 md:px-10 lg:px-14 pb-36 md:pb-44"
className="flex-1 overflow-y-auto px-4 pt-8 md:px-10 lg:px-14 pb-36 md:pb-44 [overflow-anchor:none]"
onScroll={() => {
const container = transcriptContainerRef.current;
if (!container) return;
@@ -1443,6 +1462,9 @@ export default function App() {
onStartChat={selectedSearch ? handleStartChatFromSearch : undefined}
/>
)}
{isChatReplyStreamingInView ? (
<div className="mx-auto mt-6 h-[52vh] min-h-72 max-h-[36rem] max-w-4xl" aria-hidden="true" />
) : null}
<div ref={transcriptEndRef} />
</div>