Adds searxng support for tool calling
This commit is contained in:
@@ -13,6 +13,8 @@ services:
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||
XAI_API_KEY: ${XAI_API_KEY:-}
|
||||
EXA_API_KEY: ${EXA_API_KEY:-}
|
||||
CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa}
|
||||
SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-}
|
||||
volumes:
|
||||
- sybil_data:/data
|
||||
expose:
|
||||
|
||||
@@ -114,7 +114,7 @@ Behavior notes:
|
||||
- Server updates chat-level model metadata on each call: `lastUsedProvider`/`lastUsedModel`; first successful/failed call also initializes `initiatedProvider`/`initiatedModel` if unset.
|
||||
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction.
|
||||
- Available tool calls for chat: `web_search` and `fetch_url`.
|
||||
- `web_search` uses Exa and returns ranked results with per-result summaries/snippets.
|
||||
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
|
||||
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
|
||||
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
|
||||
- `anthropic` currently runs without server-managed tool calls.
|
||||
@@ -161,6 +161,7 @@ Behavior notes:
|
||||
|
||||
Search run notes:
|
||||
- Backend executes Exa search and Exa answer.
|
||||
- Search mode is independent from chat `web_search` tool configuration and remains Exa-only.
|
||||
- Persists answer text/citations + ranked results.
|
||||
- If both search and answer fail, endpoint returns an error.
|
||||
|
||||
|
||||
@@ -105,6 +105,7 @@ Event order:
|
||||
- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text.
|
||||
- `xai`: same tool-enabled behavior as OpenAI.
|
||||
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`.
|
||||
- `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints.
|
||||
|
||||
Tool-enabled streaming notes (`openai`/`xai`):
|
||||
- Stream still emits standard `meta`, `delta`, `done|error` events.
|
||||
|
||||
@@ -44,6 +44,8 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
|
||||
- `ANTHROPIC_API_KEY`
|
||||
- `XAI_API_KEY`
|
||||
- `EXA_API_KEY`
|
||||
- `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only)
|
||||
- `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`)
|
||||
|
||||
## API
|
||||
- `GET /health`
|
||||
|
||||
@@ -1,5 +1,24 @@
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { config as loadDotenv } from "dotenv";
|
||||
import { z } from "zod";
|
||||
import "dotenv/config";
|
||||
|
||||
loadDotenv({ quiet: true });
|
||||
loadDotenv({ path: path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.env"), quiet: true });
|
||||
|
||||
const OptionalUrlSchema = z.preprocess(
|
||||
(value) => (typeof value === "string" && value.trim() === "" ? undefined : value),
|
||||
z.string().trim().url().optional()
|
||||
);
|
||||
|
||||
const ChatWebSearchEngineSchema = z.preprocess(
|
||||
(value) => {
|
||||
if (typeof value !== "string") return value;
|
||||
const trimmed = value.trim();
|
||||
return trimmed ? trimmed.toLowerCase() : undefined;
|
||||
},
|
||||
z.enum(["exa", "searxng"]).default("exa")
|
||||
);
|
||||
|
||||
const EnvSchema = z.object({
|
||||
PORT: z.coerce.number().int().positive().default(8787),
|
||||
@@ -13,6 +32,18 @@ const EnvSchema = z.object({
|
||||
ANTHROPIC_API_KEY: z.string().optional(),
|
||||
XAI_API_KEY: z.string().optional(),
|
||||
EXA_API_KEY: z.string().optional(),
|
||||
|
||||
// Chat-mode web_search tool configuration. Search mode remains Exa-only for now.
|
||||
CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema,
|
||||
SEARXNG_BASE_URL: OptionalUrlSchema,
|
||||
}).superRefine((value, ctx) => {
|
||||
if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) {
|
||||
ctx.addIssue({
|
||||
code: "custom",
|
||||
path: ["SEARXNG_BASE_URL"],
|
||||
message: "SEARXNG_BASE_URL is required when CHAT_WEB_SEARCH_ENGINE=searxng",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export type Env = z.infer<typeof EnvSchema>;
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { convert as htmlToText } from "html-to-text";
|
||||
import type OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
import { env } from "../env.js";
|
||||
import { exaClient } from "../search/exa.js";
|
||||
import { searchSearxng } from "../search/searxng.js";
|
||||
import type { ChatMessage } from "./types.js";
|
||||
|
||||
const MAX_TOOL_ROUNDS = 4;
|
||||
@@ -21,6 +23,8 @@ const WebSearchArgsSchema = z
|
||||
})
|
||||
.strict();
|
||||
|
||||
type WebSearchArgs = z.infer<typeof WebSearchArgsSchema>;
|
||||
|
||||
const FetchUrlArgsSchema = z
|
||||
.object({
|
||||
url: z.string().trim().url(),
|
||||
@@ -267,8 +271,7 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
|
||||
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
||||
}
|
||||
|
||||
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
const args = WebSearchArgsSchema.parse(input);
|
||||
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
||||
const exa = exaClient();
|
||||
const response = await exa.search(args.query, {
|
||||
type: args.type ?? "auto",
|
||||
@@ -292,6 +295,7 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
const results = Array.isArray(response?.results) ? response.results : [];
|
||||
return {
|
||||
ok: true,
|
||||
searchEngine: "exa",
|
||||
query: args.query,
|
||||
requestId: response?.requestId ?? null,
|
||||
results: results.map((result: any, index: number) => ({
|
||||
@@ -309,6 +313,40 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
};
|
||||
}
|
||||
|
||||
async function runSearxngWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
||||
const response = await searchSearxng(args.query, {
|
||||
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
|
||||
includeDomains: args.includeDomains,
|
||||
excludeDomains: args.excludeDomains,
|
||||
});
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
searchEngine: "searxng",
|
||||
query: args.query,
|
||||
requestId: response.requestId,
|
||||
results: response.results.map((result, index) => ({
|
||||
rank: index + 1,
|
||||
title: result.title,
|
||||
url: result.url,
|
||||
publishedDate: result.publishedDate,
|
||||
author: null,
|
||||
summary: result.summary,
|
||||
text: result.text,
|
||||
highlights: result.summary ? [clipText(result.summary, 280)] : [],
|
||||
engines: result.engines,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
const args = WebSearchArgsSchema.parse(input);
|
||||
if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") {
|
||||
return runSearxngWebSearchTool(args);
|
||||
}
|
||||
return runExaWebSearchTool(args);
|
||||
}
|
||||
|
||||
function assertSafeFetchUrl(urlRaw: string) {
|
||||
const parsed = new URL(urlRaw);
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
||||
|
||||
160
server/src/search/searxng.ts
Normal file
160
server/src/search/searxng.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { env } from "../env.js";
|
||||
|
||||
const SEARXNG_TIMEOUT_MS = 12_000;
|
||||
const DEFAULT_SEARXNG_CATEGORIES = "general";
|
||||
|
||||
export type SearxngSearchOptions = {
|
||||
numResults: number;
|
||||
includeDomains?: string[];
|
||||
excludeDomains?: string[];
|
||||
};
|
||||
|
||||
export type SearxngSearchResult = {
|
||||
title: string | null;
|
||||
url: string | null;
|
||||
publishedDate: string | null;
|
||||
summary: string | null;
|
||||
text: string | null;
|
||||
engines: string[];
|
||||
};
|
||||
|
||||
export type SearxngSearchResponse = {
|
||||
query: string;
|
||||
requestId: null;
|
||||
results: SearxngSearchResult[];
|
||||
};
|
||||
|
||||
function clipText(input: string, maxCharacters: number) {
|
||||
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
|
||||
}
|
||||
|
||||
function compactWhitespace(input: string) {
|
||||
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function requireSearxngBaseUrl() {
|
||||
if (!env.SEARXNG_BASE_URL) {
|
||||
throw new Error("SEARXNG_BASE_URL not set");
|
||||
}
|
||||
return env.SEARXNG_BASE_URL.endsWith("/") ? env.SEARXNG_BASE_URL : `${env.SEARXNG_BASE_URL}/`;
|
||||
}
|
||||
|
||||
function normalizeDomain(input: string) {
|
||||
const trimmed = input.trim().toLowerCase();
|
||||
if (!trimmed) return null;
|
||||
|
||||
try {
|
||||
const parsed = new URL(trimmed.includes("://") ? trimmed : `https://${trimmed}`);
|
||||
return parsed.hostname.replace(/^www\./, "");
|
||||
} catch {
|
||||
return trimmed.split(/[/?#]/, 1)[0]?.replace(/^www\./, "") || null;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeDomains(input: string[] | undefined) {
|
||||
return Array.from(new Set((input ?? []).map(normalizeDomain).filter((domain): domain is string => Boolean(domain))));
|
||||
}
|
||||
|
||||
function hostnameMatchesDomain(urlRaw: string | null, domain: string) {
|
||||
if (!urlRaw) return false;
|
||||
try {
|
||||
const hostname = new URL(urlRaw).hostname.toLowerCase().replace(/^www\./, "");
|
||||
return hostname === domain || hostname.endsWith(`.${domain}`);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function filterResultsByDomains(results: SearxngSearchResult[], options: SearxngSearchOptions) {
|
||||
const includeDomains = normalizeDomains(options.includeDomains);
|
||||
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||
return results.filter((result) => {
|
||||
if (includeDomains.length && !includeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||
if (excludeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function buildSearxngQuery(query: string, options: SearxngSearchOptions) {
|
||||
const includeDomains = normalizeDomains(options.includeDomains);
|
||||
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||
const includeClause =
|
||||
includeDomains.length === 0
|
||||
? ""
|
||||
: includeDomains.length === 1
|
||||
? `site:${includeDomains[0]}`
|
||||
: `(${includeDomains.map((domain) => `site:${domain}`).join(" OR ")})`;
|
||||
const excludeClause = excludeDomains.map((domain) => `-site:${domain}`).join(" ");
|
||||
return [query, includeClause, excludeClause].filter(Boolean).join(" ");
|
||||
}
|
||||
|
||||
function buildSearchUrl(query: string, options: SearxngSearchOptions) {
|
||||
const url = new URL("search", requireSearxngBaseUrl());
|
||||
url.searchParams.set("q", buildSearxngQuery(query, options));
|
||||
url.searchParams.set("categories", DEFAULT_SEARXNG_CATEGORIES);
|
||||
url.searchParams.set("language", "auto");
|
||||
url.searchParams.set("safesearch", "1");
|
||||
url.searchParams.set("format", "json");
|
||||
return url;
|
||||
}
|
||||
|
||||
async function fetchSearxng(url: URL, accept: string) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), SEARXNG_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
return await fetch(url, {
|
||||
redirect: "follow",
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
|
||||
Accept: accept,
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
function stringOrNull(value: unknown) {
|
||||
if (typeof value !== "string") return null;
|
||||
const normalized = compactWhitespace(value);
|
||||
return normalized || null;
|
||||
}
|
||||
|
||||
function stringArray(value: unknown) {
|
||||
if (!Array.isArray(value)) return [];
|
||||
return value.filter((item): item is string => typeof item === "string").map(compactWhitespace).filter(Boolean);
|
||||
}
|
||||
|
||||
function mapJsonResult(result: any): SearxngSearchResult {
|
||||
const summary = stringOrNull(result?.content) ?? stringOrNull(result?.snippet);
|
||||
const text = summary ? clipText(summary, 700) : null;
|
||||
return {
|
||||
title: stringOrNull(result?.title),
|
||||
url: stringOrNull(result?.url),
|
||||
publishedDate: stringOrNull(result?.publishedDate) ?? stringOrNull(result?.published_date),
|
||||
summary: summary ? clipText(summary, 1_400) : null,
|
||||
text,
|
||||
engines: stringArray(result?.engines ?? (typeof result?.engine === "string" ? [result.engine] : [])),
|
||||
};
|
||||
}
|
||||
|
||||
export async function searchSearxng(query: string, options: SearxngSearchOptions): Promise<SearxngSearchResponse> {
|
||||
const url = buildSearchUrl(query, options);
|
||||
const response = await fetchSearxng(url, "application/json");
|
||||
if (!response.ok) {
|
||||
await response.arrayBuffer();
|
||||
throw new Error(`SearXNG JSON search failed with status ${response.status}. Verify search.formats includes json.`);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
|
||||
if (!contentType.includes("application/json")) {
|
||||
await response.arrayBuffer();
|
||||
throw new Error(`SearXNG JSON search returned ${contentType || "unknown content type"}.`);
|
||||
}
|
||||
|
||||
const data: any = await response.json();
|
||||
const results = Array.isArray(data?.results) ? data.results.map(mapJsonResult) : [];
|
||||
return { query, requestId: null, results: filterResultsByDomains(results, options).slice(0, options.numResults) };
|
||||
}
|
||||
Reference in New Issue
Block a user