Adds searxng support for tool calling
This commit is contained in:
@@ -13,6 +13,8 @@ services:
|
|||||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
||||||
XAI_API_KEY: ${XAI_API_KEY:-}
|
XAI_API_KEY: ${XAI_API_KEY:-}
|
||||||
EXA_API_KEY: ${EXA_API_KEY:-}
|
EXA_API_KEY: ${EXA_API_KEY:-}
|
||||||
|
CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa}
|
||||||
|
SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-}
|
||||||
volumes:
|
volumes:
|
||||||
- sybil_data:/data
|
- sybil_data:/data
|
||||||
expose:
|
expose:
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ Behavior notes:
|
|||||||
- Server updates chat-level model metadata on each call: `lastUsedProvider`/`lastUsedModel`; first successful/failed call also initializes `initiatedProvider`/`initiatedModel` if unset.
|
- Server updates chat-level model metadata on each call: `lastUsedProvider`/`lastUsedModel`; first successful/failed call also initializes `initiatedProvider`/`initiatedModel` if unset.
|
||||||
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction.
|
- For `openai` and `xai`, backend enables tool use during chat completion with an internal system instruction.
|
||||||
- Available tool calls for chat: `web_search` and `fetch_url`.
|
- Available tool calls for chat: `web_search` and `fetch_url`.
|
||||||
- `web_search` uses Exa and returns ranked results with per-result summaries/snippets.
|
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
|
||||||
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
|
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
|
||||||
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
|
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
|
||||||
- `anthropic` currently runs without server-managed tool calls.
|
- `anthropic` currently runs without server-managed tool calls.
|
||||||
@@ -161,6 +161,7 @@ Behavior notes:
|
|||||||
|
|
||||||
Search run notes:
|
Search run notes:
|
||||||
- Backend executes Exa search and Exa answer.
|
- Backend executes Exa search and Exa answer.
|
||||||
|
- Search mode is independent from chat `web_search` tool configuration and remains Exa-only.
|
||||||
- Persists answer text/citations + ranked results.
|
- Persists answer text/citations + ranked results.
|
||||||
- If both search and answer fail, endpoint returns an error.
|
- If both search and answer fail, endpoint returns an error.
|
||||||
|
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ Event order:
|
|||||||
- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text.
|
- `openai`: backend may execute internal tool calls (`web_search`, `fetch_url`) before producing final text.
|
||||||
- `xai`: same tool-enabled behavior as OpenAI.
|
- `xai`: same tool-enabled behavior as OpenAI.
|
||||||
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`.
|
- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`.
|
||||||
|
- `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints.
|
||||||
|
|
||||||
Tool-enabled streaming notes (`openai`/`xai`):
|
Tool-enabled streaming notes (`openai`/`xai`):
|
||||||
- Stream still emits standard `meta`, `delta`, `done|error` events.
|
- Stream still emits standard `meta`, `delta`, `done|error` events.
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
|
|||||||
- `ANTHROPIC_API_KEY`
|
- `ANTHROPIC_API_KEY`
|
||||||
- `XAI_API_KEY`
|
- `XAI_API_KEY`
|
||||||
- `EXA_API_KEY`
|
- `EXA_API_KEY`
|
||||||
|
- `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only)
|
||||||
|
- `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`)
|
||||||
|
|
||||||
## API
|
## API
|
||||||
- `GET /health`
|
- `GET /health`
|
||||||
|
|||||||
@@ -1,5 +1,24 @@
|
|||||||
|
import path from "node:path";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
import { config as loadDotenv } from "dotenv";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import "dotenv/config";
|
|
||||||
|
loadDotenv({ quiet: true });
|
||||||
|
loadDotenv({ path: path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.env"), quiet: true });
|
||||||
|
|
||||||
|
const OptionalUrlSchema = z.preprocess(
|
||||||
|
(value) => (typeof value === "string" && value.trim() === "" ? undefined : value),
|
||||||
|
z.string().trim().url().optional()
|
||||||
|
);
|
||||||
|
|
||||||
|
const ChatWebSearchEngineSchema = z.preprocess(
|
||||||
|
(value) => {
|
||||||
|
if (typeof value !== "string") return value;
|
||||||
|
const trimmed = value.trim();
|
||||||
|
return trimmed ? trimmed.toLowerCase() : undefined;
|
||||||
|
},
|
||||||
|
z.enum(["exa", "searxng"]).default("exa")
|
||||||
|
);
|
||||||
|
|
||||||
const EnvSchema = z.object({
|
const EnvSchema = z.object({
|
||||||
PORT: z.coerce.number().int().positive().default(8787),
|
PORT: z.coerce.number().int().positive().default(8787),
|
||||||
@@ -13,6 +32,18 @@ const EnvSchema = z.object({
|
|||||||
ANTHROPIC_API_KEY: z.string().optional(),
|
ANTHROPIC_API_KEY: z.string().optional(),
|
||||||
XAI_API_KEY: z.string().optional(),
|
XAI_API_KEY: z.string().optional(),
|
||||||
EXA_API_KEY: z.string().optional(),
|
EXA_API_KEY: z.string().optional(),
|
||||||
|
|
||||||
|
// Chat-mode web_search tool configuration. Search mode remains Exa-only for now.
|
||||||
|
CHAT_WEB_SEARCH_ENGINE: ChatWebSearchEngineSchema,
|
||||||
|
SEARXNG_BASE_URL: OptionalUrlSchema,
|
||||||
|
}).superRefine((value, ctx) => {
|
||||||
|
if (value.CHAT_WEB_SEARCH_ENGINE === "searxng" && !value.SEARXNG_BASE_URL) {
|
||||||
|
ctx.addIssue({
|
||||||
|
code: "custom",
|
||||||
|
path: ["SEARXNG_BASE_URL"],
|
||||||
|
message: "SEARXNG_BASE_URL is required when CHAT_WEB_SEARCH_ENGINE=searxng",
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
export type Env = z.infer<typeof EnvSchema>;
|
export type Env = z.infer<typeof EnvSchema>;
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
import { convert as htmlToText } from "html-to-text";
|
import { convert as htmlToText } from "html-to-text";
|
||||||
import type OpenAI from "openai";
|
import type OpenAI from "openai";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
import { env } from "../env.js";
|
||||||
import { exaClient } from "../search/exa.js";
|
import { exaClient } from "../search/exa.js";
|
||||||
|
import { searchSearxng } from "../search/searxng.js";
|
||||||
import type { ChatMessage } from "./types.js";
|
import type { ChatMessage } from "./types.js";
|
||||||
|
|
||||||
const MAX_TOOL_ROUNDS = 4;
|
const MAX_TOOL_ROUNDS = 4;
|
||||||
@@ -21,6 +23,8 @@ const WebSearchArgsSchema = z
|
|||||||
})
|
})
|
||||||
.strict();
|
.strict();
|
||||||
|
|
||||||
|
type WebSearchArgs = z.infer<typeof WebSearchArgsSchema>;
|
||||||
|
|
||||||
const FetchUrlArgsSchema = z
|
const FetchUrlArgsSchema = z
|
||||||
.object({
|
.object({
|
||||||
url: z.string().trim().url(),
|
url: z.string().trim().url(),
|
||||||
@@ -267,8 +271,7 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
|
|||||||
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
async function runExaWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
||||||
const args = WebSearchArgsSchema.parse(input);
|
|
||||||
const exa = exaClient();
|
const exa = exaClient();
|
||||||
const response = await exa.search(args.query, {
|
const response = await exa.search(args.query, {
|
||||||
type: args.type ?? "auto",
|
type: args.type ?? "auto",
|
||||||
@@ -292,6 +295,7 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
|||||||
const results = Array.isArray(response?.results) ? response.results : [];
|
const results = Array.isArray(response?.results) ? response.results : [];
|
||||||
return {
|
return {
|
||||||
ok: true,
|
ok: true,
|
||||||
|
searchEngine: "exa",
|
||||||
query: args.query,
|
query: args.query,
|
||||||
requestId: response?.requestId ?? null,
|
requestId: response?.requestId ?? null,
|
||||||
results: results.map((result: any, index: number) => ({
|
results: results.map((result: any, index: number) => ({
|
||||||
@@ -309,6 +313,40 @@ async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function runSearxngWebSearchTool(args: WebSearchArgs): Promise<ToolRunOutcome> {
|
||||||
|
const response = await searchSearxng(args.query, {
|
||||||
|
numResults: args.numResults ?? DEFAULT_WEB_RESULTS,
|
||||||
|
includeDomains: args.includeDomains,
|
||||||
|
excludeDomains: args.excludeDomains,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
searchEngine: "searxng",
|
||||||
|
query: args.query,
|
||||||
|
requestId: response.requestId,
|
||||||
|
results: response.results.map((result, index) => ({
|
||||||
|
rank: index + 1,
|
||||||
|
title: result.title,
|
||||||
|
url: result.url,
|
||||||
|
publishedDate: result.publishedDate,
|
||||||
|
author: null,
|
||||||
|
summary: result.summary,
|
||||||
|
text: result.text,
|
||||||
|
highlights: result.summary ? [clipText(result.summary, 280)] : [],
|
||||||
|
engines: result.engines,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runWebSearchTool(input: unknown): Promise<ToolRunOutcome> {
|
||||||
|
const args = WebSearchArgsSchema.parse(input);
|
||||||
|
if (env.CHAT_WEB_SEARCH_ENGINE === "searxng") {
|
||||||
|
return runSearxngWebSearchTool(args);
|
||||||
|
}
|
||||||
|
return runExaWebSearchTool(args);
|
||||||
|
}
|
||||||
|
|
||||||
function assertSafeFetchUrl(urlRaw: string) {
|
function assertSafeFetchUrl(urlRaw: string) {
|
||||||
const parsed = new URL(urlRaw);
|
const parsed = new URL(urlRaw);
|
||||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
||||||
|
|||||||
160
server/src/search/searxng.ts
Normal file
160
server/src/search/searxng.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import { env } from "../env.js";
|
||||||
|
|
||||||
|
const SEARXNG_TIMEOUT_MS = 12_000;
|
||||||
|
const DEFAULT_SEARXNG_CATEGORIES = "general";
|
||||||
|
|
||||||
|
export type SearxngSearchOptions = {
|
||||||
|
numResults: number;
|
||||||
|
includeDomains?: string[];
|
||||||
|
excludeDomains?: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SearxngSearchResult = {
|
||||||
|
title: string | null;
|
||||||
|
url: string | null;
|
||||||
|
publishedDate: string | null;
|
||||||
|
summary: string | null;
|
||||||
|
text: string | null;
|
||||||
|
engines: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SearxngSearchResponse = {
|
||||||
|
query: string;
|
||||||
|
requestId: null;
|
||||||
|
results: SearxngSearchResult[];
|
||||||
|
};
|
||||||
|
|
||||||
|
function clipText(input: string, maxCharacters: number) {
|
||||||
|
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function compactWhitespace(input: string) {
|
||||||
|
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function requireSearxngBaseUrl() {
|
||||||
|
if (!env.SEARXNG_BASE_URL) {
|
||||||
|
throw new Error("SEARXNG_BASE_URL not set");
|
||||||
|
}
|
||||||
|
return env.SEARXNG_BASE_URL.endsWith("/") ? env.SEARXNG_BASE_URL : `${env.SEARXNG_BASE_URL}/`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeDomain(input: string) {
|
||||||
|
const trimmed = input.trim().toLowerCase();
|
||||||
|
if (!trimmed) return null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = new URL(trimmed.includes("://") ? trimmed : `https://${trimmed}`);
|
||||||
|
return parsed.hostname.replace(/^www\./, "");
|
||||||
|
} catch {
|
||||||
|
return trimmed.split(/[/?#]/, 1)[0]?.replace(/^www\./, "") || null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeDomains(input: string[] | undefined) {
|
||||||
|
return Array.from(new Set((input ?? []).map(normalizeDomain).filter((domain): domain is string => Boolean(domain))));
|
||||||
|
}
|
||||||
|
|
||||||
|
function hostnameMatchesDomain(urlRaw: string | null, domain: string) {
|
||||||
|
if (!urlRaw) return false;
|
||||||
|
try {
|
||||||
|
const hostname = new URL(urlRaw).hostname.toLowerCase().replace(/^www\./, "");
|
||||||
|
return hostname === domain || hostname.endsWith(`.${domain}`);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function filterResultsByDomains(results: SearxngSearchResult[], options: SearxngSearchOptions) {
|
||||||
|
const includeDomains = normalizeDomains(options.includeDomains);
|
||||||
|
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||||
|
return results.filter((result) => {
|
||||||
|
if (includeDomains.length && !includeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||||
|
if (excludeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSearxngQuery(query: string, options: SearxngSearchOptions) {
|
||||||
|
const includeDomains = normalizeDomains(options.includeDomains);
|
||||||
|
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||||
|
const includeClause =
|
||||||
|
includeDomains.length === 0
|
||||||
|
? ""
|
||||||
|
: includeDomains.length === 1
|
||||||
|
? `site:${includeDomains[0]}`
|
||||||
|
: `(${includeDomains.map((domain) => `site:${domain}`).join(" OR ")})`;
|
||||||
|
const excludeClause = excludeDomains.map((domain) => `-site:${domain}`).join(" ");
|
||||||
|
return [query, includeClause, excludeClause].filter(Boolean).join(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSearchUrl(query: string, options: SearxngSearchOptions) {
|
||||||
|
const url = new URL("search", requireSearxngBaseUrl());
|
||||||
|
url.searchParams.set("q", buildSearxngQuery(query, options));
|
||||||
|
url.searchParams.set("categories", DEFAULT_SEARXNG_CATEGORIES);
|
||||||
|
url.searchParams.set("language", "auto");
|
||||||
|
url.searchParams.set("safesearch", "1");
|
||||||
|
url.searchParams.set("format", "json");
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchSearxng(url: URL, accept: string) {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), SEARXNG_TIMEOUT_MS);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await fetch(url, {
|
||||||
|
redirect: "follow",
|
||||||
|
signal: controller.signal,
|
||||||
|
headers: {
|
||||||
|
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
|
||||||
|
Accept: accept,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function stringOrNull(value: unknown) {
|
||||||
|
if (typeof value !== "string") return null;
|
||||||
|
const normalized = compactWhitespace(value);
|
||||||
|
return normalized || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stringArray(value: unknown) {
|
||||||
|
if (!Array.isArray(value)) return [];
|
||||||
|
return value.filter((item): item is string => typeof item === "string").map(compactWhitespace).filter(Boolean);
|
||||||
|
}
|
||||||
|
|
||||||
|
function mapJsonResult(result: any): SearxngSearchResult {
|
||||||
|
const summary = stringOrNull(result?.content) ?? stringOrNull(result?.snippet);
|
||||||
|
const text = summary ? clipText(summary, 700) : null;
|
||||||
|
return {
|
||||||
|
title: stringOrNull(result?.title),
|
||||||
|
url: stringOrNull(result?.url),
|
||||||
|
publishedDate: stringOrNull(result?.publishedDate) ?? stringOrNull(result?.published_date),
|
||||||
|
summary: summary ? clipText(summary, 1_400) : null,
|
||||||
|
text,
|
||||||
|
engines: stringArray(result?.engines ?? (typeof result?.engine === "string" ? [result.engine] : [])),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function searchSearxng(query: string, options: SearxngSearchOptions): Promise<SearxngSearchResponse> {
|
||||||
|
const url = buildSearchUrl(query, options);
|
||||||
|
const response = await fetchSearxng(url, "application/json");
|
||||||
|
if (!response.ok) {
|
||||||
|
await response.arrayBuffer();
|
||||||
|
throw new Error(`SearXNG JSON search failed with status ${response.status}. Verify search.formats includes json.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
|
||||||
|
if (!contentType.includes("application/json")) {
|
||||||
|
await response.arrayBuffer();
|
||||||
|
throw new Error(`SearXNG JSON search returned ${contentType || "unknown content type"}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: any = await response.json();
|
||||||
|
const results = Array.isArray(data?.results) ? data.results.map(mapJsonResult) : [];
|
||||||
|
return { query, requestId: null, results: filterResultsByDomains(results, options).slice(0, options.numResults) };
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user