Adds searxng support for tool calling
This commit is contained in:
160
server/src/search/searxng.ts
Normal file
160
server/src/search/searxng.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
import { env } from "../env.js";
|
||||
|
||||
const SEARXNG_TIMEOUT_MS = 12_000;
|
||||
const DEFAULT_SEARXNG_CATEGORIES = "general";
|
||||
|
||||
export type SearxngSearchOptions = {
|
||||
numResults: number;
|
||||
includeDomains?: string[];
|
||||
excludeDomains?: string[];
|
||||
};
|
||||
|
||||
export type SearxngSearchResult = {
|
||||
title: string | null;
|
||||
url: string | null;
|
||||
publishedDate: string | null;
|
||||
summary: string | null;
|
||||
text: string | null;
|
||||
engines: string[];
|
||||
};
|
||||
|
||||
export type SearxngSearchResponse = {
|
||||
query: string;
|
||||
requestId: null;
|
||||
results: SearxngSearchResult[];
|
||||
};
|
||||
|
||||
function clipText(input: string, maxCharacters: number) {
|
||||
return input.length <= maxCharacters ? input : `${input.slice(0, maxCharacters)}...`;
|
||||
}
|
||||
|
||||
function compactWhitespace(input: string) {
|
||||
return input.replace(/\r/g, "").replace(/[ \t]+\n/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function requireSearxngBaseUrl() {
|
||||
if (!env.SEARXNG_BASE_URL) {
|
||||
throw new Error("SEARXNG_BASE_URL not set");
|
||||
}
|
||||
return env.SEARXNG_BASE_URL.endsWith("/") ? env.SEARXNG_BASE_URL : `${env.SEARXNG_BASE_URL}/`;
|
||||
}
|
||||
|
||||
function normalizeDomain(input: string) {
|
||||
const trimmed = input.trim().toLowerCase();
|
||||
if (!trimmed) return null;
|
||||
|
||||
try {
|
||||
const parsed = new URL(trimmed.includes("://") ? trimmed : `https://${trimmed}`);
|
||||
return parsed.hostname.replace(/^www\./, "");
|
||||
} catch {
|
||||
return trimmed.split(/[/?#]/, 1)[0]?.replace(/^www\./, "") || null;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeDomains(input: string[] | undefined) {
|
||||
return Array.from(new Set((input ?? []).map(normalizeDomain).filter((domain): domain is string => Boolean(domain))));
|
||||
}
|
||||
|
||||
function hostnameMatchesDomain(urlRaw: string | null, domain: string) {
|
||||
if (!urlRaw) return false;
|
||||
try {
|
||||
const hostname = new URL(urlRaw).hostname.toLowerCase().replace(/^www\./, "");
|
||||
return hostname === domain || hostname.endsWith(`.${domain}`);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function filterResultsByDomains(results: SearxngSearchResult[], options: SearxngSearchOptions) {
|
||||
const includeDomains = normalizeDomains(options.includeDomains);
|
||||
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||
return results.filter((result) => {
|
||||
if (includeDomains.length && !includeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||
if (excludeDomains.some((domain) => hostnameMatchesDomain(result.url, domain))) return false;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function buildSearxngQuery(query: string, options: SearxngSearchOptions) {
|
||||
const includeDomains = normalizeDomains(options.includeDomains);
|
||||
const excludeDomains = normalizeDomains(options.excludeDomains);
|
||||
const includeClause =
|
||||
includeDomains.length === 0
|
||||
? ""
|
||||
: includeDomains.length === 1
|
||||
? `site:${includeDomains[0]}`
|
||||
: `(${includeDomains.map((domain) => `site:${domain}`).join(" OR ")})`;
|
||||
const excludeClause = excludeDomains.map((domain) => `-site:${domain}`).join(" ");
|
||||
return [query, includeClause, excludeClause].filter(Boolean).join(" ");
|
||||
}
|
||||
|
||||
function buildSearchUrl(query: string, options: SearxngSearchOptions) {
|
||||
const url = new URL("search", requireSearxngBaseUrl());
|
||||
url.searchParams.set("q", buildSearxngQuery(query, options));
|
||||
url.searchParams.set("categories", DEFAULT_SEARXNG_CATEGORIES);
|
||||
url.searchParams.set("language", "auto");
|
||||
url.searchParams.set("safesearch", "1");
|
||||
url.searchParams.set("format", "json");
|
||||
return url;
|
||||
}
|
||||
|
||||
async function fetchSearxng(url: URL, accept: string) {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), SEARXNG_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
return await fetch(url, {
|
||||
redirect: "follow",
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
|
||||
Accept: accept,
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
function stringOrNull(value: unknown) {
|
||||
if (typeof value !== "string") return null;
|
||||
const normalized = compactWhitespace(value);
|
||||
return normalized || null;
|
||||
}
|
||||
|
||||
function stringArray(value: unknown) {
|
||||
if (!Array.isArray(value)) return [];
|
||||
return value.filter((item): item is string => typeof item === "string").map(compactWhitespace).filter(Boolean);
|
||||
}
|
||||
|
||||
function mapJsonResult(result: any): SearxngSearchResult {
|
||||
const summary = stringOrNull(result?.content) ?? stringOrNull(result?.snippet);
|
||||
const text = summary ? clipText(summary, 700) : null;
|
||||
return {
|
||||
title: stringOrNull(result?.title),
|
||||
url: stringOrNull(result?.url),
|
||||
publishedDate: stringOrNull(result?.publishedDate) ?? stringOrNull(result?.published_date),
|
||||
summary: summary ? clipText(summary, 1_400) : null,
|
||||
text,
|
||||
engines: stringArray(result?.engines ?? (typeof result?.engine === "string" ? [result.engine] : [])),
|
||||
};
|
||||
}
|
||||
|
||||
export async function searchSearxng(query: string, options: SearxngSearchOptions): Promise<SearxngSearchResponse> {
|
||||
const url = buildSearchUrl(query, options);
|
||||
const response = await fetchSearxng(url, "application/json");
|
||||
if (!response.ok) {
|
||||
await response.arrayBuffer();
|
||||
throw new Error(`SearXNG JSON search failed with status ${response.status}. Verify search.formats includes json.`);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get("content-type")?.toLowerCase() ?? "";
|
||||
if (!contentType.includes("application/json")) {
|
||||
await response.arrayBuffer();
|
||||
throw new Error(`SearXNG JSON search returned ${contentType || "unknown content type"}.`);
|
||||
}
|
||||
|
||||
const data: any = await response.json();
|
||||
const results = Array.isArray(data?.results) ? data.results.map(mapJsonResult) : [];
|
||||
return { query, requestId: null, results: filterResultsByDomains(results, options).slice(0, options.numResults) };
|
||||
}
|
||||
Reference in New Issue
Block a user