fix most web_fetches from getting blocked using a real user agent

This commit is contained in:
2026-06-11 23:36:19 -07:00
parent 22aa652257
commit d7214c88ad
8 changed files with 403 additions and 54 deletions

View File

@@ -0,0 +1,26 @@
export const CHROMIUM_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
export const BROWSER_ACCEPT_LANGUAGE = "en-US,en;q=0.9";
export const FETCH_URL_ACCEPT =
"text/html,application/xhtml+xml,application/xml;q=0.9,application/pdf;q=0.9,*/*;q=0.8";
export function buildBrowserLikeRequestHeaders(accept: string): Record<string, string> {
return {
"User-Agent": CHROMIUM_USER_AGENT,
Accept: accept,
"Accept-Language": BROWSER_ACCEPT_LANGUAGE,
};
}
export function buildBrowserLikeNavigationHeaders(accept = FETCH_URL_ACCEPT): Record<string, string> {
return {
...buildBrowserLikeRequestHeaders(accept),
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
};
}

View File

@@ -6,6 +6,7 @@ import { promisify } from "node:util";
import { convert as htmlToText } from "html-to-text";
import type OpenAI from "openai";
import { z } from "zod";
import { buildBrowserLikeNavigationHeaders } from "../browser-fetch-headers.js";
import { env } from "../env.js";
import { exaClient } from "../search/exa.js";
import { searchSearxng } from "../search/searxng.js";
@@ -570,10 +571,7 @@ async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
response = await fetch(parsed.toString(), {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5",
},
headers: buildBrowserLikeNavigationHeaders(),
});
} finally {
clearTimeout(timeout);

View File

@@ -1,3 +1,4 @@
import { buildBrowserLikeRequestHeaders } from "../browser-fetch-headers.js";
import { env } from "../env.js";
const SEARXNG_TIMEOUT_MS = 12_000;
@@ -106,10 +107,7 @@ async function fetchSearxng(url: URL, accept: string) {
return await fetch(url, {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: accept,
},
headers: buildBrowserLikeRequestHeaders(accept),
});
} finally {
clearTimeout(timeout);