fix most web_fetches from getting blocked using a real user agent

This commit is contained in:
2026-06-11 23:36:19 -07:00
parent 22aa652257
commit d7214c88ad
8 changed files with 403 additions and 54 deletions

View File

@@ -299,7 +299,7 @@ Behavior notes:
- For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks.
- Available Sybil-managed tool calls for `openai` and `xai`: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available.
- `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`.
- `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side).
- `fetch_url` fetches a URL with browser-like navigation headers and returns plaintext page content (HTML converted to text server-side).
- `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check <non-interactive wrapped prompt>` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`, with SSH stdin closed.
- `shell_exec` runs arbitrary non-interactive shell commands on the same configured devbox, starting in `CHAT_CODEX_REMOTE_WORKDIR`. It uses `bash -lc` when bash exists, otherwise `sh -lc`, closes SSH stdin, and does not run inside the Sybil server container.
- Devbox tool configuration:

View File

@@ -172,6 +172,7 @@ Terminal tool-call event:
- `openai`: backend uses OpenAI's Responses API and may execute internal function tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text.
- `xai`: backend uses xAI's OpenAI-compatible Chat Completions API and may execute the same internal tool calls before producing final text.
- `fetch_url` sends browser-like navigation headers for outbound URL requests to reduce false 403s from sites that reject generic server clients.
- `hermes-agent`: backend uses the configured Hermes Agent OpenAI-compatible Chat Completions API. Sybil does not add its own tool definitions for this provider; Hermes Agent handles its own tools server-side. Custom Hermes stream events are normalized away unless they produce text deltas in this SSE contract.
- `openai`: image attachments are sent as Responses `input_image` items; text attachments are sent as `input_text` items.
- `xai` and `hermes-agent`: image attachments are sent as Chat Completions content parts; text attachments are inlined as text parts.

View File

@@ -0,0 +1,26 @@
export const CHROMIUM_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
export const BROWSER_ACCEPT_LANGUAGE = "en-US,en;q=0.9";
export const FETCH_URL_ACCEPT =
"text/html,application/xhtml+xml,application/xml;q=0.9,application/pdf;q=0.9,*/*;q=0.8";
export function buildBrowserLikeRequestHeaders(accept: string): Record<string, string> {
return {
"User-Agent": CHROMIUM_USER_AGENT,
Accept: accept,
"Accept-Language": BROWSER_ACCEPT_LANGUAGE,
};
}
export function buildBrowserLikeNavigationHeaders(accept = FETCH_URL_ACCEPT): Record<string, string> {
return {
...buildBrowserLikeRequestHeaders(accept),
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
};
}

View File

@@ -6,6 +6,7 @@ import { promisify } from "node:util";
import { convert as htmlToText } from "html-to-text";
import type OpenAI from "openai";
import { z } from "zod";
import { buildBrowserLikeNavigationHeaders } from "../browser-fetch-headers.js";
import { env } from "../env.js";
import { exaClient } from "../search/exa.js";
import { searchSearxng } from "../search/searxng.js";
@@ -570,10 +571,7 @@ async function runFetchUrlTool(input: unknown): Promise<ToolRunOutcome> {
response = await fetch(parsed.toString(), {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: "text/html, text/plain, application/json;q=0.9, */*;q=0.5",
},
headers: buildBrowserLikeNavigationHeaders(),
});
} finally {
clearTimeout(timeout);

View File

@@ -1,3 +1,4 @@
import { buildBrowserLikeRequestHeaders } from "../browser-fetch-headers.js";
import { env } from "../env.js";
const SEARXNG_TIMEOUT_MS = 12_000;
@@ -106,10 +107,7 @@ async function fetchSearxng(url: URL, accept: string) {
return await fetch(url, {
redirect: "follow",
signal: controller.signal,
headers: {
"User-Agent": "SybilBot/1.0 (+https://sybil.local)",
Accept: accept,
},
headers: buildBrowserLikeRequestHeaders(accept),
});
} finally {
clearTimeout(timeout);

View File

@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
import test from "node:test";
import {
runPlainChatCompletionsStream,
runToolAwareChatCompletions,
runToolAwareChatCompletionsStream,
runToolAwareOpenAIChatStream,
type ToolAwareStreamingEvent,
@@ -141,6 +142,79 @@ test("plain Chat Completions stream does not send Sybil-managed tools", async ()
assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Hi");
});
test("fetch_url sends browser-like navigation headers", async () => {
const originalFetch = globalThis.fetch;
const fetchCalls: Array<{ input: RequestInfo | URL; init?: RequestInit }> = [];
globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
fetchCalls.push({ input, init });
return new Response("<!doctype html><title>CPI</title><main>Consumer price index</main>", {
status: 200,
headers: { "content-type": "text/html; charset=utf-8" },
});
}) as typeof fetch;
try {
let requestCount = 0;
const client = {
chat: {
completions: {
create: async () => {
requestCount += 1;
if (requestCount === 1) {
return {
choices: [
{
message: {
tool_calls: [
{
id: "call_1",
type: "function",
function: {
name: "fetch_url",
arguments: JSON.stringify({ url: "https://www.bls.gov/news.release/pdf/cpi.pdf" }),
},
},
],
},
},
],
};
}
return {
choices: [{ message: { content: "Fetched" } }],
};
},
},
},
};
const result = await runToolAwareChatCompletions({
client: client as any,
model: "grok-test",
messages: [{ role: "user", content: "Fetch CPI PDF" }],
});
assert.equal(result.text, "Fetched");
assert.equal(fetchCalls.length, 1);
assert.equal(String(fetchCalls[0]?.input), "https://www.bls.gov/news.release/pdf/cpi.pdf");
assert.deepEqual(fetchCalls[0]?.init?.headers, {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,application/pdf;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
});
assert.equal(result.toolEvents[0]?.status, "completed");
} finally {
globalThis.fetch = originalFetch;
}
});
test("OpenAI-compatible Chat Completions stream emits initiated and terminal tool call updates", async () => {
let requestCount = 0;
const client = {

View File

@@ -1,8 +1,10 @@
import { useMemo, useState } from "preact/hooks";
import type { JSX } from "preact";
import { cn } from "@/lib/utils";
import { ChatAttachmentList } from "@/components/chat/chat-attachment-list";
import { getMessageAttachments, type Message } from "@/lib/api";
import { MarkdownContent } from "@/components/markdown/markdown-content";
import { Globe2, Link2, Wrench } from "lucide-preact";
import { ChevronDown, ChevronUp, Globe2, Link2, Wrench } from "lucide-preact";
type Props = {
messages: Message[];
@@ -72,6 +74,17 @@ function formatToolTimestamp(...values: Array<string | null | undefined>) {
}
type ToolCallVisualState = "initiated" | "completed" | "failed";
type MessageRenderItem = { kind: "message"; message: Message } | { kind: "tool_group"; key: string; messages: Message[] };
type ToolStackStyle = JSX.CSSProperties & {
"--tool-stack-x"?: string;
"--tool-stack-y"?: string;
"--tool-stack-z"?: string;
"--tool-stack-scale"?: string;
"--tool-stack-opacity"?: string;
"--tool-stack-delay"?: string;
};
const COLLAPSED_TOOL_STACK_LIMIT = 4;
function getToolVisualState(metadata: ToolLogMetadata): ToolCallVisualState {
if (metadata.status === "failed") return "failed";
@@ -89,61 +102,222 @@ function getToolDetailLabel(message: Message, metadata: ToolLogMetadata, state:
.join(" • ");
}
function buildMessageRenderItems(messages: Message[]) {
const items: MessageRenderItem[] = [];
let toolRun: Message[] = [];
const flushToolRun = () => {
if (!toolRun.length) return;
if (toolRun.length === 1) {
items.push({ kind: "message", message: toolRun[0] });
} else {
items.push({ kind: "tool_group", key: toolRun[0].id, messages: toolRun });
}
toolRun = [];
};
for (const message of messages) {
if (message.role === "tool" && asToolLogMetadata(message.metadata)) {
toolRun.push(message);
continue;
}
flushToolRun();
items.push({ kind: "message", message });
}
flushToolRun();
return items;
}
function getToolStackStyle(depth: number, totalVisible: number): ToolStackStyle {
return {
"--tool-stack-x": `${depth * 9}px`,
"--tool-stack-y": `${depth * 8}px`,
"--tool-stack-z": `${depth * -36}px`,
"--tool-stack-scale": `${Math.max(0.88, 1 - depth * 0.035)}`,
"--tool-stack-opacity": `${Math.max(0.48, 1 - depth * 0.15)}`,
"--tool-stack-delay": `${depth * 44}ms`,
zIndex: totalVisible - depth,
};
}
function getExpandedToolStyle(index: number): ToolStackStyle {
return {
"--tool-stack-delay": `${Math.min(index, 6) * 34}ms`,
};
}
function ToolCallCard({
message,
className,
style,
}: {
message: Message;
className?: string;
style?: JSX.CSSProperties;
}) {
const toolLogMetadata = asToolLogMetadata(message.metadata);
if (!toolLogMetadata) return null;
const iconKind = getToolIconName(toolLogMetadata.toolName ?? message.name);
const Icon = iconKind === "search" ? Globe2 : iconKind === "fetch" ? Link2 : Wrench;
const toolState = getToolVisualState(toolLogMetadata);
const isFailed = toolState === "failed";
const isInitiated = toolState === "initiated";
const toolSummary = getToolSummary(message, toolLogMetadata);
const toolLabel = getToolLabel(message, toolLogMetadata);
const toolDetailLabel = getToolDetailLabel(message, toolLogMetadata, toolState);
return (
<div
className={cn(
"inline-flex min-w-0 items-start gap-3 overflow-hidden rounded-xl border px-3 py-2.5 shadow-[inset_0_1px_0_hsl(180_100%_88%_/_0.06)]",
isFailed
? "border-rose-400/34 bg-[linear-gradient(90deg,hsl(350_72%_44%_/_0.18),hsl(342_66%_9%_/_0.72))]"
: isInitiated
? "border-amber-300/34 bg-[linear-gradient(90deg,hsl(43_74%_30%_/_0.34),hsl(260_48%_13%_/_0.74))]"
: "border-cyan-400/34 bg-[linear-gradient(90deg,hsl(184_89%_21%_/_0.70),hsl(208_66%_12%_/_0.78))]",
className
)}
style={style}
title={`${toolSummary}\n${toolLabel}${toolDetailLabel}`}
>
<span
className={cn(
"mt-0.5 flex h-[30px] w-[30px] shrink-0 items-center justify-center rounded-lg border",
isFailed
? "border-rose-400/34 bg-rose-400/13 text-rose-300"
: isInitiated
? "border-amber-300/34 bg-amber-300/13 text-amber-200"
: "border-cyan-300/34 bg-cyan-300/13 text-cyan-300"
)}
>
<Icon className="h-4 w-4" />
</span>
<span className="min-w-0 flex-1 space-y-1">
<span className={cn("block truncate text-sm leading-5", isFailed ? "text-rose-200" : "text-violet-50/95")}>{toolSummary}</span>
<span className="flex min-w-0 items-center gap-1.5 text-[11px] leading-4">
<span className={cn("min-w-0 truncate font-semibold", isFailed ? "text-rose-300/85" : isInitiated ? "text-amber-200/90" : "text-cyan-200/90")}>
{toolLabel}
</span>
<span className="min-w-0 truncate text-violet-200/64">{toolDetailLabel}</span>
</span>
</span>
</div>
);
}
function ToolCallStack({
groupKey,
messages,
expanded,
onToggle,
}: {
groupKey: string;
messages: Message[];
expanded: boolean;
onToggle: (groupKey: string) => void;
}) {
const visibleStackMessages = messages.slice(-COLLAPSED_TOOL_STACK_LIMIT).reverse();
const hiddenCount = Math.max(0, messages.length - visibleStackMessages.length);
const countLabel = `${messages.length} tool ${messages.length === 1 ? "call" : "calls"}`;
if (expanded) {
return (
<div className="flex justify-start">
<div className="relative flex w-full max-w-[85%] flex-col gap-2.5 pr-5">
<button
type="button"
className="tool-call-stack-toggle absolute -right-3 top-2 z-20 flex h-8 w-8 items-center justify-center rounded-full"
aria-expanded="true"
aria-label={`Collapse ${countLabel}`}
title={`Collapse ${countLabel}`}
onClick={() => onToggle(groupKey)}
>
<ChevronUp className="h-4 w-4" />
</button>
{messages.map((message, index) => (
<ToolCallCard
key={message.id}
message={message}
className="tool-call-stack-expanded-card w-full max-w-full"
style={getExpandedToolStyle(index)}
/>
))}
</div>
</div>
);
}
return (
<div className="flex justify-start">
<div className="tool-call-stack-shell relative inline-grid w-full max-w-[85%] min-w-0 pb-6 pr-9">
{visibleStackMessages.map((message, index) => (
<ToolCallCard
key={message.id}
message={message}
className={cn("tool-call-stack-card col-start-1 row-start-1 w-full max-w-full", index > 0 && "pointer-events-none")}
style={getToolStackStyle(index, visibleStackMessages.length)}
/>
))}
{hiddenCount ? (
<span className="absolute bottom-1 right-10 z-20 rounded-full border border-cyan-300/30 bg-slate-950/86 px-2 py-0.5 text-[10px] font-semibold leading-none text-cyan-100 shadow-sm">
+{hiddenCount}
</span>
) : null}
<button
type="button"
className="tool-call-stack-toggle absolute -right-3 top-1/2 z-20 flex h-8 w-8 -translate-y-1/2 items-center justify-center rounded-full"
aria-expanded="false"
aria-label={`Expand ${countLabel}`}
title={`Expand ${countLabel}`}
onClick={() => onToggle(groupKey)}
>
<ChevronDown className="h-4 w-4" />
</button>
</div>
</div>
);
}
export function ChatMessagesPanel({ messages, isLoading, isSending }: Props) {
const hasPendingAssistant = messages.some((message) => message.id.startsWith("temp-assistant-") && message.content.trim().length === 0);
const renderItems = useMemo(() => buildMessageRenderItems(messages), [messages]);
const [expandedToolGroups, setExpandedToolGroups] = useState<Set<string>>(() => new Set());
const toggleToolGroup = (groupKey: string) => {
setExpandedToolGroups((current) => {
const next = new Set(current);
if (next.has(groupKey)) next.delete(groupKey);
else next.add(groupKey);
return next;
});
};
return (
<>
{isLoading && messages.length === 0 ? <p className="text-sm text-muted-foreground">Loading messages...</p> : null}
<div className="mx-auto max-w-4xl space-y-6">
{messages.map((message) => {
{renderItems.map((item) => {
if (item.kind === "tool_group") {
return (
<ToolCallStack
key={`tool-group-${item.key}`}
groupKey={item.key}
messages={item.messages}
expanded={expandedToolGroups.has(item.key)}
onToggle={toggleToolGroup}
/>
);
}
const { message } = item;
const toolLogMetadata = asToolLogMetadata(message.metadata);
if (message.role === "tool" && toolLogMetadata) {
const iconKind = getToolIconName(toolLogMetadata.toolName ?? message.name);
const Icon = iconKind === "search" ? Globe2 : iconKind === "fetch" ? Link2 : Wrench;
const toolState = getToolVisualState(toolLogMetadata);
const isFailed = toolState === "failed";
const isInitiated = toolState === "initiated";
const toolSummary = getToolSummary(message, toolLogMetadata);
const toolLabel = getToolLabel(message, toolLogMetadata);
const toolDetailLabel = getToolDetailLabel(message, toolLogMetadata, toolState);
return (
<div key={message.id} className="flex justify-start">
<div
className={cn(
"inline-flex max-w-[85%] min-w-0 items-start gap-3 overflow-hidden rounded-xl border px-3 py-2.5 shadow-[inset_0_1px_0_hsl(180_100%_88%_/_0.06)]",
isFailed
? "border-rose-400/34 bg-[linear-gradient(90deg,hsl(350_72%_44%_/_0.18),hsl(342_66%_9%_/_0.72))]"
: isInitiated
? "border-amber-300/34 bg-[linear-gradient(90deg,hsl(43_74%_30%_/_0.34),hsl(260_48%_13%_/_0.74))]"
: "border-cyan-400/34 bg-[linear-gradient(90deg,hsl(184_89%_21%_/_0.70),hsl(208_66%_12%_/_0.78))]"
)}
title={`${toolSummary}\n${toolLabel}${toolDetailLabel}`}
>
<span
className={cn(
"mt-0.5 flex h-[30px] w-[30px] shrink-0 items-center justify-center rounded-lg border",
isFailed
? "border-rose-400/34 bg-rose-400/13 text-rose-300"
: isInitiated
? "border-amber-300/34 bg-amber-300/13 text-amber-200"
: "border-cyan-300/34 bg-cyan-300/13 text-cyan-300"
)}
>
<Icon className="h-4 w-4" />
</span>
<span className="min-w-0 flex-1 space-y-1">
<span className={cn("block truncate text-sm leading-5", isFailed ? "text-rose-200" : "text-violet-50/95")}>
{toolSummary}
</span>
<span className="flex min-w-0 items-center gap-1.5 text-[11px] leading-4">
<span className={cn("min-w-0 truncate font-semibold", isFailed ? "text-rose-300/85" : isInitiated ? "text-amber-200/90" : "text-cyan-200/90")}>
{toolLabel}
</span>
<span className="min-w-0 truncate text-violet-200/64">{toolDetailLabel}</span>
</span>
</span>
</div>
<ToolCallCard message={message} className="max-w-[85%]" />
</div>
);
}

View File

@@ -140,6 +140,84 @@ textarea {
0 14px 36px hsl(240 80% 2% / 0.28);
}
.tool-call-stack-shell {
perspective: 900px;
transform-style: preserve-3d;
isolation: isolate;
}
.tool-call-stack-card {
transform: translate3d(var(--tool-stack-x, 0), var(--tool-stack-y, 0), var(--tool-stack-z, 0)) scale(var(--tool-stack-scale, 1));
transform-origin: top left;
opacity: var(--tool-stack-opacity, 1);
animation: tool-call-stack-in 360ms cubic-bezier(0.18, 0.95, 0.28, 1) both;
animation-delay: var(--tool-stack-delay, 0ms);
will-change: transform, opacity;
}
.tool-call-stack-expanded-card {
animation: tool-call-inline-in 220ms ease-out both;
animation-delay: var(--tool-stack-delay, 0ms);
}
.tool-call-stack-toggle {
border: 1px solid hsl(188 82% 70% / 0.36);
background:
linear-gradient(180deg, hsl(230 36% 16% / 0.96), hsl(238 48% 7% / 0.96)),
hsl(236 48% 8%);
color: hsl(186 92% 86%);
box-shadow:
inset 0 1px 0 hsl(180 100% 88% / 0.08),
0 8px 22px hsl(235 72% 2% / 0.42);
transition:
border-color 160ms ease,
color 160ms ease,
transform 160ms ease,
filter 160ms ease;
}
.tool-call-stack-toggle:hover {
border-color: hsl(188 92% 74% / 0.62);
color: hsl(184 100% 92%);
filter: brightness(1.08);
}
.tool-call-stack-toggle:focus-visible {
outline: 2px solid hsl(188 92% 72% / 0.9);
outline-offset: 2px;
}
@keyframes tool-call-stack-in {
from {
opacity: 0;
transform: translate3d(0, 0.85rem, -72px) scale(0.96) rotateX(-8deg);
}
to {
opacity: var(--tool-stack-opacity, 1);
transform: translate3d(var(--tool-stack-x, 0), var(--tool-stack-y, 0), var(--tool-stack-z, 0)) scale(var(--tool-stack-scale, 1)) rotateX(0);
}
}
@keyframes tool-call-inline-in {
from {
opacity: 0;
transform: translateY(-0.35rem);
}
to {
opacity: 1;
transform: translateY(0);
}
}
@media (prefers-reduced-motion: reduce) {
.tool-call-stack-card,
.tool-call-stack-expanded-card {
animation: none;
}
}
.md-content {
word-break: break-word;
}