big backend refactor

2026-06-13 12:02:22 -07:00
parent 7436544a69
commit 297b053a91
15 changed files with 1768 additions and 1068 deletions
--- a/server/tests/chat-tools-streaming.test.ts
+++ b/server/tests/chat-tools-streaming.test.ts
@@ -1,12 +1,9 @@
 import assert from "node:assert/strict";
 import test from "node:test";
-import {
-  runPlainChatCompletionsStream,
-  runToolAwareChatCompletions,
-  runToolAwareChatCompletionsStream,
-  runToolAwareOpenAIChatStream,
-  type ToolAwareStreamingEvent,
-} from "../src/llm/chat-tools.js";
+import { type ToolAwareStreamingEvent } from "../src/llm/chat-tools.js";
+import { completeWithChatCompletionsApi, streamWithChatCompletionsApi } from "../src/llm/protocols/chat-completions-api.js";
+import { completeWithMessagesApi, streamWithMessagesApi } from "../src/llm/protocols/messages-api.js";
+import { streamWithResponsesApi } from "../src/llm/protocols/responses-api.js";

 async function* streamFrom(events: any[]) {
  for (const event of events) {
@@ -23,7 +20,7 @@ async function collectEvents(iterable: AsyncIterable<ToolAwareStreamingEvent>) {
  return events;
 }

-test("OpenAI Responses stream emits text deltas as they arrive", async () => {
+test("Responses API stream emits text deltas as they arrive", async () => {
  const outputMessage = {
    id: "msg_1",
    type: "message",
@@ -53,7 +50,7 @@ test("OpenAI Responses stream emits text deltas as they arrive", async () => {
  };

  const events = await collectEvents(
-    runToolAwareOpenAIChatStream({
+    streamWithResponsesApi({
      client: client as any,
      model: "gpt-test",
      messages: [{ role: "user", content: "Say hello" }],
@@ -71,7 +68,7 @@ test("OpenAI Responses stream emits text deltas as they arrive", async () => {
  assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Hello");
 });

-test("OpenAI-compatible Chat Completions stream emits text deltas as they arrive", async () => {
+test("Chat Completions API stream emits text deltas as they arrive", async () => {
  const client = {
    chat: {
      completions: {
@@ -90,7 +87,7 @@ test("OpenAI-compatible Chat Completions stream emits text deltas as they arrive
  };

  const events = await collectEvents(
-    runToolAwareChatCompletionsStream({
+    streamWithChatCompletionsApi({
      client: client as any,
      model: "grok-test",
      messages: [{ role: "user", content: "Say hello" }],
@@ -125,10 +122,11 @@ test("plain Chat Completions stream does not send Sybil-managed tools", async ()
  };

  const events = await collectEvents(
-    runPlainChatCompletionsStream({
+    streamWithChatCompletionsApi({
      client: client as any,
      model: "hermes-agent",
      messages: [{ role: "user", content: "Say hi" }],
+      enabledTools: [],
    })
  );

@@ -189,7 +187,7 @@ test("fetch_url sends browser-like navigation headers", async () => {
      },
    };

-    const result = await runToolAwareChatCompletions({
+    const result = await completeWithChatCompletionsApi({
      client: client as any,
      model: "grok-test",
      messages: [{ role: "user", content: "Fetch CPI PDF" }],
@@ -215,7 +213,81 @@ test("fetch_url sends browser-like navigation headers", async () => {
  }
 });

-test("OpenAI-compatible Chat Completions stream emits initiated and terminal tool call updates", async () => {
+test("Messages API executes tool_use blocks and sends tool_result follow-up", async () => {
+  const originalFetch = globalThis.fetch;
+  const fetchCalls: Array<{ input: RequestInfo | URL; init?: RequestInit }> = [];
+  globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
+    fetchCalls.push({ input, init });
+    return new Response("<!doctype html><title>Example</title><main>Tool result body</main>", {
+      status: 200,
+      headers: { "content-type": "text/html; charset=utf-8" },
+    });
+  }) as typeof fetch;
+
+  try {
+    const requestBodies: any[] = [];
+    const client = {
+      messages: {
+        create: async (body: any) => {
+          requestBodies.push(body);
+          if (requestBodies.length === 1) {
+            return {
+              content: [
+                {
+                  type: "tool_use",
+                  id: "toolu_1",
+                  name: "fetch_url",
+                  input: { url: "https://example.com/article" },
+                },
+              ],
+              usage: { input_tokens: 3, output_tokens: 2 },
+            };
+          }
+
+          return {
+            content: [{ type: "text", text: "Fetched" }],
+            usage: { input_tokens: 5, output_tokens: 1 },
+          };
+        },
+      },
+    };
+
+    const result = await completeWithMessagesApi({
+      client: client as any,
+      model: "claude-test",
+      messages: [{ role: "user", content: "Fetch the article" }],
+    });
+
+    assert.equal(result.text, "Fetched");
+    assert.equal(fetchCalls.length, 1);
+    assert.equal(String(fetchCalls[0]?.input), "https://example.com/article");
+    assert.equal(requestBodies.length, 2);
+    assert.equal(requestBodies[0]?.model, "claude-test");
+    assert.equal(requestBodies[0]?.tool_choice?.type, "auto");
+    const fetchTool = requestBodies[0]?.tools?.find((tool: any) => tool.name === "fetch_url");
+    assert.equal(fetchTool?.input_schema?.type, "object");
+    assert.equal(fetchTool?.input_schema?.properties?.url?.type, "string");
+
+    const secondMessages = requestBodies[1]?.messages ?? [];
+    assert.equal(secondMessages.at(-2)?.role, "assistant");
+    assert.equal(secondMessages.at(-2)?.content?.[0]?.type, "tool_use");
+    assert.equal(secondMessages.at(-1)?.role, "user");
+    const toolResult = secondMessages.at(-1)?.content?.[0];
+    assert.equal(toolResult?.type, "tool_result");
+    assert.equal(toolResult?.tool_use_id, "toolu_1");
+    assert.equal(toolResult?.is_error, false);
+    assert.equal(JSON.parse(toolResult?.content ?? "{}").ok, true);
+    assert.equal(result.toolEvents[0]?.toolCallId, "toolu_1");
+    assert.equal(result.toolEvents[0]?.status, "completed");
+    assert.equal(result.usage?.inputTokens, 8);
+    assert.equal(result.usage?.outputTokens, 3);
+    assert.equal(result.usage?.totalTokens, 11);
+  } finally {
+    globalThis.fetch = originalFetch;
+  }
+});
+
+test("Chat Completions API stream emits initiated and terminal tool call updates", async () => {
  let requestCount = 0;
  const client = {
    chat: {
@@ -256,7 +328,7 @@ test("OpenAI-compatible Chat Completions stream emits initiated and terminal too
  };

  const events = await collectEvents(
-    runToolAwareChatCompletionsStream({
+    streamWithChatCompletionsApi({
      client: client as any,
      model: "grok-test",
      messages: [{ role: "user", content: "Use a tool" }],
@@ -280,3 +352,122 @@ test("OpenAI-compatible Chat Completions stream emits initiated and terminal too
  assert.equal(typeof toolEvents[1]?.durationMs, "number");
  assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Done");
 });
+
+test("Messages API stream emits initiated and terminal tool call updates", async () => {
+  let requestCount = 0;
+  const requestBodies: any[] = [];
+  const client = {
+    messages: {
+      create: async (body: any) => {
+        requestCount += 1;
+        requestBodies.push(body);
+        if (requestCount === 1) {
+          return streamFrom([
+            {
+              type: "message_start",
+              message: {
+                usage: { input_tokens: 3, output_tokens: 0 },
+              },
+            },
+            {
+              type: "content_block_start",
+              index: 0,
+              content_block: { type: "text", text: "" },
+            },
+            {
+              type: "content_block_delta",
+              index: 0,
+              delta: { type: "text_delta", text: "I'll check that." },
+            },
+            { type: "content_block_stop", index: 0 },
+            {
+              type: "content_block_start",
+              index: 1,
+              content_block: {
+                type: "tool_use",
+                id: "toolu_1",
+                name: "unknown_tool",
+                input: {},
+              },
+            },
+            {
+              type: "content_block_delta",
+              index: 1,
+              delta: { type: "input_json_delta", partial_json: "{\"query\":\"current weather\"}" },
+            },
+            { type: "content_block_stop", index: 1 },
+            {
+              type: "message_delta",
+              delta: { stop_reason: "tool_use", stop_sequence: null },
+              usage: { output_tokens: 2 },
+            },
+            { type: "message_stop" },
+          ]);
+        }
+
+        return streamFrom([
+          {
+            type: "message_start",
+            message: {
+              usage: { input_tokens: 4, output_tokens: 0 },
+            },
+          },
+          {
+            type: "content_block_start",
+            index: 0,
+            content_block: { type: "text", text: "" },
+          },
+          {
+            type: "content_block_delta",
+            index: 0,
+            delta: { type: "text_delta", text: "Done" },
+          },
+          { type: "content_block_stop", index: 0 },
+          {
+            type: "message_delta",
+            delta: { stop_reason: "end_turn", stop_sequence: null },
+            usage: { output_tokens: 1 },
+          },
+          { type: "message_stop" },
+        ]);
+      },
+    },
+  };
+
+  const events = await collectEvents(
+    streamWithMessagesApi({
+      client: client as any,
+      model: "claude-test",
+      messages: [{ role: "user", content: "Use a tool" }],
+    })
+  );
+
+  assert.deepEqual(
+    events.map((event) => event.type),
+    ["tool_call", "tool_call", "delta", "done"]
+  );
+  assert.equal(requestBodies[0]?.stream, true);
+  assert.equal(requestBodies[0]?.tools?.some((tool: any) => tool.name === "fetch_url"), true);
+
+  const secondMessages = requestBodies[1]?.messages ?? [];
+  assert.equal(secondMessages.at(-2)?.role, "assistant");
+  assert.equal(secondMessages.at(-2)?.content?.[0]?.type, "text");
+  assert.equal(secondMessages.at(-2)?.content?.[0]?.text, "I'll check that.");
+  assert.equal(secondMessages.at(-2)?.content?.[1]?.type, "tool_use");
+  assert.deepEqual(secondMessages.at(-2)?.content?.[1]?.input, { query: "current weather" });
+  const toolResult = secondMessages.at(-1)?.content?.[0];
+  assert.equal(toolResult?.type, "tool_result");
+  assert.equal(toolResult?.tool_use_id, "toolu_1");
+  assert.equal(toolResult?.is_error, true);
+  assert.match(JSON.parse(toolResult?.content ?? "{}").error ?? "", /Unknown tool: unknown_tool/);
+
+  const toolEvents = events.flatMap((event) => (event.type === "tool_call" ? [event.event] : []));
+  assert.equal(toolEvents[0]?.toolCallId, "toolu_1");
+  assert.equal(toolEvents[0]?.status, "initiated");
+  assert.equal(toolEvents[1]?.toolCallId, "toolu_1");
+  assert.equal(toolEvents[1]?.status, "failed");
+  assert.match(toolEvents[1]?.error ?? "", /Unknown tool: unknown_tool/);
+  assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Done");
+  assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.usage?.inputTokens : null, 7);
+  assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.usage?.outputTokens : null, 3);
+});
--- a/server/tests/message-content.test.ts
+++ b/server/tests/message-content.test.ts
@@ -1,6 +1,6 @@
 import assert from "node:assert/strict";
 import test from "node:test";
-import { buildSystemPromptAugmentation, getAnthropicSystemPrompt } from "../src/llm/message-content.js";
+import { buildSystemPromptAugmentation, buildTopLevelSystemPrompt } from "../src/llm/message-content.js";

 test("system prompt augmentation includes date and default location", () => {
  const prompt = buildSystemPromptAugmentation(undefined, new Date("2026-05-24T15:30:00Z"));
@@ -14,8 +14,8 @@ test("system prompt augmentation uses provided user location", () => {
  assert.equal(prompt, "Current date: 2026-05-24.\nUser location: New York, NY.");
 });

-test("Anthropic system prompt includes runtime context with existing system messages", () => {
-  const prompt = getAnthropicSystemPrompt(
+test("top-level system prompt includes runtime context with existing system messages", () => {
+  const prompt = buildTopLevelSystemPrompt(
    [{ role: "system", content: "Use concise answers." }],
    "Los Angeles, CA"
  );
--- a/server/tests/provider-adapters.test.ts
+++ b/server/tests/provider-adapters.test.ts
@@ -0,0 +1,36 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { describeProviderChatBackend } from "../src/llm/provider-adapters.js";
+
+test("provider backend registry selects chat protocol and managed-tool mode", () => {
+  assert.deepEqual(describeProviderChatBackend("openai", []), {
+    provider: "openai",
+    protocol: "chat-completions",
+    managedTools: false,
+    enabledTools: [],
+  });
+  assert.deepEqual(describeProviderChatBackend("openai", ["web_search"]), {
+    provider: "openai",
+    protocol: "responses",
+    managedTools: true,
+    enabledTools: ["web_search"],
+  });
+  assert.deepEqual(describeProviderChatBackend("anthropic", ["web_search"]), {
+    provider: "anthropic",
+    protocol: "messages",
+    managedTools: true,
+    enabledTools: ["web_search"],
+  });
+  assert.deepEqual(describeProviderChatBackend("xai", ["web_search"]), {
+    provider: "xai",
+    protocol: "chat-completions",
+    managedTools: true,
+    enabledTools: ["web_search"],
+  });
+  assert.deepEqual(describeProviderChatBackend("hermes-agent", ["web_search"]), {
+    provider: "hermes-agent",
+    protocol: "chat-completions",
+    managedTools: false,
+    enabledTools: [],
+  });
+});