From 8b580fd3e188e3f59926f5a4288c1a1529cd81b3 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Mon, 4 May 2026 21:52:39 -0700 Subject: [PATCH] add hermes agent provider --- docker-compose.example.yml | 3 + docs/api/rest.md | 21 ++--- docs/api/streaming-chat.md | 5 +- ios/AGENTS.md | 1 + .../Sybil/Sources/Sybil/SybilModels.swift | 2 + .../Sources/Sybil/SybilSettingsStore.swift | 5 +- .../Sybil/Sources/Sybil/SybilViewModel.swift | 14 +++- .../Sources/Sybil/SybilWorkspaceView.swift | 2 +- server/README.md | 5 +- server/prisma/schema.prisma | 1 + server/src/env.ts | 10 +++ server/src/llm/chat-tools.ts | 76 +++++++++++++++++++ server/src/llm/model-catalog.ts | 53 +++++++------ server/src/llm/multiplexer.ts | 25 +++++- server/src/llm/provider-ids.ts | 31 ++++++++ server/src/llm/providers.ts | 12 +++ server/src/llm/streaming.ts | 27 +++++-- server/src/llm/types.ts | 4 +- server/src/routes.ts | 27 ++++--- server/tests/chat-tools-streaming.test.ts | 35 +++++++++ server/tests/provider-ids.test.ts | 12 +++ tui/README.md | 2 +- tui/src/config.ts | 2 +- tui/src/index.ts | 16 +++- tui/src/types.ts | 4 +- web/src/App.tsx | 43 ++++++++--- web/src/lib/api.ts | 4 +- 27 files changed, 359 insertions(+), 83 deletions(-) create mode 100644 server/src/llm/provider-ids.ts create mode 100644 server/tests/provider-ids.test.ts diff --git a/docker-compose.example.yml b/docker-compose.example.yml index a346a00..b8db48c 100644 --- a/docker-compose.example.yml +++ b/docker-compose.example.yml @@ -12,6 +12,9 @@ services: OPENAI_API_KEY: ${OPENAI_API_KEY:-} ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-} XAI_API_KEY: ${XAI_API_KEY:-} + HERMES_AGENT_API_BASE_URL: ${HERMES_AGENT_API_BASE_URL:-http://127.0.0.1:8642/v1} + HERMES_AGENT_API_KEY: ${HERMES_AGENT_API_KEY:-} + HERMES_AGENT_MODEL: ${HERMES_AGENT_MODEL:-} EXA_API_KEY: ${EXA_API_KEY:-} CHAT_WEB_SEARCH_ENGINE: ${CHAT_WEB_SEARCH_ENGINE:-exa} SEARXNG_BASE_URL: ${SEARXNG_BASE_URL:-} diff --git a/docs/api/rest.md b/docs/api/rest.md index bae8e1b..7d537fc 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -33,11 +33,13 @@ Chat upload limits: "providers": { "openai": { "models": ["gpt-4.1-mini"], "loadedAt": "2026-02-14T00:00:00.000Z", "error": null }, "anthropic": { "models": ["claude-3-5-sonnet-latest"], "loadedAt": null, "error": null }, - "xai": { "models": ["grok-3-mini"], "loadedAt": null, "error": null } + "xai": { "models": ["grok-3-mini"], "loadedAt": null, "error": null }, + "hermes-agent": { "models": ["hermes-agent"], "loadedAt": null, "error": null } } } ``` - OpenAI model lists are filtered to models that are expected to work with the backend's Responses API implementation. +- `hermes-agent` is included only when `HERMES_AGENT_API_KEY` is configured. Set it to Hermes `API_SERVER_KEY`, or any non-empty value if that local server does not require auth. `HERMES_AGENT_API_BASE_URL` defaults to `http://127.0.0.1:8642/v1`; set `HERMES_AGENT_MODEL` only when you need an additional fallback/override model id. ## Active Runs @@ -65,7 +67,7 @@ Behavior notes: ```json { "title": "optional title", - "provider": "optional openai|anthropic|xai", + "provider": "optional openai|anthropic|xai|hermes-agent", "model": "optional model id", "messages": [ { @@ -152,7 +154,7 @@ Notes: ```json { "chatId": "optional-chat-id", - "provider": "openai|anthropic|xai", + "provider": "openai|anthropic|xai|hermes-agent", "model": "string", "messages": [ { @@ -206,11 +208,12 @@ Behavior notes: - Text files are forwarded as explicit text blocks rather than provider-managed file references. Large text attachments should already be truncated client-side before submission. - For `openai`, backend calls OpenAI's Responses API and enables internal tool use with an internal system instruction. - For `xai`, backend calls xAI's OpenAI-compatible Chat Completions API and enables internal tool use with the same internal system instruction. +- For `hermes-agent`, backend calls the configured Hermes Agent OpenAI-compatible Chat Completions API without adding Sybil-managed tool definitions; Hermes Agent handles its own tools server-side. - For `openai`, image attachments are sent as Responses `input_image` items and text attachments are sent as `input_text` items. -- For `xai`, image attachments are sent as Chat Completions content parts alongside text. +- For `xai` and `hermes-agent`, image attachments are sent as Chat Completions content parts alongside text. - For `openai`, Responses calls that can enter the server-managed tool loop use `store: true` so reasoning and function-call items can be passed between tool rounds. - For `anthropic`, image attachments are sent as Messages API `image` blocks using base64 source data; text attachments are added as `text` blocks. -- Available tool calls for chat: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available. +- Available Sybil-managed tool calls for `openai` and `xai`: `web_search` and `fetch_url`. When `CHAT_CODEX_TOOL_ENABLED=true`, `codex_exec` is also available. When `CHAT_SHELL_TOOL_ENABLED=true`, `shell_exec` is also available. - `web_search` returns ranked results with per-result summaries/snippets. Its backend engine is selected by `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. - `fetch_url` fetches a URL and returns plaintext page content (HTML converted to text server-side). - `codex_exec` delegates coding, shell, repository inspection, and other complex software tasks to a persistent remote Codex CLI workspace over SSH. The server runs `codex exec --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check ` on the configured devbox inside `CHAT_CODEX_REMOTE_WORKDIR`, with SSH stdin closed. @@ -311,9 +314,9 @@ Behavior notes: "title": null, "createdAt": "...", "updatedAt": "...", - "initiatedProvider": "openai|anthropic|xai|null", + "initiatedProvider": "openai|anthropic|xai|hermes-agent|null", "initiatedModel": "string|null", - "lastUsedProvider": "openai|anthropic|xai|null", + "lastUsedProvider": "openai|anthropic|xai|hermes-agent|null", "lastUsedModel": "string|null" } ``` @@ -359,9 +362,9 @@ Behavior notes: "title": null, "createdAt": "...", "updatedAt": "...", - "initiatedProvider": "openai|anthropic|xai|null", + "initiatedProvider": "openai|anthropic|xai|hermes-agent|null", "initiatedModel": "string|null", - "lastUsedProvider": "openai|anthropic|xai|null", + "lastUsedProvider": "openai|anthropic|xai|hermes-agent|null", "lastUsedModel": "string|null", "messages": [Message] } diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md index c6dc6f4..e877212 100644 --- a/docs/api/streaming-chat.md +++ b/docs/api/streaming-chat.md @@ -21,7 +21,7 @@ Authentication: { "chatId": "optional-chat-id", "persist": true, - "provider": "openai|anthropic|xai", + "provider": "openai|anthropic|xai|hermes-agent", "model": "string", "messages": [ { @@ -152,8 +152,9 @@ For `persist: false` streams, `chatId` and `callId` are `null`. - `openai`: backend uses OpenAI's Responses API and may execute internal function tool calls (`web_search`, `fetch_url`, optional `codex_exec`, and optional `shell_exec`) before producing final text. - `xai`: backend uses xAI's OpenAI-compatible Chat Completions API and may execute the same internal tool calls before producing final text. +- `hermes-agent`: backend uses the configured Hermes Agent OpenAI-compatible Chat Completions API. Sybil does not add its own tool definitions for this provider; Hermes Agent handles its own tools server-side. Custom Hermes stream events are normalized away unless they produce text deltas in this SSE contract. - `openai`: image attachments are sent as Responses `input_image` items; text attachments are sent as `input_text` items. -- `xai`: image attachments are sent as Chat Completions content parts; text attachments are inlined as text parts. +- `xai` and `hermes-agent`: image attachments are sent as Chat Completions content parts; text attachments are inlined as text parts. - `openai`: Responses calls that can enter the server-managed tool loop use `store: true` so reasoning and function-call items can be passed between tool rounds. - `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. Image attachments are sent as base64 `image` blocks and text attachments are appended as `text` blocks. - `web_search` uses `CHAT_WEB_SEARCH_ENGINE` (`exa` default, or `searxng` with `SEARXNG_BASE_URL` set). SearXNG mode requires the instance to allow `format=json`. This only affects chat-mode tool calls, not search-mode endpoints. diff --git a/ios/AGENTS.md b/ios/AGENTS.md index c6d45dd..16ebf3c 100644 --- a/ios/AGENTS.md +++ b/ios/AGENTS.md @@ -51,3 +51,4 @@ Instructions for work under `/Users/buzzert/src/sybil-2/ios`. - OpenAI: `gpt-4.1-mini` - Anthropic: `claude-3-5-sonnet-latest` - xAI: `grok-3-mini` + - Hermes Agent: `hermes-agent` diff --git a/ios/Packages/Sybil/Sources/Sybil/SybilModels.swift b/ios/Packages/Sybil/Sources/Sybil/SybilModels.swift index daf6421..998f718 100644 --- a/ios/Packages/Sybil/Sources/Sybil/SybilModels.swift +++ b/ios/Packages/Sybil/Sources/Sybil/SybilModels.swift @@ -4,12 +4,14 @@ public enum Provider: String, Codable, CaseIterable, Hashable, Sendable { case openai case anthropic case xai + case hermesAgent = "hermes-agent" public var displayName: String { switch self { case .openai: return "OpenAI" case .anthropic: return "Anthropic" case .xai: return "xAI" + case .hermesAgent: return "Hermes Agent" } } } diff --git a/ios/Packages/Sybil/Sources/Sybil/SybilSettingsStore.swift b/ios/Packages/Sybil/Sources/Sybil/SybilSettingsStore.swift index b638bf1..80c5e82 100644 --- a/ios/Packages/Sybil/Sources/Sybil/SybilSettingsStore.swift +++ b/ios/Packages/Sybil/Sources/Sybil/SybilSettingsStore.swift @@ -11,6 +11,7 @@ final class SybilSettingsStore { static let preferredOpenAIModel = "sybil.ios.preferredOpenAIModel" static let preferredAnthropicModel = "sybil.ios.preferredAnthropicModel" static let preferredXAIModel = "sybil.ios.preferredXAIModel" + static let preferredHermesAgentModel = "sybil.ios.preferredHermesAgentModel" } private let defaults: UserDefaults @@ -35,7 +36,8 @@ final class SybilSettingsStore { self.preferredModelByProvider = [ .openai: defaults.string(forKey: Keys.preferredOpenAIModel) ?? "gpt-4.1-mini", .anthropic: defaults.string(forKey: Keys.preferredAnthropicModel) ?? "claude-3-5-sonnet-latest", - .xai: defaults.string(forKey: Keys.preferredXAIModel) ?? "grok-3-mini" + .xai: defaults.string(forKey: Keys.preferredXAIModel) ?? "grok-3-mini", + .hermesAgent: defaults.string(forKey: Keys.preferredHermesAgentModel) ?? "hermes-agent" ] } @@ -53,6 +55,7 @@ final class SybilSettingsStore { defaults.set(preferredModelByProvider[.openai], forKey: Keys.preferredOpenAIModel) defaults.set(preferredModelByProvider[.anthropic], forKey: Keys.preferredAnthropicModel) defaults.set(preferredModelByProvider[.xai], forKey: Keys.preferredXAIModel) + defaults.set(preferredModelByProvider[.hermesAgent], forKey: Keys.preferredHermesAgentModel) } var trimmedTokenOrNil: String? { diff --git a/ios/Packages/Sybil/Sources/Sybil/SybilViewModel.swift b/ios/Packages/Sybil/Sources/Sybil/SybilViewModel.swift index 63da034..682567c 100644 --- a/ios/Packages/Sybil/Sources/Sybil/SybilViewModel.swift +++ b/ios/Packages/Sybil/Sources/Sybil/SybilViewModel.swift @@ -141,7 +141,8 @@ final class SybilViewModel { private let fallbackModels: [Provider: [String]] = [ .openai: ["gpt-4.1-mini"], .anthropic: ["claude-3-5-sonnet-latest"], - .xai: ["grok-3-mini"] + .xai: ["grok-3-mini"], + .hermesAgent: ["hermes-agent"] ] init( @@ -160,6 +161,12 @@ final class SybilViewModel { modelOptions(for: provider) } + var providerOptions: [Provider] { + Provider.allCases.filter { candidate in + candidate != .hermesAgent || modelCatalog[candidate] != nil + } + } + func modelOptions(for candidate: Provider) -> [String] { let serverModels = modelCatalog[candidate]?.models ?? [] if !serverModels.isEmpty { @@ -893,6 +900,11 @@ final class SybilViewModel { } private func syncModelSelectionWithServerCatalog() { + if !providerOptions.contains(provider), let firstProvider = providerOptions.first { + provider = firstProvider + settings.preferredProvider = firstProvider + } + if !providerModelOptions.contains(model), let first = providerModelOptions.first { model = first settings.preferredModelByProvider[provider] = first diff --git a/ios/Packages/Sybil/Sources/Sybil/SybilWorkspaceView.swift b/ios/Packages/Sybil/Sources/Sybil/SybilWorkspaceView.swift index 7dc1422..0da6781 100644 --- a/ios/Packages/Sybil/Sources/Sybil/SybilWorkspaceView.swift +++ b/ios/Packages/Sybil/Sources/Sybil/SybilWorkspaceView.swift @@ -495,7 +495,7 @@ struct SybilWorkspaceView: View { Divider() - ForEach(Provider.allCases, id: \.self) { candidate in + ForEach(viewModel.providerOptions, id: \.self) { candidate in Menu(candidate.displayName) { let models = viewModel.modelOptions(for: candidate) if models.isEmpty { diff --git a/server/README.md b/server/README.md index bd1dd75..cbbed45 100644 --- a/server/README.md +++ b/server/README.md @@ -1,7 +1,7 @@ # Sybil Server Backend API for: -- LLM multiplexer (OpenAI Responses / Anthropic / xAI Chat Completions-compatible Grok) +- LLM multiplexer (OpenAI Responses / Anthropic / xAI Chat Completions-compatible Grok / Hermes Agent) - Personal chat database (chats/messages + LLM call log) ## Stack @@ -43,6 +43,9 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev). - `OPENAI_API_KEY` - `ANTHROPIC_API_KEY` - `XAI_API_KEY` +- `HERMES_AGENT_API_BASE_URL` (`http://127.0.0.1:8642/v1` by default; include the `/v1` suffix) +- `HERMES_AGENT_API_KEY` (enables the Hermes Agent provider; set to Hermes `API_SERVER_KEY`, or any non-empty value if that local server does not require auth) +- `HERMES_AGENT_MODEL` (optional fallback/override model id; defaults client-side to `hermes-agent`) - `EXA_API_KEY` - `CHAT_WEB_SEARCH_ENGINE` (`exa` by default, or `searxng` for chat tool calls only) - `SEARXNG_BASE_URL` (required when `CHAT_WEB_SEARCH_ENGINE=searxng`; instance must allow `format=json`) diff --git a/server/prisma/schema.prisma b/server/prisma/schema.prisma index 39fbe5d..a9d3dd3 100644 --- a/server/prisma/schema.prisma +++ b/server/prisma/schema.prisma @@ -13,6 +13,7 @@ enum Provider { openai anthropic xai + hermes_agent @map("hermes-agent") } enum MessageRole { diff --git a/server/src/env.ts b/server/src/env.ts index dffb9ab..fa51ef9 100644 --- a/server/src/env.ts +++ b/server/src/env.ts @@ -11,6 +11,13 @@ const OptionalUrlSchema = z.preprocess( z.string().trim().url().optional() ); +const DEFAULT_HERMES_AGENT_API_BASE_URL = "http://127.0.0.1:8642/v1"; + +const HermesAgentApiBaseUrlSchema = z.preprocess( + (value) => (typeof value === "string" && value.trim() === "" ? undefined : value), + z.string().trim().url().default(DEFAULT_HERMES_AGENT_API_BASE_URL) +); + const ChatWebSearchEngineSchema = z.preprocess( (value) => { if (typeof value !== "string") return value; @@ -59,6 +66,9 @@ const EnvSchema = z.object({ OPENAI_API_KEY: z.string().optional(), ANTHROPIC_API_KEY: z.string().optional(), XAI_API_KEY: z.string().optional(), + HERMES_AGENT_API_BASE_URL: HermesAgentApiBaseUrlSchema, + HERMES_AGENT_API_KEY: OptionalTrimmedStringSchema, + HERMES_AGENT_MODEL: OptionalTrimmedStringSchema, EXA_API_KEY: z.string().optional(), // Chat-mode web_search tool configuration. Search mode remains Exa-only for now. diff --git a/server/src/llm/chat-tools.ts b/server/src/llm/chat-tools.ts index c663a73..0d32bbd 100644 --- a/server/src/llm/chat-tools.ts +++ b/server/src/llm/chat-tools.ts @@ -385,6 +385,10 @@ function normalizeIncomingMessages(messages: ChatMessage[]) { return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized]; } +function normalizePlainIncomingMessages(messages: ChatMessage[]) { + return messages.map((message) => buildOpenAIConversationMessage(message)); +} + function normalizeIncomingResponsesInput(messages: ChatMessage[]) { const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message)); @@ -853,6 +857,20 @@ function extractResponsesText(response: any, fallback = "") { return parts.join("") || fallback; } +function extractChatCompletionContent(message: any) { + if (typeof message?.content === "string") return message.content; + if (!Array.isArray(message?.content)) return ""; + + return message.content + .map((part: any) => { + if (typeof part === "string") return part; + if (typeof part?.text === "string") return part.text; + if (typeof part?.content === "string") return part.content; + return ""; + }) + .join(""); +} + function getUnstreamedText(finalText: string, streamedText: string) { if (!finalText) return ""; if (!streamedText) return finalText; @@ -1093,6 +1111,26 @@ export async function runToolAwareChatCompletions(params: ToolAwareCompletionPar }; } +export async function runPlainChatCompletions(params: ToolAwareCompletionParams): Promise { + const completion = await params.client.chat.completions.create({ + model: params.model, + messages: normalizePlainIncomingMessages(params.messages), + temperature: params.temperature, + max_tokens: params.maxTokens, + } as any); + + const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; + const sawUsage = mergeUsage(usageAcc, completion?.usage); + const message = completion?.choices?.[0]?.message; + + return { + text: extractChatCompletionContent(message), + usage: sawUsage ? usageAcc : undefined, + raw: { response: completion, api: "chat.completions" }, + toolEvents: [], + }; +} + export async function* runToolAwareOpenAIChatStream( params: ToolAwareCompletionParams ): AsyncGenerator { @@ -1354,3 +1392,41 @@ export async function* runToolAwareChatCompletionsStream( }, }; } + +export async function* runPlainChatCompletionsStream( + params: ToolAwareCompletionParams +): AsyncGenerator { + const rawResponses: unknown[] = []; + const usageAcc: Required = { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; + let sawUsage = false; + let text = ""; + + const stream = await params.client.chat.completions.create({ + model: params.model, + messages: normalizePlainIncomingMessages(params.messages), + temperature: params.temperature, + max_tokens: params.maxTokens, + stream: true, + } as any); + + for await (const chunk of stream as any as AsyncIterable) { + rawResponses.push(chunk); + sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage; + + const deltaText = chunk?.choices?.[0]?.delta?.content ?? ""; + if (typeof deltaText === "string" && deltaText.length) { + text += deltaText; + yield { type: "delta", text: deltaText }; + } + } + + yield { + type: "done", + result: { + text, + usage: sawUsage ? usageAcc : undefined, + raw: { streamed: true, responses: rawResponses, api: "chat.completions" }, + toolEvents: [], + }, + }; +} diff --git a/server/src/llm/model-catalog.ts b/server/src/llm/model-catalog.ts index b54978f..411eee2 100644 --- a/server/src/llm/model-catalog.ts +++ b/server/src/llm/model-catalog.ts @@ -1,5 +1,6 @@ import type { FastifyBaseLogger } from "fastify"; -import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; +import { env } from "../env.js"; +import { anthropicClient, hermesAgentClient, isHermesAgentConfigured, openaiClient, xaiClient } from "./providers.js"; import type { Provider } from "./types.js"; export type ProviderModelSnapshot = { @@ -8,9 +9,9 @@ export type ProviderModelSnapshot = { error: string | null; }; -export type ModelCatalogSnapshot = Record; +export type ModelCatalogSnapshot = Partial>; -const providers: Provider[] = ["openai", "anthropic", "xai"]; +const baseProviders: Provider[] = ["openai", "anthropic", "xai"]; const MODEL_FETCH_TIMEOUT_MS = 15000; const modelCatalog: ModelCatalogSnapshot = { @@ -19,6 +20,10 @@ const modelCatalog: ModelCatalogSnapshot = { xai: { models: [], loadedAt: null, error: null }, }; +function getCatalogProviders(): Provider[] { + return isHermesAgentConfigured() ? [...baseProviders, "hermes-agent"] : baseProviders; +} + function uniqSorted(models: string[]) { return [...new Set(models.map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b)); } @@ -59,8 +64,15 @@ async function fetchProviderModels(provider: Provider) { return uniqSorted(page.data.map((model) => model.id)); } - const page = await xaiClient().models.list(); - return uniqSorted(page.data.map((model) => model.id)); + if (provider === "xai") { + const page = await xaiClient().models.list(); + return uniqSorted(page.data.map((model) => model.id)); + } + + const page = await hermesAgentClient().models.list(); + const models = page.data.map((model) => model.id); + if (env.HERMES_AGENT_MODEL) models.push(env.HERMES_AGENT_MODEL); + return uniqSorted(models); } async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLogger) { @@ -75,7 +87,7 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog } catch (err: any) { const message = err?.message ?? String(err); modelCatalog[provider] = { - models: [], + models: provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : [], loadedAt: new Date().toISOString(), error: message, }; @@ -84,25 +96,18 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog } export async function warmModelCatalog(logger?: FastifyBaseLogger) { - await Promise.all(providers.map((provider) => refreshProviderModels(provider, logger))); + await Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger))); } export function getModelCatalogSnapshot(): ModelCatalogSnapshot { - return { - openai: { - models: [...modelCatalog.openai.models], - loadedAt: modelCatalog.openai.loadedAt, - error: modelCatalog.openai.error, - }, - anthropic: { - models: [...modelCatalog.anthropic.models], - loadedAt: modelCatalog.anthropic.loadedAt, - error: modelCatalog.anthropic.error, - }, - xai: { - models: [...modelCatalog.xai.models], - loadedAt: modelCatalog.xai.loadedAt, - error: modelCatalog.xai.error, - }, - }; + const snapshot: ModelCatalogSnapshot = {}; + for (const provider of getCatalogProviders()) { + const entry = modelCatalog[provider] ?? { models: [], loadedAt: null, error: null }; + snapshot[provider] = { + models: [...entry.models], + loadedAt: entry.loadedAt, + error: entry.error, + }; + } + return snapshot; } diff --git a/server/src/llm/multiplexer.ts b/server/src/llm/multiplexer.ts index c87e3d8..e2b39ed 100644 --- a/server/src/llm/multiplexer.ts +++ b/server/src/llm/multiplexer.ts @@ -1,13 +1,13 @@ import { performance } from "node:perf_hooks"; import { prisma } from "../db.js"; -import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; -import { buildToolLogMessageData, runToolAwareChatCompletions, runToolAwareOpenAIChat } from "./chat-tools.js"; +import { anthropicClient, hermesAgentClient, openaiClient, xaiClient } from "./providers.js"; +import { buildToolLogMessageData, runPlainChatCompletions, runToolAwareChatCompletions, runToolAwareOpenAIChat } from "./chat-tools.js"; import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js"; +import { toPrismaProvider } from "./provider-ids.js"; import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js"; function asProviderEnum(p: Provider) { - // Prisma enum values match these strings. - return p; + return toPrismaProvider(p); } export async function runMultiplex(req: MultiplexRequest): Promise { @@ -84,6 +84,23 @@ export async function runMultiplex(req: MultiplexRequest): Promise buildToolLogMessageData(call.chatId, event)); + } else if (req.provider === "hermes-agent") { + const client = hermesAgentClient(); + const r = await runPlainChatCompletions({ + client, + model: req.model, + messages: req.messages, + temperature: req.temperature, + maxTokens: req.maxTokens, + logContext: { + provider: req.provider, + model: req.model, + chatId, + }, + }); + raw = r.raw; + outText = r.text; + usage = r.usage; } else if (req.provider === "anthropic") { const client = anthropicClient(); diff --git a/server/src/llm/provider-ids.ts b/server/src/llm/provider-ids.ts new file mode 100644 index 0000000..a79e634 --- /dev/null +++ b/server/src/llm/provider-ids.ts @@ -0,0 +1,31 @@ +import type { Provider } from "./types.js"; + +type PrismaProvider = Exclude | "hermes_agent"; + +export function toPrismaProvider(provider: Provider): PrismaProvider { + return provider === "hermes-agent" ? "hermes_agent" : provider; +} + +export function fromPrismaProvider(provider: unknown): Provider | null { + if (provider === null || provider === undefined) return null; + if (provider === "hermes_agent" || provider === "hermes-agent") return "hermes-agent"; + if (provider === "openai" || provider === "anthropic" || provider === "xai") return provider; + return null; +} + +export function serializeProviderFields>(value: T): T { + const next: Record = { ...value }; + if ("initiatedProvider" in next) { + next.initiatedProvider = fromPrismaProvider(next.initiatedProvider); + } + if ("lastUsedProvider" in next) { + next.lastUsedProvider = fromPrismaProvider(next.lastUsedProvider); + } + if ("provider" in next) { + next.provider = fromPrismaProvider(next.provider); + } + if (Array.isArray(next.calls)) { + next.calls = next.calls.map((call: Record) => serializeProviderFields(call)); + } + return next as T; +} diff --git a/server/src/llm/providers.ts b/server/src/llm/providers.ts index 31fd476..5340519 100644 --- a/server/src/llm/providers.ts +++ b/server/src/llm/providers.ts @@ -13,6 +13,18 @@ export function xaiClient() { return new OpenAI({ apiKey: env.XAI_API_KEY, baseURL: "https://api.x.ai/v1" }); } +export function isHermesAgentConfigured() { + return Boolean(env.HERMES_AGENT_API_KEY); +} + +export function hermesAgentClient() { + if (!env.HERMES_AGENT_API_KEY) throw new Error("HERMES_AGENT_API_KEY not set"); + return new OpenAI({ + apiKey: env.HERMES_AGENT_API_KEY, + baseURL: env.HERMES_AGENT_API_BASE_URL, + }); +} + export function anthropicClient() { if (!env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set"); return new Anthropic({ apiKey: env.ANTHROPIC_API_KEY }); diff --git a/server/src/llm/streaming.ts b/server/src/llm/streaming.ts index cbf0ac7..cbafb19 100644 --- a/server/src/llm/streaming.ts +++ b/server/src/llm/streaming.ts @@ -1,13 +1,15 @@ import { performance } from "node:perf_hooks"; import { prisma } from "../db.js"; -import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; +import { anthropicClient, hermesAgentClient, openaiClient, xaiClient } from "./providers.js"; import { buildToolLogMessageData, + runPlainChatCompletionsStream, runToolAwareChatCompletionsStream, runToolAwareOpenAIChatStream, type ToolExecutionEvent, } from "./chat-tools.js"; import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js"; +import { toPrismaProvider } from "./provider-ids.js"; import type { MultiplexRequest, Provider } from "./types.js"; type StreamUsage = { @@ -38,7 +40,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator ? await prisma.llmCall.create({ data: { chatId, - provider: req.provider as any, + provider: toPrismaProvider(req.provider) as any, model: req.model, request: req as any, }, @@ -51,14 +53,14 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator prisma.chat.update({ where: { id: chatId }, data: { - lastUsedProvider: req.provider as any, + lastUsedProvider: toPrismaProvider(req.provider) as any, lastUsedModel: req.model, }, }), prisma.chat.updateMany({ where: { id: chatId, initiatedProvider: null }, data: { - initiatedProvider: req.provider as any, + initiatedProvider: toPrismaProvider(req.provider) as any, initiatedModel: req.model, }, }), @@ -72,8 +74,8 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator let raw: unknown = { streamed: true }; try { - if (req.provider === "openai" || req.provider === "xai") { - const client = req.provider === "openai" ? openaiClient() : xaiClient(); + if (req.provider === "openai" || req.provider === "xai" || req.provider === "hermes-agent") { + const client = req.provider === "openai" ? openaiClient() : req.provider === "xai" ? xaiClient() : hermesAgentClient(); const streamEvents = req.provider === "openai" ? runToolAwareOpenAIChatStream({ @@ -88,6 +90,19 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator chatId: chatId ?? undefined, }, }) + : req.provider === "hermes-agent" + ? runPlainChatCompletionsStream({ + client, + model: req.model, + messages: req.messages, + temperature: req.temperature, + maxTokens: req.maxTokens, + logContext: { + provider: req.provider, + model: req.model, + chatId: chatId ?? undefined, + }, + }) : runToolAwareChatCompletionsStream({ client, model: req.model, diff --git a/server/src/llm/types.ts b/server/src/llm/types.ts index 97490e7..618fdf4 100644 --- a/server/src/llm/types.ts +++ b/server/src/llm/types.ts @@ -1,4 +1,6 @@ -export type Provider = "openai" | "anthropic" | "xai"; +export const PROVIDERS = ["openai", "anthropic", "xai", "hermes-agent"] as const; + +export type Provider = (typeof PROVIDERS)[number]; export type ChatImageAttachment = { kind: "image"; diff --git a/server/src/routes.ts b/server/src/routes.ts index 338143b..139c517 100644 --- a/server/src/routes.ts +++ b/server/src/routes.ts @@ -10,9 +10,12 @@ import { runMultiplex } from "./llm/multiplexer.js"; import { runMultiplexStream, type StreamEvent } from "./llm/streaming.js"; import { getModelCatalogSnapshot } from "./llm/model-catalog.js"; import { openaiClient } from "./llm/providers.js"; +import { serializeProviderFields, toPrismaProvider } from "./llm/provider-ids.js"; import { exaClient } from "./search/exa.js"; import type { ChatAttachment } from "./llm/types.js"; +const ProviderSchema = z.enum(["openai", "anthropic", "xai", "hermes-agent"]); + type IncomingChatMessage = { role: "system" | "user" | "assistant" | "tool"; content: string; @@ -125,7 +128,7 @@ const CompletionStreamBody = z .object({ chatId: z.string().optional(), persist: z.boolean().optional(), - provider: z.enum(["openai", "anthropic", "xai"]), + provider: ProviderSchema, model: z.string().min(1), messages: z.array(CompletionMessageSchema), temperature: z.number().min(0).max(2).optional(), @@ -591,7 +594,7 @@ export async function registerRoutes(app: FastifyInstance) { lastUsedModel: true, }, }); - return { chats }; + return { chats: chats.map((chat) => serializeProviderFields(chat)) }; }); app.post("/v1/chats", async (req) => { @@ -599,7 +602,7 @@ export async function registerRoutes(app: FastifyInstance) { const Body = z .object({ title: z.string().optional(), - provider: z.enum(["openai", "anthropic", "xai"]).optional(), + provider: ProviderSchema.optional(), model: z.string().trim().min(1).optional(), messages: z.array(CompletionMessageSchema).optional(), }) @@ -625,9 +628,9 @@ export async function registerRoutes(app: FastifyInstance) { const chat = await prisma.chat.create({ data: { title: body.title, - initiatedProvider: body.provider as any, + initiatedProvider: body.provider ? (toPrismaProvider(body.provider) as any) : undefined, initiatedModel: body.model, - lastUsedProvider: body.provider as any, + lastUsedProvider: body.provider ? (toPrismaProvider(body.provider) as any) : undefined, lastUsedModel: body.model, messages: body.messages?.length ? { @@ -651,7 +654,7 @@ export async function registerRoutes(app: FastifyInstance) { lastUsedModel: true, }, }); - return { chat }; + return { chat: serializeProviderFields(chat) }; }); app.patch("/v1/chats/:chatId", async (req) => { @@ -682,7 +685,7 @@ export async function registerRoutes(app: FastifyInstance) { }, }); if (!chat) return app.httpErrors.notFound("chat not found"); - return { chat }; + return { chat: serializeProviderFields(chat) }; }); app.post("/v1/chats/title/suggest", async (req) => { @@ -707,7 +710,7 @@ export async function registerRoutes(app: FastifyInstance) { }, }); if (!existing) return app.httpErrors.notFound("chat not found"); - if (existing.title?.trim()) return { chat: existing }; + if (existing.title?.trim()) return { chat: serializeProviderFields(existing) }; const fallback = body.content.split(/\r?\n/)[0]?.trim().slice(0, 48) || "New chat"; const suggestedRaw = await generateChatTitle(body.content); @@ -728,7 +731,7 @@ export async function registerRoutes(app: FastifyInstance) { }, }); - return { chat }; + return { chat: serializeProviderFields(chat) }; }); app.delete("/v1/chats/:chatId", async (req) => { @@ -848,7 +851,7 @@ export async function registerRoutes(app: FastifyInstance) { }, }); - return { chat }; + return { chat: serializeProviderFields(chat) }; }); app.post("/v1/searches/:searchId/run", async (req) => { @@ -994,7 +997,7 @@ export async function registerRoutes(app: FastifyInstance) { include: { messages: { orderBy: { createdAt: "asc" } }, calls: { orderBy: { createdAt: "desc" } } }, }); if (!chat) return app.httpErrors.notFound("chat not found"); - return { chat }; + return { chat: serializeProviderFields(chat) }; }); app.post("/v1/chats/:chatId/messages", async (req) => { @@ -1041,7 +1044,7 @@ export async function registerRoutes(app: FastifyInstance) { const Body = z.object({ chatId: z.string().optional(), - provider: z.enum(["openai", "anthropic", "xai"]), + provider: ProviderSchema, model: z.string().min(1), messages: z.array(CompletionMessageSchema), temperature: z.number().min(0).max(2).optional(), diff --git a/server/tests/chat-tools-streaming.test.ts b/server/tests/chat-tools-streaming.test.ts index e727f88..4ab1fca 100644 --- a/server/tests/chat-tools-streaming.test.ts +++ b/server/tests/chat-tools-streaming.test.ts @@ -1,6 +1,7 @@ import assert from "node:assert/strict"; import test from "node:test"; import { + runPlainChatCompletionsStream, runToolAwareChatCompletionsStream, runToolAwareOpenAIChatStream, type ToolAwareStreamingEvent, @@ -105,3 +106,37 @@ test("OpenAI-compatible Chat Completions stream emits text deltas as they arrive ); assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Hello"); }); + +test("plain Chat Completions stream does not send Sybil-managed tools", async () => { + let requestBody: any = null; + const client = { + chat: { + completions: { + create: async (body: any) => { + requestBody = body; + return streamFrom([ + { choices: [{ delta: { content: "Hi" } }] }, + { choices: [{ delta: {}, finish_reason: "stop" }] }, + ]); + }, + }, + }, + }; + + const events = await collectEvents( + runPlainChatCompletionsStream({ + client: client as any, + model: "hermes-agent", + messages: [{ role: "user", content: "Say hi" }], + }) + ); + + assert.equal(requestBody.model, "hermes-agent"); + assert.equal(requestBody.stream, true); + assert.equal("tools" in requestBody, false); + assert.deepEqual( + events.map((event) => event.type), + ["delta", "done"] + ); + assert.equal(events.at(-1)?.type === "done" ? events.at(-1)?.result.text : null, "Hi"); +}); diff --git a/server/tests/provider-ids.test.ts b/server/tests/provider-ids.test.ts new file mode 100644 index 0000000..0a20829 --- /dev/null +++ b/server/tests/provider-ids.test.ts @@ -0,0 +1,12 @@ +import assert from "node:assert/strict"; +import test from "node:test"; +import { fromPrismaProvider, serializeProviderFields, toPrismaProvider } from "../src/llm/provider-ids.js"; + +test("Hermes Agent provider id maps between API and Prisma enum forms", () => { + assert.equal(toPrismaProvider("hermes-agent"), "hermes_agent"); + assert.equal(fromPrismaProvider("hermes_agent"), "hermes-agent"); + assert.deepEqual(serializeProviderFields({ initiatedProvider: "hermes_agent", lastUsedProvider: "xai" }), { + initiatedProvider: "hermes-agent", + lastUsedProvider: "xai", + }); +}); diff --git a/tui/README.md b/tui/README.md index 3746bd5..d4d8bb2 100644 --- a/tui/README.md +++ b/tui/README.md @@ -23,7 +23,7 @@ Configuration is environment-only (no in-app settings). - `SYBIL_TUI_API_BASE_URL`: API base URL. Default: `http://127.0.0.1:8787` - `SYBIL_TUI_ADMIN_TOKEN`: optional bearer token for token-mode servers -- `SYBIL_TUI_DEFAULT_PROVIDER`: `openai` | `anthropic` | `xai` (default: `openai`) +- `SYBIL_TUI_DEFAULT_PROVIDER`: `openai` | `anthropic` | `xai` | `hermes-agent` (default: `openai`) - `SYBIL_TUI_DEFAULT_MODEL`: optional default model name - `SYBIL_TUI_SEARCH_NUM_RESULTS`: results per search run (default: `10`) diff --git a/tui/src/config.ts b/tui/src/config.ts index f1084b0..e703cca 100644 --- a/tui/src/config.ts +++ b/tui/src/config.ts @@ -1,6 +1,6 @@ import type { Provider } from "./types.js"; -const PROVIDERS: Provider[] = ["openai", "anthropic", "xai"]; +const PROVIDERS: Provider[] = ["openai", "anthropic", "xai", "hermes-agent"]; function normalizeBaseUrl(value: string) { const trimmed = value.trim(); diff --git a/tui/src/index.ts b/tui/src/index.ts index d8b92ea..9c5af6c 100644 --- a/tui/src/index.ts +++ b/tui/src/index.ts @@ -39,11 +39,13 @@ type ToolLogMetadata = { resultPreview?: string | null; }; -const PROVIDERS: Provider[] = ["openai", "anthropic", "xai"]; +const BASE_PROVIDERS: Provider[] = ["openai", "anthropic", "xai"]; +const PROVIDERS: Provider[] = [...BASE_PROVIDERS, "hermes-agent"]; const PROVIDER_FALLBACK_MODELS: Record = { openai: ["gpt-4.1-mini"], anthropic: ["claude-3-5-sonnet-latest"], xai: ["grok-3-mini"], + "hermes-agent": ["hermes-agent"], }; const EMPTY_MODEL_CATALOG: ModelCatalogResponse["providers"] = { @@ -74,6 +76,7 @@ function getProviderLabel(provider: Provider | null | undefined) { if (provider === "openai") return "OpenAI"; if (provider === "anthropic") return "Anthropic"; if (provider === "xai") return "xAI"; + if (provider === "hermes-agent") return "Hermes Agent"; return ""; } @@ -159,6 +162,10 @@ function getModelOptions(catalog: ModelCatalogResponse["providers"], provider: P return PROVIDER_FALLBACK_MODELS[provider]; } +function getVisibleProviders(catalog: ModelCatalogResponse["providers"]) { + return PROVIDERS.filter((provider) => provider !== "hermes-agent" || catalog[provider] !== undefined); +} + function pickProviderModel(options: string[], preferred: string | null, fallback: string | null = null) { if (fallback && options.includes(fallback)) return fallback; if (preferred && options.includes(preferred)) return preferred; @@ -202,6 +209,7 @@ async function main() { openai: null, anthropic: null, xai: null, + "hermes-agent": null, }; let model: string = config.defaultModel ?? pickProviderModel(getModelOptions(modelCatalog, provider), null); let errorMessage: string | null = null; @@ -1257,8 +1265,10 @@ async function main() { } function cycleProvider() { - const currentIndex = PROVIDERS.indexOf(provider); - const nextProvider: Provider = PROVIDERS[(currentIndex + 1) % PROVIDERS.length] ?? "openai"; + const visibleProviders = getVisibleProviders(modelCatalog); + const cycleProviders = visibleProviders.length ? visibleProviders : BASE_PROVIDERS; + const currentIndex = Math.max(0, cycleProviders.indexOf(provider)); + const nextProvider: Provider = cycleProviders[(currentIndex + 1) % cycleProviders.length] ?? "openai"; provider = nextProvider; syncModelForProvider(); updateUI(); diff --git a/tui/src/types.ts b/tui/src/types.ts index e04480c..2af459c 100644 --- a/tui/src/types.ts +++ b/tui/src/types.ts @@ -1,4 +1,4 @@ -export type Provider = "openai" | "anthropic" | "xai"; +export type Provider = "openai" | "anthropic" | "xai" | "hermes-agent"; export type ProviderModelInfo = { models: string[]; @@ -7,7 +7,7 @@ export type ProviderModelInfo = { }; export type ModelCatalogResponse = { - providers: Record; + providers: Partial>; }; export type ChatSummary = { diff --git a/web/src/App.tsx b/web/src/App.tsx index 555ef49..43455d6 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -95,6 +95,7 @@ const PROVIDER_FALLBACK_MODELS: Record = { openai: ["gpt-4.1-mini"], anthropic: ["claude-3-5-sonnet-latest"], xai: ["grok-3-mini"], + "hermes-agent": ["hermes-agent"], }; const EMPTY_MODEL_CATALOG: ModelCatalogResponse["providers"] = { @@ -103,6 +104,9 @@ const EMPTY_MODEL_CATALOG: ModelCatalogResponse["providers"] = { xai: { models: [], loadedAt: null, error: null }, }; +const BASE_PROVIDERS: Provider[] = ["openai", "anthropic", "xai"]; +const ALL_PROVIDERS: Provider[] = [...BASE_PROVIDERS, "hermes-agent"]; + const MODEL_PREFERENCES_STORAGE_KEY = "sybil:modelPreferencesByProvider"; const QUICK_QUESTION_MODEL_SELECTION_STORAGE_KEY = "sybil:quickQuestionModelSelection"; @@ -117,6 +121,7 @@ const EMPTY_MODEL_PREFERENCES: ProviderModelPreferences = { openai: null, anthropic: null, xai: null, + "hermes-agent": null, }; const EMPTY_ACTIVE_RUNS: ActiveRunsState = { chats: {}, @@ -193,6 +198,10 @@ function getModelOptions(catalog: ModelCatalogResponse["providers"], provider: P return PROVIDER_FALLBACK_MODELS[provider]; } +function getVisibleProviders(catalog: ModelCatalogResponse["providers"]) { + return ALL_PROVIDERS.filter((provider) => provider !== "hermes-agent" || catalog[provider] !== undefined); +} + function getReplyScrollBufferHeight() { if (typeof window === "undefined") return REPLY_SCROLL_BUFFER_MIN; return Math.min( @@ -308,6 +317,8 @@ function loadStoredModelPreferences() { openai: typeof parsed.openai === "string" && parsed.openai.trim() ? parsed.openai.trim() : null, anthropic: typeof parsed.anthropic === "string" && parsed.anthropic.trim() ? parsed.anthropic.trim() : null, xai: typeof parsed.xai === "string" && parsed.xai.trim() ? parsed.xai.trim() : null, + "hermes-agent": + typeof parsed["hermes-agent"] === "string" && parsed["hermes-agent"].trim() ? parsed["hermes-agent"].trim() : null, }; } catch { return EMPTY_MODEL_PREFERENCES; @@ -315,17 +326,19 @@ function loadStoredModelPreferences() { } function normalizeStoredProvider(value: unknown): Provider { - return value === "anthropic" || value === "xai" || value === "openai" ? value : "openai"; + return value === "anthropic" || value === "xai" || value === "openai" || value === "hermes-agent" ? value : "openai"; } function normalizeStoredModelPreferences(value: unknown): ProviderModelPreferences { if (!value || typeof value !== "object" || Array.isArray(value)) return EMPTY_MODEL_PREFERENCES; const parsed = value as Partial>; - return { - openai: typeof parsed.openai === "string" && parsed.openai.trim() ? parsed.openai.trim() : null, - anthropic: typeof parsed.anthropic === "string" && parsed.anthropic.trim() ? parsed.anthropic.trim() : null, - xai: typeof parsed.xai === "string" && parsed.xai.trim() ? parsed.xai.trim() : null, - }; + return { + openai: typeof parsed.openai === "string" && parsed.openai.trim() ? parsed.openai.trim() : null, + anthropic: typeof parsed.anthropic === "string" && parsed.anthropic.trim() ? parsed.anthropic.trim() : null, + xai: typeof parsed.xai === "string" && parsed.xai.trim() ? parsed.xai.trim() : null, + "hermes-agent": + typeof parsed["hermes-agent"] === "string" && parsed["hermes-agent"].trim() ? parsed["hermes-agent"].trim() : null, + }; } function loadStoredQuickQuestionModelSelection(): QuickQuestionModelSelection { @@ -354,6 +367,7 @@ function getProviderLabel(provider: Provider | null | undefined) { if (provider === "openai") return "OpenAI"; if (provider === "anthropic") return "Anthropic"; if (provider === "xai") return "xAI"; + if (provider === "hermes-agent") return "Hermes Agent"; return ""; } @@ -963,6 +977,7 @@ export default function App() { const providerModelOptions = useMemo(() => getModelOptions(modelCatalog, provider), [modelCatalog, provider]); const quickProviderModelOptions = useMemo(() => getModelOptions(modelCatalog, quickProvider), [modelCatalog, quickProvider]); + const providerOptions = useMemo(() => getVisibleProviders(modelCatalog), [modelCatalog]); useEffect(() => { if (model.trim()) return; @@ -2512,9 +2527,11 @@ export default function App() { }} disabled={isActiveSelectionSending} > - - - + {providerOptions.map((candidate) => ( + + ))} - - - + {providerOptions.map((candidate) => ( + + ))} ; + providers: Partial>; }; export type ActiveRunsResponse = {