add hermes agent provider

This commit is contained in:
2026-05-04 21:52:39 -07:00
parent 195e157e1a
commit 8b580fd3e1
27 changed files with 359 additions and 83 deletions

View File

@@ -385,6 +385,10 @@ function normalizeIncomingMessages(messages: ChatMessage[]) {
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}
function normalizePlainIncomingMessages(messages: ChatMessage[]) {
return messages.map((message) => buildOpenAIConversationMessage(message));
}
function normalizeIncomingResponsesInput(messages: ChatMessage[]) {
const normalized = messages.map((message) => buildOpenAIResponsesInputMessage(message));
@@ -853,6 +857,20 @@ function extractResponsesText(response: any, fallback = "") {
return parts.join("") || fallback;
}
function extractChatCompletionContent(message: any) {
if (typeof message?.content === "string") return message.content;
if (!Array.isArray(message?.content)) return "";
return message.content
.map((part: any) => {
if (typeof part === "string") return part;
if (typeof part?.text === "string") return part.text;
if (typeof part?.content === "string") return part.content;
return "";
})
.join("");
}
function getUnstreamedText(finalText: string, streamedText: string) {
if (!finalText) return "";
if (!streamedText) return finalText;
@@ -1093,6 +1111,26 @@ export async function runToolAwareChatCompletions(params: ToolAwareCompletionPar
};
}
export async function runPlainChatCompletions(params: ToolAwareCompletionParams): Promise<ToolAwareCompletionResult> {
const completion = await params.client.chat.completions.create({
model: params.model,
messages: normalizePlainIncomingMessages(params.messages),
temperature: params.temperature,
max_tokens: params.maxTokens,
} as any);
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
const sawUsage = mergeUsage(usageAcc, completion?.usage);
const message = completion?.choices?.[0]?.message;
return {
text: extractChatCompletionContent(message),
usage: sawUsage ? usageAcc : undefined,
raw: { response: completion, api: "chat.completions" },
toolEvents: [],
};
}
export async function* runToolAwareOpenAIChatStream(
params: ToolAwareCompletionParams
): AsyncGenerator<ToolAwareStreamingEvent> {
@@ -1354,3 +1392,41 @@ export async function* runToolAwareChatCompletionsStream(
},
};
}
export async function* runPlainChatCompletionsStream(
params: ToolAwareCompletionParams
): AsyncGenerator<ToolAwareStreamingEvent> {
const rawResponses: unknown[] = [];
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
let sawUsage = false;
let text = "";
const stream = await params.client.chat.completions.create({
model: params.model,
messages: normalizePlainIncomingMessages(params.messages),
temperature: params.temperature,
max_tokens: params.maxTokens,
stream: true,
} as any);
for await (const chunk of stream as any as AsyncIterable<any>) {
rawResponses.push(chunk);
sawUsage = mergeUsage(usageAcc, chunk?.usage) || sawUsage;
const deltaText = chunk?.choices?.[0]?.delta?.content ?? "";
if (typeof deltaText === "string" && deltaText.length) {
text += deltaText;
yield { type: "delta", text: deltaText };
}
}
yield {
type: "done",
result: {
text,
usage: sawUsage ? usageAcc : undefined,
raw: { streamed: true, responses: rawResponses, api: "chat.completions" },
toolEvents: [],
},
};
}

View File

@@ -1,5 +1,6 @@
import type { FastifyBaseLogger } from "fastify";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { env } from "../env.js";
import { anthropicClient, hermesAgentClient, isHermesAgentConfigured, openaiClient, xaiClient } from "./providers.js";
import type { Provider } from "./types.js";
export type ProviderModelSnapshot = {
@@ -8,9 +9,9 @@ export type ProviderModelSnapshot = {
error: string | null;
};
export type ModelCatalogSnapshot = Record<Provider, ProviderModelSnapshot>;
export type ModelCatalogSnapshot = Partial<Record<Provider, ProviderModelSnapshot>>;
const providers: Provider[] = ["openai", "anthropic", "xai"];
const baseProviders: Provider[] = ["openai", "anthropic", "xai"];
const MODEL_FETCH_TIMEOUT_MS = 15000;
const modelCatalog: ModelCatalogSnapshot = {
@@ -19,6 +20,10 @@ const modelCatalog: ModelCatalogSnapshot = {
xai: { models: [], loadedAt: null, error: null },
};
function getCatalogProviders(): Provider[] {
return isHermesAgentConfigured() ? [...baseProviders, "hermes-agent"] : baseProviders;
}
function uniqSorted(models: string[]) {
return [...new Set(models.map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
}
@@ -59,8 +64,15 @@ async function fetchProviderModels(provider: Provider) {
return uniqSorted(page.data.map((model) => model.id));
}
const page = await xaiClient().models.list();
return uniqSorted(page.data.map((model) => model.id));
if (provider === "xai") {
const page = await xaiClient().models.list();
return uniqSorted(page.data.map((model) => model.id));
}
const page = await hermesAgentClient().models.list();
const models = page.data.map((model) => model.id);
if (env.HERMES_AGENT_MODEL) models.push(env.HERMES_AGENT_MODEL);
return uniqSorted(models);
}
async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLogger) {
@@ -75,7 +87,7 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog
} catch (err: any) {
const message = err?.message ?? String(err);
modelCatalog[provider] = {
models: [],
models: provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : [],
loadedAt: new Date().toISOString(),
error: message,
};
@@ -84,25 +96,18 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog
}
export async function warmModelCatalog(logger?: FastifyBaseLogger) {
await Promise.all(providers.map((provider) => refreshProviderModels(provider, logger)));
await Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger)));
}
export function getModelCatalogSnapshot(): ModelCatalogSnapshot {
return {
openai: {
models: [...modelCatalog.openai.models],
loadedAt: modelCatalog.openai.loadedAt,
error: modelCatalog.openai.error,
},
anthropic: {
models: [...modelCatalog.anthropic.models],
loadedAt: modelCatalog.anthropic.loadedAt,
error: modelCatalog.anthropic.error,
},
xai: {
models: [...modelCatalog.xai.models],
loadedAt: modelCatalog.xai.loadedAt,
error: modelCatalog.xai.error,
},
};
const snapshot: ModelCatalogSnapshot = {};
for (const provider of getCatalogProviders()) {
const entry = modelCatalog[provider] ?? { models: [], loadedAt: null, error: null };
snapshot[provider] = {
models: [...entry.models],
loadedAt: entry.loadedAt,
error: entry.error,
};
}
return snapshot;
}

View File

@@ -1,13 +1,13 @@
import { performance } from "node:perf_hooks";
import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runToolAwareChatCompletions, runToolAwareOpenAIChat } from "./chat-tools.js";
import { anthropicClient, hermesAgentClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runPlainChatCompletions, runToolAwareChatCompletions, runToolAwareOpenAIChat } from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import { toPrismaProvider } from "./provider-ids.js";
import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js";
function asProviderEnum(p: Provider) {
// Prisma enum values match these strings.
return p;
return toPrismaProvider(p);
}
export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResponse> {
@@ -84,6 +84,23 @@ export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResp
outText = r.text;
usage = r.usage;
toolMessages = r.toolEvents.map((event) => buildToolLogMessageData(call.chatId, event));
} else if (req.provider === "hermes-agent") {
const client = hermesAgentClient();
const r = await runPlainChatCompletions({
client,
model: req.model,
messages: req.messages,
temperature: req.temperature,
maxTokens: req.maxTokens,
logContext: {
provider: req.provider,
model: req.model,
chatId,
},
});
raw = r.raw;
outText = r.text;
usage = r.usage;
} else if (req.provider === "anthropic") {
const client = anthropicClient();

View File

@@ -0,0 +1,31 @@
import type { Provider } from "./types.js";
type PrismaProvider = Exclude<Provider, "hermes-agent"> | "hermes_agent";
export function toPrismaProvider(provider: Provider): PrismaProvider {
return provider === "hermes-agent" ? "hermes_agent" : provider;
}
export function fromPrismaProvider(provider: unknown): Provider | null {
if (provider === null || provider === undefined) return null;
if (provider === "hermes_agent" || provider === "hermes-agent") return "hermes-agent";
if (provider === "openai" || provider === "anthropic" || provider === "xai") return provider;
return null;
}
export function serializeProviderFields<T extends Record<string, any>>(value: T): T {
const next: Record<string, any> = { ...value };
if ("initiatedProvider" in next) {
next.initiatedProvider = fromPrismaProvider(next.initiatedProvider);
}
if ("lastUsedProvider" in next) {
next.lastUsedProvider = fromPrismaProvider(next.lastUsedProvider);
}
if ("provider" in next) {
next.provider = fromPrismaProvider(next.provider);
}
if (Array.isArray(next.calls)) {
next.calls = next.calls.map((call: Record<string, any>) => serializeProviderFields(call));
}
return next as T;
}

View File

@@ -13,6 +13,18 @@ export function xaiClient() {
return new OpenAI({ apiKey: env.XAI_API_KEY, baseURL: "https://api.x.ai/v1" });
}
export function isHermesAgentConfigured() {
return Boolean(env.HERMES_AGENT_API_KEY);
}
export function hermesAgentClient() {
if (!env.HERMES_AGENT_API_KEY) throw new Error("HERMES_AGENT_API_KEY not set");
return new OpenAI({
apiKey: env.HERMES_AGENT_API_KEY,
baseURL: env.HERMES_AGENT_API_BASE_URL,
});
}
export function anthropicClient() {
if (!env.ANTHROPIC_API_KEY) throw new Error("ANTHROPIC_API_KEY not set");
return new Anthropic({ apiKey: env.ANTHROPIC_API_KEY });

View File

@@ -1,13 +1,15 @@
import { performance } from "node:perf_hooks";
import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { anthropicClient, hermesAgentClient, openaiClient, xaiClient } from "./providers.js";
import {
buildToolLogMessageData,
runPlainChatCompletionsStream,
runToolAwareChatCompletionsStream,
runToolAwareOpenAIChatStream,
type ToolExecutionEvent,
} from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import { toPrismaProvider } from "./provider-ids.js";
import type { MultiplexRequest, Provider } from "./types.js";
type StreamUsage = {
@@ -38,7 +40,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
? await prisma.llmCall.create({
data: {
chatId,
provider: req.provider as any,
provider: toPrismaProvider(req.provider) as any,
model: req.model,
request: req as any,
},
@@ -51,14 +53,14 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
prisma.chat.update({
where: { id: chatId },
data: {
lastUsedProvider: req.provider as any,
lastUsedProvider: toPrismaProvider(req.provider) as any,
lastUsedModel: req.model,
},
}),
prisma.chat.updateMany({
where: { id: chatId, initiatedProvider: null },
data: {
initiatedProvider: req.provider as any,
initiatedProvider: toPrismaProvider(req.provider) as any,
initiatedModel: req.model,
},
}),
@@ -72,8 +74,8 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
let raw: unknown = { streamed: true };
try {
if (req.provider === "openai" || req.provider === "xai") {
const client = req.provider === "openai" ? openaiClient() : xaiClient();
if (req.provider === "openai" || req.provider === "xai" || req.provider === "hermes-agent") {
const client = req.provider === "openai" ? openaiClient() : req.provider === "xai" ? xaiClient() : hermesAgentClient();
const streamEvents =
req.provider === "openai"
? runToolAwareOpenAIChatStream({
@@ -88,6 +90,19 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
chatId: chatId ?? undefined,
},
})
: req.provider === "hermes-agent"
? runPlainChatCompletionsStream({
client,
model: req.model,
messages: req.messages,
temperature: req.temperature,
maxTokens: req.maxTokens,
logContext: {
provider: req.provider,
model: req.model,
chatId: chatId ?? undefined,
},
})
: runToolAwareChatCompletionsStream({
client,
model: req.model,

View File

@@ -1,4 +1,6 @@
export type Provider = "openai" | "anthropic" | "xai";
export const PROVIDERS = ["openai", "anthropic", "xai", "hermes-agent"] as const;
export type Provider = (typeof PROVIDERS)[number];
export type ChatImageAttachment = {
kind: "image";