import { performance } from "node:perf_hooks"; import { prisma } from "../db.js"; import { anthropicClient, openaiClient, xaiClient } from "./providers.js"; import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js"; function asProviderEnum(p: Provider) { // Prisma enum values match these strings. return p; } export async function runMultiplex(req: MultiplexRequest): Promise { const t0 = performance.now(); // Persist call record early so we can attach errors. const call = await prisma.llmCall.create({ data: { chatId: req.chatId ?? (await prisma.chat.create({ data: {} })).id, provider: asProviderEnum(req.provider) as any, model: req.model, request: req as any, }, select: { id: true, chatId: true }, }); try { let outText = ""; let usage: MultiplexResponse["usage"] | undefined; let raw: unknown; if (req.provider === "openai" || req.provider === "xai") { const client = req.provider === "openai" ? openaiClient() : xaiClient(); const r = await client.chat.completions.create({ model: req.model, // OpenAI SDK has very specific message union types; our normalized schema is compatible. messages: req.messages.map((m) => ({ role: m.role, content: m.content, name: m.name })) as any, temperature: req.temperature, max_tokens: req.maxTokens, }); raw = r; outText = r.choices?.[0]?.message?.content ?? ""; usage = r.usage ? { inputTokens: r.usage.prompt_tokens, outputTokens: r.usage.completion_tokens, totalTokens: r.usage.total_tokens, } : undefined; } else if (req.provider === "anthropic") { const client = anthropicClient(); // Anthropic splits system prompt. We'll convert first system message into system string. const system = req.messages.find((m) => m.role === "system")?.content; const msgs = req.messages .filter((m) => m.role !== "system") .map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", content: m.content })); const r = await client.messages.create({ model: req.model, system, max_tokens: req.maxTokens ?? 1024, temperature: req.temperature, messages: msgs as any, }); raw = r; outText = r.content .map((c: any) => (c.type === "text" ? c.text : "")) .join("") .trim(); // Anthropic usage (SDK typing varies by version) const ru: any = (r as any).usage; if (ru) { usage = { inputTokens: ru.input_tokens, outputTokens: ru.output_tokens, totalTokens: (ru.input_tokens ?? 0) + (ru.output_tokens ?? 0), }; } } else { throw new Error(`unknown provider: ${req.provider}`); } const latencyMs = Math.round(performance.now() - t0); // Store assistant message + call record await prisma.$transaction([ prisma.message.create({ data: { chatId: call.chatId, role: "assistant" as any, content: outText, }, }), prisma.llmCall.update({ where: { id: call.id }, data: { response: raw as any, latencyMs, inputTokens: usage?.inputTokens, outputTokens: usage?.outputTokens, totalTokens: usage?.totalTokens, }, }), ]); return { provider: req.provider, model: req.model, message: { role: "assistant", content: outText }, usage, raw, }; } catch (e: any) { const latencyMs = Math.round(performance.now() - t0); await prisma.llmCall.update({ where: { id: call.id }, data: { error: e?.message ?? String(e), latencyMs, }, }); throw e; } }