quick question feature
This commit is contained in:
@@ -10,11 +10,17 @@ import {
|
||||
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
|
||||
import type { MultiplexRequest, Provider } from "./types.js";
|
||||
|
||||
type StreamUsage = {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
};
|
||||
|
||||
export type StreamEvent =
|
||||
| { type: "meta"; chatId: string; callId: string; provider: Provider; model: string }
|
||||
| { type: "meta"; chatId: string | null; callId: string | null; provider: Provider; model: string }
|
||||
| { type: "tool_call"; event: ToolExecutionEvent }
|
||||
| { type: "delta"; text: string }
|
||||
| { type: "done"; text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }
|
||||
| { type: "done"; text: string; usage?: StreamUsage }
|
||||
| { type: "error"; message: string };
|
||||
|
||||
function getChatIdOrCreate(chatId?: string) {
|
||||
@@ -24,39 +30,45 @@ function getChatIdOrCreate(chatId?: string) {
|
||||
|
||||
export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator<StreamEvent> {
|
||||
const t0 = performance.now();
|
||||
const chatId = await getChatIdOrCreate(req.chatId);
|
||||
const shouldPersist = req.persist !== false;
|
||||
const chatId = shouldPersist ? await getChatIdOrCreate(req.chatId) : null;
|
||||
|
||||
const call = await prisma.llmCall.create({
|
||||
data: {
|
||||
chatId,
|
||||
provider: req.provider as any,
|
||||
model: req.model,
|
||||
request: req as any,
|
||||
},
|
||||
select: { id: true },
|
||||
});
|
||||
const call =
|
||||
shouldPersist && chatId
|
||||
? await prisma.llmCall.create({
|
||||
data: {
|
||||
chatId,
|
||||
provider: req.provider as any,
|
||||
model: req.model,
|
||||
request: req as any,
|
||||
},
|
||||
select: { id: true },
|
||||
})
|
||||
: null;
|
||||
|
||||
await prisma.$transaction([
|
||||
prisma.chat.update({
|
||||
where: { id: chatId },
|
||||
data: {
|
||||
lastUsedProvider: req.provider as any,
|
||||
lastUsedModel: req.model,
|
||||
},
|
||||
}),
|
||||
prisma.chat.updateMany({
|
||||
where: { id: chatId, initiatedProvider: null },
|
||||
data: {
|
||||
initiatedProvider: req.provider as any,
|
||||
initiatedModel: req.model,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
if (shouldPersist && chatId) {
|
||||
await prisma.$transaction([
|
||||
prisma.chat.update({
|
||||
where: { id: chatId },
|
||||
data: {
|
||||
lastUsedProvider: req.provider as any,
|
||||
lastUsedModel: req.model,
|
||||
},
|
||||
}),
|
||||
prisma.chat.updateMany({
|
||||
where: { id: chatId, initiatedProvider: null },
|
||||
data: {
|
||||
initiatedProvider: req.provider as any,
|
||||
initiatedModel: req.model,
|
||||
},
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
yield { type: "meta", chatId, callId: call.id, provider: req.provider, model: req.model };
|
||||
yield { type: "meta", chatId, callId: call?.id ?? null, provider: req.provider, model: req.model };
|
||||
|
||||
let text = "";
|
||||
let usage: StreamEvent extends any ? any : never;
|
||||
let usage: StreamUsage | undefined;
|
||||
let raw: unknown = { streamed: true };
|
||||
|
||||
try {
|
||||
@@ -73,7 +85,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
|
||||
logContext: {
|
||||
provider: req.provider,
|
||||
model: req.model,
|
||||
chatId,
|
||||
chatId: chatId ?? undefined,
|
||||
},
|
||||
})
|
||||
: runToolAwareChatCompletionsStream({
|
||||
@@ -85,7 +97,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
|
||||
logContext: {
|
||||
provider: req.provider,
|
||||
model: req.model,
|
||||
chatId,
|
||||
chatId: chatId ?? undefined,
|
||||
},
|
||||
});
|
||||
for await (const ev of streamEvents) {
|
||||
@@ -96,16 +108,18 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
|
||||
}
|
||||
|
||||
if (ev.type === "tool_call") {
|
||||
const toolMessage = buildToolLogMessageData(chatId, ev.event);
|
||||
await prisma.message.create({
|
||||
data: {
|
||||
chatId: toolMessage.chatId,
|
||||
role: toolMessage.role as any,
|
||||
content: toolMessage.content,
|
||||
name: toolMessage.name,
|
||||
metadata: toolMessage.metadata as any,
|
||||
},
|
||||
});
|
||||
if (shouldPersist && chatId) {
|
||||
const toolMessage = buildToolLogMessageData(chatId, ev.event);
|
||||
await prisma.message.create({
|
||||
data: {
|
||||
chatId: toolMessage.chatId,
|
||||
role: toolMessage.role as any,
|
||||
content: toolMessage.content,
|
||||
name: toolMessage.name,
|
||||
metadata: toolMessage.metadata as any,
|
||||
},
|
||||
});
|
||||
}
|
||||
yield { type: "tool_call", event: ev.event };
|
||||
continue;
|
||||
}
|
||||
@@ -156,32 +170,36 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
|
||||
|
||||
const latencyMs = Math.round(performance.now() - t0);
|
||||
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.message.create({
|
||||
data: { chatId, role: "assistant" as any, content: text },
|
||||
if (shouldPersist && chatId && call) {
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.message.create({
|
||||
data: { chatId, role: "assistant" as any, content: text },
|
||||
});
|
||||
await tx.llmCall.update({
|
||||
where: { id: call.id },
|
||||
data: {
|
||||
response: raw as any,
|
||||
latencyMs,
|
||||
inputTokens: usage?.inputTokens,
|
||||
outputTokens: usage?.outputTokens,
|
||||
totalTokens: usage?.totalTokens,
|
||||
},
|
||||
});
|
||||
});
|
||||
await tx.llmCall.update({
|
||||
where: { id: call.id },
|
||||
data: {
|
||||
response: raw as any,
|
||||
latencyMs,
|
||||
inputTokens: usage?.inputTokens,
|
||||
outputTokens: usage?.outputTokens,
|
||||
totalTokens: usage?.totalTokens,
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
yield { type: "done", text, usage };
|
||||
} catch (e: any) {
|
||||
const latencyMs = Math.round(performance.now() - t0);
|
||||
await prisma.llmCall.update({
|
||||
where: { id: call.id },
|
||||
data: {
|
||||
error: e?.message ?? String(e),
|
||||
latencyMs,
|
||||
},
|
||||
});
|
||||
if (shouldPersist && call) {
|
||||
await prisma.llmCall.update({
|
||||
where: { id: call.id },
|
||||
data: {
|
||||
error: e?.message ?? String(e),
|
||||
latencyMs,
|
||||
},
|
||||
});
|
||||
}
|
||||
yield { type: "error", message: e?.message ?? String(e) };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ export type ChatMessage = {
|
||||
|
||||
export type MultiplexRequest = {
|
||||
chatId?: string;
|
||||
persist?: boolean;
|
||||
provider: Provider;
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
|
||||
Reference in New Issue
Block a user