backend, web: support for resuming streams
This commit is contained in:
59
server/src/active-streams.ts
Normal file
59
server/src/active-streams.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
export type SseStreamEvent = {
|
||||
event: string;
|
||||
data: unknown;
|
||||
};
|
||||
|
||||
type SseStreamListener = (event: SseStreamEvent) => void;
|
||||
|
||||
export class ActiveSseStream {
|
||||
private readonly events: SseStreamEvent[] = [];
|
||||
private readonly listeners = new Set<SseStreamListener>();
|
||||
private completed = false;
|
||||
private resolveDone!: () => void;
|
||||
|
||||
readonly done: Promise<void>;
|
||||
|
||||
constructor() {
|
||||
this.done = new Promise((resolve) => {
|
||||
this.resolveDone = resolve;
|
||||
});
|
||||
}
|
||||
|
||||
get isCompleted() {
|
||||
return this.completed;
|
||||
}
|
||||
|
||||
emit(event: string, data: unknown) {
|
||||
if (this.completed) return;
|
||||
const entry = { event, data };
|
||||
this.events.push(entry);
|
||||
for (const listener of this.listeners) {
|
||||
listener(entry);
|
||||
}
|
||||
}
|
||||
|
||||
complete(finalEvent?: SseStreamEvent) {
|
||||
if (this.completed) return;
|
||||
if (finalEvent) {
|
||||
this.emit(finalEvent.event, finalEvent.data);
|
||||
}
|
||||
this.completed = true;
|
||||
this.listeners.clear();
|
||||
this.resolveDone();
|
||||
}
|
||||
|
||||
subscribe(listener: SseStreamListener) {
|
||||
for (const event of this.events) {
|
||||
listener(event);
|
||||
}
|
||||
|
||||
if (this.completed) {
|
||||
return () => {};
|
||||
}
|
||||
|
||||
this.listeners.add(listener);
|
||||
return () => {
|
||||
this.listeners.delete(listener);
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
import { performance } from "node:perf_hooks";
|
||||
import { z } from "zod";
|
||||
import type { FastifyInstance } from "fastify";
|
||||
import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
|
||||
import { ActiveSseStream, type SseStreamEvent } from "./active-streams.js";
|
||||
import { prisma } from "./db.js";
|
||||
import { requireAdmin } from "./auth.js";
|
||||
import { env } from "./env.js";
|
||||
import { buildComparableAttachments } from "./llm/message-content.js";
|
||||
import { runMultiplex } from "./llm/multiplexer.js";
|
||||
import { runMultiplexStream } from "./llm/streaming.js";
|
||||
import { runMultiplexStream, type StreamEvent } from "./llm/streaming.js";
|
||||
import { getModelCatalogSnapshot } from "./llm/model-catalog.js";
|
||||
import { openaiClient } from "./llm/providers.js";
|
||||
import { exaClient } from "./search/exa.js";
|
||||
@@ -120,6 +121,26 @@ const CompletionMessageSchema = z
|
||||
}
|
||||
});
|
||||
|
||||
const CompletionStreamBody = z
|
||||
.object({
|
||||
chatId: z.string().optional(),
|
||||
persist: z.boolean().optional(),
|
||||
provider: z.enum(["openai", "anthropic", "xai"]),
|
||||
model: z.string().min(1),
|
||||
messages: z.array(CompletionMessageSchema),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().int().positive().optional(),
|
||||
})
|
||||
.superRefine((value, ctx) => {
|
||||
if (value.persist === false && value.chatId) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
message: "chatId must be omitted when persist is false",
|
||||
path: ["chatId"],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
function mergeAttachmentsIntoMetadata(metadata: unknown, attachments?: ChatAttachment[]) {
|
||||
if (!attachments?.length) return metadata as any;
|
||||
if (!metadata || typeof metadata !== "object" || Array.isArray(metadata)) {
|
||||
@@ -293,6 +314,246 @@ function buildSseHeaders(originHeader: string | undefined) {
|
||||
return headers;
|
||||
}
|
||||
|
||||
type SearchRunRequest = z.infer<typeof SearchRunBody>;
|
||||
|
||||
const activeChatStreams = new Map<string, ActiveSseStream>();
|
||||
const activeSearchStreams = new Map<string, ActiveSseStream>();
|
||||
|
||||
function getErrorMessage(err: unknown) {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
|
||||
function writeSseEvent(reply: FastifyReply, event: SseStreamEvent) {
|
||||
if (reply.raw.destroyed || reply.raw.writableEnded) return;
|
||||
reply.raw.write(`event: ${event.event}\n`);
|
||||
reply.raw.write(`data: ${JSON.stringify(event.data)}\n\n`);
|
||||
}
|
||||
|
||||
async function streamActiveRun(req: FastifyRequest, reply: FastifyReply, stream: ActiveSseStream) {
|
||||
reply.raw.writeHead(200, buildSseHeaders(typeof req.headers.origin === "string" ? req.headers.origin : undefined));
|
||||
reply.raw.flushHeaders?.();
|
||||
|
||||
let unsubscribe = () => {};
|
||||
let closed = false;
|
||||
const closedPromise = new Promise<void>((resolve) => {
|
||||
const onClose = () => {
|
||||
closed = true;
|
||||
unsubscribe();
|
||||
reply.raw.off("close", onClose);
|
||||
resolve();
|
||||
};
|
||||
reply.raw.on("close", onClose);
|
||||
stream.done.finally(() => {
|
||||
reply.raw.off("close", onClose);
|
||||
});
|
||||
});
|
||||
|
||||
unsubscribe = stream.subscribe((event) => writeSseEvent(reply, event));
|
||||
await Promise.race([stream.done, closedPromise]);
|
||||
unsubscribe();
|
||||
|
||||
if (!closed && !reply.raw.destroyed && !reply.raw.writableEnded) {
|
||||
reply.raw.end();
|
||||
}
|
||||
|
||||
return reply;
|
||||
}
|
||||
|
||||
function mapChatStreamEvent(ev: StreamEvent): SseStreamEvent {
|
||||
if (ev.type === "tool_call") return { event: "tool_call", data: ev.event };
|
||||
return { event: ev.type, data: ev };
|
||||
}
|
||||
|
||||
function startActiveChatStream(chatId: string, body: z.infer<typeof CompletionStreamBody>) {
|
||||
const stream = new ActiveSseStream();
|
||||
activeChatStreams.set(chatId, stream);
|
||||
|
||||
void (async () => {
|
||||
let sawTerminalEvent = false;
|
||||
try {
|
||||
for await (const ev of runMultiplexStream(body)) {
|
||||
const event = mapChatStreamEvent(ev);
|
||||
if (ev.type === "done" || ev.type === "error") {
|
||||
sawTerminalEvent = true;
|
||||
stream.complete(event);
|
||||
break;
|
||||
}
|
||||
stream.emit(event.event, event.data);
|
||||
}
|
||||
|
||||
if (!sawTerminalEvent) {
|
||||
stream.complete({ event: "error", data: { message: "chat stream ended unexpectedly" } });
|
||||
}
|
||||
} catch (err) {
|
||||
stream.complete({ event: "error", data: { message: getErrorMessage(err) } });
|
||||
} finally {
|
||||
activeChatStreams.delete(chatId);
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
async function executeSearchRunStream(searchId: string, body: SearchRunRequest, stream: ActiveSseStream) {
|
||||
const startedAt = performance.now();
|
||||
const query = body.query?.trim();
|
||||
if (!query) {
|
||||
stream.complete({ event: "error", data: { message: "query is required" } });
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedTitle = body.title?.trim() || query.slice(0, 80);
|
||||
|
||||
try {
|
||||
const exa = exaClient();
|
||||
const searchPromise = exa.search(query, {
|
||||
type: body.type ?? "auto",
|
||||
numResults: body.numResults ?? 10,
|
||||
includeDomains: body.includeDomains,
|
||||
excludeDomains: body.excludeDomains,
|
||||
moderation: true,
|
||||
userLocation: "US",
|
||||
contents: false,
|
||||
} as any);
|
||||
const answerPromise = exa.answer(query, {
|
||||
text: true,
|
||||
model: "exa",
|
||||
userLocation: "US",
|
||||
});
|
||||
|
||||
let searchResponse: any | null = null;
|
||||
let answerResponse: any | null = null;
|
||||
let enrichedResults: any[] | null = null;
|
||||
let searchError: string | null = null;
|
||||
let answerError: string | null = null;
|
||||
|
||||
const searchSettled = searchPromise.then(
|
||||
async (value) => {
|
||||
searchResponse = value;
|
||||
const previewResults = (value?.results ?? []).map((result: any, index: number) => mapSearchResultPreview(result, index));
|
||||
stream.emit("search_results", {
|
||||
requestId: value?.requestId ?? null,
|
||||
results: previewResults,
|
||||
});
|
||||
|
||||
const urls = (value?.results ?? []).map((result: any) => result?.url).filter((url: string | undefined) => typeof url === "string");
|
||||
if (!urls.length) return;
|
||||
|
||||
try {
|
||||
const contentsResponse = await exa.getContents(urls, {
|
||||
text: { maxCharacters: 1200 },
|
||||
highlights: {
|
||||
query,
|
||||
maxCharacters: 320,
|
||||
numSentences: 2,
|
||||
highlightsPerUrl: 2,
|
||||
},
|
||||
} as any);
|
||||
const byUrl = new Map<string, any>();
|
||||
for (const contentItem of contentsResponse?.results ?? []) {
|
||||
byUrl.set(normalizeUrlForMatch(contentItem?.url), contentItem);
|
||||
}
|
||||
|
||||
enrichedResults = (value?.results ?? []).map((result: any) => {
|
||||
const contentItem = byUrl.get(normalizeUrlForMatch(result?.url));
|
||||
if (!contentItem) return result;
|
||||
return {
|
||||
...result,
|
||||
text: contentItem.text ?? result.text ?? null,
|
||||
highlights: Array.isArray(contentItem.highlights) ? contentItem.highlights : result.highlights ?? null,
|
||||
highlightScores: Array.isArray(contentItem.highlightScores) ? contentItem.highlightScores : result.highlightScores ?? null,
|
||||
};
|
||||
});
|
||||
|
||||
stream.emit("search_results", {
|
||||
requestId: value?.requestId ?? null,
|
||||
results: enrichedResults.map((result: any, index: number) => mapSearchResultPreview(result, index)),
|
||||
});
|
||||
} catch {
|
||||
// keep preview results if content enrichment fails
|
||||
}
|
||||
},
|
||||
(reason) => {
|
||||
searchError = reason?.message ?? String(reason);
|
||||
stream.emit("search_error", { error: searchError });
|
||||
}
|
||||
);
|
||||
|
||||
const answerSettled = answerPromise.then(
|
||||
(value) => {
|
||||
answerResponse = value;
|
||||
stream.emit("answer", {
|
||||
answerText: parseAnswerText(value),
|
||||
answerRequestId: value?.requestId ?? null,
|
||||
answerCitations: (value?.citations as any) ?? null,
|
||||
});
|
||||
},
|
||||
(reason) => {
|
||||
answerError = reason?.message ?? String(reason);
|
||||
stream.emit("answer_error", { error: answerError });
|
||||
}
|
||||
);
|
||||
|
||||
await Promise.all([searchSettled, answerSettled]);
|
||||
|
||||
const latencyMs = Math.round(performance.now() - startedAt);
|
||||
const persistedResults = enrichedResults ?? searchResponse?.results ?? [];
|
||||
const rows = persistedResults.map((result: any, index: number) => mapSearchResultRow(searchId, result, index));
|
||||
const answerText = parseAnswerText(answerResponse);
|
||||
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.search.update({
|
||||
where: { id: searchId },
|
||||
data: {
|
||||
query,
|
||||
title: normalizedTitle,
|
||||
requestId: searchResponse?.requestId ?? null,
|
||||
rawResponse: searchResponse as any,
|
||||
latencyMs,
|
||||
error: searchError,
|
||||
answerText,
|
||||
answerRequestId: answerResponse?.requestId ?? null,
|
||||
answerCitations: (answerResponse?.citations as any) ?? null,
|
||||
answerRawResponse: answerResponse as any,
|
||||
answerError,
|
||||
},
|
||||
});
|
||||
await tx.searchResult.deleteMany({ where: { searchId } });
|
||||
if (rows.length) {
|
||||
await tx.searchResult.createMany({ data: rows as any });
|
||||
}
|
||||
});
|
||||
|
||||
const search = await prisma.search.findUnique({
|
||||
where: { id: searchId },
|
||||
include: { results: { orderBy: { rank: "asc" } } },
|
||||
});
|
||||
if (!search) {
|
||||
stream.complete({ event: "error", data: { message: "search not found" } });
|
||||
} else {
|
||||
stream.complete({ event: "done", data: { search } });
|
||||
}
|
||||
} catch (err) {
|
||||
const message = getErrorMessage(err);
|
||||
try {
|
||||
await prisma.search.update({
|
||||
where: { id: searchId },
|
||||
data: {
|
||||
query,
|
||||
title: normalizedTitle,
|
||||
latencyMs: Math.round(performance.now() - startedAt),
|
||||
error: message,
|
||||
},
|
||||
});
|
||||
} catch {
|
||||
// keep the stream terminal event even if the backing search row disappeared
|
||||
}
|
||||
stream.complete({ event: "error", data: { message } });
|
||||
} finally {
|
||||
activeSearchStreams.delete(searchId);
|
||||
}
|
||||
}
|
||||
|
||||
export async function registerRoutes(app: FastifyInstance) {
|
||||
app.get("/health", { logLevel: "silent" }, async () => ({ ok: true }));
|
||||
|
||||
@@ -306,6 +567,14 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
return { providers: getModelCatalogSnapshot() };
|
||||
});
|
||||
|
||||
app.get("/v1/active-runs", async (req) => {
|
||||
requireAdmin(req);
|
||||
return {
|
||||
chats: Array.from(activeChatStreams.keys()),
|
||||
searches: Array.from(activeSearchStreams.keys()),
|
||||
};
|
||||
});
|
||||
|
||||
app.get("/v1/chats", async (req) => {
|
||||
requireAdmin(req);
|
||||
const chats = await prisma.chat.findMany({
|
||||
@@ -695,162 +964,24 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
const query = body.query?.trim() || existing.query?.trim();
|
||||
if (!query) return app.httpErrors.badRequest("query is required");
|
||||
|
||||
const startedAt = performance.now();
|
||||
const normalizedTitle = body.title?.trim() || query.slice(0, 80);
|
||||
|
||||
reply.raw.writeHead(200, buildSseHeaders(typeof req.headers.origin === "string" ? req.headers.origin : undefined));
|
||||
|
||||
const send = (event: string, data: any) => {
|
||||
if (reply.raw.writableEnded) return;
|
||||
reply.raw.write(`event: ${event}\n`);
|
||||
reply.raw.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
try {
|
||||
const exa = exaClient();
|
||||
const searchPromise = exa.search(query, {
|
||||
type: body.type ?? "auto",
|
||||
numResults: body.numResults ?? 10,
|
||||
includeDomains: body.includeDomains,
|
||||
excludeDomains: body.excludeDomains,
|
||||
moderation: true,
|
||||
userLocation: "US",
|
||||
contents: false,
|
||||
} as any);
|
||||
const answerPromise = exa.answer(query, {
|
||||
text: true,
|
||||
model: "exa",
|
||||
userLocation: "US",
|
||||
});
|
||||
|
||||
let searchResponse: any | null = null;
|
||||
let answerResponse: any | null = null;
|
||||
let enrichedResults: any[] | null = null;
|
||||
let searchError: string | null = null;
|
||||
let answerError: string | null = null;
|
||||
|
||||
const searchSettled = searchPromise.then(
|
||||
async (value) => {
|
||||
searchResponse = value;
|
||||
const previewResults = (value?.results ?? []).map((result: any, index: number) => mapSearchResultPreview(result, index));
|
||||
send("search_results", {
|
||||
requestId: value?.requestId ?? null,
|
||||
results: previewResults,
|
||||
});
|
||||
|
||||
const urls = (value?.results ?? []).map((result: any) => result?.url).filter((url: string | undefined) => typeof url === "string");
|
||||
if (!urls.length) return;
|
||||
|
||||
try {
|
||||
const contentsResponse = await exa.getContents(urls, {
|
||||
text: { maxCharacters: 1200 },
|
||||
highlights: {
|
||||
query,
|
||||
maxCharacters: 320,
|
||||
numSentences: 2,
|
||||
highlightsPerUrl: 2,
|
||||
},
|
||||
} as any);
|
||||
const byUrl = new Map<string, any>();
|
||||
for (const contentItem of contentsResponse?.results ?? []) {
|
||||
byUrl.set(normalizeUrlForMatch(contentItem?.url), contentItem);
|
||||
}
|
||||
|
||||
enrichedResults = (value?.results ?? []).map((result: any) => {
|
||||
const contentItem = byUrl.get(normalizeUrlForMatch(result?.url));
|
||||
if (!contentItem) return result;
|
||||
return {
|
||||
...result,
|
||||
text: contentItem.text ?? result.text ?? null,
|
||||
highlights: Array.isArray(contentItem.highlights) ? contentItem.highlights : result.highlights ?? null,
|
||||
highlightScores: Array.isArray(contentItem.highlightScores) ? contentItem.highlightScores : result.highlightScores ?? null,
|
||||
};
|
||||
});
|
||||
|
||||
send("search_results", {
|
||||
requestId: value?.requestId ?? null,
|
||||
results: enrichedResults.map((result: any, index: number) => mapSearchResultPreview(result, index)),
|
||||
});
|
||||
} catch {
|
||||
// keep preview results if content enrichment fails
|
||||
}
|
||||
},
|
||||
(reason) => {
|
||||
searchError = reason?.message ?? String(reason);
|
||||
send("search_error", { error: searchError });
|
||||
}
|
||||
);
|
||||
|
||||
const answerSettled = answerPromise.then(
|
||||
(value) => {
|
||||
answerResponse = value;
|
||||
send("answer", {
|
||||
answerText: parseAnswerText(value),
|
||||
answerRequestId: value?.requestId ?? null,
|
||||
answerCitations: (value?.citations as any) ?? null,
|
||||
});
|
||||
},
|
||||
(reason) => {
|
||||
answerError = reason?.message ?? String(reason);
|
||||
send("answer_error", { error: answerError });
|
||||
}
|
||||
);
|
||||
|
||||
await Promise.all([searchSettled, answerSettled]);
|
||||
|
||||
const latencyMs = Math.round(performance.now() - startedAt);
|
||||
const persistedResults = enrichedResults ?? searchResponse?.results ?? [];
|
||||
const rows = persistedResults.map((result: any, index: number) => mapSearchResultRow(searchId, result, index));
|
||||
const answerText = parseAnswerText(answerResponse);
|
||||
|
||||
await prisma.$transaction(async (tx) => {
|
||||
await tx.search.update({
|
||||
where: { id: searchId },
|
||||
data: {
|
||||
query,
|
||||
title: normalizedTitle,
|
||||
requestId: searchResponse?.requestId ?? null,
|
||||
rawResponse: searchResponse as any,
|
||||
latencyMs,
|
||||
error: searchError,
|
||||
answerText,
|
||||
answerRequestId: answerResponse?.requestId ?? null,
|
||||
answerCitations: (answerResponse?.citations as any) ?? null,
|
||||
answerRawResponse: answerResponse as any,
|
||||
answerError,
|
||||
},
|
||||
});
|
||||
await tx.searchResult.deleteMany({ where: { searchId } });
|
||||
if (rows.length) {
|
||||
await tx.searchResult.createMany({ data: rows as any });
|
||||
}
|
||||
});
|
||||
|
||||
const search = await prisma.search.findUnique({
|
||||
where: { id: searchId },
|
||||
include: { results: { orderBy: { rank: "asc" } } },
|
||||
});
|
||||
if (!search) {
|
||||
send("error", { message: "search not found" });
|
||||
} else {
|
||||
send("done", { search });
|
||||
}
|
||||
} catch (err: any) {
|
||||
await prisma.search.update({
|
||||
where: { id: searchId },
|
||||
data: {
|
||||
query,
|
||||
title: normalizedTitle,
|
||||
latencyMs: Math.round(performance.now() - startedAt),
|
||||
error: err?.message ?? String(err),
|
||||
},
|
||||
});
|
||||
send("error", { message: err?.message ?? String(err) });
|
||||
} finally {
|
||||
reply.raw.end();
|
||||
const existingStream = activeSearchStreams.get(searchId);
|
||||
if (existingStream) {
|
||||
return streamActiveRun(req, reply, existingStream);
|
||||
}
|
||||
|
||||
return reply;
|
||||
const stream = new ActiveSseStream();
|
||||
activeSearchStreams.set(searchId, stream);
|
||||
void executeSearchRunStream(searchId, { ...body, query }, stream);
|
||||
return streamActiveRun(req, reply, stream);
|
||||
});
|
||||
|
||||
app.post("/v1/searches/:searchId/run/stream/attach", async (req, reply) => {
|
||||
requireAdmin(req);
|
||||
const Params = z.object({ searchId: z.string() });
|
||||
const { searchId } = Params.parse(req.params);
|
||||
const stream = activeSearchStreams.get(searchId);
|
||||
if (!stream) return app.httpErrors.notFound("active search stream not found");
|
||||
return streamActiveRun(req, reply, stream);
|
||||
});
|
||||
|
||||
app.get("/v1/chats/:chatId", async (req) => {
|
||||
@@ -895,6 +1026,15 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
return { message: msg };
|
||||
});
|
||||
|
||||
app.post("/v1/chats/:chatId/stream/attach", async (req, reply) => {
|
||||
requireAdmin(req);
|
||||
const Params = z.object({ chatId: z.string() });
|
||||
const { chatId } = Params.parse(req.params);
|
||||
const stream = activeChatStreams.get(chatId);
|
||||
if (!stream) return app.httpErrors.notFound("active chat stream not found");
|
||||
return streamActiveRun(req, reply, stream);
|
||||
});
|
||||
|
||||
// Main: create a completion via provider+model and store everything.
|
||||
app.post("/v1/chat-completions", async (req) => {
|
||||
requireAdmin(req);
|
||||
@@ -935,27 +1075,7 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
app.post("/v1/chat-completions/stream", async (req, reply) => {
|
||||
requireAdmin(req);
|
||||
|
||||
const Body = z
|
||||
.object({
|
||||
chatId: z.string().optional(),
|
||||
persist: z.boolean().optional(),
|
||||
provider: z.enum(["openai", "anthropic", "xai"]),
|
||||
model: z.string().min(1),
|
||||
messages: z.array(CompletionMessageSchema),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().int().positive().optional(),
|
||||
})
|
||||
.superRefine((value, ctx) => {
|
||||
if (value.persist === false && value.chatId) {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
message: "chatId must be omitted when persist is false",
|
||||
path: ["chatId"],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const parsed = Body.safeParse(req.body);
|
||||
const parsed = CompletionStreamBody.safeParse(req.body);
|
||||
if (!parsed.success) return app.httpErrors.badRequest(parsed.error.message);
|
||||
const body = parsed.data;
|
||||
|
||||
@@ -970,23 +1090,24 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
await storeNonAssistantMessages(body.chatId, body.messages);
|
||||
}
|
||||
|
||||
if (body.persist !== false && body.chatId) {
|
||||
if (activeChatStreams.has(body.chatId)) {
|
||||
return app.httpErrors.conflict("chat completion already running");
|
||||
}
|
||||
const stream = startActiveChatStream(body.chatId, body);
|
||||
return streamActiveRun(req, reply, stream);
|
||||
}
|
||||
|
||||
reply.raw.writeHead(200, buildSseHeaders(typeof req.headers.origin === "string" ? req.headers.origin : undefined));
|
||||
reply.raw.flushHeaders();
|
||||
|
||||
const send = (event: string, data: any) => {
|
||||
reply.raw.write(`event: ${event}\n`);
|
||||
reply.raw.write(`data: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
for await (const ev of runMultiplexStream(body)) {
|
||||
if (ev.type === "meta") send("meta", ev);
|
||||
else if (ev.type === "tool_call") send("tool_call", ev.event);
|
||||
else if (ev.type === "delta") send("delta", ev);
|
||||
else if (ev.type === "done") send("done", ev);
|
||||
else if (ev.type === "error") send("error", ev);
|
||||
writeSseEvent(reply, mapChatStreamEvent(ev));
|
||||
}
|
||||
|
||||
reply.raw.end();
|
||||
if (!reply.raw.destroyed && !reply.raw.writableEnded) {
|
||||
reply.raw.end();
|
||||
}
|
||||
return reply;
|
||||
});
|
||||
}
|
||||
|
||||
34
server/tests/active-streams.test.ts
Normal file
34
server/tests/active-streams.test.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
import { ActiveSseStream, type SseStreamEvent } from "../src/active-streams.js";
|
||||
|
||||
test("ActiveSseStream replays buffered events to late subscribers", () => {
|
||||
const stream = new ActiveSseStream();
|
||||
stream.emit("delta", { text: "hel" });
|
||||
stream.emit("delta", { text: "lo" });
|
||||
|
||||
const events: SseStreamEvent[] = [];
|
||||
const unsubscribe = stream.subscribe((event) => events.push(event));
|
||||
unsubscribe();
|
||||
|
||||
assert.deepEqual(events, [
|
||||
{ event: "delta", data: { text: "hel" } },
|
||||
{ event: "delta", data: { text: "lo" } },
|
||||
]);
|
||||
});
|
||||
|
||||
test("ActiveSseStream replays terminal events after completion", async () => {
|
||||
const stream = new ActiveSseStream();
|
||||
stream.emit("delta", { text: "done" });
|
||||
stream.complete({ event: "done", data: { text: "done" } });
|
||||
await stream.done;
|
||||
|
||||
const events: SseStreamEvent[] = [];
|
||||
stream.subscribe((event) => events.push(event));
|
||||
|
||||
assert.equal(stream.isCompleted, true);
|
||||
assert.deepEqual(events, [
|
||||
{ event: "delta", data: { text: "done" } },
|
||||
{ event: "done", data: { text: "done" } },
|
||||
]);
|
||||
});
|
||||
Reference in New Issue
Block a user