diff --git a/docs/api/rest.md b/docs/api/rest.md index bc03415..9678b95 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -191,7 +191,7 @@ Behavior notes: - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=` (optional fallback when a volume mount is not practical) - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional) - `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional) -- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output. +- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`). Streaming requests persist each completed tool call as its SSE `tool_call` event is emitted, then store the assistant output when the completion finishes. - `anthropic` currently runs without server-managed tool calls. ## Searches diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md index f4133b0..456a1f2 100644 --- a/docs/api/streaming-chat.md +++ b/docs/api/streaming-chat.md @@ -150,6 +150,7 @@ Backend database remains source of truth. During stream: - Client may optimistically render accumulated `delta` text. +- Backend persists each completed tool call as a `tool` message before emitting its `tool_call` SSE event, so chat detail refreshes can show completed tool calls while the assistant response is still running. On successful completion: - Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction. diff --git a/server/src/llm/streaming.ts b/server/src/llm/streaming.ts index e94035d..9f23573 100644 --- a/server/src/llm/streaming.ts +++ b/server/src/llm/streaming.ts @@ -58,7 +58,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator let text = ""; let usage: StreamEvent extends any ? any : never; let raw: unknown = { streamed: true }; - let toolMessages: ReturnType[] = []; try { if (req.provider === "openai" || req.provider === "xai") { @@ -97,7 +96,16 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator } if (ev.type === "tool_call") { - toolMessages.push(buildToolLogMessageData(chatId, ev.event)); + const toolMessage = buildToolLogMessageData(chatId, ev.event); + await prisma.message.create({ + data: { + chatId: toolMessage.chatId, + role: toolMessage.role as any, + content: toolMessage.content, + name: toolMessage.name, + metadata: toolMessage.metadata as any, + }, + }); yield { type: "tool_call", event: ev.event }; continue; } @@ -149,17 +157,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator const latencyMs = Math.round(performance.now() - t0); await prisma.$transaction(async (tx) => { - if (toolMessages.length) { - await tx.message.createMany({ - data: toolMessages.map((message) => ({ - chatId: message.chatId, - role: message.role as any, - content: message.content, - name: message.name, - metadata: message.metadata as any, - })), - }); - } await tx.message.create({ data: { chatId, role: "assistant" as any, content: text }, });