tool call in-flight resume

2026-05-02 22:03:43 -07:00
parent 2c32ca66e2
commit cf9832ca3b
3 changed files with 12 additions and 14 deletions
--- a/docs/api/rest.md
+++ b/docs/api/rest.md
@@ -191,7 +191,7 @@ Behavior notes:
  - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=<base64-private-key>` (optional fallback when a volume mount is not practical)
  - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional)
  - `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional)
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
+- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`). Streaming requests persist each completed tool call as its SSE `tool_call` event is emitted, then store the assistant output when the completion finishes.
 - `anthropic` currently runs without server-managed tool calls.
 ## Searches
--- a/docs/api/streaming-chat.md
+++ b/docs/api/streaming-chat.md
@@ -150,6 +150,7 @@ Backend database remains source of truth.
 During stream:
 - Client may optimistically render accumulated `delta` text.
 - Backend persists each completed tool call as a `tool` message before emitting its `tool_call` SSE event, so chat detail refreshes can show completed tool calls while the assistant response is still running.
 On successful completion:
 - Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction.
--- a/server/src/llm/streaming.ts
+++ b/server/src/llm/streaming.ts
@@ -58,7 +58,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
  let text = "";
  let usage: StreamEvent extends any ? any : never;
  let raw: unknown = { streamed: true };
  let toolMessages: ReturnType<typeof buildToolLogMessageData>[] = [];
  try {
    if (req.provider === "openai" || req.provider === "xai") {
@@ -97,7 +96,16 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
        }
        if (ev.type === "tool_call") {
-          toolMessages.push(buildToolLogMessageData(chatId, ev.event));
+          const toolMessage = buildToolLogMessageData(chatId, ev.event);
          await prisma.message.create({
            data: {
              chatId: toolMessage.chatId,
              role: toolMessage.role as any,
              content: toolMessage.content,
              name: toolMessage.name,
              metadata: toolMessage.metadata as any,
            },
          });
          yield { type: "tool_call", event: ev.event };
          continue;
        }
@@ -149,17 +157,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
    const latencyMs = Math.round(performance.now() - t0);
    await prisma.$transaction(async (tx) => {
      if (toolMessages.length) {
        await tx.message.createMany({
          data: toolMessages.map((message) => ({
            chatId: message.chatId,
            role: message.role as any,
            content: message.content,
            name: message.name,
            metadata: message.metadata as any,
          })),
        });
      }
      await tx.message.create({
        data: { chatId, role: "assistant" as any, content: text },
      });