diff --git a/docs/api/rest.md b/docs/api/rest.md
index bc03415..9678b95 100644
--- a/docs/api/rest.md
+++ b/docs/api/rest.md
@@ -191,7 +191,7 @@ Behavior notes:
   - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=<base64-private-key>` (optional fallback when a volume mount is not practical)
   - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional)
   - `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional)
-- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output.
+- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`). Streaming requests persist each completed tool call as its SSE `tool_call` event is emitted, then store the assistant output when the completion finishes.
 - `anthropic` currently runs without server-managed tool calls.
 
 ## Searches
diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md
index f4133b0..456a1f2 100644
--- a/docs/api/streaming-chat.md
+++ b/docs/api/streaming-chat.md
@@ -150,6 +150,7 @@ Backend database remains source of truth.
 
 During stream:
 - Client may optimistically render accumulated `delta` text.
+- Backend persists each completed tool call as a `tool` message before emitting its `tool_call` SSE event, so chat detail refreshes can show completed tool calls while the assistant response is still running.
 
 On successful completion:
 - Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction.
diff --git a/server/src/llm/streaming.ts b/server/src/llm/streaming.ts
index e94035d..9f23573 100644
--- a/server/src/llm/streaming.ts
+++ b/server/src/llm/streaming.ts
@@ -58,7 +58,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
   let text = "";
   let usage: StreamEvent extends any ? any : never;
   let raw: unknown = { streamed: true };
-  let toolMessages: ReturnType<typeof buildToolLogMessageData>[] = [];
 
   try {
     if (req.provider === "openai" || req.provider === "xai") {
@@ -97,7 +96,16 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
         }
 
         if (ev.type === "tool_call") {
-          toolMessages.push(buildToolLogMessageData(chatId, ev.event));
+          const toolMessage = buildToolLogMessageData(chatId, ev.event);
+          await prisma.message.create({
+            data: {
+              chatId: toolMessage.chatId,
+              role: toolMessage.role as any,
+              content: toolMessage.content,
+              name: toolMessage.name,
+              metadata: toolMessage.metadata as any,
+            },
+          });
           yield { type: "tool_call", event: ev.event };
           continue;
         }
@@ -149,17 +157,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
     const latencyMs = Math.round(performance.now() - t0);
 
     await prisma.$transaction(async (tx) => {
-      if (toolMessages.length) {
-        await tx.message.createMany({
-          data: toolMessages.map((message) => ({
-            chatId: message.chatId,
-            role: message.role as any,
-            content: message.content,
-            name: message.name,
-            metadata: message.metadata as any,
-          })),
-        });
-      }
       await tx.message.create({
         data: { chatId, role: "assistant" as any, content: text },
       });