tool call in-flight resume

This commit is contained in:
2026-05-02 22:03:43 -07:00
parent 2c32ca66e2
commit cf9832ca3b
3 changed files with 12 additions and 14 deletions

View File

@@ -191,7 +191,7 @@ Behavior notes:
- `CHAT_CODEX_SSH_PRIVATE_KEY_B64=<base64-private-key>` (optional fallback when a volume mount is not practical) - `CHAT_CODEX_SSH_PRIVATE_KEY_B64=<base64-private-key>` (optional fallback when a volume mount is not practical)
- `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional) - `CHAT_CODEX_EXEC_TIMEOUT_MS=600000` (optional)
- `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional) - `CHAT_SHELL_EXEC_TIMEOUT_MS=120000` (optional)
- When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`), then stores the assistant output. - When a tool call is executed, backend stores a chat `Message` with `role: "tool"` and tool metadata (`metadata.kind = "tool_call"`). Streaming requests persist each completed tool call as its SSE `tool_call` event is emitted, then store the assistant output when the completion finishes.
- `anthropic` currently runs without server-managed tool calls. - `anthropic` currently runs without server-managed tool calls.
## Searches ## Searches

View File

@@ -150,6 +150,7 @@ Backend database remains source of truth.
During stream: During stream:
- Client may optimistically render accumulated `delta` text. - Client may optimistically render accumulated `delta` text.
- Backend persists each completed tool call as a `tool` message before emitting its `tool_call` SSE event, so chat detail refreshes can show completed tool calls while the assistant response is still running.
On successful completion: On successful completion:
- Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction. - Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction.

View File

@@ -58,7 +58,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
let text = ""; let text = "";
let usage: StreamEvent extends any ? any : never; let usage: StreamEvent extends any ? any : never;
let raw: unknown = { streamed: true }; let raw: unknown = { streamed: true };
let toolMessages: ReturnType<typeof buildToolLogMessageData>[] = [];
try { try {
if (req.provider === "openai" || req.provider === "xai") { if (req.provider === "openai" || req.provider === "xai") {
@@ -97,7 +96,16 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
} }
if (ev.type === "tool_call") { if (ev.type === "tool_call") {
toolMessages.push(buildToolLogMessageData(chatId, ev.event)); const toolMessage = buildToolLogMessageData(chatId, ev.event);
await prisma.message.create({
data: {
chatId: toolMessage.chatId,
role: toolMessage.role as any,
content: toolMessage.content,
name: toolMessage.name,
metadata: toolMessage.metadata as any,
},
});
yield { type: "tool_call", event: ev.event }; yield { type: "tool_call", event: ev.event };
continue; continue;
} }
@@ -149,17 +157,6 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
const latencyMs = Math.round(performance.now() - t0); const latencyMs = Math.round(performance.now() - t0);
await prisma.$transaction(async (tx) => { await prisma.$transaction(async (tx) => {
if (toolMessages.length) {
await tx.message.createMany({
data: toolMessages.map((message) => ({
chatId: message.chatId,
role: message.role as any,
content: message.content,
name: message.name,
metadata: message.metadata as any,
})),
});
}
await tx.message.create({ await tx.message.create({
data: { chatId, role: "assistant" as any, content: text }, data: { chatId, role: "assistant" as any, content: text },
}); });