From 29e340fd08cbcb21b0b1664f9b6dd6a46abdce57 Mon Sep 17 00:00:00 2001
From: James Magahern <james@magahern.com>
Date: Sat, 2 May 2026 23:48:01 -0700
Subject: [PATCH] quick question feature

---
 docs/api/rest.md                              |  22 +-
 docs/api/streaming-chat.md                    |  26 +-
 server/src/llm/streaming.ts                   | 144 +++---
 server/src/llm/types.ts                       |   1 +
 server/src/routes.ts                          |  87 +++-
 web/src/App.tsx                               | 482 +++++++++++++++++-
 .../components/chat/chat-messages-panel.tsx   |  75 ++-
 web/src/lib/api.ts                            |  17 +-
 8 files changed, 748 insertions(+), 106 deletions(-)

diff --git a/docs/api/rest.md b/docs/api/rest.md
index 9678b95..daf0c54 100644
--- a/docs/api/rest.md
+++ b/docs/api/rest.md
@@ -45,9 +45,29 @@ Chat upload limits:
 - Response: `{ "chats": ChatSummary[] }`
 
 ### `POST /v1/chats`
-- Body: `{ "title"?: string }`
+- Body:
+```json
+{
+  "title": "optional title",
+  "provider": "optional openai|anthropic|xai",
+  "model": "optional model id",
+  "messages": [
+    {
+      "role": "system|user|assistant|tool",
+      "content": "string",
+      "name": "optional",
+      "attachments": []
+    }
+  ]
+}
+```
 - Response: `{ "chat": ChatSummary }`
 
+Behavior notes:
+- `provider` and `model` must be supplied together when present.
+- When `provider`/`model` are supplied, the new chat initializes `initiatedProvider`/`initiatedModel` and `lastUsedProvider`/`lastUsedModel`.
+- Optional `messages` are inserted as the initial transcript. Attachment metadata uses the same schema and limits as chat completion messages.
+
 ### `PATCH /v1/chats/:chatId`
 - Body: `{ "title": string }`
 - Response: `{ "chat": ChatSummary }`
diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md
index 2c08034..cd2e2dc 100644
--- a/docs/api/streaming-chat.md
+++ b/docs/api/streaming-chat.md
@@ -19,6 +19,7 @@ Authentication:
 ```json
 {
   "chatId": "optional-chat-id",
+  "persist": true,
   "provider": "openai|anthropic|xai",
   "model": "string",
   "messages": [
@@ -53,10 +54,12 @@ Authentication:
 ```
 
 Notes:
-- If `chatId` is omitted, backend creates a new chat.
+- `persist` defaults to `true`.
+- If `persist` is `true` and `chatId` is omitted, backend creates a new chat.
 - If `chatId` is provided, backend validates it exists.
-- Backend stores only new non-assistant input history rows to avoid duplicates.
-- Attachments are optional and are persisted under `message.metadata.attachments` on stored user messages.
+- If `persist` is `false`, `chatId` must be omitted. Backend does not create a chat and does not persist input messages, tool-call messages, assistant output, or `LlmCall` metadata.
+- For persisted streams, backend stores only new non-assistant input history rows to avoid duplicates.
+- Attachments are optional and are persisted under `message.metadata.attachments` on stored user messages when `persist` is `true`.
 
 ## Event Stream Contract
 
@@ -71,13 +74,15 @@ Event order:
 ```json
 {
   "type": "meta",
-  "chatId": "chat-id",
-  "callId": "llm-call-id",
+  "chatId": "chat-id-or-null",
+  "callId": "llm-call-id-or-null",
   "provider": "openai",
   "model": "gpt-4.1-mini"
 }
 ```
 
+For `persist: false` streams, `chatId` and `callId` are `null`.
+
 ### `delta`
 
 ```json
@@ -148,17 +153,22 @@ Tool-enabled streaming notes (`openai`/`xai`):
 
 Backend database remains source of truth.
 
-During stream:
+For persisted streams:
 - Client may optimistically render accumulated `delta` text.
 - Backend persists each completed tool call as a `tool` message before emitting its `tool_call` SSE event, so chat detail refreshes can show completed tool calls while the assistant response is still running.
 
-On successful completion:
+On successful persisted completion:
 - Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction.
 - Backend then emits `done`.
 
-On failure:
+On persisted failure:
 - Backend records call error and emits `error`.
 
+For `persist: false` streams:
+- Client may render the same `meta`, `tool_call`, `delta`, and terminal events.
+- Backend does not write any chat, message, tool-call log, assistant output, or call metadata rows.
+- `done.text` is the canonical assistant text if the client later imports the result into a saved chat.
+
 Client recommendation (for iOS/web):
 1. Render deltas in real time for UX.
 2. On `done`, refresh chat detail from REST (`GET /v1/chats/:chatId`) and use DB-backed data as canonical.
diff --git a/server/src/llm/streaming.ts b/server/src/llm/streaming.ts
index 9f23573..cbf0ac7 100644
--- a/server/src/llm/streaming.ts
+++ b/server/src/llm/streaming.ts
@@ -10,11 +10,17 @@ import {
 import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
 import type { MultiplexRequest, Provider } from "./types.js";
 
+type StreamUsage = {
+  inputTokens?: number;
+  outputTokens?: number;
+  totalTokens?: number;
+};
+
 export type StreamEvent =
-  | { type: "meta"; chatId: string; callId: string; provider: Provider; model: string }
+  | { type: "meta"; chatId: string | null; callId: string | null; provider: Provider; model: string }
   | { type: "tool_call"; event: ToolExecutionEvent }
   | { type: "delta"; text: string }
-  | { type: "done"; text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }
+  | { type: "done"; text: string; usage?: StreamUsage }
   | { type: "error"; message: string };
 
 function getChatIdOrCreate(chatId?: string) {
@@ -24,39 +30,45 @@ function getChatIdOrCreate(chatId?: string) {
 
 export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator<StreamEvent> {
   const t0 = performance.now();
-  const chatId = await getChatIdOrCreate(req.chatId);
+  const shouldPersist = req.persist !== false;
+  const chatId = shouldPersist ? await getChatIdOrCreate(req.chatId) : null;
 
-  const call = await prisma.llmCall.create({
-    data: {
-      chatId,
-      provider: req.provider as any,
-      model: req.model,
-      request: req as any,
-    },
-    select: { id: true },
-  });
+  const call =
+    shouldPersist && chatId
+      ? await prisma.llmCall.create({
+          data: {
+            chatId,
+            provider: req.provider as any,
+            model: req.model,
+            request: req as any,
+          },
+          select: { id: true },
+        })
+      : null;
 
-  await prisma.$transaction([
-    prisma.chat.update({
-      where: { id: chatId },
-      data: {
-        lastUsedProvider: req.provider as any,
-        lastUsedModel: req.model,
-      },
-    }),
-    prisma.chat.updateMany({
-      where: { id: chatId, initiatedProvider: null },
-      data: {
-        initiatedProvider: req.provider as any,
-        initiatedModel: req.model,
-      },
-    }),
-  ]);
+  if (shouldPersist && chatId) {
+    await prisma.$transaction([
+      prisma.chat.update({
+        where: { id: chatId },
+        data: {
+          lastUsedProvider: req.provider as any,
+          lastUsedModel: req.model,
+        },
+      }),
+      prisma.chat.updateMany({
+        where: { id: chatId, initiatedProvider: null },
+        data: {
+          initiatedProvider: req.provider as any,
+          initiatedModel: req.model,
+        },
+      }),
+    ]);
+  }
 
-  yield { type: "meta", chatId, callId: call.id, provider: req.provider, model: req.model };
+  yield { type: "meta", chatId, callId: call?.id ?? null, provider: req.provider, model: req.model };
 
   let text = "";
-  let usage: StreamEvent extends any ? any : never;
+  let usage: StreamUsage | undefined;
   let raw: unknown = { streamed: true };
 
   try {
@@ -73,7 +85,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
               logContext: {
                 provider: req.provider,
                 model: req.model,
-                chatId,
+                chatId: chatId ?? undefined,
               },
             })
           : runToolAwareChatCompletionsStream({
@@ -85,7 +97,7 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
               logContext: {
                 provider: req.provider,
                 model: req.model,
-                chatId,
+                chatId: chatId ?? undefined,
               },
             });
       for await (const ev of streamEvents) {
@@ -96,16 +108,18 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
         }
 
         if (ev.type === "tool_call") {
-          const toolMessage = buildToolLogMessageData(chatId, ev.event);
-          await prisma.message.create({
-            data: {
-              chatId: toolMessage.chatId,
-              role: toolMessage.role as any,
-              content: toolMessage.content,
-              name: toolMessage.name,
-              metadata: toolMessage.metadata as any,
-            },
-          });
+          if (shouldPersist && chatId) {
+            const toolMessage = buildToolLogMessageData(chatId, ev.event);
+            await prisma.message.create({
+              data: {
+                chatId: toolMessage.chatId,
+                role: toolMessage.role as any,
+                content: toolMessage.content,
+                name: toolMessage.name,
+                metadata: toolMessage.metadata as any,
+              },
+            });
+          }
           yield { type: "tool_call", event: ev.event };
           continue;
         }
@@ -156,32 +170,36 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
 
     const latencyMs = Math.round(performance.now() - t0);
 
-    await prisma.$transaction(async (tx) => {
-      await tx.message.create({
-        data: { chatId, role: "assistant" as any, content: text },
+    if (shouldPersist && chatId && call) {
+      await prisma.$transaction(async (tx) => {
+        await tx.message.create({
+          data: { chatId, role: "assistant" as any, content: text },
+        });
+        await tx.llmCall.update({
+          where: { id: call.id },
+          data: {
+            response: raw as any,
+            latencyMs,
+            inputTokens: usage?.inputTokens,
+            outputTokens: usage?.outputTokens,
+            totalTokens: usage?.totalTokens,
+          },
+        });
       });
-      await tx.llmCall.update({
-        where: { id: call.id },
-        data: {
-          response: raw as any,
-          latencyMs,
-          inputTokens: usage?.inputTokens,
-          outputTokens: usage?.outputTokens,
-          totalTokens: usage?.totalTokens,
-        },
-      });
-    });
+    }
 
     yield { type: "done", text, usage };
   } catch (e: any) {
     const latencyMs = Math.round(performance.now() - t0);
-    await prisma.llmCall.update({
-      where: { id: call.id },
-      data: {
-        error: e?.message ?? String(e),
-        latencyMs,
-      },
-    });
+    if (shouldPersist && call) {
+      await prisma.llmCall.update({
+        where: { id: call.id },
+        data: {
+          error: e?.message ?? String(e),
+          latencyMs,
+        },
+      });
+    }
     yield { type: "error", message: e?.message ?? String(e) };
   }
 }
diff --git a/server/src/llm/types.ts b/server/src/llm/types.ts
index 516c8c5..97490e7 100644
--- a/server/src/llm/types.ts
+++ b/server/src/llm/types.ts
@@ -30,6 +30,7 @@ export type ChatMessage = {
 
 export type MultiplexRequest = {
   chatId?: string;
+  persist?: boolean;
   provider: Provider;
   model: string;
   messages: ChatMessage[];
diff --git a/server/src/routes.ts b/server/src/routes.ts
index dc25184..c61aedb 100644
--- a/server/src/routes.ts
+++ b/server/src/routes.ts
@@ -327,10 +327,50 @@ export async function registerRoutes(app: FastifyInstance) {
 
   app.post("/v1/chats", async (req) => {
     requireAdmin(req);
-    const Body = z.object({ title: z.string().optional() });
-    const body = Body.parse(req.body ?? {});
+    const Body = z
+      .object({
+        title: z.string().optional(),
+        provider: z.enum(["openai", "anthropic", "xai"]).optional(),
+        model: z.string().trim().min(1).optional(),
+        messages: z.array(CompletionMessageSchema).optional(),
+      })
+      .superRefine((value, ctx) => {
+        if (value.provider && !value.model) {
+          ctx.addIssue({
+            code: z.ZodIssueCode.custom,
+            message: "model is required when provider is supplied",
+            path: ["model"],
+          });
+        }
+        if (!value.provider && value.model) {
+          ctx.addIssue({
+            code: z.ZodIssueCode.custom,
+            message: "provider is required when model is supplied",
+            path: ["provider"],
+          });
+        }
+      });
+    const parsed = Body.safeParse(req.body ?? {});
+    if (!parsed.success) return app.httpErrors.badRequest(parsed.error.message);
+    const body = parsed.data;
     const chat = await prisma.chat.create({
-      data: { title: body.title },
+      data: {
+        title: body.title,
+        initiatedProvider: body.provider as any,
+        initiatedModel: body.model,
+        lastUsedProvider: body.provider as any,
+        lastUsedModel: body.model,
+        messages: body.messages?.length
+          ? {
+              create: body.messages.map((message) => ({
+                role: message.role as any,
+                content: message.content,
+                name: message.name,
+                metadata: message.attachments?.length ? ({ attachments: message.attachments } as any) : undefined,
+              })),
+            }
+          : undefined,
+      },
       select: {
         id: true,
         title: true,
@@ -838,7 +878,9 @@ export async function registerRoutes(app: FastifyInstance) {
     });
 
     const { chatId } = Params.parse(req.params);
-    const body = Body.parse(req.body);
+    const parsed = Body.safeParse(req.body);
+    if (!parsed.success) return app.httpErrors.badRequest(parsed.error.message);
+    const body = parsed.data;
 
     const msg = await prisma.message.create({
       data: {
@@ -866,7 +908,9 @@ export async function registerRoutes(app: FastifyInstance) {
       maxTokens: z.number().int().positive().optional(),
     });
 
-    const body = Body.parse(req.body);
+    const parsed = Body.safeParse(req.body);
+    if (!parsed.success) return app.httpErrors.badRequest(parsed.error.message);
+    const body = parsed.data;
 
     // ensure chat exists if provided
     if (body.chatId) {
@@ -891,16 +935,29 @@ export async function registerRoutes(app: FastifyInstance) {
   app.post("/v1/chat-completions/stream", async (req, reply) => {
     requireAdmin(req);
 
-    const Body = z.object({
-      chatId: z.string().optional(),
-      provider: z.enum(["openai", "anthropic", "xai"]),
-      model: z.string().min(1),
-      messages: z.array(CompletionMessageSchema),
-      temperature: z.number().min(0).max(2).optional(),
-      maxTokens: z.number().int().positive().optional(),
-    });
+    const Body = z
+      .object({
+        chatId: z.string().optional(),
+        persist: z.boolean().optional(),
+        provider: z.enum(["openai", "anthropic", "xai"]),
+        model: z.string().min(1),
+        messages: z.array(CompletionMessageSchema),
+        temperature: z.number().min(0).max(2).optional(),
+        maxTokens: z.number().int().positive().optional(),
+      })
+      .superRefine((value, ctx) => {
+        if (value.persist === false && value.chatId) {
+          ctx.addIssue({
+            code: z.ZodIssueCode.custom,
+            message: "chatId must be omitted when persist is false",
+            path: ["chatId"],
+          });
+        }
+      });
 
-    const body = Body.parse(req.body);
+    const parsed = Body.safeParse(req.body);
+    if (!parsed.success) return app.httpErrors.badRequest(parsed.error.message);
+    const body = parsed.data;
 
     // ensure chat exists if provided
     if (body.chatId) {
@@ -909,7 +966,7 @@ export async function registerRoutes(app: FastifyInstance) {
     }
 
     // Store only new non-assistant messages to avoid duplicate history entries.
-    if (body.chatId) {
+    if (body.persist !== false && body.chatId) {
       await storeNonAssistantMessages(body.chatId, body.messages);
     }
 
diff --git a/web/src/App.tsx b/web/src/App.tsx
index c65efe9..e0f5d5a 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -1,5 +1,5 @@
 import { useEffect, useMemo, useRef, useState } from "preact/hooks";
-import { Check, ChevronDown, Globe2, Menu, MessageSquare, Paperclip, Plus, Search, SendHorizontal, Trash2 } from "lucide-preact";
+import { Check, ChevronDown, Globe2, Menu, MessageSquare, Paperclip, Plus, Rabbit, Search, SendHorizontal, Trash2, X } from "lucide-preact";
 import { Button } from "@/components/ui/button";
 import { Textarea } from "@/components/ui/textarea";
 import { Separator } from "@/components/ui/separator";
@@ -92,9 +92,15 @@ const EMPTY_MODEL_CATALOG: ModelCatalogResponse["providers"] = {
 };
 
 const MODEL_PREFERENCES_STORAGE_KEY = "sybil:modelPreferencesByProvider";
+const QUICK_QUESTION_MODEL_SELECTION_STORAGE_KEY = "sybil:quickQuestionModelSelection";
 
 type ProviderModelPreferences = Record<Provider, string | null>;
 
+type QuickQuestionModelSelection = {
+  provider: Provider;
+  modelPreferences: ProviderModelPreferences;
+};
+
 const EMPTY_MODEL_PREFERENCES: ProviderModelPreferences = {
   openai: null,
   anthropic: null,
@@ -292,6 +298,37 @@ function loadStoredModelPreferences() {
   }
 }
 
+function normalizeStoredProvider(value: unknown): Provider {
+  return value === "anthropic" || value === "xai" || value === "openai" ? value : "openai";
+}
+
+function normalizeStoredModelPreferences(value: unknown): ProviderModelPreferences {
+  if (!value || typeof value !== "object" || Array.isArray(value)) return EMPTY_MODEL_PREFERENCES;
+  const parsed = value as Partial<Record<Provider, unknown>>;
+  return {
+    openai: typeof parsed.openai === "string" && parsed.openai.trim() ? parsed.openai.trim() : null,
+    anthropic: typeof parsed.anthropic === "string" && parsed.anthropic.trim() ? parsed.anthropic.trim() : null,
+    xai: typeof parsed.xai === "string" && parsed.xai.trim() ? parsed.xai.trim() : null,
+  };
+}
+
+function loadStoredQuickQuestionModelSelection(): QuickQuestionModelSelection {
+  if (typeof window === "undefined") {
+    return { provider: "openai", modelPreferences: EMPTY_MODEL_PREFERENCES };
+  }
+  try {
+    const raw = window.localStorage.getItem(QUICK_QUESTION_MODEL_SELECTION_STORAGE_KEY);
+    if (!raw) return { provider: "openai", modelPreferences: EMPTY_MODEL_PREFERENCES };
+    const parsed = JSON.parse(raw) as { provider?: unknown; modelPreferences?: unknown };
+    return {
+      provider: normalizeStoredProvider(parsed.provider),
+      modelPreferences: normalizeStoredModelPreferences(parsed.modelPreferences),
+    };
+  } catch {
+    return { provider: "openai", modelPreferences: EMPTY_MODEL_PREFERENCES };
+  }
+}
+
 function pickProviderModel(options: string[], preferred: string | null) {
   if (preferred?.trim()) return preferred.trim();
   return options[0] ?? "";
@@ -620,6 +657,22 @@ export default function App() {
     const stored = loadStoredModelPreferences();
     return stored.openai ?? PROVIDER_FALLBACK_MODELS.openai[0];
   });
+  const [quickProvider, setQuickProvider] = useState<Provider>(() => loadStoredQuickQuestionModelSelection().provider);
+  const [quickProviderModelPreferences, setQuickProviderModelPreferences] = useState<ProviderModelPreferences>(
+    () => loadStoredQuickQuestionModelSelection().modelPreferences
+  );
+  const [quickModel, setQuickModel] = useState(() => {
+    const stored = loadStoredQuickQuestionModelSelection();
+    return stored.modelPreferences[stored.provider] ?? PROVIDER_FALLBACK_MODELS[stored.provider][0];
+  });
+  const [isQuickQuestionOpen, setIsQuickQuestionOpen] = useState(false);
+  const [quickPrompt, setQuickPrompt] = useState("");
+  const [quickSubmittedPrompt, setQuickSubmittedPrompt] = useState<string | null>(null);
+  const [quickSubmittedModelSelection, setQuickSubmittedModelSelection] = useState<{ provider: Provider; model: string } | null>(null);
+  const [quickQuestionMessages, setQuickQuestionMessages] = useState<Message[]>([]);
+  const [isQuickQuestionSending, setIsQuickQuestionSending] = useState(false);
+  const [isConvertingQuickQuestion, setIsConvertingQuickQuestion] = useState(false);
+  const [quickQuestionError, setQuickQuestionError] = useState<string | null>(null);
   const [error, setError] = useState<string | null>(null);
   const [transcriptTailSpacerHeight, setTranscriptTailSpacerHeight] = useState(TRANSCRIPT_BOTTOM_GAP);
   const transcriptContainerRef = useRef<HTMLDivElement>(null);
@@ -631,6 +684,7 @@ export default function App() {
   const selectedItemRef = useRef<SidebarSelection | null>(null);
   const pendingTitleGenerationRef = useRef<Set<string>>(new Set());
   const searchRunAbortRef = useRef<AbortController | null>(null);
+  const quickQuestionAbortRef = useRef<AbortController | null>(null);
   const searchRunCounterRef = useRef(0);
   const shouldAutoScrollRef = useRef(true);
   const wasSendingRef = useRef(false);
@@ -713,6 +767,12 @@ export default function App() {
     setPendingChatState(null);
     setComposer("");
     setPendingAttachments([]);
+    setIsQuickQuestionOpen(false);
+    setQuickPrompt("");
+    setQuickSubmittedPrompt(null);
+    setQuickSubmittedModelSelection(null);
+    setQuickQuestionMessages([]);
+    setQuickQuestionError(null);
     setError(null);
   };
 
@@ -846,6 +906,7 @@ export default function App() {
   }, [isAuthenticated, selectedItem]);
 
   const providerModelOptions = useMemo(() => getModelOptions(modelCatalog, provider), [modelCatalog, provider]);
+  const quickProviderModelOptions = useMemo(() => getModelOptions(modelCatalog, quickProvider), [modelCatalog, quickProvider]);
 
   useEffect(() => {
     if (model.trim()) return;
@@ -859,6 +920,46 @@ export default function App() {
     window.localStorage.setItem(MODEL_PREFERENCES_STORAGE_KEY, JSON.stringify(providerModelPreferences));
   }, [providerModelPreferences]);
 
+  useEffect(() => {
+    if (quickModel.trim()) return;
+    setQuickModel((current) => {
+      return current.trim() || pickProviderModel(quickProviderModelOptions, quickProviderModelPreferences[quickProvider]);
+    });
+  }, [quickModel, quickProvider, quickProviderModelOptions, quickProviderModelPreferences]);
+
+  useEffect(() => {
+    if (typeof window === "undefined") return;
+    window.localStorage.setItem(
+      QUICK_QUESTION_MODEL_SELECTION_STORAGE_KEY,
+      JSON.stringify({
+        provider: quickProvider,
+        modelPreferences: quickProviderModelPreferences,
+      } satisfies QuickQuestionModelSelection)
+    );
+  }, [quickProvider, quickProviderModelPreferences]);
+
+  useEffect(() => {
+    if (!isQuickQuestionOpen || typeof window === "undefined") return;
+    window.requestAnimationFrame(() => {
+      const textarea = document.getElementById("quick-question-input") as HTMLTextAreaElement | null;
+      if (!textarea) return;
+      textarea.focus();
+      textarea.style.height = "0px";
+      textarea.style.height = `${textarea.scrollHeight}px`;
+      if (textarea.value.length > 0) {
+        textarea.select();
+      }
+    });
+  }, [isQuickQuestionOpen]);
+
+  useEffect(() => {
+    if (typeof document === "undefined") return;
+    const textarea = document.getElementById("quick-question-input") as HTMLTextAreaElement | null;
+    if (!textarea) return;
+    textarea.style.height = "0px";
+    textarea.style.height = `${textarea.scrollHeight}px`;
+  }, [quickPrompt, isQuickQuestionOpen]);
+
   const selectedKey = selectedItem ? `${selectedItem.kind}:${selectedItem.id}` : null;
   const isChatReplyStreamingInView =
     isSending &&
@@ -933,6 +1034,8 @@ export default function App() {
     return () => {
       searchRunAbortRef.current?.abort();
       searchRunAbortRef.current = null;
+      quickQuestionAbortRef.current?.abort();
+      quickQuestionAbortRef.current = null;
     };
   }, []);
 
@@ -960,6 +1063,18 @@ export default function App() {
     }
     return (isSearchMode ? messages : pendingChatState.messages).filter(isDisplayableMessage);
   }, [isSearchMode, messages, pendingChatState, selectedItem]);
+  const quickAnswerText = useMemo(() => {
+    for (let index = quickQuestionMessages.length - 1; index >= 0; index -= 1) {
+      const message = quickQuestionMessages[index];
+      if (message.role === "assistant") return message.content;
+    }
+    return "";
+  }, [quickQuestionMessages]);
+  const canConvertQuickQuestion =
+    Boolean(quickSubmittedPrompt?.trim()) &&
+    Boolean(quickSubmittedModelSelection?.model.trim()) &&
+    Boolean(quickAnswerText.trim()) &&
+    !isQuickQuestionSending;
 
   const selectedChatSummary = useMemo(() => {
     if (!selectedItem || selectedItem.kind !== "chat") return null;
@@ -1028,6 +1143,12 @@ export default function App() {
     setIsMobileSidebarOpen(false);
   };
 
+  const handleOpenQuickQuestion = () => {
+    setQuickQuestionError(null);
+    setIsQuickQuestionOpen(true);
+    setIsMobileSidebarOpen(false);
+  };
+
   const handleCreateSearch = () => {
     setError(null);
     setContextMenu(null);
@@ -1068,6 +1189,15 @@ export default function App() {
       if (!hasPrimaryModifier || event.altKey) return;
 
       const key = event.key.toLowerCase();
+      if (key === "i" && !event.shiftKey) {
+        event.preventDefault();
+        setQuickQuestionError(null);
+        setIsQuickQuestionOpen((current) => !current);
+        return;
+      }
+
+      if (isQuickQuestionOpen) return;
+
       if (key === "j") {
         event.preventDefault();
         if (event.shiftKey) {
@@ -1087,7 +1217,7 @@ export default function App() {
 
     window.addEventListener("keydown", handleKeyDown);
     return () => window.removeEventListener("keydown", handleKeyDown);
-  }, [filteredSidebarItems, isAuthenticated]);
+  }, [filteredSidebarItems, isAuthenticated, isQuickQuestionOpen]);
 
   const openContextMenu = (event: MouseEvent, item: SidebarSelection) => {
     event.preventDefault();
@@ -1138,6 +1268,17 @@ export default function App() {
     };
   }, [contextMenu]);
 
+  useEffect(() => {
+    if (!isQuickQuestionOpen) return;
+    const handleKeyDown = (event: KeyboardEvent) => {
+      if (event.key !== "Escape") return;
+      event.preventDefault();
+      setIsQuickQuestionOpen(false);
+    };
+    window.addEventListener("keydown", handleKeyDown);
+    return () => window.removeEventListener("keydown", handleKeyDown);
+  }, [isQuickQuestionOpen]);
+
   const handleOpenAttachmentPicker = () => {
     fileInputRef.current?.click();
   };
@@ -1587,6 +1728,182 @@ export default function App() {
     }
   };
 
+  const handleSendQuickQuestion = async () => {
+    const content = quickPrompt.trim();
+    if (!content || isQuickQuestionSending || isConvertingQuickQuestion) return;
+
+    const selectedModel = quickModel.trim();
+    if (!selectedModel) {
+      setQuickQuestionError("No model available for selected provider");
+      return;
+    }
+
+    const now = new Date().toISOString();
+    const optimisticAssistantMessage: Message = {
+      id: `temp-assistant-quick-${Date.now()}`,
+      createdAt: now,
+      role: "assistant",
+      content: "",
+      name: null,
+      metadata: null,
+    };
+
+    quickQuestionAbortRef.current?.abort();
+    const abortController = new AbortController();
+    quickQuestionAbortRef.current = abortController;
+
+    setQuickQuestionError(null);
+    setQuickSubmittedPrompt(content);
+    setQuickSubmittedModelSelection({ provider: quickProvider, model: selectedModel });
+    setQuickQuestionMessages([optimisticAssistantMessage]);
+    setIsQuickQuestionSending(true);
+
+    let streamErrorMessage: string | null = null;
+
+    try {
+      await runCompletionStream(
+        {
+          persist: false,
+          provider: quickProvider,
+          model: selectedModel,
+          messages: [{ role: "user", content }],
+        },
+        {
+          onToolCall: (payload) => {
+            setQuickQuestionMessages((current) => {
+              if (
+                current.some(
+                  (message) =>
+                    asToolLogMetadata(message.metadata)?.toolCallId === payload.toolCallId || message.id === `temp-tool-${payload.toolCallId}`
+                )
+              ) {
+                return current;
+              }
+
+              const toolMessage = buildOptimisticToolMessage(payload);
+              const assistantIndex = current.findIndex(
+                (message, index, all) => index === all.length - 1 && message.id.startsWith("temp-assistant-quick-")
+              );
+              if (assistantIndex < 0) return current.concat(toolMessage);
+              return [
+                ...current.slice(0, assistantIndex),
+                toolMessage,
+                ...current.slice(assistantIndex),
+              ];
+            });
+          },
+          onDelta: (payload) => {
+            if (!payload.text) return;
+            setQuickQuestionMessages((current) => {
+              let updated = false;
+              const nextMessages = current.map((message, index, all) => {
+                const isTarget = index === all.length - 1 && message.id.startsWith("temp-assistant-quick-");
+                if (!isTarget) return message;
+                updated = true;
+                return { ...message, content: message.content + payload.text };
+              });
+              return updated ? nextMessages : current;
+            });
+          },
+          onDone: (payload) => {
+            setQuickQuestionMessages((current) => {
+              let updated = false;
+              const nextMessages = current.map((message, index, all) => {
+                const isTarget = index === all.length - 1 && message.id.startsWith("temp-assistant-quick-");
+                if (!isTarget) return message;
+                updated = true;
+                return { ...message, content: payload.text };
+              });
+              return updated ? nextMessages : current;
+            });
+          },
+          onError: (payload) => {
+            streamErrorMessage = payload.message;
+          },
+        },
+        { signal: abortController.signal }
+      );
+
+      if (streamErrorMessage) {
+        throw new Error(streamErrorMessage);
+      }
+    } catch (err) {
+      if (abortController.signal.aborted) return;
+      const message = err instanceof Error ? err.message : String(err);
+      if (message.includes("bearer token")) {
+        handleAuthFailure(message);
+      } else {
+        setQuickQuestionError(message);
+      }
+    } finally {
+      if (quickQuestionAbortRef.current === abortController) {
+        quickQuestionAbortRef.current = null;
+      }
+      if (!abortController.signal.aborted) {
+        setIsQuickQuestionSending(false);
+      }
+    }
+  };
+
+  const handleConvertQuickQuestionToChat = async () => {
+    const question = quickSubmittedPrompt?.trim();
+    const answer = quickAnswerText.trim();
+    const selection = quickSubmittedModelSelection;
+    if (!question || !answer || !selection || isQuickQuestionSending || isConvertingQuickQuestion) return;
+
+    setQuickQuestionError(null);
+    setIsConvertingQuickQuestion(true);
+
+    try {
+      const title = question.split(/\r?\n/)[0]?.trim().slice(0, 48) || "Quick question";
+      const chat = await createChat({
+        title,
+        provider: selection.provider,
+        model: selection.model,
+        messages: [
+          { role: "user", content: question },
+          { role: "assistant", content: answer },
+        ],
+      });
+
+      setDraftKind(null);
+      setPendingChatState(null);
+      setComposer("");
+      setPendingAttachments([]);
+      setIsQuickQuestionOpen(false);
+      setProvider(selection.provider);
+      setModel(selection.model);
+      setChats((current) => {
+        const withoutExisting = current.filter((existing) => existing.id !== chat.id);
+        return [chat, ...withoutExisting];
+      });
+      setSelectedItem({ kind: "chat", id: chat.id });
+      setSelectedChat({
+        id: chat.id,
+        title: chat.title,
+        createdAt: chat.createdAt,
+        updatedAt: chat.updatedAt,
+        initiatedProvider: chat.initiatedProvider,
+        initiatedModel: chat.initiatedModel,
+        lastUsedProvider: chat.lastUsedProvider,
+        lastUsedModel: chat.lastUsedModel,
+        messages: [],
+      });
+      setSelectedSearch(null);
+      await refreshCollections({ kind: "chat", id: chat.id });
+      await refreshChat(chat.id);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      if (message.includes("bearer token")) {
+        handleAuthFailure(message);
+      } else {
+        setQuickQuestionError(message);
+      }
+    } finally {
+      setIsConvertingQuickQuestion(false);
+    }
+  };
+
   const handleSend = async () => {
     const content = composer.trim();
     const attachments = pendingAttachments;
@@ -1683,13 +2000,30 @@ export default function App() {
           </div>
 
           <div className="space-y-3 px-3 pb-3">
-            <Button className="h-11 w-full justify-start gap-3 text-[15px]" onClick={handleCreateChat}>
-              <Plus className="h-4 w-4" />
-              New chat
-              <span className="ml-auto rounded-md border border-violet-100/12 bg-white/5 px-1.5 py-0.5 text-[10px] font-semibold uppercase tracking-wide text-violet-100/52">
-                {primaryShortcutModifier} J
-              </span>
-            </Button>
+            <div className="flex gap-2">
+              <Button className="h-11 min-w-0 flex-1 justify-start gap-3 text-[15px]" onClick={handleCreateChat}>
+                <Plus className="h-4 w-4" />
+                New chat
+                <span className="ml-auto rounded-md border border-violet-100/12 bg-white/5 px-1.5 py-0.5 text-[10px] font-semibold uppercase tracking-wide text-violet-100/52">
+                  {primaryShortcutModifier} J
+                </span>
+              </Button>
+              <div className="group relative">
+                <Button
+                  className="h-11 w-11 rounded-lg"
+                  onClick={handleOpenQuickQuestion}
+                  size="icon"
+                  variant="secondary"
+                  title={`${primaryShortcutModifier}+i`}
+                  aria-label="Quick question"
+                >
+                  <Rabbit className="h-4 w-4" />
+                </Button>
+                <span className="pointer-events-none absolute left-1/2 top-full z-50 mt-2 -translate-x-1/2 whitespace-nowrap rounded-md border border-violet-300/22 bg-[hsl(238_48%_7%)] px-2 py-1 text-xs font-semibold text-violet-100/90 opacity-0 shadow-xl shadow-black/35 transition group-hover:opacity-100 group-focus-within:opacity-100">
+                  {primaryShortcutModifier}+i
+                </span>
+              </div>
+            </div>
             <Button className="h-10 w-full justify-start gap-3" variant="secondary" onClick={handleCreateSearch}>
               <Search className="h-4 w-4" />
               New search
@@ -1961,6 +2295,136 @@ export default function App() {
           </button>
         </div>
       ) : null}
+      {isQuickQuestionOpen ? (
+        <div
+          className="fixed inset-0 z-[60] flex items-center justify-center bg-black/72 p-3 backdrop-blur-md md:p-6"
+          onMouseDown={(event) => {
+            if (event.target === event.currentTarget) setIsQuickQuestionOpen(false);
+          }}
+        >
+          <section
+            role="dialog"
+            aria-modal="true"
+            aria-labelledby="quick-question-title"
+            className="glass-panel flex max-h-[88vh] w-full max-w-3xl flex-col rounded-2xl border border-violet-300/24 p-4 shadow-2xl shadow-black/45 md:p-5"
+          >
+            <div className="mb-3 flex items-center justify-between gap-3">
+              <h2 id="quick-question-title" className="text-sm font-semibold text-violet-50">
+                Quick question
+              </h2>
+              <Button
+                type="button"
+                size="icon"
+                variant="ghost"
+                className="h-8 w-8"
+                onClick={() => setIsQuickQuestionOpen(false)}
+                aria-label="Close quick question"
+              >
+                <X className="h-4 w-4" />
+              </Button>
+            </div>
+
+            <div className="min-h-0 flex-1 space-y-3">
+              <Textarea
+                id="quick-question-input"
+                rows={2}
+                value={quickPrompt}
+                onInput={(event) => {
+                  const textarea = event.currentTarget;
+                  textarea.style.height = "0px";
+                  textarea.style.height = `${textarea.scrollHeight}px`;
+                  const nextPrompt = textarea.value;
+                  if (nextPrompt !== quickPrompt) {
+                    quickQuestionAbortRef.current?.abort();
+                    quickQuestionAbortRef.current = null;
+                    setIsQuickQuestionSending(false);
+                    setQuickSubmittedPrompt(null);
+                    setQuickSubmittedModelSelection(null);
+                    setQuickQuestionMessages([]);
+                    setQuickQuestionError(null);
+                  }
+                  setQuickPrompt(nextPrompt);
+                }}
+                onKeyDown={(event) => {
+                  if (event.key === "Enter" && !event.shiftKey) {
+                    event.preventDefault();
+                    void handleSendQuickQuestion();
+                  }
+                }}
+                placeholder="Ask Sybil..."
+                className="max-h-36 min-h-[4.75rem] resize-none overflow-y-auto border-violet-300/24 bg-background/72 text-base text-violet-50 placeholder:text-violet-200/45"
+                disabled={isQuickQuestionSending || isConvertingQuickQuestion}
+              />
+
+              <div className="h-[min(34vh,22rem)] overflow-y-auto rounded-xl border border-violet-300/16 bg-background/38 px-3 py-4">
+                {quickQuestionMessages.length ? (
+                  <ChatMessagesPanel messages={quickQuestionMessages} isLoading={false} isSending={isQuickQuestionSending} />
+                ) : null}
+                {quickQuestionError ? (
+                  <p className="text-sm text-rose-300">{quickQuestionError}</p>
+                ) : null}
+              </div>
+            </div>
+
+            <div className="mt-4 flex flex-col gap-3 md:flex-row md:items-center md:justify-between">
+              <div className="flex min-w-0 flex-1 flex-col gap-2 sm:flex-row sm:items-center">
+                <select
+                  className="h-10 min-w-32 rounded-lg border border-violet-300/22 bg-background/72 px-3 text-sm text-violet-50 outline-none shadow-[inset_0_1px_0_hsl(255_100%_92%_/_0.06)] focus:border-violet-300/45 focus:ring-1 focus:ring-ring/70"
+                  value={quickProvider}
+                  onChange={(event) => {
+                    const nextProvider = event.currentTarget.value as Provider;
+                    setQuickProvider(nextProvider);
+                    const options = getModelOptions(modelCatalog, nextProvider);
+                    setQuickModel(pickProviderModel(options, quickProviderModelPreferences[nextProvider]));
+                  }}
+                  disabled={isQuickQuestionSending || isConvertingQuickQuestion}
+                  aria-label="Quick question provider"
+                >
+                  <option value="openai">OpenAI</option>
+                  <option value="anthropic">Anthropic</option>
+                  <option value="xai">xAI</option>
+                </select>
+                <ModelCombobox
+                  options={quickProviderModelOptions}
+                  value={quickModel}
+                  disabled={isQuickQuestionSending || isConvertingQuickQuestion}
+                  onChange={(nextModel) => {
+                    const normalizedModel = nextModel.trim();
+                    setQuickModel(normalizedModel);
+                    setQuickProviderModelPreferences((current) => ({
+                      ...current,
+                      [quickProvider]: normalizedModel || null,
+                    }));
+                  }}
+                />
+              </div>
+
+              <div className="flex items-center justify-end gap-2">
+                <Button
+                  type="button"
+                  variant="secondary"
+                  className="gap-2"
+                  onClick={() => void handleConvertQuickQuestionToChat()}
+                  disabled={!canConvertQuickQuestion || isConvertingQuickQuestion}
+                >
+                  <MessageSquare className="h-4 w-4" />
+                  Convert to chat
+                </Button>
+                <Button
+                  type="button"
+                  className="h-10 w-10 rounded-lg"
+                  onClick={() => void handleSendQuickQuestion()}
+                  size="icon"
+                  disabled={isQuickQuestionSending || isConvertingQuickQuestion || !quickPrompt.trim()}
+                  aria-label="Ask quick question"
+                >
+                  <SendHorizontal className="h-4 w-4" />
+                </Button>
+              </div>
+            </div>
+          </section>
+        </div>
+      ) : null}
     </div>
   );
 }
diff --git a/web/src/components/chat/chat-messages-panel.tsx b/web/src/components/chat/chat-messages-panel.tsx
index 4406f69..f029b6b 100644
--- a/web/src/components/chat/chat-messages-panel.tsx
+++ b/web/src/components/chat/chat-messages-panel.tsx
@@ -12,9 +12,16 @@ type Props = {
 
 type ToolLogMetadata = {
   kind: "tool_call";
+  toolCallId?: string;
   toolName?: string;
   status?: "completed" | "failed";
   summary?: string;
+  args?: Record<string, unknown>;
+  startedAt?: string;
+  completedAt?: string;
+  durationMs?: number;
+  error?: string | null;
+  resultPreview?: string | null;
 };
 
 function asToolLogMetadata(value: unknown): ToolLogMetadata | null {
@@ -26,10 +33,26 @@ function asToolLogMetadata(value: unknown): ToolLogMetadata | null {
 
 function getToolSummary(message: Message, metadata: ToolLogMetadata) {
   if (typeof metadata.summary === "string" && metadata.summary.trim()) return metadata.summary.trim();
+  if (metadata.status === "failed" && typeof metadata.error === "string" && metadata.error.trim()) {
+    return `Tool failed: ${metadata.error.trim()}`;
+  }
+  if (typeof metadata.resultPreview === "string" && metadata.resultPreview.trim()) return metadata.resultPreview.trim();
+  if (message.content.trim()) return message.content.trim();
   const toolName = metadata.toolName?.trim() || message.name?.trim() || "unknown_tool";
   return `Ran tool '${toolName}'.`;
 }
 
+function getToolLabel(message: Message, metadata: ToolLogMetadata) {
+  const raw = metadata.toolName?.trim() || message.name?.trim();
+  if (!raw) return "Tool call";
+  return raw
+    .replace(/_/g, " ")
+    .split(/\s+/)
+    .filter(Boolean)
+    .map((word) => `${word.slice(0, 1).toUpperCase()}${word.slice(1)}`)
+    .join(" ");
+}
+
 function getToolIconName(toolName: string | null | undefined) {
   const lowered = toolName?.toLowerCase() ?? "";
   if (lowered.includes("search")) return "search";
@@ -37,6 +60,27 @@ function getToolIconName(toolName: string | null | undefined) {
   return "generic";
 }
 
+function formatDuration(durationMs: unknown) {
+  if (typeof durationMs !== "number" || !Number.isFinite(durationMs) || durationMs <= 0) return null;
+  return `${Math.round(durationMs)} ms`;
+}
+
+function formatToolTimestamp(...values: Array<string | null | undefined>) {
+  const value = values.find((candidate) => candidate && !Number.isNaN(new Date(candidate).getTime()));
+  if (!value) return null;
+  return new Intl.DateTimeFormat(undefined, { hour: "numeric", minute: "2-digit" }).format(new Date(value));
+}
+
+function getToolDetailLabel(message: Message, metadata: ToolLogMetadata, isFailed: boolean) {
+  return [
+    isFailed ? "Failed" : "Completed",
+    formatDuration(metadata.durationMs),
+    formatToolTimestamp(message.createdAt, metadata.completedAt, metadata.startedAt),
+  ]
+    .filter(Boolean)
+    .join(" • ");
+}
+
 export function ChatMessagesPanel({ messages, isLoading, isSending }: Props) {
   const hasPendingAssistant = messages.some((message) => message.id.startsWith("temp-assistant-") && message.content.trim().length === 0);
 
@@ -51,19 +95,38 @@ export function ChatMessagesPanel({ messages, isLoading, isSending }: Props) {
             const Icon = iconKind === "search" ? Globe2 : iconKind === "fetch" ? Link2 : Wrench;
             const isFailed = toolLogMetadata.status === "failed";
             const toolSummary = getToolSummary(message, toolLogMetadata);
+            const toolLabel = getToolLabel(message, toolLogMetadata);
+            const toolDetailLabel = getToolDetailLabel(message, toolLogMetadata, isFailed);
             return (
               <div key={message.id} className="flex justify-start">
                 <div
                   className={cn(
-                    "inline-flex max-w-[85%] min-w-0 items-center gap-3 overflow-hidden rounded-lg border px-3.5 py-2 text-sm leading-5 shadow-[inset_0_1px_0_hsl(180_100%_88%_/_0.06)]",
+                    "inline-flex max-w-[85%] min-w-0 items-start gap-3 overflow-hidden rounded-xl border px-3 py-2.5 shadow-[inset_0_1px_0_hsl(180_100%_88%_/_0.06)]",
                     isFailed
-                      ? "border-rose-500/40 bg-rose-950/18 text-rose-200"
-                      : "border-cyan-400/34 bg-cyan-950/18 text-cyan-100"
+                      ? "border-rose-400/34 bg-[linear-gradient(90deg,hsl(350_72%_44%_/_0.18),hsl(342_66%_9%_/_0.72))]"
+                      : "border-cyan-400/34 bg-[linear-gradient(90deg,hsl(184_89%_21%_/_0.70),hsl(208_66%_12%_/_0.78))]"
                   )}
-                  title={toolSummary}
+                  title={`${toolSummary}\n${toolLabel} • ${toolDetailLabel}`}
                 >
-                  <Icon className="h-4 w-4 shrink-0 text-cyan-300" />
-                  <span className="min-w-0 flex-1 truncate">{toolSummary}</span>
+                  <span
+                    className={cn(
+                      "mt-0.5 flex h-[30px] w-[30px] shrink-0 items-center justify-center rounded-lg border",
+                      isFailed ? "border-rose-400/34 bg-rose-400/13 text-rose-300" : "border-cyan-300/34 bg-cyan-300/13 text-cyan-300"
+                    )}
+                  >
+                    <Icon className="h-4 w-4" />
+                  </span>
+                  <span className="min-w-0 flex-1 space-y-1">
+                    <span className={cn("block truncate text-sm leading-5", isFailed ? "text-rose-200" : "text-violet-50/95")}>
+                      {toolSummary}
+                    </span>
+                    <span className="flex min-w-0 items-center gap-1.5 text-[11px] leading-4">
+                      <span className={cn("min-w-0 truncate font-semibold", isFailed ? "text-rose-300/85" : "text-cyan-200/90")}>
+                        {toolLabel}
+                      </span>
+                      <span className="min-w-0 truncate text-violet-200/64">{toolDetailLabel}</span>
+                    </span>
+                  </span>
                 </div>
               </div>
             );
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts
index 2ae595b..53a6de0 100644
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -148,13 +148,20 @@ type CompletionResponse = {
 };
 
 type CompletionStreamHandlers = {
-  onMeta?: (payload: { chatId: string; callId: string; provider: Provider; model: string }) => void;
+  onMeta?: (payload: { chatId: string | null; callId: string | null; provider: Provider; model: string }) => void;
   onToolCall?: (payload: ToolCallEvent) => void;
   onDelta?: (payload: { text: string }) => void;
   onDone?: (payload: { text: string; usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number } }) => void;
   onError?: (payload: { message: string }) => void;
 };
 
+type CreateChatRequest = {
+  title?: string;
+  provider?: Provider;
+  model?: string;
+  messages?: CompletionRequestMessage[];
+};
+
 const API_BASE_URL = import.meta.env.VITE_API_BASE_URL ?? "/api";
 const ENV_ADMIN_TOKEN = (import.meta.env.VITE_ADMIN_TOKEN as string | undefined)?.trim() || null;
 let authToken: string | null = ENV_ADMIN_TOKEN;
@@ -210,10 +217,11 @@ export async function listModels() {
   return api<ModelCatalogResponse>("/v1/models");
 }
 
-export async function createChat(title?: string) {
+export async function createChat(input?: string | CreateChatRequest) {
+  const body = typeof input === "string" ? { title: input } : input ?? {};
   const data = await api<{ chat: ChatSummary }>("/v1/chats", {
     method: "POST",
-    body: JSON.stringify({ title }),
+    body: JSON.stringify(body),
   });
   return data.chat;
 }
@@ -443,7 +451,8 @@ export async function runCompletion(body: {
 
 export async function runCompletionStream(
   body: {
-    chatId: string;
+    chatId?: string | null;
+    persist?: boolean;
     provider: Provider;
     model: string;
     messages: CompletionRequestMessage[];