From 7ef2825c16dbb32bb432fc83c42c69eba38f20c9 Mon Sep 17 00:00:00 2001
From: James Magahern <james@magahern.com>
Date: Sat, 14 Feb 2026 21:20:14 -0800
Subject: [PATCH] docs

---
 AGENTS.md                  |   5 +
 docs/api/rest.md           | 184 +++++++++++++++++++++++++++++++++++++
 docs/api/streaming-chat.md | 131 ++++++++++++++++++++++++++
 server/README.md           |   5 +
 web/README.md              |   8 +-
 5 files changed, 331 insertions(+), 2 deletions(-)
 create mode 100644 docs/api/rest.md
 create mode 100644 docs/api/streaming-chat.md
diff --git a/AGENTS.md b/AGENTS.md
index bd8c5c2..95d7cd6 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,2 +1,7 @@
 # AGENTS.md
 
+## Documentation Discipline
+
+- Keep API documentation current whenever backend/frontend API behavior changes.
+- If request/response shapes, stream events, auth behavior, or endpoint semantics change, update docs in `docs/api/` in the same change.
+- Treat `docs/api/rest.md` and `docs/api/streaming-chat.md` as the source-of-truth contract for non-web clients (including iOS).
diff --git a/docs/api/rest.md b/docs/api/rest.md
new file mode 100644
index 0000000..c86ae9d
--- /dev/null
+++ b/docs/api/rest.md
@@ -0,0 +1,184 @@
+# REST API Contract
+
+Base URL: `/api` behind web proxy, or server root directly in local/dev.
+
+Authentication:
+- If `ADMIN_TOKEN` is set on server, send `Authorization: Bearer <token>`.
+- If `ADMIN_TOKEN` is unset, API is open for local/dev use.
+
+Content type:
+- Requests with bodies use `application/json`.
+- Responses are JSON unless noted otherwise.
+
+## Health + Auth
+
+### `GET /health`
+- Response: `{ "ok": true }`
+
+### `GET /v1/auth/session`
+- Response: `{ "authenticated": true, "mode": "open" | "token" }`
+
+## Models
+
+### `GET /v1/models`
+- Response:
+```json
+{
+  "providers": {
+    "openai": { "models": ["gpt-4.1-mini"], "loadedAt": "2026-02-14T00:00:00.000Z", "error": null },
+    "anthropic": { "models": ["claude-3-5-sonnet-latest"], "loadedAt": null, "error": null },
+    "xai": { "models": ["grok-3-mini"], "loadedAt": null, "error": null }
+  }
+}
+```
+
+## Chats
+
+### `GET /v1/chats`
+- Response: `{ "chats": ChatSummary[] }`
+
+### `POST /v1/chats`
+- Body: `{ "title"?: string }`
+- Response: `{ "chat": ChatSummary }`
+
+### `DELETE /v1/chats/:chatId`
+- Response: `{ "deleted": true }`
+- Not found: `404 { "message": "chat not found" }`
+
+### `GET /v1/chats/:chatId`
+- Response: `{ "chat": ChatDetail }`
+
+### `POST /v1/chats/:chatId/messages`
+- Body:
+```json
+{
+  "role": "system|user|assistant|tool",
+  "content": "string",
+  "name": "optional",
+  "metadata": {}
+}
+```
+- Response: `{ "message": Message }`
+
+## Chat Completions (non-streaming)
+
+### `POST /v1/chat-completions`
+- Body:
+```json
+{
+  "chatId": "optional-chat-id",
+  "provider": "openai|anthropic|xai",
+  "model": "string",
+  "messages": [
+    { "role": "system|user|assistant|tool", "content": "string", "name": "optional" }
+  ],
+  "temperature": 0.2,
+  "maxTokens": 256
+}
+```
+- Response:
+```json
+{
+  "chatId": "chat-id-or-null",
+  "provider": "openai",
+  "model": "gpt-4.1-mini",
+  "message": { "role": "assistant", "content": "..." },
+  "usage": { "inputTokens": 10, "outputTokens": 20, "totalTokens": 30 },
+  "raw": {}
+}
+```
+
+Behavior notes:
+- If `chatId` is present, server validates chat existence.
+- For `chatId` calls, server stores only *new* non-assistant messages from provided history to avoid duplicates.
+- Server persists final assistant output and call metadata (`LlmCall`) in DB.
+
+## Searches
+
+### `GET /v1/searches`
+- Response: `{ "searches": SearchSummary[] }`
+
+### `POST /v1/searches`
+- Body: `{ "title"?: string, "query"?: string }`
+- Response: `{ "search": SearchSummary }`
+
+### `DELETE /v1/searches/:searchId`
+- Response: `{ "deleted": true }`
+- Not found: `404 { "message": "search not found" }`
+
+### `GET /v1/searches/:searchId`
+- Response: `{ "search": SearchDetail }`
+
+### `POST /v1/searches/:searchId/run`
+- Body:
+```json
+{
+  "query": "optional override",
+  "title": "optional override",
+  "type": "auto|fast|deep|instant",
+  "numResults": 10,
+  "includeDomains": ["example.com"],
+  "excludeDomains": ["example.org"]
+}
+```
+- Response: `{ "search": SearchDetail }`
+
+Search run notes:
+- Backend executes Exa search and Exa answer.
+- Persists answer text/citations + ranked results.
+- If both search and answer fail, endpoint returns an error.
+
+## Type Shapes
+
+`ChatSummary`
+```json
+{ "id": "...", "title": null, "createdAt": "...", "updatedAt": "..." }
+```
+
+`Message`
+```json
+{
+  "id": "...",
+  "createdAt": "...",
+  "role": "system|user|assistant|tool",
+  "content": "...",
+  "name": null
+}
+```
+
+`ChatDetail`
+```json
+{
+  "id": "...",
+  "title": null,
+  "createdAt": "...",
+  "updatedAt": "...",
+  "messages": [Message]
+}
+```
+
+`SearchSummary`
+```json
+{ "id": "...", "title": null, "query": null, "createdAt": "...", "updatedAt": "..." }
+```
+
+`SearchDetail`
+```json
+{
+  "id": "...",
+  "title": "...",
+  "query": "...",
+  "createdAt": "...",
+  "updatedAt": "...",
+  "requestId": "...",
+  "latencyMs": 123,
+  "error": null,
+  "answerText": "...",
+  "answerRequestId": "...",
+  "answerCitations": [],
+  "answerError": null,
+  "results": []
+}
+```
+
+For streaming contracts, see `docs/api/streaming-chat.md`.
diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md
new file mode 100644
index 0000000..3410fea
--- /dev/null
+++ b/docs/api/streaming-chat.md
@@ -0,0 +1,131 @@
+# Streaming Chat API Contract
+
+This document defines the server-sent events (SSE) contract for chat completions.
+
+Endpoint:
+- `POST /v1/chat-completions/stream`
+
+Transport:
+- HTTP response uses `Content-Type: text/event-stream; charset=utf-8`
+- Events are emitted in SSE format (`event: ...`, `data: ...`)
+- Request body is JSON
+
+Authentication:
+- Same as REST endpoints (`Authorization: Bearer <token>` when token mode is enabled)
+
+## Request Body
+
+```json
+{
+  "chatId": "optional-chat-id",
+  "provider": "openai|anthropic|xai",
+  "model": "string",
+  "messages": [
+    { "role": "system|user|assistant|tool", "content": "string", "name": "optional" }
+  ],
+  "temperature": 0.2,
+  "maxTokens": 256
+}
+```
+
+Notes:
+- If `chatId` is omitted, backend creates a new chat.
+- If `chatId` is provided, backend validates it exists.
+- Backend stores only new non-assistant input history rows to avoid duplicates.
+
+## Event Stream Contract
+
+Event order:
+1. Exactly one `meta`
+2. Zero or more `delta`
+3. Exactly one terminal event: `done` or `error`
+
+### `meta`
+
+```json
+{
+  "type": "meta",
+  "chatId": "chat-id",
+  "callId": "llm-call-id",
+  "provider": "openai",
+  "model": "gpt-4.1-mini"
+}
+```
+
+### `delta`
+
+```json
+{ "type": "delta", "text": "next chunk" }
+```
+
+`text` may contain partial words, punctuation, or whitespace.
+
+### `done`
+
+```json
+{
+  "type": "done",
+  "text": "full assistant response",
+  "usage": {
+    "inputTokens": 123,
+    "outputTokens": 456,
+    "totalTokens": 579
+  }
+}
+```
+
+`usage` may be omitted when provider does not expose final token accounting for stream mode.
+
+### `error`
+
+```json
+{ "type": "error", "message": "provider timeout" }
+```
+
+## Provider Streaming Behavior
+
+- `openai`: streamed via OpenAI chat completion chunks; emits `delta` from `choices[0].delta.content`.
+- `xai`: uses OpenAI-compatible API, same chunk extraction as OpenAI.
+- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`.
+
+## Persistence + Consistency Model
+
+Backend database remains source of truth.
+
+During stream:
+- Client may optimistically render accumulated `delta` text.
+
+On successful completion:
+- Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction.
+- Backend then emits `done`.
+
+On failure:
+- Backend records call error and emits `error`.
+
+Client recommendation (for iOS/web):
+1. Render deltas in real time for UX.
+2. On `done`, refresh chat detail from REST (`GET /v1/chats/:chatId`) and use DB-backed data as canonical.
+3. On `error`, preserve user input and show retry affordance.
+
+## SSE Parsing Rules
+
+- Concatenate multiple `data:` lines with newline before JSON parse.
+- Event completes on blank line.
+- Ignore unknown event names for forward compatibility.
+
+## Example Stream
+
+```text
+event: meta
+data: {"type":"meta","chatId":"c1","callId":"k1","provider":"openai","model":"gpt-4.1-mini"}
+
+event: delta
+data: {"type":"delta","text":"Hello"}
+
+event: delta
+data: {"type":"delta","text":" world"}
+
+event: done
+data: {"type":"done","text":"Hello world"}
+
+```
diff --git a/server/README.md b/server/README.md
index cfed71e..274219b 100644
--- a/server/README.md
+++ b/server/README.md
@@ -20,6 +20,10 @@ Migrations are applied automatically on server startup (`prisma migrate deploy`)
 
 Open docs: `http://localhost:8787/docs`
 
+API contract docs for clients:
+- `../docs/api/rest.md`
+- `../docs/api/streaming-chat.md`
+
 ## Run Modes
 
 - `npm run dev`: runs `src/index.ts` with `tsx` in watch mode (auto-restart on file changes). Use for local development.
@@ -54,6 +58,7 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev).
 - `POST /v1/searches`
 - `GET /v1/searches/:searchId`
 - `POST /v1/searches/:searchId/run`
+- `POST /v1/searches/:searchId/run/stream` (SSE)
 
 Search runs now execute both Exa `searchAndContents` and Exa `answer`, storing:
 - ranked search results (for result cards), and
diff --git a/web/README.md b/web/README.md
index 8d65185..ebbc8f6 100644
--- a/web/README.md
+++ b/web/README.md
@@ -38,8 +38,12 @@ Default dev URL: `http://localhost:5173`
   - Chat mode: transcript + provider/model controls.
   - Search mode: top AI answer block + Google-style Exa results view.
 - Composer adapts to the active item:
-  - Chat sends `POST /v1/chat-completions`.
-  - Search sends `POST /v1/searches/:searchId/run`.
+  - Chat sends `POST /v1/chat-completions/stream` (SSE).
+  - Search sends `POST /v1/searches/:searchId/run/stream` (SSE).
+
+Client API contract docs:
+- `../docs/api/rest.md`
+- `../docs/api/streaming-chat.md`
 
 ## Routes