From 7ef2825c16dbb32bb432fc83c42c69eba38f20c9 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Sat, 14 Feb 2026 21:20:14 -0800 Subject: [PATCH] docs --- AGENTS.md | 5 + docs/api/rest.md | 184 +++++++++++++++++++++++++++++++++++++ docs/api/streaming-chat.md | 131 ++++++++++++++++++++++++++ server/README.md | 5 + web/README.md | 8 +- 5 files changed, 331 insertions(+), 2 deletions(-) create mode 100644 docs/api/rest.md create mode 100644 docs/api/streaming-chat.md diff --git a/AGENTS.md b/AGENTS.md index bd8c5c2..95d7cd6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,2 +1,7 @@ # AGENTS.md +## Documentation Discipline + +- Keep API documentation current whenever backend/frontend API behavior changes. +- If request/response shapes, stream events, auth behavior, or endpoint semantics change, update docs in `docs/api/` in the same change. +- Treat `docs/api/rest.md` and `docs/api/streaming-chat.md` as the source-of-truth contract for non-web clients (including iOS). diff --git a/docs/api/rest.md b/docs/api/rest.md new file mode 100644 index 0000000..c86ae9d --- /dev/null +++ b/docs/api/rest.md @@ -0,0 +1,184 @@ +# REST API Contract + +Base URL: `/api` behind web proxy, or server root directly in local/dev. + +Authentication: +- If `ADMIN_TOKEN` is set on server, send `Authorization: Bearer `. +- If `ADMIN_TOKEN` is unset, API is open for local/dev use. + +Content type: +- Requests with bodies use `application/json`. +- Responses are JSON unless noted otherwise. + +## Health + Auth + +### `GET /health` +- Response: `{ "ok": true }` + +### `GET /v1/auth/session` +- Response: `{ "authenticated": true, "mode": "open" | "token" }` + +## Models + +### `GET /v1/models` +- Response: +```json +{ + "providers": { + "openai": { "models": ["gpt-4.1-mini"], "loadedAt": "2026-02-14T00:00:00.000Z", "error": null }, + "anthropic": { "models": ["claude-3-5-sonnet-latest"], "loadedAt": null, "error": null }, + "xai": { "models": ["grok-3-mini"], "loadedAt": null, "error": null } + } +} +``` + +## Chats + +### `GET /v1/chats` +- Response: `{ "chats": ChatSummary[] }` + +### `POST /v1/chats` +- Body: `{ "title"?: string }` +- Response: `{ "chat": ChatSummary }` + +### `DELETE /v1/chats/:chatId` +- Response: `{ "deleted": true }` +- Not found: `404 { "message": "chat not found" }` + +### `GET /v1/chats/:chatId` +- Response: `{ "chat": ChatDetail }` + +### `POST /v1/chats/:chatId/messages` +- Body: +```json +{ + "role": "system|user|assistant|tool", + "content": "string", + "name": "optional", + "metadata": {} +} +``` +- Response: `{ "message": Message }` + +## Chat Completions (non-streaming) + +### `POST /v1/chat-completions` +- Body: +```json +{ + "chatId": "optional-chat-id", + "provider": "openai|anthropic|xai", + "model": "string", + "messages": [ + { "role": "system|user|assistant|tool", "content": "string", "name": "optional" } + ], + "temperature": 0.2, + "maxTokens": 256 +} +``` +- Response: +```json +{ + "chatId": "chat-id-or-null", + "provider": "openai", + "model": "gpt-4.1-mini", + "message": { "role": "assistant", "content": "..." }, + "usage": { "inputTokens": 10, "outputTokens": 20, "totalTokens": 30 }, + "raw": {} +} +``` + +Behavior notes: +- If `chatId` is present, server validates chat existence. +- For `chatId` calls, server stores only *new* non-assistant messages from provided history to avoid duplicates. +- Server persists final assistant output and call metadata (`LlmCall`) in DB. + +## Searches + +### `GET /v1/searches` +- Response: `{ "searches": SearchSummary[] }` + +### `POST /v1/searches` +- Body: `{ "title"?: string, "query"?: string }` +- Response: `{ "search": SearchSummary }` + +### `DELETE /v1/searches/:searchId` +- Response: `{ "deleted": true }` +- Not found: `404 { "message": "search not found" }` + +### `GET /v1/searches/:searchId` +- Response: `{ "search": SearchDetail }` + +### `POST /v1/searches/:searchId/run` +- Body: +```json +{ + "query": "optional override", + "title": "optional override", + "type": "auto|fast|deep|instant", + "numResults": 10, + "includeDomains": ["example.com"], + "excludeDomains": ["example.org"] +} +``` +- Response: `{ "search": SearchDetail }` + +Search run notes: +- Backend executes Exa search and Exa answer. +- Persists answer text/citations + ranked results. +- If both search and answer fail, endpoint returns an error. + +## Type Shapes + +`ChatSummary` +```json +{ "id": "...", "title": null, "createdAt": "...", "updatedAt": "..." } +``` + +`Message` +```json +{ + "id": "...", + "createdAt": "...", + "role": "system|user|assistant|tool", + "content": "...", + "name": null +} +``` + +`ChatDetail` +```json +{ + "id": "...", + "title": null, + "createdAt": "...", + "updatedAt": "...", + "messages": [Message] +} +``` + +`SearchSummary` +```json +{ "id": "...", "title": null, "query": null, "createdAt": "...", "updatedAt": "..." } +``` + +`SearchDetail` +```json +{ + "id": "...", + "title": "...", + "query": "...", + "createdAt": "...", + "updatedAt": "...", + "requestId": "...", + "latencyMs": 123, + "error": null, + "answerText": "...", + "answerRequestId": "...", + "answerCitations": [], + "answerError": null, + "results": [] +} +``` + +For streaming contracts, see `docs/api/streaming-chat.md`. diff --git a/docs/api/streaming-chat.md b/docs/api/streaming-chat.md new file mode 100644 index 0000000..3410fea --- /dev/null +++ b/docs/api/streaming-chat.md @@ -0,0 +1,131 @@ +# Streaming Chat API Contract + +This document defines the server-sent events (SSE) contract for chat completions. + +Endpoint: +- `POST /v1/chat-completions/stream` + +Transport: +- HTTP response uses `Content-Type: text/event-stream; charset=utf-8` +- Events are emitted in SSE format (`event: ...`, `data: ...`) +- Request body is JSON + +Authentication: +- Same as REST endpoints (`Authorization: Bearer ` when token mode is enabled) + +## Request Body + +```json +{ + "chatId": "optional-chat-id", + "provider": "openai|anthropic|xai", + "model": "string", + "messages": [ + { "role": "system|user|assistant|tool", "content": "string", "name": "optional" } + ], + "temperature": 0.2, + "maxTokens": 256 +} +``` + +Notes: +- If `chatId` is omitted, backend creates a new chat. +- If `chatId` is provided, backend validates it exists. +- Backend stores only new non-assistant input history rows to avoid duplicates. + +## Event Stream Contract + +Event order: +1. Exactly one `meta` +2. Zero or more `delta` +3. Exactly one terminal event: `done` or `error` + +### `meta` + +```json +{ + "type": "meta", + "chatId": "chat-id", + "callId": "llm-call-id", + "provider": "openai", + "model": "gpt-4.1-mini" +} +``` + +### `delta` + +```json +{ "type": "delta", "text": "next chunk" } +``` + +`text` may contain partial words, punctuation, or whitespace. + +### `done` + +```json +{ + "type": "done", + "text": "full assistant response", + "usage": { + "inputTokens": 123, + "outputTokens": 456, + "totalTokens": 579 + } +} +``` + +`usage` may be omitted when provider does not expose final token accounting for stream mode. + +### `error` + +```json +{ "type": "error", "message": "provider timeout" } +``` + +## Provider Streaming Behavior + +- `openai`: streamed via OpenAI chat completion chunks; emits `delta` from `choices[0].delta.content`. +- `xai`: uses OpenAI-compatible API, same chunk extraction as OpenAI. +- `anthropic`: streamed via event stream; emits `delta` from `content_block_delta` with `text_delta`. + +## Persistence + Consistency Model + +Backend database remains source of truth. + +During stream: +- Client may optimistically render accumulated `delta` text. + +On successful completion: +- Backend persists assistant `Message` and updates `LlmCall` usage/latency in a transaction. +- Backend then emits `done`. + +On failure: +- Backend records call error and emits `error`. + +Client recommendation (for iOS/web): +1. Render deltas in real time for UX. +2. On `done`, refresh chat detail from REST (`GET /v1/chats/:chatId`) and use DB-backed data as canonical. +3. On `error`, preserve user input and show retry affordance. + +## SSE Parsing Rules + +- Concatenate multiple `data:` lines with newline before JSON parse. +- Event completes on blank line. +- Ignore unknown event names for forward compatibility. + +## Example Stream + +```text +event: meta +data: {"type":"meta","chatId":"c1","callId":"k1","provider":"openai","model":"gpt-4.1-mini"} + +event: delta +data: {"type":"delta","text":"Hello"} + +event: delta +data: {"type":"delta","text":" world"} + +event: done +data: {"type":"done","text":"Hello world"} + +``` diff --git a/server/README.md b/server/README.md index cfed71e..274219b 100644 --- a/server/README.md +++ b/server/README.md @@ -20,6 +20,10 @@ Migrations are applied automatically on server startup (`prisma migrate deploy`) Open docs: `http://localhost:8787/docs` +API contract docs for clients: +- `../docs/api/rest.md` +- `../docs/api/streaming-chat.md` + ## Run Modes - `npm run dev`: runs `src/index.ts` with `tsx` in watch mode (auto-restart on file changes). Use for local development. @@ -54,6 +58,7 @@ If `ADMIN_TOKEN` is not set, the server runs in open mode (dev). - `POST /v1/searches` - `GET /v1/searches/:searchId` - `POST /v1/searches/:searchId/run` +- `POST /v1/searches/:searchId/run/stream` (SSE) Search runs now execute both Exa `searchAndContents` and Exa `answer`, storing: - ranked search results (for result cards), and diff --git a/web/README.md b/web/README.md index 8d65185..ebbc8f6 100644 --- a/web/README.md +++ b/web/README.md @@ -38,8 +38,12 @@ Default dev URL: `http://localhost:5173` - Chat mode: transcript + provider/model controls. - Search mode: top AI answer block + Google-style Exa results view. - Composer adapts to the active item: - - Chat sends `POST /v1/chat-completions`. - - Search sends `POST /v1/searches/:searchId/run`. + - Chat sends `POST /v1/chat-completions/stream` (SSE). + - Search sends `POST /v1/searches/:searchId/run/stream` (SSE). + +Client API contract docs: +- `../docs/api/rest.md` +- `../docs/api/streaming-chat.md` ## Routes