adds attachment support
This commit is contained in:
@@ -9,6 +9,7 @@ import { warmModelCatalog } from "./llm/model-catalog.js";
|
||||
import { registerRoutes } from "./routes.js";
|
||||
|
||||
const app = Fastify({
|
||||
bodyLimit: 32 * 1024 * 1024,
|
||||
disableRequestLogging: true,
|
||||
logger: {
|
||||
transport: {
|
||||
|
||||
@@ -4,6 +4,7 @@ import { z } from "zod";
|
||||
import { env } from "../env.js";
|
||||
import { exaClient } from "../search/exa.js";
|
||||
import { searchSearxng } from "../search/searxng.js";
|
||||
import { buildOpenAIConversationMessage } from "./message-content.js";
|
||||
import type { ChatMessage } from "./types.js";
|
||||
|
||||
const MAX_TOOL_ROUNDS = 4;
|
||||
@@ -250,23 +251,7 @@ function extractHtmlTitle(html: string) {
|
||||
}
|
||||
|
||||
function normalizeIncomingMessages(messages: ChatMessage[]) {
|
||||
const normalized = messages.map((m) => {
|
||||
if (m.role === "tool") {
|
||||
const name = m.name?.trim() || "tool";
|
||||
return {
|
||||
role: "user",
|
||||
content: `Tool output (${name}):\n${m.content}`,
|
||||
};
|
||||
}
|
||||
if (m.role === "assistant" || m.role === "system" || m.role === "user") {
|
||||
const out: any = { role: m.role, content: m.content };
|
||||
if (m.name && (m.role === "assistant" || m.role === "user")) {
|
||||
out.name = m.name;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return { role: "user", content: m.content };
|
||||
});
|
||||
const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
|
||||
|
||||
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
|
||||
}
|
||||
|
||||
211
server/src/llm/message-content.ts
Normal file
211
server/src/llm/message-content.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
import type { ChatAttachment, ChatImageAttachment, ChatMessage, ChatTextAttachment } from "./types.js";
|
||||
|
||||
function escapeAttribute(value: string) {
|
||||
return value.replace(/"/g, """);
|
||||
}
|
||||
|
||||
function getImageAttachments(message: ChatMessage) {
|
||||
return (message.attachments ?? []).filter((attachment): attachment is ChatImageAttachment => attachment.kind === "image");
|
||||
}
|
||||
|
||||
function getTextAttachments(message: ChatMessage) {
|
||||
return (message.attachments ?? []).filter((attachment): attachment is ChatTextAttachment => attachment.kind === "text");
|
||||
}
|
||||
|
||||
function buildImageSummaryText(attachments: ChatImageAttachment[]) {
|
||||
if (!attachments.length) return null;
|
||||
const label = attachments.length === 1 ? "Attached image" : "Attached images";
|
||||
return `${label}: ${attachments.map((attachment) => attachment.filename).join(", ")}.`;
|
||||
}
|
||||
|
||||
function buildTextAttachmentPrompt(attachment: ChatTextAttachment) {
|
||||
const truncationNote = attachment.truncated ? ' truncated="true"' : "";
|
||||
return [
|
||||
`Attached text file: ${attachment.filename}${attachment.truncated ? " (content truncated)" : ""}`,
|
||||
`<attached_file filename="${escapeAttribute(attachment.filename)}" mime_type="${escapeAttribute(attachment.mimeType)}"${truncationNote}>`,
|
||||
attachment.text,
|
||||
"</attached_file>",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function toOpenAIContent(message: ChatMessage) {
|
||||
const imageAttachments = getImageAttachments(message);
|
||||
const textAttachments = getTextAttachments(message);
|
||||
if (!imageAttachments.length && !textAttachments.length) {
|
||||
return message.content;
|
||||
}
|
||||
|
||||
const parts: Array<Record<string, unknown>> = [];
|
||||
|
||||
for (const attachment of imageAttachments) {
|
||||
parts.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: attachment.dataUrl,
|
||||
detail: "auto",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const imageSummary = buildImageSummaryText(imageAttachments);
|
||||
if (imageSummary) {
|
||||
parts.push({ type: "text", text: imageSummary });
|
||||
}
|
||||
|
||||
for (const attachment of textAttachments) {
|
||||
parts.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
|
||||
}
|
||||
|
||||
if (message.content.trim()) {
|
||||
parts.push({ type: "text", text: message.content });
|
||||
}
|
||||
|
||||
if (parts.length === 1 && parts[0]?.type === "text" && typeof parts[0].text === "string") {
|
||||
return parts[0].text;
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
function parseImageDataUrl(attachment: ChatImageAttachment) {
|
||||
const match = attachment.dataUrl.match(/^data:(image\/(?:png|jpeg));base64,([a-z0-9+/=\s]+)$/i);
|
||||
if (!match) {
|
||||
throw new Error(`Invalid image attachment data URL for '${attachment.filename}'.`);
|
||||
}
|
||||
|
||||
const mediaType = match[1].toLowerCase();
|
||||
if (mediaType !== attachment.mimeType) {
|
||||
throw new Error(`Image attachment MIME type mismatch for '${attachment.filename}'.`);
|
||||
}
|
||||
|
||||
return {
|
||||
mediaType,
|
||||
data: match[2].replace(/\s+/g, ""),
|
||||
};
|
||||
}
|
||||
|
||||
function toAnthropicContent(message: ChatMessage) {
|
||||
const imageAttachments = getImageAttachments(message);
|
||||
const textAttachments = getTextAttachments(message);
|
||||
if (!imageAttachments.length && !textAttachments.length) {
|
||||
return message.content;
|
||||
}
|
||||
|
||||
const blocks: Array<Record<string, unknown>> = [];
|
||||
|
||||
for (const attachment of imageAttachments) {
|
||||
const source = parseImageDataUrl(attachment);
|
||||
blocks.push({
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: source.mediaType,
|
||||
data: source.data,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const imageSummary = buildImageSummaryText(imageAttachments);
|
||||
if (imageSummary) {
|
||||
blocks.push({ type: "text", text: imageSummary });
|
||||
}
|
||||
|
||||
for (const attachment of textAttachments) {
|
||||
blocks.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
|
||||
}
|
||||
|
||||
if (message.content.trim()) {
|
||||
blocks.push({ type: "text", text: message.content });
|
||||
}
|
||||
|
||||
if (blocks.length === 1 && blocks[0]?.type === "text" && typeof blocks[0].text === "string") {
|
||||
return blocks[0].text;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
export function buildOpenAIConversationMessage(message: ChatMessage) {
|
||||
if (message.role === "tool") {
|
||||
const name = message.name?.trim() || "tool";
|
||||
return {
|
||||
role: "user",
|
||||
content: `Tool output (${name}):\n${message.content}`,
|
||||
};
|
||||
}
|
||||
|
||||
const out: Record<string, unknown> = {
|
||||
role: message.role,
|
||||
content: toOpenAIContent(message),
|
||||
};
|
||||
|
||||
if (message.name && (message.role === "assistant" || message.role === "user")) {
|
||||
out.name = message.name;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
export function getAnthropicSystemPrompt(messages: ChatMessage[]) {
|
||||
return messages.find((message) => message.role === "system")?.content;
|
||||
}
|
||||
|
||||
export function buildAnthropicConversationMessage(message: ChatMessage) {
|
||||
if (message.role === "system") {
|
||||
throw new Error("System messages must be handled separately for Anthropic.");
|
||||
}
|
||||
|
||||
if (message.role === "tool") {
|
||||
const name = message.name?.trim() || "tool";
|
||||
return {
|
||||
role: "user",
|
||||
content: `Tool output (${name}):\n${message.content}`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
role: message.role === "assistant" ? "assistant" : "user",
|
||||
content: toAnthropicContent(message),
|
||||
};
|
||||
}
|
||||
|
||||
export function buildComparableAttachments(input: unknown): ChatAttachment[] {
|
||||
if (!Array.isArray(input)) return [];
|
||||
|
||||
const attachments: ChatAttachment[] = [];
|
||||
for (const entry of input) {
|
||||
if (!entry || typeof entry !== "object" || Array.isArray(entry)) continue;
|
||||
const record = entry as Record<string, unknown>;
|
||||
const kind = record.kind;
|
||||
const id = typeof record.id === "string" ? record.id : "";
|
||||
const filename = typeof record.filename === "string" ? record.filename : "";
|
||||
const mimeType = typeof record.mimeType === "string" ? record.mimeType : "";
|
||||
const sizeBytes = typeof record.sizeBytes === "number" ? record.sizeBytes : 0;
|
||||
|
||||
if (kind === "image" && typeof record.dataUrl === "string") {
|
||||
attachments.push({
|
||||
kind,
|
||||
id,
|
||||
filename,
|
||||
mimeType: mimeType === "image/png" ? "image/png" : "image/jpeg",
|
||||
sizeBytes,
|
||||
dataUrl: record.dataUrl,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (kind === "text" && typeof record.text === "string") {
|
||||
attachments.push({
|
||||
kind,
|
||||
id,
|
||||
filename,
|
||||
mimeType,
|
||||
sizeBytes,
|
||||
text: record.text,
|
||||
truncated: record.truncated === true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return attachments;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import { performance } from "node:perf_hooks";
|
||||
import { prisma } from "../db.js";
|
||||
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
|
||||
import { buildToolLogMessageData, runToolAwareOpenAIChat } from "./chat-tools.js";
|
||||
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
|
||||
import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js";
|
||||
|
||||
function asProviderEnum(p: Provider) {
|
||||
@@ -68,11 +69,8 @@ export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResp
|
||||
} else if (req.provider === "anthropic") {
|
||||
const client = anthropicClient();
|
||||
|
||||
// Anthropic splits system prompt. We'll convert first system message into system string.
|
||||
const system = req.messages.find((m) => m.role === "system")?.content;
|
||||
const msgs = req.messages
|
||||
.filter((m) => m.role !== "system")
|
||||
.map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", content: m.content }));
|
||||
const system = getAnthropicSystemPrompt(req.messages);
|
||||
const msgs = req.messages.filter((message) => message.role !== "system").map((message) => buildAnthropicConversationMessage(message));
|
||||
|
||||
const r = await client.messages.create({
|
||||
model: req.model,
|
||||
|
||||
@@ -2,6 +2,7 @@ import { performance } from "node:perf_hooks";
|
||||
import { prisma } from "../db.js";
|
||||
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
|
||||
import { buildToolLogMessageData, runToolAwareOpenAIChatStream, type ToolExecutionEvent } from "./chat-tools.js";
|
||||
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
|
||||
import type { MultiplexRequest, Provider } from "./types.js";
|
||||
|
||||
export type StreamEvent =
|
||||
@@ -88,10 +89,8 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
|
||||
} else if (req.provider === "anthropic") {
|
||||
const client = anthropicClient();
|
||||
|
||||
const system = req.messages.find((m) => m.role === "system")?.content;
|
||||
const msgs = req.messages
|
||||
.filter((m) => m.role !== "system")
|
||||
.map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", content: m.content }));
|
||||
const system = getAnthropicSystemPrompt(req.messages);
|
||||
const msgs = req.messages.filter((message) => message.role !== "system").map((message) => buildAnthropicConversationMessage(message));
|
||||
|
||||
const stream = await client.messages.create({
|
||||
model: req.model,
|
||||
|
||||
@@ -1,9 +1,31 @@
|
||||
export type Provider = "openai" | "anthropic" | "xai";
|
||||
|
||||
export type ChatImageAttachment = {
|
||||
kind: "image";
|
||||
id: string;
|
||||
filename: string;
|
||||
mimeType: "image/png" | "image/jpeg";
|
||||
sizeBytes: number;
|
||||
dataUrl: string;
|
||||
};
|
||||
|
||||
export type ChatTextAttachment = {
|
||||
kind: "text";
|
||||
id: string;
|
||||
filename: string;
|
||||
mimeType: string;
|
||||
sizeBytes: number;
|
||||
text: string;
|
||||
truncated?: boolean;
|
||||
};
|
||||
|
||||
export type ChatAttachment = ChatImageAttachment | ChatTextAttachment;
|
||||
|
||||
export type ChatMessage = {
|
||||
role: "system" | "user" | "assistant" | "tool";
|
||||
content: string;
|
||||
name?: string;
|
||||
attachments?: ChatAttachment[];
|
||||
};
|
||||
|
||||
export type MultiplexRequest = {
|
||||
|
||||
@@ -4,23 +4,33 @@ import type { FastifyInstance } from "fastify";
|
||||
import { prisma } from "./db.js";
|
||||
import { requireAdmin } from "./auth.js";
|
||||
import { env } from "./env.js";
|
||||
import { buildComparableAttachments } from "./llm/message-content.js";
|
||||
import { runMultiplex } from "./llm/multiplexer.js";
|
||||
import { runMultiplexStream } from "./llm/streaming.js";
|
||||
import { getModelCatalogSnapshot } from "./llm/model-catalog.js";
|
||||
import { openaiClient } from "./llm/providers.js";
|
||||
import { exaClient } from "./search/exa.js";
|
||||
import type { ChatAttachment } from "./llm/types.js";
|
||||
|
||||
type IncomingChatMessage = {
|
||||
role: "system" | "user" | "assistant" | "tool";
|
||||
content: string;
|
||||
name?: string;
|
||||
attachments?: ChatAttachment[];
|
||||
};
|
||||
|
||||
function sameMessage(
|
||||
a: { role: string; content: string; name?: string | null },
|
||||
b: { role: string; content: string; name?: string | null }
|
||||
a: { role: string; content: string; name?: string | null; metadata?: unknown },
|
||||
b: { role: string; content: string; name?: string | null; attachments?: ChatAttachment[] }
|
||||
) {
|
||||
return a.role === b.role && a.content === b.content && (a.name ?? null) === (b.name ?? null);
|
||||
const existingAttachments = JSON.stringify(buildComparableAttachments((a.metadata as Record<string, unknown> | null)?.attachments ?? null));
|
||||
const incomingAttachments = JSON.stringify(b.attachments ?? []);
|
||||
return (
|
||||
a.role === b.role &&
|
||||
a.content === b.content &&
|
||||
(a.name ?? null) === (b.name ?? null) &&
|
||||
existingAttachments === incomingAttachments
|
||||
);
|
||||
}
|
||||
|
||||
function isToolCallLogMetadata(value: unknown) {
|
||||
@@ -60,10 +70,67 @@ async function storeNonAssistantMessages(chatId: string, messages: IncomingChatM
|
||||
role: m.role as any,
|
||||
content: m.content,
|
||||
name: m.name,
|
||||
metadata: m.attachments?.length ? ({ attachments: m.attachments } as any) : undefined,
|
||||
})),
|
||||
});
|
||||
}
|
||||
|
||||
const MAX_CHAT_ATTACHMENTS = 8;
|
||||
const MAX_IMAGE_ATTACHMENT_BYTES = 6 * 1024 * 1024;
|
||||
const MAX_TEXT_ATTACHMENT_CHARS = 200_000;
|
||||
const MAX_IMAGE_DATA_URL_CHARS = 8_500_000;
|
||||
|
||||
const ChatAttachmentSchema = z.discriminatedUnion("kind", [
|
||||
z.object({
|
||||
kind: z.literal("image"),
|
||||
id: z.string().trim().min(1).max(128),
|
||||
filename: z.string().trim().min(1).max(255),
|
||||
mimeType: z.enum(["image/png", "image/jpeg"]),
|
||||
sizeBytes: z.number().int().positive().max(MAX_IMAGE_ATTACHMENT_BYTES),
|
||||
dataUrl: z
|
||||
.string()
|
||||
.max(MAX_IMAGE_DATA_URL_CHARS)
|
||||
.regex(/^data:image\/(?:png|jpeg);base64,[a-z0-9+/=\s]+$/i, "Invalid image data URL"),
|
||||
}),
|
||||
z.object({
|
||||
kind: z.literal("text"),
|
||||
id: z.string().trim().min(1).max(128),
|
||||
filename: z.string().trim().min(1).max(255),
|
||||
mimeType: z.string().trim().min(1).max(127),
|
||||
sizeBytes: z.number().int().positive().max(8 * 1024 * 1024),
|
||||
text: z.string().max(MAX_TEXT_ATTACHMENT_CHARS),
|
||||
truncated: z.boolean().optional(),
|
||||
}),
|
||||
]);
|
||||
|
||||
const CompletionMessageSchema = z
|
||||
.object({
|
||||
role: z.enum(["system", "user", "assistant", "tool"]),
|
||||
content: z.string(),
|
||||
name: z.string().optional(),
|
||||
attachments: z.array(ChatAttachmentSchema).max(MAX_CHAT_ATTACHMENTS).optional(),
|
||||
})
|
||||
.superRefine((value, ctx) => {
|
||||
if (value.attachments?.length && value.role === "tool") {
|
||||
ctx.addIssue({
|
||||
code: z.ZodIssueCode.custom,
|
||||
message: "Tool messages cannot include attachments.",
|
||||
path: ["attachments"],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
function mergeAttachmentsIntoMetadata(metadata: unknown, attachments?: ChatAttachment[]) {
|
||||
if (!attachments?.length) return metadata as any;
|
||||
if (!metadata || typeof metadata !== "object" || Array.isArray(metadata)) {
|
||||
return { attachments };
|
||||
}
|
||||
return {
|
||||
...(metadata as Record<string, unknown>),
|
||||
attachments,
|
||||
};
|
||||
}
|
||||
|
||||
const SearchRunBody = z.object({
|
||||
query: z.string().trim().min(1).optional(),
|
||||
title: z.string().trim().min(1).optional(),
|
||||
@@ -768,6 +835,7 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
content: z.string(),
|
||||
name: z.string().optional(),
|
||||
metadata: z.unknown().optional(),
|
||||
attachments: z.array(ChatAttachmentSchema).max(MAX_CHAT_ATTACHMENTS).optional(),
|
||||
});
|
||||
|
||||
const { chatId } = Params.parse(req.params);
|
||||
@@ -779,7 +847,7 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
role: body.role as any,
|
||||
content: body.content,
|
||||
name: body.name,
|
||||
metadata: body.metadata as any,
|
||||
metadata: mergeAttachmentsIntoMetadata(body.metadata, body.attachments) as any,
|
||||
},
|
||||
});
|
||||
|
||||
@@ -794,13 +862,7 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
chatId: z.string().optional(),
|
||||
provider: z.enum(["openai", "anthropic", "xai"]),
|
||||
model: z.string().min(1),
|
||||
messages: z.array(
|
||||
z.object({
|
||||
role: z.enum(["system", "user", "assistant", "tool"]),
|
||||
content: z.string(),
|
||||
name: z.string().optional(),
|
||||
})
|
||||
),
|
||||
messages: z.array(CompletionMessageSchema),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().int().positive().optional(),
|
||||
});
|
||||
@@ -834,13 +896,7 @@ export async function registerRoutes(app: FastifyInstance) {
|
||||
chatId: z.string().optional(),
|
||||
provider: z.enum(["openai", "anthropic", "xai"]),
|
||||
model: z.string().min(1),
|
||||
messages: z.array(
|
||||
z.object({
|
||||
role: z.enum(["system", "user", "assistant", "tool"]),
|
||||
content: z.string(),
|
||||
name: z.string().optional(),
|
||||
})
|
||||
),
|
||||
messages: z.array(CompletionMessageSchema),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user