adds attachment support

This commit is contained in:
2026-05-02 19:21:06 -07:00
parent 11e6875de9
commit 38da3cea72
15 changed files with 949 additions and 67 deletions

View File

@@ -4,6 +4,7 @@ import { z } from "zod";
import { env } from "../env.js";
import { exaClient } from "../search/exa.js";
import { searchSearxng } from "../search/searxng.js";
import { buildOpenAIConversationMessage } from "./message-content.js";
import type { ChatMessage } from "./types.js";
const MAX_TOOL_ROUNDS = 4;
@@ -250,23 +251,7 @@ function extractHtmlTitle(html: string) {
}
function normalizeIncomingMessages(messages: ChatMessage[]) {
const normalized = messages.map((m) => {
if (m.role === "tool") {
const name = m.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${m.content}`,
};
}
if (m.role === "assistant" || m.role === "system" || m.role === "user") {
const out: any = { role: m.role, content: m.content };
if (m.name && (m.role === "assistant" || m.role === "user")) {
out.name = m.name;
}
return out;
}
return { role: "user", content: m.content };
});
const normalized = messages.map((message) => buildOpenAIConversationMessage(message));
return [{ role: "system", content: CHAT_TOOL_SYSTEM_PROMPT }, ...normalized];
}

View File

@@ -0,0 +1,211 @@
import type { ChatAttachment, ChatImageAttachment, ChatMessage, ChatTextAttachment } from "./types.js";
function escapeAttribute(value: string) {
return value.replace(/"/g, """);
}
function getImageAttachments(message: ChatMessage) {
return (message.attachments ?? []).filter((attachment): attachment is ChatImageAttachment => attachment.kind === "image");
}
function getTextAttachments(message: ChatMessage) {
return (message.attachments ?? []).filter((attachment): attachment is ChatTextAttachment => attachment.kind === "text");
}
function buildImageSummaryText(attachments: ChatImageAttachment[]) {
if (!attachments.length) return null;
const label = attachments.length === 1 ? "Attached image" : "Attached images";
return `${label}: ${attachments.map((attachment) => attachment.filename).join(", ")}.`;
}
function buildTextAttachmentPrompt(attachment: ChatTextAttachment) {
const truncationNote = attachment.truncated ? ' truncated="true"' : "";
return [
`Attached text file: ${attachment.filename}${attachment.truncated ? " (content truncated)" : ""}`,
`<attached_file filename="${escapeAttribute(attachment.filename)}" mime_type="${escapeAttribute(attachment.mimeType)}"${truncationNote}>`,
attachment.text,
"</attached_file>",
].join("\n");
}
function toOpenAIContent(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const parts: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
parts.push({
type: "image_url",
image_url: {
url: attachment.dataUrl,
detail: "auto",
},
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
parts.push({ type: "text", text: imageSummary });
}
for (const attachment of textAttachments) {
parts.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
parts.push({ type: "text", text: message.content });
}
if (parts.length === 1 && parts[0]?.type === "text" && typeof parts[0].text === "string") {
return parts[0].text;
}
return parts;
}
function parseImageDataUrl(attachment: ChatImageAttachment) {
const match = attachment.dataUrl.match(/^data:(image\/(?:png|jpeg));base64,([a-z0-9+/=\s]+)$/i);
if (!match) {
throw new Error(`Invalid image attachment data URL for '${attachment.filename}'.`);
}
const mediaType = match[1].toLowerCase();
if (mediaType !== attachment.mimeType) {
throw new Error(`Image attachment MIME type mismatch for '${attachment.filename}'.`);
}
return {
mediaType,
data: match[2].replace(/\s+/g, ""),
};
}
function toAnthropicContent(message: ChatMessage) {
const imageAttachments = getImageAttachments(message);
const textAttachments = getTextAttachments(message);
if (!imageAttachments.length && !textAttachments.length) {
return message.content;
}
const blocks: Array<Record<string, unknown>> = [];
for (const attachment of imageAttachments) {
const source = parseImageDataUrl(attachment);
blocks.push({
type: "image",
source: {
type: "base64",
media_type: source.mediaType,
data: source.data,
},
});
}
const imageSummary = buildImageSummaryText(imageAttachments);
if (imageSummary) {
blocks.push({ type: "text", text: imageSummary });
}
for (const attachment of textAttachments) {
blocks.push({ type: "text", text: buildTextAttachmentPrompt(attachment) });
}
if (message.content.trim()) {
blocks.push({ type: "text", text: message.content });
}
if (blocks.length === 1 && blocks[0]?.type === "text" && typeof blocks[0].text === "string") {
return blocks[0].text;
}
return blocks;
}
export function buildOpenAIConversationMessage(message: ChatMessage) {
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
const out: Record<string, unknown> = {
role: message.role,
content: toOpenAIContent(message),
};
if (message.name && (message.role === "assistant" || message.role === "user")) {
out.name = message.name;
}
return out;
}
export function getAnthropicSystemPrompt(messages: ChatMessage[]) {
return messages.find((message) => message.role === "system")?.content;
}
export function buildAnthropicConversationMessage(message: ChatMessage) {
if (message.role === "system") {
throw new Error("System messages must be handled separately for Anthropic.");
}
if (message.role === "tool") {
const name = message.name?.trim() || "tool";
return {
role: "user",
content: `Tool output (${name}):\n${message.content}`,
};
}
return {
role: message.role === "assistant" ? "assistant" : "user",
content: toAnthropicContent(message),
};
}
export function buildComparableAttachments(input: unknown): ChatAttachment[] {
if (!Array.isArray(input)) return [];
const attachments: ChatAttachment[] = [];
for (const entry of input) {
if (!entry || typeof entry !== "object" || Array.isArray(entry)) continue;
const record = entry as Record<string, unknown>;
const kind = record.kind;
const id = typeof record.id === "string" ? record.id : "";
const filename = typeof record.filename === "string" ? record.filename : "";
const mimeType = typeof record.mimeType === "string" ? record.mimeType : "";
const sizeBytes = typeof record.sizeBytes === "number" ? record.sizeBytes : 0;
if (kind === "image" && typeof record.dataUrl === "string") {
attachments.push({
kind,
id,
filename,
mimeType: mimeType === "image/png" ? "image/png" : "image/jpeg",
sizeBytes,
dataUrl: record.dataUrl,
});
continue;
}
if (kind === "text" && typeof record.text === "string") {
attachments.push({
kind,
id,
filename,
mimeType,
sizeBytes,
text: record.text,
truncated: record.truncated === true,
});
}
}
return attachments;
}

View File

@@ -2,6 +2,7 @@ import { performance } from "node:perf_hooks";
import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runToolAwareOpenAIChat } from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import type { MultiplexRequest, MultiplexResponse, Provider } from "./types.js";
function asProviderEnum(p: Provider) {
@@ -68,11 +69,8 @@ export async function runMultiplex(req: MultiplexRequest): Promise<MultiplexResp
} else if (req.provider === "anthropic") {
const client = anthropicClient();
// Anthropic splits system prompt. We'll convert first system message into system string.
const system = req.messages.find((m) => m.role === "system")?.content;
const msgs = req.messages
.filter((m) => m.role !== "system")
.map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", content: m.content }));
const system = getAnthropicSystemPrompt(req.messages);
const msgs = req.messages.filter((message) => message.role !== "system").map((message) => buildAnthropicConversationMessage(message));
const r = await client.messages.create({
model: req.model,

View File

@@ -2,6 +2,7 @@ import { performance } from "node:perf_hooks";
import { prisma } from "../db.js";
import { anthropicClient, openaiClient, xaiClient } from "./providers.js";
import { buildToolLogMessageData, runToolAwareOpenAIChatStream, type ToolExecutionEvent } from "./chat-tools.js";
import { buildAnthropicConversationMessage, getAnthropicSystemPrompt } from "./message-content.js";
import type { MultiplexRequest, Provider } from "./types.js";
export type StreamEvent =
@@ -88,10 +89,8 @@ export async function* runMultiplexStream(req: MultiplexRequest): AsyncGenerator
} else if (req.provider === "anthropic") {
const client = anthropicClient();
const system = req.messages.find((m) => m.role === "system")?.content;
const msgs = req.messages
.filter((m) => m.role !== "system")
.map((m) => ({ role: m.role === "assistant" ? "assistant" : "user", content: m.content }));
const system = getAnthropicSystemPrompt(req.messages);
const msgs = req.messages.filter((message) => message.role !== "system").map((message) => buildAnthropicConversationMessage(message));
const stream = await client.messages.create({
model: req.model,

View File

@@ -1,9 +1,31 @@
export type Provider = "openai" | "anthropic" | "xai";
export type ChatImageAttachment = {
kind: "image";
id: string;
filename: string;
mimeType: "image/png" | "image/jpeg";
sizeBytes: number;
dataUrl: string;
};
export type ChatTextAttachment = {
kind: "text";
id: string;
filename: string;
mimeType: string;
sizeBytes: number;
text: string;
truncated?: boolean;
};
export type ChatAttachment = ChatImageAttachment | ChatTextAttachment;
export type ChatMessage = {
role: "system" | "user" | "assistant" | "tool";
content: string;
name?: string;
attachments?: ChatAttachment[];
};
export type MultiplexRequest = {