server: refresh model catalog daily

This commit is contained in:
2026-05-20 22:08:45 -07:00
parent 411790ee04
commit f79e5e02c5
3 changed files with 38 additions and 4 deletions

View File

@@ -40,6 +40,7 @@ Chat upload limits:
``` ```
- OpenAI model lists are filtered to models that are expected to work with the backend's Responses API implementation. - OpenAI model lists are filtered to models that are expected to work with the backend's Responses API implementation.
- `hermes-agent` is included only when `HERMES_AGENT_API_KEY` is configured. Set it to Hermes `API_SERVER_KEY`, or any non-empty value if that local server does not require auth. `HERMES_AGENT_API_BASE_URL` defaults to `http://127.0.0.1:8642/v1`; set `HERMES_AGENT_MODEL` only when you need an additional fallback/override model id. - `hermes-agent` is included only when `HERMES_AGENT_API_KEY` is configured. Set it to Hermes `API_SERVER_KEY`, or any non-empty value if that local server does not require auth. `HERMES_AGENT_API_BASE_URL` defaults to `http://127.0.0.1:8642/v1`; set `HERMES_AGENT_MODEL` only when you need an additional fallback/override model id.
- The backend loads provider model lists at startup and refreshes them about once every 24 hours. If a later provider refresh fails, the response keeps the last loaded model list for that provider and sets `error` to the latest failure message.
## Active Runs ## Active Runs

View File

@@ -5,7 +5,7 @@ import swaggerUI from "@fastify/swagger-ui";
import sensible from "@fastify/sensible"; import sensible from "@fastify/sensible";
import { env } from "./env.js"; import { env } from "./env.js";
import { ensureDatabaseReady } from "./db-init.js"; import { ensureDatabaseReady } from "./db-init.js";
import { warmModelCatalog } from "./llm/model-catalog.js"; import { startModelCatalogRefreshLoop, warmModelCatalog } from "./llm/model-catalog.js";
import { registerRoutes } from "./routes.js"; import { registerRoutes } from "./routes.js";
const app = Fastify({ const app = Fastify({
@@ -21,6 +21,7 @@ const app = Fastify({
await ensureDatabaseReady(app.log); await ensureDatabaseReady(app.log);
await warmModelCatalog(app.log); await warmModelCatalog(app.log);
const stopModelCatalogRefreshLoop = startModelCatalogRefreshLoop(app.log);
await app.register(cors, { await app.register(cors, {
origin: true, origin: true,
@@ -80,6 +81,10 @@ app.setErrorHandler((err, req, reply) => {
}); });
}); });
app.addHook("onClose", async () => {
stopModelCatalogRefreshLoop();
});
await registerRoutes(app); await registerRoutes(app);
await app.listen({ port: env.PORT, host: env.HOST }); await app.listen({ port: env.PORT, host: env.HOST });

View File

@@ -13,6 +13,7 @@ export type ModelCatalogSnapshot = Partial<Record<Provider, ProviderModelSnapsho
const baseProviders: Provider[] = ["openai", "anthropic", "xai"]; const baseProviders: Provider[] = ["openai", "anthropic", "xai"];
const MODEL_FETCH_TIMEOUT_MS = 15000; const MODEL_FETCH_TIMEOUT_MS = 15000;
const MODEL_CATALOG_REFRESH_INTERVAL_MS = 24 * 60 * 60 * 1000;
const modelCatalog: ModelCatalogSnapshot = { const modelCatalog: ModelCatalogSnapshot = {
openai: { models: [], loadedAt: null, error: null }, openai: { models: [], loadedAt: null, error: null },
@@ -20,6 +21,8 @@ const modelCatalog: ModelCatalogSnapshot = {
xai: { models: [], loadedAt: null, error: null }, xai: { models: [], loadedAt: null, error: null },
}; };
let catalogRefreshPromise: Promise<void> | null = null;
function getCatalogProviders(): Provider[] { function getCatalogProviders(): Provider[] {
return isHermesAgentConfigured() ? [...baseProviders, "hermes-agent"] : baseProviders; return isHermesAgentConfigured() ? [...baseProviders, "hermes-agent"] : baseProviders;
} }
@@ -86,17 +89,42 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog
logger?.info({ provider, modelCount: models.length }, "model catalog loaded"); logger?.info({ provider, modelCount: models.length }, "model catalog loaded");
} catch (err: any) { } catch (err: any) {
const message = err?.message ?? String(err); const message = err?.message ?? String(err);
const previous = modelCatalog[provider];
const fallbackModels = provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : [];
modelCatalog[provider] = { modelCatalog[provider] = {
models: provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : [], models: previous?.models.length ? previous.models : fallbackModels,
loadedAt: new Date().toISOString(), loadedAt: previous?.loadedAt ?? null,
error: message, error: message,
}; };
logger?.warn({ provider, err: message }, "failed to load provider model catalog"); logger?.warn({ provider, err: message }, "failed to load provider model catalog");
} }
} }
export async function refreshModelCatalog(logger?: FastifyBaseLogger) {
if (catalogRefreshPromise) return catalogRefreshPromise;
catalogRefreshPromise = Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger)))
.then(() => undefined)
.finally(() => {
catalogRefreshPromise = null;
});
return catalogRefreshPromise;
}
export async function warmModelCatalog(logger?: FastifyBaseLogger) { export async function warmModelCatalog(logger?: FastifyBaseLogger) {
await Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger))); await refreshModelCatalog(logger);
}
export function startModelCatalogRefreshLoop(logger?: FastifyBaseLogger) {
const timer = setInterval(() => {
void refreshModelCatalog(logger);
}, MODEL_CATALOG_REFRESH_INTERVAL_MS);
timer.unref?.();
return () => {
clearInterval(timer);
};
} }
export function getModelCatalogSnapshot(): ModelCatalogSnapshot { export function getModelCatalogSnapshot(): ModelCatalogSnapshot {