diff --git a/docs/api/rest.md b/docs/api/rest.md index 09c9c44..d225143 100644 --- a/docs/api/rest.md +++ b/docs/api/rest.md @@ -40,6 +40,7 @@ Chat upload limits: ``` - OpenAI model lists are filtered to models that are expected to work with the backend's Responses API implementation. - `hermes-agent` is included only when `HERMES_AGENT_API_KEY` is configured. Set it to Hermes `API_SERVER_KEY`, or any non-empty value if that local server does not require auth. `HERMES_AGENT_API_BASE_URL` defaults to `http://127.0.0.1:8642/v1`; set `HERMES_AGENT_MODEL` only when you need an additional fallback/override model id. +- The backend loads provider model lists at startup and refreshes them about once every 24 hours. If a later provider refresh fails, the response keeps the last loaded model list for that provider and sets `error` to the latest failure message. ## Active Runs diff --git a/server/src/index.ts b/server/src/index.ts index e7c52bc..e79e9d1 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -5,7 +5,7 @@ import swaggerUI from "@fastify/swagger-ui"; import sensible from "@fastify/sensible"; import { env } from "./env.js"; import { ensureDatabaseReady } from "./db-init.js"; -import { warmModelCatalog } from "./llm/model-catalog.js"; +import { startModelCatalogRefreshLoop, warmModelCatalog } from "./llm/model-catalog.js"; import { registerRoutes } from "./routes.js"; const app = Fastify({ @@ -21,6 +21,7 @@ const app = Fastify({ await ensureDatabaseReady(app.log); await warmModelCatalog(app.log); +const stopModelCatalogRefreshLoop = startModelCatalogRefreshLoop(app.log); await app.register(cors, { origin: true, @@ -80,6 +81,10 @@ app.setErrorHandler((err, req, reply) => { }); }); +app.addHook("onClose", async () => { + stopModelCatalogRefreshLoop(); +}); + await registerRoutes(app); await app.listen({ port: env.PORT, host: env.HOST }); diff --git a/server/src/llm/model-catalog.ts b/server/src/llm/model-catalog.ts index 411eee2..5e18dc7 100644 --- a/server/src/llm/model-catalog.ts +++ b/server/src/llm/model-catalog.ts @@ -13,6 +13,7 @@ export type ModelCatalogSnapshot = Partial | null = null; + function getCatalogProviders(): Provider[] { return isHermesAgentConfigured() ? [...baseProviders, "hermes-agent"] : baseProviders; } @@ -86,17 +89,42 @@ async function refreshProviderModels(provider: Provider, logger?: FastifyBaseLog logger?.info({ provider, modelCount: models.length }, "model catalog loaded"); } catch (err: any) { const message = err?.message ?? String(err); + const previous = modelCatalog[provider]; + const fallbackModels = provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : []; modelCatalog[provider] = { - models: provider === "hermes-agent" && env.HERMES_AGENT_MODEL ? [env.HERMES_AGENT_MODEL] : [], - loadedAt: new Date().toISOString(), + models: previous?.models.length ? previous.models : fallbackModels, + loadedAt: previous?.loadedAt ?? null, error: message, }; logger?.warn({ provider, err: message }, "failed to load provider model catalog"); } } +export async function refreshModelCatalog(logger?: FastifyBaseLogger) { + if (catalogRefreshPromise) return catalogRefreshPromise; + + catalogRefreshPromise = Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger))) + .then(() => undefined) + .finally(() => { + catalogRefreshPromise = null; + }); + + return catalogRefreshPromise; +} + export async function warmModelCatalog(logger?: FastifyBaseLogger) { - await Promise.all(getCatalogProviders().map((provider) => refreshProviderModels(provider, logger))); + await refreshModelCatalog(logger); +} + +export function startModelCatalogRefreshLoop(logger?: FastifyBaseLogger) { + const timer = setInterval(() => { + void refreshModelCatalog(logger); + }, MODEL_CATALOG_REFRESH_INTERVAL_MS); + timer.unref?.(); + + return () => { + clearInterval(timer); + }; } export function getModelCatalogSnapshot(): ModelCatalogSnapshot {