diff --git a/Dockerfile b/Dockerfile
index cc49052..aaab698 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -22,6 +22,7 @@ COPY . .
RUN mkdir -p /archives && chmod 0777 /archives
VOLUME ["/archives"]
+EXPOSE 5733
ENTRYPOINT ["dumb-init", "--", "/app/scripts/archive-worker-entrypoint.sh"]
CMD ["help"]
diff --git a/Dockerfile.web b/Dockerfile.web
new file mode 100644
index 0000000..2f7dfe1
--- /dev/null
+++ b/Dockerfile.web
@@ -0,0 +1,16 @@
+FROM node:22-slim
+
+WORKDIR /app
+
+ENV NODE_ENV=production
+ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
+
+COPY package.json package-lock.json ./
+RUN npm ci --omit=dev
+
+COPY src ./src
+COPY public ./public
+
+EXPOSE 5731 5732
+
+CMD ["node", "src/backend-server.mjs"]
diff --git a/README.md b/README.md
index ac0fc2b..fcaa6c1 100644
--- a/README.md
+++ b/README.md
@@ -51,3 +51,23 @@ For visual debugging, expose VNC from the worker:
```
The worker image starts Xvfb internally, so callers do not need to mount the host X11 socket or override the entrypoint.
+
+## Web UI
+
+The web path is split into three roles:
+
+- `src/frontend-server.mjs` serves the static UI and proxies `/api/*` and `/archives/*` to the backend.
+- `src/backend-server.mjs` manages archive lookup, job state, and the archive index.
+- `src/worker-server.mjs` runs inside the browser worker container and wraps `archivePage()` over HTTP.
+
+Run the full stack with:
+
+```sh
+docker compose -f docker-compose.example.yml up --build
+```
+
+Then open `http://localhost:5731`. Direct path archival is supported, for example:
+
+```text
+http://localhost:5731/https://example.com
+```
diff --git a/Tiltfile b/Tiltfile
new file mode 100644
index 0000000..0036a1d
--- /dev/null
+++ b/Tiltfile
@@ -0,0 +1 @@
+docker_compose("docker-compose.example.yml")
diff --git a/docker-compose.example.yml b/docker-compose.example.yml
new file mode 100644
index 0000000..9fbc876
--- /dev/null
+++ b/docker-compose.example.yml
@@ -0,0 +1,66 @@
+services:
+ frontend:
+ build:
+ context: .
+ dockerfile: Dockerfile.web
+ image: local-page-archiver-web:latest
+ command: ["node", "src/frontend-server.mjs"]
+ environment:
+ PORT: "5731"
+ BACKEND_URL: "http://backend:5732"
+ ports:
+ - "5731:5731"
+ depends_on:
+ backend:
+ condition: service_healthy
+ healthcheck:
+ test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5731/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
+ interval: 10s
+ timeout: 3s
+ retries: 5
+
+ backend:
+ build:
+ context: .
+ dockerfile: Dockerfile.web
+ image: local-page-archiver-web:latest
+ command: ["node", "src/backend-server.mjs"]
+ environment:
+ PORT: "5732"
+ ARCHIVE_PATH: /archives
+ ARCHIVE_WORKER_URL: "http://browser:5733"
+ PUBLIC_ARCHIVES_PATH: /archives
+ volumes:
+ - archives:/archives
+ depends_on:
+ browser:
+ condition: service_healthy
+ healthcheck:
+ test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5732/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
+ interval: 10s
+ timeout: 3s
+ retries: 5
+
+ browser:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ image: local-page-archiver-browser:latest
+ command: ["serve-worker"]
+ environment:
+ PORT: "5733"
+ ARCHIVE_PATH: /archives
+ ARCHIVE_WORKER_XVFB: "1"
+ volumes:
+ - archives:/archives
+ expose:
+ - "5733"
+ shm_size: 1gb
+ healthcheck:
+ test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5733/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
+ interval: 10s
+ timeout: 3s
+ retries: 10
+
+volumes:
+ archives:
diff --git a/package.json b/package.json
index 6002cfd..1356b16 100644
--- a/package.json
+++ b/package.json
@@ -12,6 +12,9 @@
"archive": "node src/cli.mjs archive",
"container:archive": "node src/container-runner.mjs archive",
"container:build": "node src/container-runner.mjs build",
+ "start:backend": "node src/backend-server.mjs",
+ "start:frontend": "node src/frontend-server.mjs",
+ "start:worker": "node src/worker-server.mjs",
"test": "node --test test/*.test.mjs",
"install-browsers": "playwright install chromium"
},
diff --git a/public/assets/app.css b/public/assets/app.css
new file mode 100644
index 0000000..ebe7ad2
--- /dev/null
+++ b/public/assets/app.css
@@ -0,0 +1,153 @@
+:root {
+ color-scheme: light;
+ --bg: #f6f5f1;
+ --surface: #ffffff;
+ --ink: #161616;
+ --muted: #696963;
+ --line: #d8d6ce;
+ --accent: #2f7664;
+ --accent-strong: #245d50;
+ --danger: #a43d32;
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+}
+
+* {
+ box-sizing: border-box;
+}
+
+html,
+body {
+ min-height: 100%;
+}
+
+body {
+ margin: 0;
+ background: var(--bg);
+ color: var(--ink);
+}
+
+button,
+input {
+ font: inherit;
+ letter-spacing: 0;
+}
+
+.shell {
+ min-height: 100vh;
+ display: grid;
+ place-items: center;
+ padding: 24px;
+}
+
+.archive-box {
+ width: min(680px, 100%);
+ background: var(--surface);
+ border: 1px solid var(--line);
+ border-radius: 8px;
+ padding: 12px;
+ box-shadow: 0 18px 42px rgba(28, 25, 19, 0.08);
+}
+
+.input-row {
+ display: grid;
+ grid-template-columns: 1fr auto;
+ gap: 10px;
+}
+
+input {
+ min-width: 0;
+ width: 100%;
+ height: 48px;
+ border: 1px solid var(--line);
+ border-radius: 6px;
+ color: var(--ink);
+ background: #fbfaf7;
+ padding: 0 14px;
+ outline: none;
+}
+
+input:focus {
+ border-color: var(--accent);
+ box-shadow: 0 0 0 3px rgba(47, 118, 100, 0.16);
+}
+
+button {
+ height: 48px;
+ min-width: 112px;
+ border: 0;
+ border-radius: 6px;
+ color: #ffffff;
+ background: var(--accent);
+ padding: 0 18px;
+ cursor: pointer;
+}
+
+button:hover {
+ background: var(--accent-strong);
+}
+
+button:disabled {
+ cursor: wait;
+ opacity: 0.72;
+}
+
+.progress-wrap {
+ padding-top: 12px;
+}
+
+.progress-track {
+ height: 6px;
+ overflow: hidden;
+ border-radius: 999px;
+ background: #e7e4dc;
+}
+
+.progress-bar {
+ width: 0%;
+ height: 100%;
+ border-radius: inherit;
+ background: var(--accent);
+ transition: width 220ms ease;
+}
+
+.status-line {
+ min-height: 22px;
+ margin-top: 8px;
+ color: var(--muted);
+ font-size: 14px;
+ line-height: 22px;
+}
+
+.status-line.error {
+ color: var(--danger);
+}
+
+.sr-only {
+ position: absolute;
+ width: 1px;
+ height: 1px;
+ overflow: hidden;
+ clip: rect(0, 0, 0, 0);
+ white-space: nowrap;
+ clip-path: inset(50%);
+}
+
+@media (max-width: 560px) {
+ .shell {
+ align-items: start;
+ padding: 16px;
+ padding-top: 20vh;
+ }
+
+ .archive-box {
+ padding: 10px;
+ }
+
+ .input-row {
+ grid-template-columns: 1fr;
+ }
+
+ button {
+ width: 100%;
+ }
+}
diff --git a/public/assets/app.js b/public/assets/app.js
new file mode 100644
index 0000000..445a5fd
--- /dev/null
+++ b/public/assets/app.js
@@ -0,0 +1,169 @@
+const form = document.querySelector("#archive-form");
+const input = document.querySelector("#archive-url");
+const button = document.querySelector("#archive-submit");
+const progressWrap = document.querySelector("#progress-wrap");
+const progressBar = document.querySelector("#progress-bar");
+const statusLine = document.querySelector("#status-line");
+
+let pollTimer = null;
+let visualTimer = null;
+let startedAt = Date.now();
+
+form.addEventListener("submit", (event) => {
+ event.preventDefault();
+ submitArchive(input.value);
+});
+
+const pathUrl = urlFromPath();
+if (pathUrl) {
+ input.value = pathUrl;
+ submitArchive(pathUrl);
+} else {
+ input.focus();
+}
+
+async function submitArchive(rawUrl) {
+ stopTimers();
+ setBusy(true);
+ setStatus("Checking", 8);
+
+ try {
+ const response = await fetch("/api/archives", {
+ method: "POST",
+ headers: {
+ "content-type": "application/json"
+ },
+ body: JSON.stringify({ url: rawUrl })
+ });
+ const data = await readApiResponse(response);
+ if (data.archive?.archiveUrl) {
+ openArchive(data.archive.archiveUrl);
+ return;
+ }
+ if (data.job?.id) {
+ watchJob(data.job);
+ return;
+ }
+ throw new Error(data.error || "Archive did not start");
+ } catch (error) {
+ setError(error.message || "Archive failed");
+ setBusy(false);
+ }
+}
+
+function watchJob(job) {
+ startedAt = Date.parse(job.startedAt || job.createdAt) || Date.now();
+ updateFromJob(job);
+ visualTimer = window.setInterval(updateVisualProgress, 250);
+ pollTimer = window.setInterval(async () => {
+ try {
+ const response = await fetch(`/api/jobs/${encodeURIComponent(job.id)}`);
+ const data = await readApiResponse(response);
+ updateFromJob(data.job);
+ } catch (error) {
+ stopTimers();
+ setError(error.message || "Archive failed");
+ setBusy(false);
+ }
+ }, 850);
+}
+
+function updateFromJob(job) {
+ if (job.status === "done" && job.archive?.archiveUrl) {
+ stopTimers();
+ setStatus("Opening", 100);
+ openArchive(job.archive.archiveUrl);
+ return;
+ }
+
+ if (job.status === "failed") {
+ stopTimers();
+ setError(job.error || "Archive failed");
+ setBusy(false);
+ return;
+ }
+
+ startedAt = Date.parse(job.startedAt || job.createdAt) || startedAt;
+ const elapsed = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
+ const label = job.status === "queued" ? "Queued" : `Archiving ${elapsed}s`;
+ setStatus(label, optimisticProgress());
+}
+
+function updateVisualProgress() {
+ if (!progressWrap.hidden) {
+ progressBar.style.width = `${optimisticProgress()}%`;
+ }
+}
+
+function optimisticProgress() {
+ const elapsed = Math.max(0, (Date.now() - startedAt) / 1000);
+ if (elapsed < 1) {
+ return 12;
+ }
+ if (elapsed < 12) {
+ return Math.min(88, 12 + elapsed * 6.3);
+ }
+ return Math.min(96, 88 + (elapsed - 12) * 0.6);
+}
+
+async function readApiResponse(response) {
+ const data = await response.json().catch(() => null);
+ if (!response.ok || data?.ok === false) {
+ throw new Error(data?.error || `Request failed with ${response.status}`);
+ }
+ return data;
+}
+
+function setBusy(isBusy) {
+ button.disabled = isBusy;
+ input.readOnly = isBusy;
+}
+
+function setStatus(text, progress) {
+ progressWrap.hidden = false;
+ statusLine.classList.remove("error");
+ statusLine.textContent = text;
+ progressBar.style.width = `${Math.max(0, Math.min(100, progress))}%`;
+}
+
+function setError(text) {
+ progressWrap.hidden = false;
+ statusLine.classList.add("error");
+ statusLine.textContent = text;
+ progressBar.style.width = "100%";
+}
+
+function stopTimers() {
+ if (pollTimer) {
+ window.clearInterval(pollTimer);
+ pollTimer = null;
+ }
+ if (visualTimer) {
+ window.clearInterval(visualTimer);
+ visualTimer = null;
+ }
+}
+
+function openArchive(archiveUrl) {
+ window.location.assign(archiveUrl);
+}
+
+function urlFromPath() {
+ const rawPath = window.location.pathname.replace(/^\/+/, "");
+ if (!rawPath || rawPath.startsWith("assets/") || rawPath.startsWith("api/") || rawPath.startsWith("archives/")) {
+ return "";
+ }
+
+ let decoded;
+ try {
+ decoded = decodeURIComponent(rawPath);
+ } catch {
+ return "";
+ }
+
+ if (!/^https?:\/\//i.test(decoded)) {
+ return "";
+ }
+
+ return `${decoded}${window.location.search}${window.location.hash}`;
+}
diff --git a/public/index.html b/public/index.html
new file mode 100644
index 0000000..b82cf84
--- /dev/null
+++ b/public/index.html
@@ -0,0 +1,27 @@
+
+
+
+
+
+ Archive
+
+
+
+
+
+
+
+
+
diff --git a/scripts/archive-worker-entrypoint.sh b/scripts/archive-worker-entrypoint.sh
index 541d88a..27c0655 100755
--- a/scripts/archive-worker-entrypoint.sh
+++ b/scripts/archive-worker-entrypoint.sh
@@ -52,6 +52,10 @@ case "$1" in
archive|help)
set -- node src/cli.mjs "$@"
;;
+ serve-worker)
+ shift
+ set -- node src/worker-server.mjs "$@"
+ ;;
esac
"$@" &
diff --git a/src/archive-catalog.mjs b/src/archive-catalog.mjs
new file mode 100644
index 0000000..4f1c6d7
--- /dev/null
+++ b/src/archive-catalog.mjs
@@ -0,0 +1,250 @@
+import crypto from "node:crypto";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { defaultArchivePath } from "./asset-inliner.mjs";
+
+const INDEX_FILE = ".archive-index.json";
+const INDEX_VERSION = 1;
+const COMMENT_RE = //;
+
+export function normalizeArchiveUrl(rawUrl) {
+ const text = String(rawUrl || "").trim();
+ if (!text) {
+ throw new Error("URL is required");
+ }
+
+ let url;
+ try {
+ url = new URL(text);
+ } catch {
+ throw new Error("Enter a valid URL");
+ }
+
+ if (url.protocol !== "http:" && url.protocol !== "https:") {
+ throw new Error("Only http and https URLs can be archived");
+ }
+
+ return url.href;
+}
+
+export function archiveIdForUrl(sourceUrl) {
+ const url = new URL(normalizeArchiveUrl(sourceUrl));
+ const stem =
+ `${url.hostname}${url.pathname}`
+ .replace(/\/+$/, "")
+ .replace(/[^a-z0-9]+/gi, "-")
+ .replace(/^-+|-+$/g, "")
+ .slice(0, 72) || "archive";
+ const digest = crypto.createHash("sha256").update(url.href).digest("hex").slice(0, 16);
+ return `${stem}-${digest}`;
+}
+
+export function archiveFileNameForUrl(sourceUrl) {
+ return `${archiveIdForUrl(sourceUrl)}.html`;
+}
+
+export class ArchiveCatalog {
+ constructor(options = {}) {
+ this.archivePath = path.resolve(options.archivePath || defaultArchivePath());
+ this.publicBasePath = options.publicBasePath || "/archives";
+ this.indexPath = path.join(this.archivePath, INDEX_FILE);
+ this.index = {
+ version: INDEX_VERSION,
+ archives: {}
+ };
+ this.loadPromise = null;
+ this.savePromise = Promise.resolve();
+ }
+
+ async findByUrl(rawUrl) {
+ const sourceUrl = normalizeArchiveUrl(rawUrl);
+ await this.ensureLoaded();
+
+ const indexed = this.index.archives[sourceUrl];
+ if (indexed && await this.hasArchiveFile(indexed.fileName)) {
+ return this.toPublicRecord(indexed);
+ }
+
+ if (indexed) {
+ delete this.index.archives[sourceUrl];
+ await this.saveIndex();
+ }
+
+ const stableFileName = archiveFileNameForUrl(sourceUrl);
+ if (await this.hasArchiveFile(stableFileName)) {
+ const record = this.upsertRecord(sourceUrl, {
+ id: path.basename(stableFileName, ".html"),
+ fileName: stableFileName
+ });
+ await this.saveIndex();
+ return this.toPublicRecord(record);
+ }
+
+ return null;
+ }
+
+ async recordResult(rawUrl, result) {
+ const sourceUrl = normalizeArchiveUrl(rawUrl);
+ await this.ensureLoaded();
+
+ const fileName = path.basename(result.filePath || `${result.id}.html`);
+ const id = result.id || path.basename(fileName, ".html");
+ const record = this.upsertRecord(sourceUrl, {
+ id,
+ fileName,
+ warningsCount: Array.isArray(result.warnings) ? result.warnings.length : 0,
+ externalAssetsCount: Array.isArray(result.externalAssets) ? result.externalAssets.length : 0
+ });
+ await this.saveIndex();
+ return this.toPublicRecord(record);
+ }
+
+ async ensureLoaded() {
+ if (!this.loadPromise) {
+ this.loadPromise = this.loadIndex();
+ }
+ await this.loadPromise;
+ }
+
+ async loadIndex() {
+ await fs.mkdir(this.archivePath, { recursive: true });
+ try {
+ const data = JSON.parse(await fs.readFile(this.indexPath, "utf8"));
+ if (data && data.version === INDEX_VERSION && data.archives && typeof data.archives === "object") {
+ this.index = data;
+ }
+ } catch (error) {
+ if (error.code !== "ENOENT") {
+ throw error;
+ }
+ }
+
+ if (await this.scanArchiveFiles()) {
+ await this.saveIndex();
+ }
+ }
+
+ async scanArchiveFiles() {
+ let changed = false;
+ const entries = await fs.readdir(this.archivePath, { withFileTypes: true }).catch((error) => {
+ if (error.code === "ENOENT") {
+ return [];
+ }
+ throw error;
+ });
+
+ for (const entry of entries) {
+ if (!entry.isFile() || !entry.name.endsWith(".html")) {
+ continue;
+ }
+
+ const filePath = path.join(this.archivePath, entry.name);
+ const metadata = await readArchiveMetadata(filePath);
+ if (!metadata?.sourceUrl) {
+ continue;
+ }
+
+ let sourceUrl;
+ try {
+ sourceUrl = normalizeArchiveUrl(metadata.sourceUrl);
+ } catch {
+ continue;
+ }
+ const current = this.index.archives[sourceUrl];
+ if (current?.fileName === entry.name) {
+ continue;
+ }
+
+ this.index.archives[sourceUrl] = {
+ id: path.basename(entry.name, ".html"),
+ fileName: entry.name,
+ sourceUrl,
+ createdAt: metadata.createdAt || new Date().toISOString(),
+ updatedAt: new Date().toISOString()
+ };
+ changed = true;
+ }
+
+ for (const [sourceUrl, record] of Object.entries(this.index.archives)) {
+ if (!record?.fileName || !await this.hasArchiveFile(record.fileName)) {
+ delete this.index.archives[sourceUrl];
+ changed = true;
+ }
+ }
+
+ return changed;
+ }
+
+ upsertRecord(sourceUrl, values) {
+ const previous = this.index.archives[sourceUrl];
+ const now = new Date().toISOString();
+ const record = {
+ id: values.id,
+ fileName: values.fileName,
+ sourceUrl,
+ createdAt: previous?.createdAt || now,
+ updatedAt: now,
+ warningsCount: values.warningsCount ?? previous?.warningsCount ?? 0,
+ externalAssetsCount: values.externalAssetsCount ?? previous?.externalAssetsCount ?? 0
+ };
+ this.index.archives[sourceUrl] = record;
+ return record;
+ }
+
+ async hasArchiveFile(fileName) {
+ if (!isSafeArchiveFileName(fileName)) {
+ return false;
+ }
+ const stat = await fs.stat(path.join(this.archivePath, fileName)).catch(() => null);
+ return !!stat?.isFile();
+ }
+
+ toPublicRecord(record) {
+ return {
+ ...record,
+ archiveUrl: `${this.publicBasePath}/${encodeURIComponent(record.fileName)}`
+ };
+ }
+
+ async saveIndex() {
+ this.savePromise = this.savePromise.then(async () => {
+ await fs.mkdir(this.archivePath, { recursive: true });
+ const tmpPath = `${this.indexPath}.${process.pid}.tmp`;
+ await fs.writeFile(tmpPath, `${JSON.stringify(this.index, null, 2)}\n`, "utf8");
+ await fs.rename(tmpPath, this.indexPath);
+ });
+ return this.savePromise;
+ }
+}
+
+export function isSafeArchiveFileName(fileName) {
+ return (
+ typeof fileName === "string" &&
+ fileName === path.basename(fileName) &&
+ fileName.endsWith(".html") &&
+ !fileName.startsWith(".")
+ );
+}
+
+async function readArchiveMetadata(filePath) {
+ const handle = await fs.open(filePath, "r").catch(() => null);
+ if (!handle) {
+ return null;
+ }
+
+ try {
+ const buffer = Buffer.alloc(4096);
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, 0);
+ const head = buffer.subarray(0, bytesRead).toString("utf8");
+ const match = head.match(COMMENT_RE);
+ if (!match) {
+ return null;
+ }
+ return {
+ sourceUrl: match[1].replaceAll("- -", "--"),
+ createdAt: match[2]
+ };
+ } finally {
+ await handle.close();
+ }
+}
diff --git a/src/backend-server.mjs b/src/backend-server.mjs
new file mode 100644
index 0000000..084ead3
--- /dev/null
+++ b/src/backend-server.mjs
@@ -0,0 +1,345 @@
+#!/usr/bin/env node
+import { createReadStream } from "node:fs";
+import fs from "node:fs/promises";
+import http from "node:http";
+import path from "node:path";
+import { randomUUID } from "node:crypto";
+import { ArchiveCatalog, archiveIdForUrl, isSafeArchiveFileName, normalizeArchiveUrl } from "./archive-catalog.mjs";
+import { defaultArchivePath } from "./asset-inliner.mjs";
+
+const PORT = Number(process.env.PORT || 5732);
+const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
+const ARCHIVE_WORKER_URL = process.env.ARCHIVE_WORKER_URL || "http://127.0.0.1:5733";
+const PUBLIC_ARCHIVES_PATH = process.env.PUBLIC_ARCHIVES_PATH || "/archives";
+const JOB_TIMEOUT_MS = Number(process.env.ARCHIVE_JOB_TIMEOUT_MS || 120000);
+const MAX_BODY_BYTES = 64 * 1024;
+
+const catalog = new ArchiveCatalog({
+ archivePath: ARCHIVE_PATH,
+ publicBasePath: PUBLIC_ARCHIVES_PATH
+});
+
+const jobs = new Map();
+const activeJobByUrl = new Map();
+let workerQueue = Promise.resolve();
+
+const server = http.createServer(async (req, res) => {
+ try {
+ await route(req, res);
+ } catch (error) {
+ sendJson(res, error.statusCode || 500, {
+ ok: false,
+ error: error.message || "Unexpected error"
+ });
+ }
+});
+
+server.listen(PORT, () => {
+ console.log(`archive backend listening on ${PORT}`);
+ console.log(`archive path: ${ARCHIVE_PATH}`);
+ console.log(`archive worker: ${ARCHIVE_WORKER_URL}`);
+});
+
+const cleanupTimer = setInterval(cleanupJobs, 10 * 60 * 1000);
+cleanupTimer.unref?.();
+
+async function route(req, res) {
+ const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
+
+ if (req.method === "GET" && requestUrl.pathname === "/healthz") {
+ sendJson(res, 200, { ok: true });
+ return;
+ }
+
+ if (req.method === "GET" && requestUrl.pathname === "/api/archives/lookup") {
+ const sourceUrl = normalizeArchiveUrl(requestUrl.searchParams.get("url"));
+ const archive = await catalog.findByUrl(sourceUrl);
+ sendJson(res, 200, {
+ ok: true,
+ exists: !!archive,
+ archive
+ });
+ return;
+ }
+
+ if (req.method === "POST" && requestUrl.pathname === "/api/archives") {
+ const body = await readJsonBody(req);
+ const sourceUrl = normalizeArchiveUrl(body.url);
+ const response = await createOrFindArchive(sourceUrl);
+ sendJson(res, response.statusCode, response.body);
+ return;
+ }
+
+ const jobMatch = requestUrl.pathname.match(/^\/api\/jobs\/([^/]+)$/);
+ if (req.method === "GET" && jobMatch) {
+ const job = jobs.get(jobMatch[1]);
+ if (!job) {
+ sendJson(res, 404, { ok: false, error: "Job not found" });
+ return;
+ }
+ sendJson(res, 200, {
+ ok: true,
+ job: publicJob(job)
+ });
+ return;
+ }
+
+ if (req.method === "GET" && requestUrl.pathname.startsWith(`${PUBLIC_ARCHIVES_PATH}/`)) {
+ await serveArchive(requestUrl.pathname.slice(PUBLIC_ARCHIVES_PATH.length + 1), res);
+ return;
+ }
+
+ sendJson(res, 404, { ok: false, error: "Not found" });
+}
+
+async function createOrFindArchive(sourceUrl) {
+ const existing = await catalog.findByUrl(sourceUrl);
+ if (existing) {
+ return {
+ statusCode: 200,
+ body: {
+ ok: true,
+ status: "done",
+ mode: "existing",
+ archive: existing
+ }
+ };
+ }
+
+ const activeJobId = activeJobByUrl.get(sourceUrl);
+ const activeJob = activeJobId ? jobs.get(activeJobId) : null;
+ if (activeJob && !isTerminal(activeJob.status)) {
+ return {
+ statusCode: 202,
+ body: {
+ ok: true,
+ status: activeJob.status,
+ mode: "active",
+ job: publicJob(activeJob)
+ }
+ };
+ }
+
+ const job = {
+ id: cryptoRandomId(),
+ archiveId: archiveIdForUrl(sourceUrl),
+ sourceUrl,
+ status: "queued",
+ message: "Queued",
+ createdAt: new Date().toISOString(),
+ startedAt: null,
+ updatedAt: new Date().toISOString(),
+ finishedAt: null,
+ archive: null,
+ error: null
+ };
+ jobs.set(job.id, job);
+ activeJobByUrl.set(sourceUrl, job.id);
+ enqueueJob(job);
+
+ return {
+ statusCode: 202,
+ body: {
+ ok: true,
+ status: job.status,
+ mode: "created",
+ job: publicJob(job)
+ }
+ };
+}
+
+function enqueueJob(job) {
+ const run = () => executeJob(job);
+ workerQueue = workerQueue.then(run, run);
+}
+
+async function executeJob(job) {
+ if (job.status !== "queued") {
+ return;
+ }
+
+ updateJob(job, {
+ status: "running",
+ message: "Archiving",
+ startedAt: new Date().toISOString()
+ });
+
+ try {
+ const result = await requestWorkerArchive(job.sourceUrl, job.archiveId);
+ const archive = await catalog.recordResult(job.sourceUrl, result);
+ updateJob(job, {
+ status: "done",
+ message: "Opening",
+ archive,
+ finishedAt: new Date().toISOString()
+ });
+ } catch (error) {
+ updateJob(job, {
+ status: "failed",
+ message: "Failed",
+ error: error.message || "Archive failed",
+ finishedAt: new Date().toISOString()
+ });
+ } finally {
+ activeJobByUrl.delete(job.sourceUrl);
+ }
+}
+
+async function requestWorkerArchive(sourceUrl, archiveId) {
+ const workerUrl = new URL("/archive", ARCHIVE_WORKER_URL);
+ const controller = new AbortController();
+ const timeout = setTimeout(() => controller.abort(), JOB_TIMEOUT_MS);
+ timeout.unref?.();
+
+ try {
+ const response = await fetch(workerUrl, {
+ method: "POST",
+ headers: {
+ "content-type": "application/json"
+ },
+ body: JSON.stringify({ url: sourceUrl, id: archiveId }),
+ signal: controller.signal
+ });
+ const text = await response.text();
+ const parsed = parseJson(text);
+ if (!response.ok || parsed.ok === false) {
+ throw new Error(parsed.error || text || `Worker returned ${response.status}`);
+ }
+ return parsed.result || parsed;
+ } catch (error) {
+ if (error.name === "AbortError") {
+ throw new Error(`Archive timed out after ${Math.round(JOB_TIMEOUT_MS / 1000)} seconds`);
+ }
+ throw error;
+ } finally {
+ clearTimeout(timeout);
+ }
+}
+
+async function serveArchive(rawFileName, res) {
+ let fileName;
+ try {
+ fileName = decodeURIComponent(rawFileName);
+ } catch {
+ sendJson(res, 400, { ok: false, error: "Invalid archive path" });
+ return;
+ }
+
+ if (!isSafeArchiveFileName(fileName)) {
+ sendJson(res, 404, { ok: false, error: "Archive not found" });
+ return;
+ }
+
+ const filePath = path.join(ARCHIVE_PATH, fileName);
+ const stat = await fs.stat(filePath).catch(() => null);
+ if (!stat?.isFile()) {
+ sendJson(res, 404, { ok: false, error: "Archive not found" });
+ return;
+ }
+
+ const stream = createReadStream(filePath, { encoding: "utf8" });
+ stream.on("error", () => {
+ if (!res.headersSent) {
+ sendJson(res, 404, { ok: false, error: "Archive not found" });
+ } else {
+ res.destroy();
+ }
+ });
+ res.writeHead(200, {
+ "content-type": "text/html; charset=utf-8",
+ "cache-control": "no-store"
+ });
+ stream.pipe(res);
+}
+
+function updateJob(job, values) {
+ Object.assign(job, values, {
+ updatedAt: new Date().toISOString()
+ });
+}
+
+function publicJob(job) {
+ const startedAt = job.startedAt || job.createdAt;
+ return {
+ id: job.id,
+ sourceUrl: job.sourceUrl,
+ status: job.status,
+ message: job.message,
+ createdAt: job.createdAt,
+ startedAt,
+ updatedAt: job.updatedAt,
+ finishedAt: job.finishedAt,
+ elapsedMs: startedAt ? Date.now() - Date.parse(startedAt) : 0,
+ archive: job.archive,
+ error: job.error
+ };
+}
+
+function isTerminal(status) {
+ return status === "done" || status === "failed";
+}
+
+function cleanupJobs() {
+ const cutoff = Date.now() - 60 * 60 * 1000;
+ for (const [id, job] of jobs) {
+ if (isTerminal(job.status) && Date.parse(job.finishedAt || job.updatedAt) < cutoff) {
+ jobs.delete(id);
+ }
+ }
+}
+
+async function readJsonBody(req) {
+ const text = await readRequestBody(req, MAX_BODY_BYTES);
+ if (!text.trim()) {
+ throw httpError(400, "Request body is required");
+ }
+ try {
+ return JSON.parse(text);
+ } catch {
+ throw httpError(400, "Request body must be JSON");
+ }
+}
+
+function readRequestBody(req, maxBytes) {
+ return new Promise((resolve, reject) => {
+ const chunks = [];
+ let total = 0;
+ req.on("data", (chunk) => {
+ total += chunk.length;
+ if (total > maxBytes) {
+ reject(httpError(413, "Request body is too large"));
+ req.destroy();
+ return;
+ }
+ chunks.push(chunk);
+ });
+ req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
+ req.on("error", reject);
+ });
+}
+
+function sendJson(res, statusCode, payload) {
+ res.writeHead(statusCode, {
+ "content-type": "application/json; charset=utf-8",
+ "cache-control": "no-store"
+ });
+ res.end(`${JSON.stringify(payload)}\n`);
+}
+
+function parseJson(text) {
+ try {
+ return JSON.parse(text);
+ } catch {
+ throw new Error(text || "Worker returned invalid JSON");
+ }
+}
+
+function cryptoRandomId() {
+ return randomUUID();
+}
+
+function httpError(statusCode, message) {
+ const error = new Error(message);
+ error.statusCode = statusCode;
+ return error;
+}
diff --git a/src/frontend-server.mjs b/src/frontend-server.mjs
new file mode 100644
index 0000000..8b51c31
--- /dev/null
+++ b/src/frontend-server.mjs
@@ -0,0 +1,157 @@
+#!/usr/bin/env node
+import fs from "node:fs/promises";
+import http from "node:http";
+import path from "node:path";
+import { Readable } from "node:stream";
+import { fileURLToPath } from "node:url";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const PORT = Number(process.env.PORT || 5731);
+const BACKEND_URL = process.env.BACKEND_URL || "http://127.0.0.1:5732";
+const PUBLIC_DIR = path.resolve(__dirname, "..", "public");
+const MAX_PROXY_BODY_BYTES = 128 * 1024;
+
+const CONTENT_TYPES = new Map([
+ [".css", "text/css; charset=utf-8"],
+ [".html", "text/html; charset=utf-8"],
+ [".js", "text/javascript; charset=utf-8"],
+ [".svg", "image/svg+xml"]
+]);
+
+const server = http.createServer(async (req, res) => {
+ try {
+ await route(req, res);
+ } catch (error) {
+ res.writeHead(error.statusCode || 500, {
+ "content-type": "text/plain; charset=utf-8",
+ "cache-control": "no-store"
+ });
+ res.end(error.message || "Unexpected error");
+ }
+});
+
+server.listen(PORT, () => {
+ console.log(`archive frontend listening on ${PORT}`);
+ console.log(`archive backend: ${BACKEND_URL}`);
+});
+
+async function route(req, res) {
+ const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
+
+ if (requestUrl.pathname === "/healthz") {
+ res.writeHead(200, {
+ "content-type": "application/json; charset=utf-8",
+ "cache-control": "no-store"
+ });
+ res.end('{"ok":true}\n');
+ return;
+ }
+
+ if (requestUrl.pathname.startsWith("/api/") || requestUrl.pathname.startsWith("/archives/")) {
+ await proxyToBackend(req, res, requestUrl);
+ return;
+ }
+
+ if (req.method !== "GET" && req.method !== "HEAD") {
+ throw httpError(405, "Method not allowed");
+ }
+
+ if (requestUrl.pathname.startsWith("/assets/")) {
+ await serveStatic(requestUrl.pathname, res);
+ return;
+ }
+
+ await serveStatic("/index.html", res);
+}
+
+async function serveStatic(urlPath, res) {
+ let decodedPath;
+ try {
+ decodedPath = decodeURIComponent(urlPath);
+ } catch {
+ throw httpError(400, "Invalid path");
+ }
+
+ const filePath = path.join(PUBLIC_DIR, decodedPath);
+ const relative = path.relative(PUBLIC_DIR, filePath);
+ if (relative.startsWith("..") || path.isAbsolute(relative)) {
+ throw httpError(404, "Not found");
+ }
+
+ const bytes = await fs.readFile(filePath).catch((error) => {
+ if (error.code === "ENOENT") {
+ throw httpError(404, "Not found");
+ }
+ throw error;
+ });
+
+ const type = CONTENT_TYPES.get(path.extname(filePath)) || "application/octet-stream";
+ res.writeHead(200, {
+ "content-type": type,
+ "cache-control": "no-store"
+ });
+ res.end(bytes);
+}
+
+async function proxyToBackend(req, res, requestUrl) {
+ const upstreamUrl = new URL(`${requestUrl.pathname}${requestUrl.search}`, BACKEND_URL);
+ const headers = {};
+ for (const [key, value] of Object.entries(req.headers)) {
+ if (["connection", "content-length", "host"].includes(key.toLowerCase())) {
+ continue;
+ }
+ if (Array.isArray(value)) {
+ headers[key] = value.join(", ");
+ } else if (value !== undefined) {
+ headers[key] = value;
+ }
+ }
+
+ const body = req.method === "GET" || req.method === "HEAD"
+ ? undefined
+ : await readRequestBody(req, MAX_PROXY_BODY_BYTES);
+
+ const upstream = await fetch(upstreamUrl, {
+ method: req.method,
+ headers,
+ body
+ });
+
+ const responseHeaders = {};
+ upstream.headers.forEach((value, key) => {
+ if (!["connection", "content-encoding", "transfer-encoding"].includes(key.toLowerCase())) {
+ responseHeaders[key] = value;
+ }
+ });
+
+ res.writeHead(upstream.status, responseHeaders);
+ if (req.method === "HEAD" || !upstream.body) {
+ res.end();
+ return;
+ }
+ Readable.fromWeb(upstream.body).pipe(res);
+}
+
+function readRequestBody(req, maxBytes) {
+ return new Promise((resolve, reject) => {
+ const chunks = [];
+ let total = 0;
+ req.on("data", (chunk) => {
+ total += chunk.length;
+ if (total > maxBytes) {
+ reject(httpError(413, "Request body is too large"));
+ req.destroy();
+ return;
+ }
+ chunks.push(chunk);
+ });
+ req.on("end", () => resolve(Buffer.concat(chunks)));
+ req.on("error", reject);
+ });
+}
+
+function httpError(statusCode, message) {
+ const error = new Error(message);
+ error.statusCode = statusCode;
+ return error;
+}
diff --git a/src/worker-server.mjs b/src/worker-server.mjs
new file mode 100644
index 0000000..37ba402
--- /dev/null
+++ b/src/worker-server.mjs
@@ -0,0 +1,103 @@
+#!/usr/bin/env node
+import http from "node:http";
+import path from "node:path";
+import { archivePage, defaultArchivePath } from "./archiver.mjs";
+import { archiveIdForUrl, normalizeArchiveUrl } from "./archive-catalog.mjs";
+
+const PORT = Number(process.env.PORT || process.env.ARCHIVE_WORKER_PORT || 5733);
+const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
+const MAX_BODY_BYTES = 64 * 1024;
+
+let queue = Promise.resolve();
+
+const server = http.createServer(async (req, res) => {
+ try {
+ await route(req, res);
+ } catch (error) {
+ sendJson(res, error.statusCode || 500, {
+ ok: false,
+ error: error.message || "Unexpected error"
+ });
+ }
+});
+
+server.listen(PORT, () => {
+ console.log(`archive worker listening on ${PORT}`);
+ console.log(`archive path: ${ARCHIVE_PATH}`);
+});
+
+async function route(req, res) {
+ const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
+
+ if (req.method === "GET" && requestUrl.pathname === "/healthz") {
+ sendJson(res, 200, { ok: true });
+ return;
+ }
+
+ if (req.method === "POST" && requestUrl.pathname === "/archive") {
+ const body = await readJsonBody(req);
+ const sourceUrl = normalizeArchiveUrl(body.url);
+ const id = typeof body.id === "string" && body.id.trim() ? body.id.trim() : archiveIdForUrl(sourceUrl);
+ const result = await enqueueArchive(sourceUrl, id);
+ sendJson(res, 200, {
+ ok: true,
+ result
+ });
+ return;
+ }
+
+ sendJson(res, 404, { ok: false, error: "Not found" });
+}
+
+function enqueueArchive(sourceUrl, id) {
+ const run = () => archivePage(sourceUrl, {
+ archivePath: ARCHIVE_PATH,
+ id
+ });
+ queue = queue.then(run, run);
+ return queue;
+}
+
+async function readJsonBody(req) {
+ const text = await readRequestBody(req, MAX_BODY_BYTES);
+ if (!text.trim()) {
+ throw httpError(400, "Request body is required");
+ }
+ try {
+ return JSON.parse(text);
+ } catch {
+ throw httpError(400, "Request body must be JSON");
+ }
+}
+
+function readRequestBody(req, maxBytes) {
+ return new Promise((resolve, reject) => {
+ const chunks = [];
+ let total = 0;
+ req.on("data", (chunk) => {
+ total += chunk.length;
+ if (total > maxBytes) {
+ reject(httpError(413, "Request body is too large"));
+ req.destroy();
+ return;
+ }
+ chunks.push(chunk);
+ });
+ req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
+ req.on("error", reject);
+ });
+}
+
+function sendJson(res, statusCode, payload) {
+ res.writeHead(statusCode, {
+ "content-type": "application/json; charset=utf-8",
+ "cache-control": "no-store"
+ });
+ res.end(`${JSON.stringify(payload)}\n`);
+}
+
+function httpError(statusCode, message) {
+ const error = new Error(message);
+ error.statusCode = statusCode;
+ return error;
+}
diff --git a/test/archive-catalog.test.mjs b/test/archive-catalog.test.mjs
new file mode 100644
index 0000000..c34bf06
--- /dev/null
+++ b/test/archive-catalog.test.mjs
@@ -0,0 +1,49 @@
+import assert from "node:assert/strict";
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import test from "node:test";
+import { ArchiveCatalog, archiveFileNameForUrl, archiveIdForUrl, normalizeArchiveUrl } from "../src/archive-catalog.mjs";
+
+test("normalizes only http and https archive URLs", () => {
+ assert.equal(normalizeArchiveUrl(" https://example.com/path "), "https://example.com/path");
+ assert.throws(() => normalizeArchiveUrl("file:///tmp/page.html"), /Only http and https/);
+ assert.throws(() => normalizeArchiveUrl("not a url"), /valid URL/);
+});
+
+test("builds stable archive ids from the full URL", () => {
+ const first = archiveIdForUrl("https://example.com/article?x=1");
+ const second = archiveIdForUrl("https://example.com/article?x=1");
+ const third = archiveIdForUrl("https://example.com/article?x=2");
+ assert.equal(first, second);
+ assert.notEqual(first, third);
+ assert.match(first, /^example-com-article-[a-f0-9]{16}$/);
+});
+
+test("finds stable archive files without rerendering", async () => {
+ const archivePath = await fs.mkdtemp(path.join(os.tmpdir(), "archive-catalog-"));
+ const sourceUrl = "https://example.com/";
+ const fileName = archiveFileNameForUrl(sourceUrl);
+ await fs.writeFile(path.join(archivePath, fileName), "", "utf8");
+
+ const catalog = new ArchiveCatalog({ archivePath });
+ const record = await catalog.findByUrl(sourceUrl);
+
+ assert.equal(record.fileName, fileName);
+ assert.equal(record.archiveUrl, `/archives/${encodeURIComponent(fileName)}`);
+});
+
+test("indexes older timestamped archives from the archive comment", async () => {
+ const archivePath = await fs.mkdtemp(path.join(os.tmpdir(), "archive-catalog-"));
+ await fs.writeFile(
+ path.join(archivePath, "example-com-2026-05-16T00-00-00-000Z.html"),
+ '\n\n',
+ "utf8"
+ );
+
+ const catalog = new ArchiveCatalog({ archivePath });
+ const record = await catalog.findByUrl("https://example.com/story");
+
+ assert.equal(record.fileName, "example-com-2026-05-16T00-00-00-000Z.html");
+ assert.equal(record.sourceUrl, "https://example.com/story");
+});
diff --git a/test/archiver.test.mjs b/test/archiver.test.mjs
new file mode 100644
index 0000000..1ebfd03
--- /dev/null
+++ b/test/archiver.test.mjs
@@ -0,0 +1,26 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+import { renderPage } from "../src/archiver.mjs";
+
+test("renderPage serializes CSSOM-inserted style rules", async () => {
+ const html = `
+
+
+
+
+
+ Styled by CSSOM
+ `;
+
+ const rendered = await renderPage(`data:text/html,${encodeURIComponent(html)}`, {
+ userscriptDelay: 0
+ });
+
+ assert.match(rendered, /