adds frontend
This commit is contained in:
@@ -22,6 +22,7 @@ COPY . .
|
||||
RUN mkdir -p /archives && chmod 0777 /archives
|
||||
|
||||
VOLUME ["/archives"]
|
||||
EXPOSE 5733
|
||||
|
||||
ENTRYPOINT ["dumb-init", "--", "/app/scripts/archive-worker-entrypoint.sh"]
|
||||
CMD ["help"]
|
||||
|
||||
16
Dockerfile.web
Normal file
16
Dockerfile.web
Normal file
@@ -0,0 +1,16 @@
|
||||
FROM node:22-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
||||
|
||||
COPY package.json package-lock.json ./
|
||||
RUN npm ci --omit=dev
|
||||
|
||||
COPY src ./src
|
||||
COPY public ./public
|
||||
|
||||
EXPOSE 5731 5732
|
||||
|
||||
CMD ["node", "src/backend-server.mjs"]
|
||||
20
README.md
20
README.md
@@ -51,3 +51,23 @@ For visual debugging, expose VNC from the worker:
|
||||
```
|
||||
|
||||
The worker image starts Xvfb internally, so callers do not need to mount the host X11 socket or override the entrypoint.
|
||||
|
||||
## Web UI
|
||||
|
||||
The web path is split into three roles:
|
||||
|
||||
- `src/frontend-server.mjs` serves the static UI and proxies `/api/*` and `/archives/*` to the backend.
|
||||
- `src/backend-server.mjs` manages archive lookup, job state, and the archive index.
|
||||
- `src/worker-server.mjs` runs inside the browser worker container and wraps `archivePage()` over HTTP.
|
||||
|
||||
Run the full stack with:
|
||||
|
||||
```sh
|
||||
docker compose -f docker-compose.example.yml up --build
|
||||
```
|
||||
|
||||
Then open `http://localhost:5731`. Direct path archival is supported, for example:
|
||||
|
||||
```text
|
||||
http://localhost:5731/https://example.com
|
||||
```
|
||||
|
||||
66
docker-compose.example.yml
Normal file
66
docker-compose.example.yml
Normal file
@@ -0,0 +1,66 @@
|
||||
services:
|
||||
frontend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.web
|
||||
image: local-page-archiver-web:latest
|
||||
command: ["node", "src/frontend-server.mjs"]
|
||||
environment:
|
||||
PORT: "5731"
|
||||
BACKEND_URL: "http://backend:5732"
|
||||
ports:
|
||||
- "5731:5731"
|
||||
depends_on:
|
||||
backend:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5731/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
backend:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.web
|
||||
image: local-page-archiver-web:latest
|
||||
command: ["node", "src/backend-server.mjs"]
|
||||
environment:
|
||||
PORT: "5732"
|
||||
ARCHIVE_PATH: /archives
|
||||
ARCHIVE_WORKER_URL: "http://browser:5733"
|
||||
PUBLIC_ARCHIVES_PATH: /archives
|
||||
volumes:
|
||||
- archives:/archives
|
||||
depends_on:
|
||||
browser:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5732/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
|
||||
browser:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: local-page-archiver-browser:latest
|
||||
command: ["serve-worker"]
|
||||
environment:
|
||||
PORT: "5733"
|
||||
ARCHIVE_PATH: /archives
|
||||
ARCHIVE_WORKER_XVFB: "1"
|
||||
volumes:
|
||||
- archives:/archives
|
||||
expose:
|
||||
- "5733"
|
||||
shm_size: 1gb
|
||||
healthcheck:
|
||||
test: ["CMD", "node", "-e", "fetch('http://127.0.0.1:5733/healthz').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 10
|
||||
|
||||
volumes:
|
||||
archives:
|
||||
@@ -12,6 +12,9 @@
|
||||
"archive": "node src/cli.mjs archive",
|
||||
"container:archive": "node src/container-runner.mjs archive",
|
||||
"container:build": "node src/container-runner.mjs build",
|
||||
"start:backend": "node src/backend-server.mjs",
|
||||
"start:frontend": "node src/frontend-server.mjs",
|
||||
"start:worker": "node src/worker-server.mjs",
|
||||
"test": "node --test test/*.test.mjs",
|
||||
"install-browsers": "playwright install chromium"
|
||||
},
|
||||
|
||||
153
public/assets/app.css
Normal file
153
public/assets/app.css
Normal file
@@ -0,0 +1,153 @@
|
||||
:root {
|
||||
color-scheme: light;
|
||||
--bg: #f6f5f1;
|
||||
--surface: #ffffff;
|
||||
--ink: #161616;
|
||||
--muted: #696963;
|
||||
--line: #d8d6ce;
|
||||
--accent: #2f7664;
|
||||
--accent-strong: #245d50;
|
||||
--danger: #a43d32;
|
||||
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html,
|
||||
body {
|
||||
min-height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
button,
|
||||
input {
|
||||
font: inherit;
|
||||
letter-spacing: 0;
|
||||
}
|
||||
|
||||
.shell {
|
||||
min-height: 100vh;
|
||||
display: grid;
|
||||
place-items: center;
|
||||
padding: 24px;
|
||||
}
|
||||
|
||||
.archive-box {
|
||||
width: min(680px, 100%);
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
padding: 12px;
|
||||
box-shadow: 0 18px 42px rgba(28, 25, 19, 0.08);
|
||||
}
|
||||
|
||||
.input-row {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
input {
|
||||
min-width: 0;
|
||||
width: 100%;
|
||||
height: 48px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
color: var(--ink);
|
||||
background: #fbfaf7;
|
||||
padding: 0 14px;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
input:focus {
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 3px rgba(47, 118, 100, 0.16);
|
||||
}
|
||||
|
||||
button {
|
||||
height: 48px;
|
||||
min-width: 112px;
|
||||
border: 0;
|
||||
border-radius: 6px;
|
||||
color: #ffffff;
|
||||
background: var(--accent);
|
||||
padding: 0 18px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:hover {
|
||||
background: var(--accent-strong);
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
cursor: wait;
|
||||
opacity: 0.72;
|
||||
}
|
||||
|
||||
.progress-wrap {
|
||||
padding-top: 12px;
|
||||
}
|
||||
|
||||
.progress-track {
|
||||
height: 6px;
|
||||
overflow: hidden;
|
||||
border-radius: 999px;
|
||||
background: #e7e4dc;
|
||||
}
|
||||
|
||||
.progress-bar {
|
||||
width: 0%;
|
||||
height: 100%;
|
||||
border-radius: inherit;
|
||||
background: var(--accent);
|
||||
transition: width 220ms ease;
|
||||
}
|
||||
|
||||
.status-line {
|
||||
min-height: 22px;
|
||||
margin-top: 8px;
|
||||
color: var(--muted);
|
||||
font-size: 14px;
|
||||
line-height: 22px;
|
||||
}
|
||||
|
||||
.status-line.error {
|
||||
color: var(--danger);
|
||||
}
|
||||
|
||||
.sr-only {
|
||||
position: absolute;
|
||||
width: 1px;
|
||||
height: 1px;
|
||||
overflow: hidden;
|
||||
clip: rect(0, 0, 0, 0);
|
||||
white-space: nowrap;
|
||||
clip-path: inset(50%);
|
||||
}
|
||||
|
||||
@media (max-width: 560px) {
|
||||
.shell {
|
||||
align-items: start;
|
||||
padding: 16px;
|
||||
padding-top: 20vh;
|
||||
}
|
||||
|
||||
.archive-box {
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.input-row {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
button {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
169
public/assets/app.js
Normal file
169
public/assets/app.js
Normal file
@@ -0,0 +1,169 @@
|
||||
const form = document.querySelector("#archive-form");
|
||||
const input = document.querySelector("#archive-url");
|
||||
const button = document.querySelector("#archive-submit");
|
||||
const progressWrap = document.querySelector("#progress-wrap");
|
||||
const progressBar = document.querySelector("#progress-bar");
|
||||
const statusLine = document.querySelector("#status-line");
|
||||
|
||||
let pollTimer = null;
|
||||
let visualTimer = null;
|
||||
let startedAt = Date.now();
|
||||
|
||||
form.addEventListener("submit", (event) => {
|
||||
event.preventDefault();
|
||||
submitArchive(input.value);
|
||||
});
|
||||
|
||||
const pathUrl = urlFromPath();
|
||||
if (pathUrl) {
|
||||
input.value = pathUrl;
|
||||
submitArchive(pathUrl);
|
||||
} else {
|
||||
input.focus();
|
||||
}
|
||||
|
||||
async function submitArchive(rawUrl) {
|
||||
stopTimers();
|
||||
setBusy(true);
|
||||
setStatus("Checking", 8);
|
||||
|
||||
try {
|
||||
const response = await fetch("/api/archives", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({ url: rawUrl })
|
||||
});
|
||||
const data = await readApiResponse(response);
|
||||
if (data.archive?.archiveUrl) {
|
||||
openArchive(data.archive.archiveUrl);
|
||||
return;
|
||||
}
|
||||
if (data.job?.id) {
|
||||
watchJob(data.job);
|
||||
return;
|
||||
}
|
||||
throw new Error(data.error || "Archive did not start");
|
||||
} catch (error) {
|
||||
setError(error.message || "Archive failed");
|
||||
setBusy(false);
|
||||
}
|
||||
}
|
||||
|
||||
function watchJob(job) {
|
||||
startedAt = Date.parse(job.startedAt || job.createdAt) || Date.now();
|
||||
updateFromJob(job);
|
||||
visualTimer = window.setInterval(updateVisualProgress, 250);
|
||||
pollTimer = window.setInterval(async () => {
|
||||
try {
|
||||
const response = await fetch(`/api/jobs/${encodeURIComponent(job.id)}`);
|
||||
const data = await readApiResponse(response);
|
||||
updateFromJob(data.job);
|
||||
} catch (error) {
|
||||
stopTimers();
|
||||
setError(error.message || "Archive failed");
|
||||
setBusy(false);
|
||||
}
|
||||
}, 850);
|
||||
}
|
||||
|
||||
function updateFromJob(job) {
|
||||
if (job.status === "done" && job.archive?.archiveUrl) {
|
||||
stopTimers();
|
||||
setStatus("Opening", 100);
|
||||
openArchive(job.archive.archiveUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
if (job.status === "failed") {
|
||||
stopTimers();
|
||||
setError(job.error || "Archive failed");
|
||||
setBusy(false);
|
||||
return;
|
||||
}
|
||||
|
||||
startedAt = Date.parse(job.startedAt || job.createdAt) || startedAt;
|
||||
const elapsed = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
|
||||
const label = job.status === "queued" ? "Queued" : `Archiving ${elapsed}s`;
|
||||
setStatus(label, optimisticProgress());
|
||||
}
|
||||
|
||||
function updateVisualProgress() {
|
||||
if (!progressWrap.hidden) {
|
||||
progressBar.style.width = `${optimisticProgress()}%`;
|
||||
}
|
||||
}
|
||||
|
||||
function optimisticProgress() {
|
||||
const elapsed = Math.max(0, (Date.now() - startedAt) / 1000);
|
||||
if (elapsed < 1) {
|
||||
return 12;
|
||||
}
|
||||
if (elapsed < 12) {
|
||||
return Math.min(88, 12 + elapsed * 6.3);
|
||||
}
|
||||
return Math.min(96, 88 + (elapsed - 12) * 0.6);
|
||||
}
|
||||
|
||||
async function readApiResponse(response) {
|
||||
const data = await response.json().catch(() => null);
|
||||
if (!response.ok || data?.ok === false) {
|
||||
throw new Error(data?.error || `Request failed with ${response.status}`);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
function setBusy(isBusy) {
|
||||
button.disabled = isBusy;
|
||||
input.readOnly = isBusy;
|
||||
}
|
||||
|
||||
function setStatus(text, progress) {
|
||||
progressWrap.hidden = false;
|
||||
statusLine.classList.remove("error");
|
||||
statusLine.textContent = text;
|
||||
progressBar.style.width = `${Math.max(0, Math.min(100, progress))}%`;
|
||||
}
|
||||
|
||||
function setError(text) {
|
||||
progressWrap.hidden = false;
|
||||
statusLine.classList.add("error");
|
||||
statusLine.textContent = text;
|
||||
progressBar.style.width = "100%";
|
||||
}
|
||||
|
||||
function stopTimers() {
|
||||
if (pollTimer) {
|
||||
window.clearInterval(pollTimer);
|
||||
pollTimer = null;
|
||||
}
|
||||
if (visualTimer) {
|
||||
window.clearInterval(visualTimer);
|
||||
visualTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
function openArchive(archiveUrl) {
|
||||
window.location.assign(archiveUrl);
|
||||
}
|
||||
|
||||
function urlFromPath() {
|
||||
const rawPath = window.location.pathname.replace(/^\/+/, "");
|
||||
if (!rawPath || rawPath.startsWith("assets/") || rawPath.startsWith("api/") || rawPath.startsWith("archives/")) {
|
||||
return "";
|
||||
}
|
||||
|
||||
let decoded;
|
||||
try {
|
||||
decoded = decodeURIComponent(rawPath);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!/^https?:\/\//i.test(decoded)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return `${decoded}${window.location.search}${window.location.hash}`;
|
||||
}
|
||||
27
public/index.html
Normal file
27
public/index.html
Normal file
@@ -0,0 +1,27 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>Archive</title>
|
||||
<link rel="stylesheet" href="/assets/app.css">
|
||||
<script src="/assets/app.js" defer></script>
|
||||
</head>
|
||||
<body>
|
||||
<main class="shell">
|
||||
<form class="archive-box" id="archive-form" autocomplete="off">
|
||||
<label class="sr-only" for="archive-url">URL</label>
|
||||
<div class="input-row">
|
||||
<input id="archive-url" name="url" type="url" inputmode="url" spellcheck="false" autocomplete="url" placeholder="https://example.com" required>
|
||||
<button id="archive-submit" type="submit">Archive</button>
|
||||
</div>
|
||||
<div class="progress-wrap" id="progress-wrap" hidden>
|
||||
<div class="progress-track" aria-hidden="true">
|
||||
<div class="progress-bar" id="progress-bar"></div>
|
||||
</div>
|
||||
<div class="status-line" id="status-line" aria-live="polite"></div>
|
||||
</div>
|
||||
</form>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
@@ -52,6 +52,10 @@ case "$1" in
|
||||
archive|help)
|
||||
set -- node src/cli.mjs "$@"
|
||||
;;
|
||||
serve-worker)
|
||||
shift
|
||||
set -- node src/worker-server.mjs "$@"
|
||||
;;
|
||||
esac
|
||||
|
||||
"$@" &
|
||||
|
||||
250
src/archive-catalog.mjs
Normal file
250
src/archive-catalog.mjs
Normal file
@@ -0,0 +1,250 @@
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { defaultArchivePath } from "./asset-inliner.mjs";
|
||||
|
||||
const INDEX_FILE = ".archive-index.json";
|
||||
const INDEX_VERSION = 1;
|
||||
const COMMENT_RE = /<!--\s*Archived locally\. Source: ([\s\S]*?)\. Created: ([^.]*(?:\.[0-9]+)?Z)\.\s*-->/;
|
||||
|
||||
export function normalizeArchiveUrl(rawUrl) {
|
||||
const text = String(rawUrl || "").trim();
|
||||
if (!text) {
|
||||
throw new Error("URL is required");
|
||||
}
|
||||
|
||||
let url;
|
||||
try {
|
||||
url = new URL(text);
|
||||
} catch {
|
||||
throw new Error("Enter a valid URL");
|
||||
}
|
||||
|
||||
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
||||
throw new Error("Only http and https URLs can be archived");
|
||||
}
|
||||
|
||||
return url.href;
|
||||
}
|
||||
|
||||
export function archiveIdForUrl(sourceUrl) {
|
||||
const url = new URL(normalizeArchiveUrl(sourceUrl));
|
||||
const stem =
|
||||
`${url.hostname}${url.pathname}`
|
||||
.replace(/\/+$/, "")
|
||||
.replace(/[^a-z0-9]+/gi, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 72) || "archive";
|
||||
const digest = crypto.createHash("sha256").update(url.href).digest("hex").slice(0, 16);
|
||||
return `${stem}-${digest}`;
|
||||
}
|
||||
|
||||
export function archiveFileNameForUrl(sourceUrl) {
|
||||
return `${archiveIdForUrl(sourceUrl)}.html`;
|
||||
}
|
||||
|
||||
export class ArchiveCatalog {
|
||||
constructor(options = {}) {
|
||||
this.archivePath = path.resolve(options.archivePath || defaultArchivePath());
|
||||
this.publicBasePath = options.publicBasePath || "/archives";
|
||||
this.indexPath = path.join(this.archivePath, INDEX_FILE);
|
||||
this.index = {
|
||||
version: INDEX_VERSION,
|
||||
archives: {}
|
||||
};
|
||||
this.loadPromise = null;
|
||||
this.savePromise = Promise.resolve();
|
||||
}
|
||||
|
||||
async findByUrl(rawUrl) {
|
||||
const sourceUrl = normalizeArchiveUrl(rawUrl);
|
||||
await this.ensureLoaded();
|
||||
|
||||
const indexed = this.index.archives[sourceUrl];
|
||||
if (indexed && await this.hasArchiveFile(indexed.fileName)) {
|
||||
return this.toPublicRecord(indexed);
|
||||
}
|
||||
|
||||
if (indexed) {
|
||||
delete this.index.archives[sourceUrl];
|
||||
await this.saveIndex();
|
||||
}
|
||||
|
||||
const stableFileName = archiveFileNameForUrl(sourceUrl);
|
||||
if (await this.hasArchiveFile(stableFileName)) {
|
||||
const record = this.upsertRecord(sourceUrl, {
|
||||
id: path.basename(stableFileName, ".html"),
|
||||
fileName: stableFileName
|
||||
});
|
||||
await this.saveIndex();
|
||||
return this.toPublicRecord(record);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async recordResult(rawUrl, result) {
|
||||
const sourceUrl = normalizeArchiveUrl(rawUrl);
|
||||
await this.ensureLoaded();
|
||||
|
||||
const fileName = path.basename(result.filePath || `${result.id}.html`);
|
||||
const id = result.id || path.basename(fileName, ".html");
|
||||
const record = this.upsertRecord(sourceUrl, {
|
||||
id,
|
||||
fileName,
|
||||
warningsCount: Array.isArray(result.warnings) ? result.warnings.length : 0,
|
||||
externalAssetsCount: Array.isArray(result.externalAssets) ? result.externalAssets.length : 0
|
||||
});
|
||||
await this.saveIndex();
|
||||
return this.toPublicRecord(record);
|
||||
}
|
||||
|
||||
async ensureLoaded() {
|
||||
if (!this.loadPromise) {
|
||||
this.loadPromise = this.loadIndex();
|
||||
}
|
||||
await this.loadPromise;
|
||||
}
|
||||
|
||||
async loadIndex() {
|
||||
await fs.mkdir(this.archivePath, { recursive: true });
|
||||
try {
|
||||
const data = JSON.parse(await fs.readFile(this.indexPath, "utf8"));
|
||||
if (data && data.version === INDEX_VERSION && data.archives && typeof data.archives === "object") {
|
||||
this.index = data;
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.code !== "ENOENT") {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
if (await this.scanArchiveFiles()) {
|
||||
await this.saveIndex();
|
||||
}
|
||||
}
|
||||
|
||||
async scanArchiveFiles() {
|
||||
let changed = false;
|
||||
const entries = await fs.readdir(this.archivePath, { withFileTypes: true }).catch((error) => {
|
||||
if (error.code === "ENOENT") {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !entry.name.endsWith(".html")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const filePath = path.join(this.archivePath, entry.name);
|
||||
const metadata = await readArchiveMetadata(filePath);
|
||||
if (!metadata?.sourceUrl) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let sourceUrl;
|
||||
try {
|
||||
sourceUrl = normalizeArchiveUrl(metadata.sourceUrl);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const current = this.index.archives[sourceUrl];
|
||||
if (current?.fileName === entry.name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
this.index.archives[sourceUrl] = {
|
||||
id: path.basename(entry.name, ".html"),
|
||||
fileName: entry.name,
|
||||
sourceUrl,
|
||||
createdAt: metadata.createdAt || new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString()
|
||||
};
|
||||
changed = true;
|
||||
}
|
||||
|
||||
for (const [sourceUrl, record] of Object.entries(this.index.archives)) {
|
||||
if (!record?.fileName || !await this.hasArchiveFile(record.fileName)) {
|
||||
delete this.index.archives[sourceUrl];
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
upsertRecord(sourceUrl, values) {
|
||||
const previous = this.index.archives[sourceUrl];
|
||||
const now = new Date().toISOString();
|
||||
const record = {
|
||||
id: values.id,
|
||||
fileName: values.fileName,
|
||||
sourceUrl,
|
||||
createdAt: previous?.createdAt || now,
|
||||
updatedAt: now,
|
||||
warningsCount: values.warningsCount ?? previous?.warningsCount ?? 0,
|
||||
externalAssetsCount: values.externalAssetsCount ?? previous?.externalAssetsCount ?? 0
|
||||
};
|
||||
this.index.archives[sourceUrl] = record;
|
||||
return record;
|
||||
}
|
||||
|
||||
async hasArchiveFile(fileName) {
|
||||
if (!isSafeArchiveFileName(fileName)) {
|
||||
return false;
|
||||
}
|
||||
const stat = await fs.stat(path.join(this.archivePath, fileName)).catch(() => null);
|
||||
return !!stat?.isFile();
|
||||
}
|
||||
|
||||
toPublicRecord(record) {
|
||||
return {
|
||||
...record,
|
||||
archiveUrl: `${this.publicBasePath}/${encodeURIComponent(record.fileName)}`
|
||||
};
|
||||
}
|
||||
|
||||
async saveIndex() {
|
||||
this.savePromise = this.savePromise.then(async () => {
|
||||
await fs.mkdir(this.archivePath, { recursive: true });
|
||||
const tmpPath = `${this.indexPath}.${process.pid}.tmp`;
|
||||
await fs.writeFile(tmpPath, `${JSON.stringify(this.index, null, 2)}\n`, "utf8");
|
||||
await fs.rename(tmpPath, this.indexPath);
|
||||
});
|
||||
return this.savePromise;
|
||||
}
|
||||
}
|
||||
|
||||
export function isSafeArchiveFileName(fileName) {
|
||||
return (
|
||||
typeof fileName === "string" &&
|
||||
fileName === path.basename(fileName) &&
|
||||
fileName.endsWith(".html") &&
|
||||
!fileName.startsWith(".")
|
||||
);
|
||||
}
|
||||
|
||||
async function readArchiveMetadata(filePath) {
|
||||
const handle = await fs.open(filePath, "r").catch(() => null);
|
||||
if (!handle) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const buffer = Buffer.alloc(4096);
|
||||
const { bytesRead } = await handle.read(buffer, 0, buffer.length, 0);
|
||||
const head = buffer.subarray(0, bytesRead).toString("utf8");
|
||||
const match = head.match(COMMENT_RE);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
sourceUrl: match[1].replaceAll("- -", "--"),
|
||||
createdAt: match[2]
|
||||
};
|
||||
} finally {
|
||||
await handle.close();
|
||||
}
|
||||
}
|
||||
345
src/backend-server.mjs
Normal file
345
src/backend-server.mjs
Normal file
@@ -0,0 +1,345 @@
|
||||
#!/usr/bin/env node
|
||||
import { createReadStream } from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { ArchiveCatalog, archiveIdForUrl, isSafeArchiveFileName, normalizeArchiveUrl } from "./archive-catalog.mjs";
|
||||
import { defaultArchivePath } from "./asset-inliner.mjs";
|
||||
|
||||
const PORT = Number(process.env.PORT || 5732);
|
||||
const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
|
||||
const ARCHIVE_WORKER_URL = process.env.ARCHIVE_WORKER_URL || "http://127.0.0.1:5733";
|
||||
const PUBLIC_ARCHIVES_PATH = process.env.PUBLIC_ARCHIVES_PATH || "/archives";
|
||||
const JOB_TIMEOUT_MS = Number(process.env.ARCHIVE_JOB_TIMEOUT_MS || 120000);
|
||||
const MAX_BODY_BYTES = 64 * 1024;
|
||||
|
||||
const catalog = new ArchiveCatalog({
|
||||
archivePath: ARCHIVE_PATH,
|
||||
publicBasePath: PUBLIC_ARCHIVES_PATH
|
||||
});
|
||||
|
||||
const jobs = new Map();
|
||||
const activeJobByUrl = new Map();
|
||||
let workerQueue = Promise.resolve();
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
sendJson(res, error.statusCode || 500, {
|
||||
ok: false,
|
||||
error: error.message || "Unexpected error"
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive backend listening on ${PORT}`);
|
||||
console.log(`archive path: ${ARCHIVE_PATH}`);
|
||||
console.log(`archive worker: ${ARCHIVE_WORKER_URL}`);
|
||||
});
|
||||
|
||||
const cleanupTimer = setInterval(cleanupJobs, 10 * 60 * 1000);
|
||||
cleanupTimer.unref?.();
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/healthz") {
|
||||
sendJson(res, 200, { ok: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/api/archives/lookup") {
|
||||
const sourceUrl = normalizeArchiveUrl(requestUrl.searchParams.get("url"));
|
||||
const archive = await catalog.findByUrl(sourceUrl);
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
exists: !!archive,
|
||||
archive
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && requestUrl.pathname === "/api/archives") {
|
||||
const body = await readJsonBody(req);
|
||||
const sourceUrl = normalizeArchiveUrl(body.url);
|
||||
const response = await createOrFindArchive(sourceUrl);
|
||||
sendJson(res, response.statusCode, response.body);
|
||||
return;
|
||||
}
|
||||
|
||||
const jobMatch = requestUrl.pathname.match(/^\/api\/jobs\/([^/]+)$/);
|
||||
if (req.method === "GET" && jobMatch) {
|
||||
const job = jobs.get(jobMatch[1]);
|
||||
if (!job) {
|
||||
sendJson(res, 404, { ok: false, error: "Job not found" });
|
||||
return;
|
||||
}
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
job: publicJob(job)
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname.startsWith(`${PUBLIC_ARCHIVES_PATH}/`)) {
|
||||
await serveArchive(requestUrl.pathname.slice(PUBLIC_ARCHIVES_PATH.length + 1), res);
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(res, 404, { ok: false, error: "Not found" });
|
||||
}
|
||||
|
||||
async function createOrFindArchive(sourceUrl) {
|
||||
const existing = await catalog.findByUrl(sourceUrl);
|
||||
if (existing) {
|
||||
return {
|
||||
statusCode: 200,
|
||||
body: {
|
||||
ok: true,
|
||||
status: "done",
|
||||
mode: "existing",
|
||||
archive: existing
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const activeJobId = activeJobByUrl.get(sourceUrl);
|
||||
const activeJob = activeJobId ? jobs.get(activeJobId) : null;
|
||||
if (activeJob && !isTerminal(activeJob.status)) {
|
||||
return {
|
||||
statusCode: 202,
|
||||
body: {
|
||||
ok: true,
|
||||
status: activeJob.status,
|
||||
mode: "active",
|
||||
job: publicJob(activeJob)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const job = {
|
||||
id: cryptoRandomId(),
|
||||
archiveId: archiveIdForUrl(sourceUrl),
|
||||
sourceUrl,
|
||||
status: "queued",
|
||||
message: "Queued",
|
||||
createdAt: new Date().toISOString(),
|
||||
startedAt: null,
|
||||
updatedAt: new Date().toISOString(),
|
||||
finishedAt: null,
|
||||
archive: null,
|
||||
error: null
|
||||
};
|
||||
jobs.set(job.id, job);
|
||||
activeJobByUrl.set(sourceUrl, job.id);
|
||||
enqueueJob(job);
|
||||
|
||||
return {
|
||||
statusCode: 202,
|
||||
body: {
|
||||
ok: true,
|
||||
status: job.status,
|
||||
mode: "created",
|
||||
job: publicJob(job)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function enqueueJob(job) {
|
||||
const run = () => executeJob(job);
|
||||
workerQueue = workerQueue.then(run, run);
|
||||
}
|
||||
|
||||
async function executeJob(job) {
|
||||
if (job.status !== "queued") {
|
||||
return;
|
||||
}
|
||||
|
||||
updateJob(job, {
|
||||
status: "running",
|
||||
message: "Archiving",
|
||||
startedAt: new Date().toISOString()
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await requestWorkerArchive(job.sourceUrl, job.archiveId);
|
||||
const archive = await catalog.recordResult(job.sourceUrl, result);
|
||||
updateJob(job, {
|
||||
status: "done",
|
||||
message: "Opening",
|
||||
archive,
|
||||
finishedAt: new Date().toISOString()
|
||||
});
|
||||
} catch (error) {
|
||||
updateJob(job, {
|
||||
status: "failed",
|
||||
message: "Failed",
|
||||
error: error.message || "Archive failed",
|
||||
finishedAt: new Date().toISOString()
|
||||
});
|
||||
} finally {
|
||||
activeJobByUrl.delete(job.sourceUrl);
|
||||
}
|
||||
}
|
||||
|
||||
async function requestWorkerArchive(sourceUrl, archiveId) {
|
||||
const workerUrl = new URL("/archive", ARCHIVE_WORKER_URL);
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), JOB_TIMEOUT_MS);
|
||||
timeout.unref?.();
|
||||
|
||||
try {
|
||||
const response = await fetch(workerUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({ url: sourceUrl, id: archiveId }),
|
||||
signal: controller.signal
|
||||
});
|
||||
const text = await response.text();
|
||||
const parsed = parseJson(text);
|
||||
if (!response.ok || parsed.ok === false) {
|
||||
throw new Error(parsed.error || text || `Worker returned ${response.status}`);
|
||||
}
|
||||
return parsed.result || parsed;
|
||||
} catch (error) {
|
||||
if (error.name === "AbortError") {
|
||||
throw new Error(`Archive timed out after ${Math.round(JOB_TIMEOUT_MS / 1000)} seconds`);
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function serveArchive(rawFileName, res) {
|
||||
let fileName;
|
||||
try {
|
||||
fileName = decodeURIComponent(rawFileName);
|
||||
} catch {
|
||||
sendJson(res, 400, { ok: false, error: "Invalid archive path" });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isSafeArchiveFileName(fileName)) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = path.join(ARCHIVE_PATH, fileName);
|
||||
const stat = await fs.stat(filePath).catch(() => null);
|
||||
if (!stat?.isFile()) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
const stream = createReadStream(filePath, { encoding: "utf8" });
|
||||
stream.on("error", () => {
|
||||
if (!res.headersSent) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
} else {
|
||||
res.destroy();
|
||||
}
|
||||
});
|
||||
res.writeHead(200, {
|
||||
"content-type": "text/html; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
stream.pipe(res);
|
||||
}
|
||||
|
||||
function updateJob(job, values) {
|
||||
Object.assign(job, values, {
|
||||
updatedAt: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
|
||||
function publicJob(job) {
|
||||
const startedAt = job.startedAt || job.createdAt;
|
||||
return {
|
||||
id: job.id,
|
||||
sourceUrl: job.sourceUrl,
|
||||
status: job.status,
|
||||
message: job.message,
|
||||
createdAt: job.createdAt,
|
||||
startedAt,
|
||||
updatedAt: job.updatedAt,
|
||||
finishedAt: job.finishedAt,
|
||||
elapsedMs: startedAt ? Date.now() - Date.parse(startedAt) : 0,
|
||||
archive: job.archive,
|
||||
error: job.error
|
||||
};
|
||||
}
|
||||
|
||||
function isTerminal(status) {
|
||||
return status === "done" || status === "failed";
|
||||
}
|
||||
|
||||
function cleanupJobs() {
|
||||
const cutoff = Date.now() - 60 * 60 * 1000;
|
||||
for (const [id, job] of jobs) {
|
||||
if (isTerminal(job.status) && Date.parse(job.finishedAt || job.updatedAt) < cutoff) {
|
||||
jobs.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function readJsonBody(req) {
|
||||
const text = await readRequestBody(req, MAX_BODY_BYTES);
|
||||
if (!text.trim()) {
|
||||
throw httpError(400, "Request body is required");
|
||||
}
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw httpError(400, "Request body must be JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function sendJson(res, statusCode, payload) {
|
||||
res.writeHead(statusCode, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(`${JSON.stringify(payload)}\n`);
|
||||
}
|
||||
|
||||
function parseJson(text) {
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(text || "Worker returned invalid JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function cryptoRandomId() {
|
||||
return randomUUID();
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
157
src/frontend-server.mjs
Normal file
157
src/frontend-server.mjs
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env node
|
||||
import fs from "node:fs/promises";
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { Readable } from "node:stream";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const PORT = Number(process.env.PORT || 5731);
|
||||
const BACKEND_URL = process.env.BACKEND_URL || "http://127.0.0.1:5732";
|
||||
const PUBLIC_DIR = path.resolve(__dirname, "..", "public");
|
||||
const MAX_PROXY_BODY_BYTES = 128 * 1024;
|
||||
|
||||
const CONTENT_TYPES = new Map([
|
||||
[".css", "text/css; charset=utf-8"],
|
||||
[".html", "text/html; charset=utf-8"],
|
||||
[".js", "text/javascript; charset=utf-8"],
|
||||
[".svg", "image/svg+xml"]
|
||||
]);
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
res.writeHead(error.statusCode || 500, {
|
||||
"content-type": "text/plain; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(error.message || "Unexpected error");
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive frontend listening on ${PORT}`);
|
||||
console.log(`archive backend: ${BACKEND_URL}`);
|
||||
});
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (requestUrl.pathname === "/healthz") {
|
||||
res.writeHead(200, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end('{"ok":true}\n');
|
||||
return;
|
||||
}
|
||||
|
||||
if (requestUrl.pathname.startsWith("/api/") || requestUrl.pathname.startsWith("/archives/")) {
|
||||
await proxyToBackend(req, res, requestUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method !== "GET" && req.method !== "HEAD") {
|
||||
throw httpError(405, "Method not allowed");
|
||||
}
|
||||
|
||||
if (requestUrl.pathname.startsWith("/assets/")) {
|
||||
await serveStatic(requestUrl.pathname, res);
|
||||
return;
|
||||
}
|
||||
|
||||
await serveStatic("/index.html", res);
|
||||
}
|
||||
|
||||
async function serveStatic(urlPath, res) {
|
||||
let decodedPath;
|
||||
try {
|
||||
decodedPath = decodeURIComponent(urlPath);
|
||||
} catch {
|
||||
throw httpError(400, "Invalid path");
|
||||
}
|
||||
|
||||
const filePath = path.join(PUBLIC_DIR, decodedPath);
|
||||
const relative = path.relative(PUBLIC_DIR, filePath);
|
||||
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
||||
throw httpError(404, "Not found");
|
||||
}
|
||||
|
||||
const bytes = await fs.readFile(filePath).catch((error) => {
|
||||
if (error.code === "ENOENT") {
|
||||
throw httpError(404, "Not found");
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
const type = CONTENT_TYPES.get(path.extname(filePath)) || "application/octet-stream";
|
||||
res.writeHead(200, {
|
||||
"content-type": type,
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(bytes);
|
||||
}
|
||||
|
||||
async function proxyToBackend(req, res, requestUrl) {
|
||||
const upstreamUrl = new URL(`${requestUrl.pathname}${requestUrl.search}`, BACKEND_URL);
|
||||
const headers = {};
|
||||
for (const [key, value] of Object.entries(req.headers)) {
|
||||
if (["connection", "content-length", "host"].includes(key.toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
headers[key] = value.join(", ");
|
||||
} else if (value !== undefined) {
|
||||
headers[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
const body = req.method === "GET" || req.method === "HEAD"
|
||||
? undefined
|
||||
: await readRequestBody(req, MAX_PROXY_BODY_BYTES);
|
||||
|
||||
const upstream = await fetch(upstreamUrl, {
|
||||
method: req.method,
|
||||
headers,
|
||||
body
|
||||
});
|
||||
|
||||
const responseHeaders = {};
|
||||
upstream.headers.forEach((value, key) => {
|
||||
if (!["connection", "content-encoding", "transfer-encoding"].includes(key.toLowerCase())) {
|
||||
responseHeaders[key] = value;
|
||||
}
|
||||
});
|
||||
|
||||
res.writeHead(upstream.status, responseHeaders);
|
||||
if (req.method === "HEAD" || !upstream.body) {
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
Readable.fromWeb(upstream.body).pipe(res);
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks)));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
103
src/worker-server.mjs
Normal file
103
src/worker-server.mjs
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env node
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { archivePage, defaultArchivePath } from "./archiver.mjs";
|
||||
import { archiveIdForUrl, normalizeArchiveUrl } from "./archive-catalog.mjs";
|
||||
|
||||
const PORT = Number(process.env.PORT || process.env.ARCHIVE_WORKER_PORT || 5733);
|
||||
const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
|
||||
const MAX_BODY_BYTES = 64 * 1024;
|
||||
|
||||
let queue = Promise.resolve();
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
sendJson(res, error.statusCode || 500, {
|
||||
ok: false,
|
||||
error: error.message || "Unexpected error"
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive worker listening on ${PORT}`);
|
||||
console.log(`archive path: ${ARCHIVE_PATH}`);
|
||||
});
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/healthz") {
|
||||
sendJson(res, 200, { ok: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && requestUrl.pathname === "/archive") {
|
||||
const body = await readJsonBody(req);
|
||||
const sourceUrl = normalizeArchiveUrl(body.url);
|
||||
const id = typeof body.id === "string" && body.id.trim() ? body.id.trim() : archiveIdForUrl(sourceUrl);
|
||||
const result = await enqueueArchive(sourceUrl, id);
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
result
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(res, 404, { ok: false, error: "Not found" });
|
||||
}
|
||||
|
||||
function enqueueArchive(sourceUrl, id) {
|
||||
const run = () => archivePage(sourceUrl, {
|
||||
archivePath: ARCHIVE_PATH,
|
||||
id
|
||||
});
|
||||
queue = queue.then(run, run);
|
||||
return queue;
|
||||
}
|
||||
|
||||
async function readJsonBody(req) {
|
||||
const text = await readRequestBody(req, MAX_BODY_BYTES);
|
||||
if (!text.trim()) {
|
||||
throw httpError(400, "Request body is required");
|
||||
}
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw httpError(400, "Request body must be JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function sendJson(res, statusCode, payload) {
|
||||
res.writeHead(statusCode, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(`${JSON.stringify(payload)}\n`);
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
49
test/archive-catalog.test.mjs
Normal file
49
test/archive-catalog.test.mjs
Normal file
@@ -0,0 +1,49 @@
|
||||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import test from "node:test";
|
||||
import { ArchiveCatalog, archiveFileNameForUrl, archiveIdForUrl, normalizeArchiveUrl } from "../src/archive-catalog.mjs";
|
||||
|
||||
test("normalizes only http and https archive URLs", () => {
|
||||
assert.equal(normalizeArchiveUrl(" https://example.com/path "), "https://example.com/path");
|
||||
assert.throws(() => normalizeArchiveUrl("file:///tmp/page.html"), /Only http and https/);
|
||||
assert.throws(() => normalizeArchiveUrl("not a url"), /valid URL/);
|
||||
});
|
||||
|
||||
test("builds stable archive ids from the full URL", () => {
|
||||
const first = archiveIdForUrl("https://example.com/article?x=1");
|
||||
const second = archiveIdForUrl("https://example.com/article?x=1");
|
||||
const third = archiveIdForUrl("https://example.com/article?x=2");
|
||||
assert.equal(first, second);
|
||||
assert.notEqual(first, third);
|
||||
assert.match(first, /^example-com-article-[a-f0-9]{16}$/);
|
||||
});
|
||||
|
||||
test("finds stable archive files without rerendering", async () => {
|
||||
const archivePath = await fs.mkdtemp(path.join(os.tmpdir(), "archive-catalog-"));
|
||||
const sourceUrl = "https://example.com/";
|
||||
const fileName = archiveFileNameForUrl(sourceUrl);
|
||||
await fs.writeFile(path.join(archivePath, fileName), "<!doctype html>", "utf8");
|
||||
|
||||
const catalog = new ArchiveCatalog({ archivePath });
|
||||
const record = await catalog.findByUrl(sourceUrl);
|
||||
|
||||
assert.equal(record.fileName, fileName);
|
||||
assert.equal(record.archiveUrl, `/archives/${encodeURIComponent(fileName)}`);
|
||||
});
|
||||
|
||||
test("indexes older timestamped archives from the archive comment", async () => {
|
||||
const archivePath = await fs.mkdtemp(path.join(os.tmpdir(), "archive-catalog-"));
|
||||
await fs.writeFile(
|
||||
path.join(archivePath, "example-com-2026-05-16T00-00-00-000Z.html"),
|
||||
'<!doctype html>\n<!-- Archived locally. Source: https://example.com/story. Created: 2026-05-16T00:00:00.000Z. -->\n<html></html>',
|
||||
"utf8"
|
||||
);
|
||||
|
||||
const catalog = new ArchiveCatalog({ archivePath });
|
||||
const record = await catalog.findByUrl("https://example.com/story");
|
||||
|
||||
assert.equal(record.fileName, "example-com-2026-05-16T00-00-00-000Z.html");
|
||||
assert.equal(record.sourceUrl, "https://example.com/story");
|
||||
});
|
||||
26
test/archiver.test.mjs
Normal file
26
test/archiver.test.mjs
Normal file
@@ -0,0 +1,26 @@
|
||||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
import { renderPage } from "../src/archiver.mjs";
|
||||
|
||||
test("renderPage serializes CSSOM-inserted style rules", async () => {
|
||||
const html = `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<style id="runtime-style"></style>
|
||||
<script>
|
||||
document
|
||||
.getElementById("runtime-style")
|
||||
.sheet
|
||||
.insertRule(".runtime-rule { color: rgb(1, 2, 3); }", 0);
|
||||
</script>
|
||||
</head>
|
||||
<body><div class="runtime-rule">Styled by CSSOM</div></body>
|
||||
</html>`;
|
||||
|
||||
const rendered = await renderPage(`data:text/html,${encodeURIComponent(html)}`, {
|
||||
userscriptDelay: 0
|
||||
});
|
||||
|
||||
assert.match(rendered, /<style id="runtime-style">[\s\S]*\.runtime-rule/);
|
||||
assert.match(rendered, /color:\s*rgb\(1,\s*2,\s*3\)/);
|
||||
});
|
||||
83
test/asset-inliner.test.mjs
Normal file
83
test/asset-inliner.test.mjs
Normal file
@@ -0,0 +1,83 @@
|
||||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
import { AssetInliner, splitSrcset } from "../src/asset-inliner.mjs";
|
||||
import { findExternalAssetRefs } from "../src/archiver.mjs";
|
||||
|
||||
test("inlines real srcset attributes without reading escaped src text from srcdoc", async () => {
|
||||
const fetched = [];
|
||||
const inliner = new AssetInliner();
|
||||
inliner.fetchAsset = async (rawUrl) => {
|
||||
fetched.push(rawUrl);
|
||||
return {
|
||||
bytes: Buffer.from("asset"),
|
||||
contentType: "image/png"
|
||||
};
|
||||
};
|
||||
|
||||
const html = `
|
||||
<img srcset="/small.png 1x, /large.png 2x">
|
||||
<iframe srcdoc="<script src="https://js.stripe.com/v3/foo.js"></script><img src="/nested.png">"></iframe>
|
||||
`;
|
||||
|
||||
const output = await inliner.inlineHtml(html, "https://example.com/article");
|
||||
|
||||
assert.deepEqual(fetched.sort(), [
|
||||
"https://example.com/large.png",
|
||||
"https://example.com/nested.png",
|
||||
"https://example.com/small.png",
|
||||
]);
|
||||
assert.doesNotMatch(output, /js\.stripe\.com/);
|
||||
assert.equal(inliner.warnings.length, 0);
|
||||
});
|
||||
|
||||
test("external asset reporting ignores escaped nested attributes inside srcdoc", () => {
|
||||
const refs = findExternalAssetRefs(`
|
||||
<iframe srcdoc="<img src="https://tracker.example/pixel.gif">"></iframe>
|
||||
<img src="https://cdn.example/picture.jpg">
|
||||
`);
|
||||
|
||||
assert.deepEqual(refs, ["https://cdn.example/picture.jpg"]);
|
||||
});
|
||||
|
||||
test("srcset parsing keeps image CDN transform commas inside URLs", async () => {
|
||||
assert.deepEqual(splitSrcset([
|
||||
"https://media.example/photos/id/master/w_120,c_limit/photo.jpg 120w",
|
||||
"https://media.example/photos/id/master/w_240,c_limit/photo.jpg 240w"
|
||||
].join(", ")), [
|
||||
"https://media.example/photos/id/master/w_120,c_limit/photo.jpg 120w",
|
||||
"https://media.example/photos/id/master/w_240,c_limit/photo.jpg 240w"
|
||||
]);
|
||||
|
||||
const fetched = [];
|
||||
const inliner = new AssetInliner();
|
||||
inliner.fetchAsset = async (rawUrl) => {
|
||||
fetched.push(rawUrl);
|
||||
return {
|
||||
bytes: Buffer.from("asset"),
|
||||
contentType: "image/jpeg"
|
||||
};
|
||||
};
|
||||
|
||||
await inliner.inlineSrcset(
|
||||
"https://media.example/photos/id/master/w_120,c_limit/photo.jpg 120w, https://media.example/photos/id/master/w_240,c_limit/photo.jpg 240w",
|
||||
"https://example.com/article"
|
||||
);
|
||||
|
||||
assert.deepEqual(fetched, [
|
||||
"https://media.example/photos/id/master/w_120,c_limit/photo.jpg",
|
||||
"https://media.example/photos/id/master/w_240,c_limit/photo.jpg"
|
||||
]);
|
||||
});
|
||||
|
||||
test("external asset reporting parses srcset-like attributes without splitting URL commas", () => {
|
||||
const refs = findExternalAssetRefs(`
|
||||
<img srcset="data:image/gif;base64,R0lGODlhAQABAAAAACw= 1x, https://cdn.example/image.jpg 2x">
|
||||
<link rel="preload" as="image" imagesrcset="https://media.example/photos/id/master/w_120,c_limit/photo.jpg 120w, https://media.example/photos/id/master/w_240,c_limit/photo.jpg 240w">
|
||||
`);
|
||||
|
||||
assert.deepEqual(refs, [
|
||||
"https://cdn.example/image.jpg",
|
||||
"https://media.example/photos/id/master/w_120,c_limit/photo.jpg",
|
||||
"https://media.example/photos/id/master/w_240,c_limit/photo.jpg"
|
||||
]);
|
||||
});
|
||||
Reference in New Issue
Block a user