adds frontend
This commit is contained in:
250
src/archive-catalog.mjs
Normal file
250
src/archive-catalog.mjs
Normal file
@@ -0,0 +1,250 @@
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { defaultArchivePath } from "./asset-inliner.mjs";
|
||||
|
||||
const INDEX_FILE = ".archive-index.json";
|
||||
const INDEX_VERSION = 1;
|
||||
const COMMENT_RE = /<!--\s*Archived locally\. Source: ([\s\S]*?)\. Created: ([^.]*(?:\.[0-9]+)?Z)\.\s*-->/;
|
||||
|
||||
export function normalizeArchiveUrl(rawUrl) {
|
||||
const text = String(rawUrl || "").trim();
|
||||
if (!text) {
|
||||
throw new Error("URL is required");
|
||||
}
|
||||
|
||||
let url;
|
||||
try {
|
||||
url = new URL(text);
|
||||
} catch {
|
||||
throw new Error("Enter a valid URL");
|
||||
}
|
||||
|
||||
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
||||
throw new Error("Only http and https URLs can be archived");
|
||||
}
|
||||
|
||||
return url.href;
|
||||
}
|
||||
|
||||
export function archiveIdForUrl(sourceUrl) {
|
||||
const url = new URL(normalizeArchiveUrl(sourceUrl));
|
||||
const stem =
|
||||
`${url.hostname}${url.pathname}`
|
||||
.replace(/\/+$/, "")
|
||||
.replace(/[^a-z0-9]+/gi, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 72) || "archive";
|
||||
const digest = crypto.createHash("sha256").update(url.href).digest("hex").slice(0, 16);
|
||||
return `${stem}-${digest}`;
|
||||
}
|
||||
|
||||
export function archiveFileNameForUrl(sourceUrl) {
|
||||
return `${archiveIdForUrl(sourceUrl)}.html`;
|
||||
}
|
||||
|
||||
export class ArchiveCatalog {
|
||||
constructor(options = {}) {
|
||||
this.archivePath = path.resolve(options.archivePath || defaultArchivePath());
|
||||
this.publicBasePath = options.publicBasePath || "/archives";
|
||||
this.indexPath = path.join(this.archivePath, INDEX_FILE);
|
||||
this.index = {
|
||||
version: INDEX_VERSION,
|
||||
archives: {}
|
||||
};
|
||||
this.loadPromise = null;
|
||||
this.savePromise = Promise.resolve();
|
||||
}
|
||||
|
||||
async findByUrl(rawUrl) {
|
||||
const sourceUrl = normalizeArchiveUrl(rawUrl);
|
||||
await this.ensureLoaded();
|
||||
|
||||
const indexed = this.index.archives[sourceUrl];
|
||||
if (indexed && await this.hasArchiveFile(indexed.fileName)) {
|
||||
return this.toPublicRecord(indexed);
|
||||
}
|
||||
|
||||
if (indexed) {
|
||||
delete this.index.archives[sourceUrl];
|
||||
await this.saveIndex();
|
||||
}
|
||||
|
||||
const stableFileName = archiveFileNameForUrl(sourceUrl);
|
||||
if (await this.hasArchiveFile(stableFileName)) {
|
||||
const record = this.upsertRecord(sourceUrl, {
|
||||
id: path.basename(stableFileName, ".html"),
|
||||
fileName: stableFileName
|
||||
});
|
||||
await this.saveIndex();
|
||||
return this.toPublicRecord(record);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async recordResult(rawUrl, result) {
|
||||
const sourceUrl = normalizeArchiveUrl(rawUrl);
|
||||
await this.ensureLoaded();
|
||||
|
||||
const fileName = path.basename(result.filePath || `${result.id}.html`);
|
||||
const id = result.id || path.basename(fileName, ".html");
|
||||
const record = this.upsertRecord(sourceUrl, {
|
||||
id,
|
||||
fileName,
|
||||
warningsCount: Array.isArray(result.warnings) ? result.warnings.length : 0,
|
||||
externalAssetsCount: Array.isArray(result.externalAssets) ? result.externalAssets.length : 0
|
||||
});
|
||||
await this.saveIndex();
|
||||
return this.toPublicRecord(record);
|
||||
}
|
||||
|
||||
async ensureLoaded() {
|
||||
if (!this.loadPromise) {
|
||||
this.loadPromise = this.loadIndex();
|
||||
}
|
||||
await this.loadPromise;
|
||||
}
|
||||
|
||||
async loadIndex() {
|
||||
await fs.mkdir(this.archivePath, { recursive: true });
|
||||
try {
|
||||
const data = JSON.parse(await fs.readFile(this.indexPath, "utf8"));
|
||||
if (data && data.version === INDEX_VERSION && data.archives && typeof data.archives === "object") {
|
||||
this.index = data;
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.code !== "ENOENT") {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
if (await this.scanArchiveFiles()) {
|
||||
await this.saveIndex();
|
||||
}
|
||||
}
|
||||
|
||||
async scanArchiveFiles() {
|
||||
let changed = false;
|
||||
const entries = await fs.readdir(this.archivePath, { withFileTypes: true }).catch((error) => {
|
||||
if (error.code === "ENOENT") {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !entry.name.endsWith(".html")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const filePath = path.join(this.archivePath, entry.name);
|
||||
const metadata = await readArchiveMetadata(filePath);
|
||||
if (!metadata?.sourceUrl) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let sourceUrl;
|
||||
try {
|
||||
sourceUrl = normalizeArchiveUrl(metadata.sourceUrl);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const current = this.index.archives[sourceUrl];
|
||||
if (current?.fileName === entry.name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
this.index.archives[sourceUrl] = {
|
||||
id: path.basename(entry.name, ".html"),
|
||||
fileName: entry.name,
|
||||
sourceUrl,
|
||||
createdAt: metadata.createdAt || new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString()
|
||||
};
|
||||
changed = true;
|
||||
}
|
||||
|
||||
for (const [sourceUrl, record] of Object.entries(this.index.archives)) {
|
||||
if (!record?.fileName || !await this.hasArchiveFile(record.fileName)) {
|
||||
delete this.index.archives[sourceUrl];
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
upsertRecord(sourceUrl, values) {
|
||||
const previous = this.index.archives[sourceUrl];
|
||||
const now = new Date().toISOString();
|
||||
const record = {
|
||||
id: values.id,
|
||||
fileName: values.fileName,
|
||||
sourceUrl,
|
||||
createdAt: previous?.createdAt || now,
|
||||
updatedAt: now,
|
||||
warningsCount: values.warningsCount ?? previous?.warningsCount ?? 0,
|
||||
externalAssetsCount: values.externalAssetsCount ?? previous?.externalAssetsCount ?? 0
|
||||
};
|
||||
this.index.archives[sourceUrl] = record;
|
||||
return record;
|
||||
}
|
||||
|
||||
async hasArchiveFile(fileName) {
|
||||
if (!isSafeArchiveFileName(fileName)) {
|
||||
return false;
|
||||
}
|
||||
const stat = await fs.stat(path.join(this.archivePath, fileName)).catch(() => null);
|
||||
return !!stat?.isFile();
|
||||
}
|
||||
|
||||
toPublicRecord(record) {
|
||||
return {
|
||||
...record,
|
||||
archiveUrl: `${this.publicBasePath}/${encodeURIComponent(record.fileName)}`
|
||||
};
|
||||
}
|
||||
|
||||
async saveIndex() {
|
||||
this.savePromise = this.savePromise.then(async () => {
|
||||
await fs.mkdir(this.archivePath, { recursive: true });
|
||||
const tmpPath = `${this.indexPath}.${process.pid}.tmp`;
|
||||
await fs.writeFile(tmpPath, `${JSON.stringify(this.index, null, 2)}\n`, "utf8");
|
||||
await fs.rename(tmpPath, this.indexPath);
|
||||
});
|
||||
return this.savePromise;
|
||||
}
|
||||
}
|
||||
|
||||
export function isSafeArchiveFileName(fileName) {
|
||||
return (
|
||||
typeof fileName === "string" &&
|
||||
fileName === path.basename(fileName) &&
|
||||
fileName.endsWith(".html") &&
|
||||
!fileName.startsWith(".")
|
||||
);
|
||||
}
|
||||
|
||||
async function readArchiveMetadata(filePath) {
|
||||
const handle = await fs.open(filePath, "r").catch(() => null);
|
||||
if (!handle) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const buffer = Buffer.alloc(4096);
|
||||
const { bytesRead } = await handle.read(buffer, 0, buffer.length, 0);
|
||||
const head = buffer.subarray(0, bytesRead).toString("utf8");
|
||||
const match = head.match(COMMENT_RE);
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
sourceUrl: match[1].replaceAll("- -", "--"),
|
||||
createdAt: match[2]
|
||||
};
|
||||
} finally {
|
||||
await handle.close();
|
||||
}
|
||||
}
|
||||
345
src/backend-server.mjs
Normal file
345
src/backend-server.mjs
Normal file
@@ -0,0 +1,345 @@
|
||||
#!/usr/bin/env node
|
||||
import { createReadStream } from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { ArchiveCatalog, archiveIdForUrl, isSafeArchiveFileName, normalizeArchiveUrl } from "./archive-catalog.mjs";
|
||||
import { defaultArchivePath } from "./asset-inliner.mjs";
|
||||
|
||||
const PORT = Number(process.env.PORT || 5732);
|
||||
const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
|
||||
const ARCHIVE_WORKER_URL = process.env.ARCHIVE_WORKER_URL || "http://127.0.0.1:5733";
|
||||
const PUBLIC_ARCHIVES_PATH = process.env.PUBLIC_ARCHIVES_PATH || "/archives";
|
||||
const JOB_TIMEOUT_MS = Number(process.env.ARCHIVE_JOB_TIMEOUT_MS || 120000);
|
||||
const MAX_BODY_BYTES = 64 * 1024;
|
||||
|
||||
const catalog = new ArchiveCatalog({
|
||||
archivePath: ARCHIVE_PATH,
|
||||
publicBasePath: PUBLIC_ARCHIVES_PATH
|
||||
});
|
||||
|
||||
const jobs = new Map();
|
||||
const activeJobByUrl = new Map();
|
||||
let workerQueue = Promise.resolve();
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
sendJson(res, error.statusCode || 500, {
|
||||
ok: false,
|
||||
error: error.message || "Unexpected error"
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive backend listening on ${PORT}`);
|
||||
console.log(`archive path: ${ARCHIVE_PATH}`);
|
||||
console.log(`archive worker: ${ARCHIVE_WORKER_URL}`);
|
||||
});
|
||||
|
||||
const cleanupTimer = setInterval(cleanupJobs, 10 * 60 * 1000);
|
||||
cleanupTimer.unref?.();
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/healthz") {
|
||||
sendJson(res, 200, { ok: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/api/archives/lookup") {
|
||||
const sourceUrl = normalizeArchiveUrl(requestUrl.searchParams.get("url"));
|
||||
const archive = await catalog.findByUrl(sourceUrl);
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
exists: !!archive,
|
||||
archive
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && requestUrl.pathname === "/api/archives") {
|
||||
const body = await readJsonBody(req);
|
||||
const sourceUrl = normalizeArchiveUrl(body.url);
|
||||
const response = await createOrFindArchive(sourceUrl);
|
||||
sendJson(res, response.statusCode, response.body);
|
||||
return;
|
||||
}
|
||||
|
||||
const jobMatch = requestUrl.pathname.match(/^\/api\/jobs\/([^/]+)$/);
|
||||
if (req.method === "GET" && jobMatch) {
|
||||
const job = jobs.get(jobMatch[1]);
|
||||
if (!job) {
|
||||
sendJson(res, 404, { ok: false, error: "Job not found" });
|
||||
return;
|
||||
}
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
job: publicJob(job)
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname.startsWith(`${PUBLIC_ARCHIVES_PATH}/`)) {
|
||||
await serveArchive(requestUrl.pathname.slice(PUBLIC_ARCHIVES_PATH.length + 1), res);
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(res, 404, { ok: false, error: "Not found" });
|
||||
}
|
||||
|
||||
async function createOrFindArchive(sourceUrl) {
|
||||
const existing = await catalog.findByUrl(sourceUrl);
|
||||
if (existing) {
|
||||
return {
|
||||
statusCode: 200,
|
||||
body: {
|
||||
ok: true,
|
||||
status: "done",
|
||||
mode: "existing",
|
||||
archive: existing
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const activeJobId = activeJobByUrl.get(sourceUrl);
|
||||
const activeJob = activeJobId ? jobs.get(activeJobId) : null;
|
||||
if (activeJob && !isTerminal(activeJob.status)) {
|
||||
return {
|
||||
statusCode: 202,
|
||||
body: {
|
||||
ok: true,
|
||||
status: activeJob.status,
|
||||
mode: "active",
|
||||
job: publicJob(activeJob)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const job = {
|
||||
id: cryptoRandomId(),
|
||||
archiveId: archiveIdForUrl(sourceUrl),
|
||||
sourceUrl,
|
||||
status: "queued",
|
||||
message: "Queued",
|
||||
createdAt: new Date().toISOString(),
|
||||
startedAt: null,
|
||||
updatedAt: new Date().toISOString(),
|
||||
finishedAt: null,
|
||||
archive: null,
|
||||
error: null
|
||||
};
|
||||
jobs.set(job.id, job);
|
||||
activeJobByUrl.set(sourceUrl, job.id);
|
||||
enqueueJob(job);
|
||||
|
||||
return {
|
||||
statusCode: 202,
|
||||
body: {
|
||||
ok: true,
|
||||
status: job.status,
|
||||
mode: "created",
|
||||
job: publicJob(job)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function enqueueJob(job) {
|
||||
const run = () => executeJob(job);
|
||||
workerQueue = workerQueue.then(run, run);
|
||||
}
|
||||
|
||||
async function executeJob(job) {
|
||||
if (job.status !== "queued") {
|
||||
return;
|
||||
}
|
||||
|
||||
updateJob(job, {
|
||||
status: "running",
|
||||
message: "Archiving",
|
||||
startedAt: new Date().toISOString()
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await requestWorkerArchive(job.sourceUrl, job.archiveId);
|
||||
const archive = await catalog.recordResult(job.sourceUrl, result);
|
||||
updateJob(job, {
|
||||
status: "done",
|
||||
message: "Opening",
|
||||
archive,
|
||||
finishedAt: new Date().toISOString()
|
||||
});
|
||||
} catch (error) {
|
||||
updateJob(job, {
|
||||
status: "failed",
|
||||
message: "Failed",
|
||||
error: error.message || "Archive failed",
|
||||
finishedAt: new Date().toISOString()
|
||||
});
|
||||
} finally {
|
||||
activeJobByUrl.delete(job.sourceUrl);
|
||||
}
|
||||
}
|
||||
|
||||
async function requestWorkerArchive(sourceUrl, archiveId) {
|
||||
const workerUrl = new URL("/archive", ARCHIVE_WORKER_URL);
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), JOB_TIMEOUT_MS);
|
||||
timeout.unref?.();
|
||||
|
||||
try {
|
||||
const response = await fetch(workerUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({ url: sourceUrl, id: archiveId }),
|
||||
signal: controller.signal
|
||||
});
|
||||
const text = await response.text();
|
||||
const parsed = parseJson(text);
|
||||
if (!response.ok || parsed.ok === false) {
|
||||
throw new Error(parsed.error || text || `Worker returned ${response.status}`);
|
||||
}
|
||||
return parsed.result || parsed;
|
||||
} catch (error) {
|
||||
if (error.name === "AbortError") {
|
||||
throw new Error(`Archive timed out after ${Math.round(JOB_TIMEOUT_MS / 1000)} seconds`);
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
async function serveArchive(rawFileName, res) {
|
||||
let fileName;
|
||||
try {
|
||||
fileName = decodeURIComponent(rawFileName);
|
||||
} catch {
|
||||
sendJson(res, 400, { ok: false, error: "Invalid archive path" });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isSafeArchiveFileName(fileName)) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = path.join(ARCHIVE_PATH, fileName);
|
||||
const stat = await fs.stat(filePath).catch(() => null);
|
||||
if (!stat?.isFile()) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
const stream = createReadStream(filePath, { encoding: "utf8" });
|
||||
stream.on("error", () => {
|
||||
if (!res.headersSent) {
|
||||
sendJson(res, 404, { ok: false, error: "Archive not found" });
|
||||
} else {
|
||||
res.destroy();
|
||||
}
|
||||
});
|
||||
res.writeHead(200, {
|
||||
"content-type": "text/html; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
stream.pipe(res);
|
||||
}
|
||||
|
||||
function updateJob(job, values) {
|
||||
Object.assign(job, values, {
|
||||
updatedAt: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
|
||||
function publicJob(job) {
|
||||
const startedAt = job.startedAt || job.createdAt;
|
||||
return {
|
||||
id: job.id,
|
||||
sourceUrl: job.sourceUrl,
|
||||
status: job.status,
|
||||
message: job.message,
|
||||
createdAt: job.createdAt,
|
||||
startedAt,
|
||||
updatedAt: job.updatedAt,
|
||||
finishedAt: job.finishedAt,
|
||||
elapsedMs: startedAt ? Date.now() - Date.parse(startedAt) : 0,
|
||||
archive: job.archive,
|
||||
error: job.error
|
||||
};
|
||||
}
|
||||
|
||||
function isTerminal(status) {
|
||||
return status === "done" || status === "failed";
|
||||
}
|
||||
|
||||
function cleanupJobs() {
|
||||
const cutoff = Date.now() - 60 * 60 * 1000;
|
||||
for (const [id, job] of jobs) {
|
||||
if (isTerminal(job.status) && Date.parse(job.finishedAt || job.updatedAt) < cutoff) {
|
||||
jobs.delete(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function readJsonBody(req) {
|
||||
const text = await readRequestBody(req, MAX_BODY_BYTES);
|
||||
if (!text.trim()) {
|
||||
throw httpError(400, "Request body is required");
|
||||
}
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw httpError(400, "Request body must be JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function sendJson(res, statusCode, payload) {
|
||||
res.writeHead(statusCode, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(`${JSON.stringify(payload)}\n`);
|
||||
}
|
||||
|
||||
function parseJson(text) {
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw new Error(text || "Worker returned invalid JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function cryptoRandomId() {
|
||||
return randomUUID();
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
157
src/frontend-server.mjs
Normal file
157
src/frontend-server.mjs
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env node
|
||||
import fs from "node:fs/promises";
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { Readable } from "node:stream";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const PORT = Number(process.env.PORT || 5731);
|
||||
const BACKEND_URL = process.env.BACKEND_URL || "http://127.0.0.1:5732";
|
||||
const PUBLIC_DIR = path.resolve(__dirname, "..", "public");
|
||||
const MAX_PROXY_BODY_BYTES = 128 * 1024;
|
||||
|
||||
const CONTENT_TYPES = new Map([
|
||||
[".css", "text/css; charset=utf-8"],
|
||||
[".html", "text/html; charset=utf-8"],
|
||||
[".js", "text/javascript; charset=utf-8"],
|
||||
[".svg", "image/svg+xml"]
|
||||
]);
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
res.writeHead(error.statusCode || 500, {
|
||||
"content-type": "text/plain; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(error.message || "Unexpected error");
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive frontend listening on ${PORT}`);
|
||||
console.log(`archive backend: ${BACKEND_URL}`);
|
||||
});
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (requestUrl.pathname === "/healthz") {
|
||||
res.writeHead(200, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end('{"ok":true}\n');
|
||||
return;
|
||||
}
|
||||
|
||||
if (requestUrl.pathname.startsWith("/api/") || requestUrl.pathname.startsWith("/archives/")) {
|
||||
await proxyToBackend(req, res, requestUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method !== "GET" && req.method !== "HEAD") {
|
||||
throw httpError(405, "Method not allowed");
|
||||
}
|
||||
|
||||
if (requestUrl.pathname.startsWith("/assets/")) {
|
||||
await serveStatic(requestUrl.pathname, res);
|
||||
return;
|
||||
}
|
||||
|
||||
await serveStatic("/index.html", res);
|
||||
}
|
||||
|
||||
async function serveStatic(urlPath, res) {
|
||||
let decodedPath;
|
||||
try {
|
||||
decodedPath = decodeURIComponent(urlPath);
|
||||
} catch {
|
||||
throw httpError(400, "Invalid path");
|
||||
}
|
||||
|
||||
const filePath = path.join(PUBLIC_DIR, decodedPath);
|
||||
const relative = path.relative(PUBLIC_DIR, filePath);
|
||||
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
||||
throw httpError(404, "Not found");
|
||||
}
|
||||
|
||||
const bytes = await fs.readFile(filePath).catch((error) => {
|
||||
if (error.code === "ENOENT") {
|
||||
throw httpError(404, "Not found");
|
||||
}
|
||||
throw error;
|
||||
});
|
||||
|
||||
const type = CONTENT_TYPES.get(path.extname(filePath)) || "application/octet-stream";
|
||||
res.writeHead(200, {
|
||||
"content-type": type,
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(bytes);
|
||||
}
|
||||
|
||||
async function proxyToBackend(req, res, requestUrl) {
|
||||
const upstreamUrl = new URL(`${requestUrl.pathname}${requestUrl.search}`, BACKEND_URL);
|
||||
const headers = {};
|
||||
for (const [key, value] of Object.entries(req.headers)) {
|
||||
if (["connection", "content-length", "host"].includes(key.toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
headers[key] = value.join(", ");
|
||||
} else if (value !== undefined) {
|
||||
headers[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
const body = req.method === "GET" || req.method === "HEAD"
|
||||
? undefined
|
||||
: await readRequestBody(req, MAX_PROXY_BODY_BYTES);
|
||||
|
||||
const upstream = await fetch(upstreamUrl, {
|
||||
method: req.method,
|
||||
headers,
|
||||
body
|
||||
});
|
||||
|
||||
const responseHeaders = {};
|
||||
upstream.headers.forEach((value, key) => {
|
||||
if (!["connection", "content-encoding", "transfer-encoding"].includes(key.toLowerCase())) {
|
||||
responseHeaders[key] = value;
|
||||
}
|
||||
});
|
||||
|
||||
res.writeHead(upstream.status, responseHeaders);
|
||||
if (req.method === "HEAD" || !upstream.body) {
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
Readable.fromWeb(upstream.body).pipe(res);
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks)));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
103
src/worker-server.mjs
Normal file
103
src/worker-server.mjs
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env node
|
||||
import http from "node:http";
|
||||
import path from "node:path";
|
||||
import { archivePage, defaultArchivePath } from "./archiver.mjs";
|
||||
import { archiveIdForUrl, normalizeArchiveUrl } from "./archive-catalog.mjs";
|
||||
|
||||
const PORT = Number(process.env.PORT || process.env.ARCHIVE_WORKER_PORT || 5733);
|
||||
const ARCHIVE_PATH = path.resolve(process.env.ARCHIVE_PATH || defaultArchivePath());
|
||||
const MAX_BODY_BYTES = 64 * 1024;
|
||||
|
||||
let queue = Promise.resolve();
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
try {
|
||||
await route(req, res);
|
||||
} catch (error) {
|
||||
sendJson(res, error.statusCode || 500, {
|
||||
ok: false,
|
||||
error: error.message || "Unexpected error"
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log(`archive worker listening on ${PORT}`);
|
||||
console.log(`archive path: ${ARCHIVE_PATH}`);
|
||||
});
|
||||
|
||||
async function route(req, res) {
|
||||
const requestUrl = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
||||
|
||||
if (req.method === "GET" && requestUrl.pathname === "/healthz") {
|
||||
sendJson(res, 200, { ok: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method === "POST" && requestUrl.pathname === "/archive") {
|
||||
const body = await readJsonBody(req);
|
||||
const sourceUrl = normalizeArchiveUrl(body.url);
|
||||
const id = typeof body.id === "string" && body.id.trim() ? body.id.trim() : archiveIdForUrl(sourceUrl);
|
||||
const result = await enqueueArchive(sourceUrl, id);
|
||||
sendJson(res, 200, {
|
||||
ok: true,
|
||||
result
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(res, 404, { ok: false, error: "Not found" });
|
||||
}
|
||||
|
||||
function enqueueArchive(sourceUrl, id) {
|
||||
const run = () => archivePage(sourceUrl, {
|
||||
archivePath: ARCHIVE_PATH,
|
||||
id
|
||||
});
|
||||
queue = queue.then(run, run);
|
||||
return queue;
|
||||
}
|
||||
|
||||
async function readJsonBody(req) {
|
||||
const text = await readRequestBody(req, MAX_BODY_BYTES);
|
||||
if (!text.trim()) {
|
||||
throw httpError(400, "Request body is required");
|
||||
}
|
||||
try {
|
||||
return JSON.parse(text);
|
||||
} catch {
|
||||
throw httpError(400, "Request body must be JSON");
|
||||
}
|
||||
}
|
||||
|
||||
function readRequestBody(req, maxBytes) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const chunks = [];
|
||||
let total = 0;
|
||||
req.on("data", (chunk) => {
|
||||
total += chunk.length;
|
||||
if (total > maxBytes) {
|
||||
reject(httpError(413, "Request body is too large"));
|
||||
req.destroy();
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
req.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
function sendJson(res, statusCode, payload) {
|
||||
res.writeHead(statusCode, {
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"cache-control": "no-store"
|
||||
});
|
||||
res.end(`${JSON.stringify(payload)}\n`);
|
||||
}
|
||||
|
||||
function httpError(statusCode, message) {
|
||||
const error = new Error(message);
|
||||
error.statusCode = statusCode;
|
||||
return error;
|
||||
}
|
||||
Reference in New Issue
Block a user