#!/usr/bin/env node import fs from "node:fs/promises"; import path from "node:path"; import { spawn } from "node:child_process"; import { defaultArchivePath } from "./asset-inliner.mjs"; const CONTAINER_ARCHIVE_PATH = "/archives"; const CONTAINER_INPUT_PATH = "/input"; const DEFAULT_IMAGE = process.env.ARCHIVE_WORKER_IMAGE || "local-page-archiver:latest"; const DEFAULT_SHM_SIZE = "1g"; export async function archiveWithContainer(input, options = {}) { if (!input) { throw new Error("Missing archive input URL or HTML file path"); } const runtime = options.runtime || await detectContainerRuntime(); const image = options.image || DEFAULT_IMAGE; const archivePath = path.resolve(options.archivePath || defaultArchivePath()); await fs.mkdir(archivePath, { recursive: true }); if (options.build === true) { await buildWorkerImage({ runtime, image, context: options.context || process.cwd() }); } const { containerInput, inputMount } = await resolveContainerInput(input); const runArgs = [ "run", "--rm", "--shm-size", options.shmSize || DEFAULT_SHM_SIZE, "-e", `ARCHIVE_PATH=${CONTAINER_ARCHIVE_PATH}`, "-e", "ARCHIVE_WORKER_XVFB=1", "-v", `${archivePath}:${CONTAINER_ARCHIVE_PATH}` ]; if (inputMount) { runArgs.push("-v", `${inputMount.host}:${inputMount.container}:ro`); } if (options.network) { runArgs.push("--network", options.network); } if (options.name) { runArgs.push("--name", options.name); } if (options.vnc === true) { const hostPort = String(options.vncPort || 5901); runArgs.push( "-e", "ARCHIVE_WORKER_VNC=1", "-p", `${hostPort}:5900` ); } runArgs.push(image, "archive", containerInput, "--json"); if (options.id) { runArgs.push("--id", options.id); } const worker = await runCapture(runtime, runArgs, { reject: false }); let parsed; try { parsed = parseWorkerJson(worker.stdout); } catch (error) { if (worker.code !== 0) { const message = worker.stderr.trim() || error.message; const workerError = new Error(message); workerError.worker = worker; throw workerError; } throw error; } if (worker.code !== 0 || parsed.ok === false) { const message = parsed.error || worker.stderr.trim() || `Archive worker exited with ${worker.code}`; const error = new Error(message); error.worker = worker; error.result = parsed; throw error; } return mapContainerResult(parsed, { runtime, image, archivePath }); } export async function detectContainerRuntime() { for (const runtime of ["podman", "docker"]) { if (await commandExists(runtime)) { return runtime; } } throw new Error("Neither podman nor docker is available on PATH"); } export async function imageExists(runtime, image) { const args = runtime === "podman" ? ["image", "exists", image] : ["image", "inspect", image]; const result = await runCapture(runtime, args, { reject: false }); return result.code === 0; } export async function buildWorkerImage({ runtime, image = DEFAULT_IMAGE, context = process.cwd() } = {}) { const selectedRuntime = runtime || await detectContainerRuntime(); await runInherited(selectedRuntime, ["build", "-t", image, context]); } function mapContainerResult(result, { runtime, image, archivePath }) { const containerFilePath = result.filePath; let filePath = containerFilePath; if (containerFilePath?.startsWith(`${CONTAINER_ARCHIVE_PATH}/`)) { filePath = path.join(archivePath, path.relative(CONTAINER_ARCHIVE_PATH, containerFilePath)); } return { ...result, filePath, archivePath, container: { runtime, image, filePath: containerFilePath, archivePath: CONTAINER_ARCHIVE_PATH } }; } async function resolveContainerInput(input) { if (isUrlLike(input)) { return { containerInput: input, inputMount: null }; } const absolute = path.resolve(input); const stat = await fs.stat(absolute).catch(() => null); if (!stat?.isFile()) { return { containerInput: input, inputMount: null }; } return { containerInput: path.posix.join(CONTAINER_INPUT_PATH, path.basename(absolute)), inputMount: { host: path.dirname(absolute), container: CONTAINER_INPUT_PATH } }; } function isUrlLike(value) { return /^[a-z][a-z0-9+.-]*:/i.test(value); } async function commandExists(command) { const result = await runCapture(command, ["--version"], { reject: false }); return result.code === 0; } function parseWorkerJson(stdout) { const trimmed = stdout.trim(); if (!trimmed) { throw new Error("Archive worker produced no JSON output"); } const lines = trimmed.split(/\r?\n/).reverse(); for (const line of lines) { const candidate = line.trim(); if (!candidate.startsWith("{")) { continue; } try { return JSON.parse(candidate); } catch { // Keep looking; earlier lines may contain log output. } } throw new Error(`Archive worker output did not include JSON: ${trimmed.slice(0, 500)}`); } function runCapture(command, args, options = {}) { return new Promise((resolve, reject) => { const child = spawn(command, args, { cwd: options.cwd, env: options.env || process.env, stdio: ["ignore", "pipe", "pipe"] }); let stdout = ""; let stderr = ""; child.stdout.on("data", (chunk) => { stdout += chunk; }); child.stderr.on("data", (chunk) => { stderr += chunk; }); child.on("error", (error) => { if (options.reject === false) { resolve({ code: 127, stdout, stderr: error.message }); } else { reject(error); } }); child.on("close", (code) => { const result = { code, stdout, stderr }; if (code !== 0 && options.reject !== false) { const error = new Error(stderr.trim() || `${command} exited with ${code}`); error.result = result; reject(error); } else { resolve(result); } }); }); } function runInherited(command, args, options = {}) { return new Promise((resolve, reject) => { const child = spawn(command, args, { cwd: options.cwd, env: options.env || process.env, stdio: "inherit" }); child.on("error", reject); child.on("close", (code) => { if (code === 0) { resolve(); } else { reject(new Error(`${command} exited with ${code}`)); } }); }); } function parseArgs(argv) { const args = { command: argv[2], positional: [] }; for (let i = 3; i < argv.length; i += 1) { const arg = argv[i]; if (!arg.startsWith("--")) { args.positional.push(arg); continue; } const [flag, inlineValue] = arg.split("=", 2); const key = flag.slice(2); if (key.startsWith("no-")) { args[key.slice(3)] = false; } else if (inlineValue !== undefined) { args[key] = inlineValue; } else if (i + 1 < argv.length && !argv[i + 1].startsWith("--")) { args[key] = argv[++i]; } else { args[key] = true; } } return args; } function usage() { console.log(`Usage: node src/container-runner.mjs archive [options] node src/container-runner.mjs build [options] Options: --runtime Container runtime. Defaults to podman, then docker --image Worker image. Defaults to ${DEFAULT_IMAGE} --archive-path Host output directory. Defaults to ARCHIVE_PATH or ${defaultArchivePath()} --id Output id/file stem --build Build the worker image before archiving --vnc Expose x11vnc from the worker for debugging --vnc-port Host VNC port. Defaults to 5901 --network Optional runtime network to attach --json Print machine-readable JSON`); } async function main() { const args = parseArgs(process.argv); if (!args.command || args.command === "help" || args.help) { usage(); return; } const runtime = args.runtime || await detectContainerRuntime(); const image = args.image || DEFAULT_IMAGE; if (args.command === "build") { await buildWorkerImage({ runtime, image }); return; } if (args.command !== "archive") { throw new Error(`Unknown command: ${args.command}`); } const input = args.positional[0]; if (!input) { usage(); process.exitCode = 1; return; } const result = await archiveWithContainer(input, { runtime, image, archivePath: args["archive-path"], id: args.id, build: args.build === true, vnc: args.vnc === true, vncPort: args["vnc-port"], network: args.network }); if (args.json === true) { console.log(JSON.stringify(result)); return; } console.log(`Archived: ${result.sourceUrl}`); console.log(`Output: ${result.filePath}`); console.log(`Worker: ${result.container.runtime} ${result.container.image}`); if (result.externalAssets.length) { console.log(`External asset references remaining: ${result.externalAssets.length}`); for (const ref of result.externalAssets.slice(0, 20)) { console.log(` ${ref}`); } } else { console.log("External asset references remaining: 0"); } if (result.warnings.length) { console.log(`Warnings: ${result.warnings.length}`); for (const warning of result.warnings.slice(0, 20)) { console.log(` ${warning}`); } } } if (import.meta.url === `file://${process.argv[1]}`) { main().catch((error) => { console.error(error.message); process.exitCode = 1; }); }