Files
sigilbox/src/container-runner.mjs

349 lines
9.5 KiB
JavaScript
Raw Normal View History

2026-05-15 23:40:39 -07:00
#!/usr/bin/env node
import fs from "node:fs/promises";
import path from "node:path";
import { spawn } from "node:child_process";
import { defaultArchivePath } from "./asset-inliner.mjs";
const CONTAINER_ARCHIVE_PATH = "/archives";
const CONTAINER_INPUT_PATH = "/input";
const DEFAULT_IMAGE = process.env.ARCHIVE_WORKER_IMAGE || "local-page-archiver:latest";
const DEFAULT_SHM_SIZE = "1g";
export async function archiveWithContainer(input, options = {}) {
if (!input) {
throw new Error("Missing archive input URL or HTML file path");
}
const runtime = options.runtime || await detectContainerRuntime();
const image = options.image || DEFAULT_IMAGE;
const archivePath = path.resolve(options.archivePath || defaultArchivePath());
await fs.mkdir(archivePath, { recursive: true });
if (options.build === true) {
await buildWorkerImage({ runtime, image, context: options.context || process.cwd() });
}
const { containerInput, inputMount } = await resolveContainerInput(input);
const runArgs = [
"run",
"--rm",
"--shm-size",
options.shmSize || DEFAULT_SHM_SIZE,
"-e",
`ARCHIVE_PATH=${CONTAINER_ARCHIVE_PATH}`,
"-e",
"ARCHIVE_WORKER_XVFB=1",
"-v",
`${archivePath}:${CONTAINER_ARCHIVE_PATH}`
];
if (inputMount) {
runArgs.push("-v", `${inputMount.host}:${inputMount.container}:ro`);
}
if (options.network) {
runArgs.push("--network", options.network);
}
if (options.name) {
runArgs.push("--name", options.name);
}
if (options.vnc === true) {
const hostPort = String(options.vncPort || 5901);
runArgs.push(
"-e",
"ARCHIVE_WORKER_VNC=1",
"-p",
`${hostPort}:5900`
);
}
runArgs.push(image, "archive", containerInput, "--json");
if (options.id) {
runArgs.push("--id", options.id);
}
const worker = await runCapture(runtime, runArgs, { reject: false });
let parsed;
try {
parsed = parseWorkerJson(worker.stdout);
} catch (error) {
if (worker.code !== 0) {
const message = worker.stderr.trim() || error.message;
const workerError = new Error(message);
workerError.worker = worker;
throw workerError;
}
throw error;
}
if (worker.code !== 0 || parsed.ok === false) {
const message = parsed.error || worker.stderr.trim() || `Archive worker exited with ${worker.code}`;
const error = new Error(message);
error.worker = worker;
error.result = parsed;
throw error;
}
return mapContainerResult(parsed, {
runtime,
image,
archivePath
});
}
export async function detectContainerRuntime() {
for (const runtime of ["podman", "docker"]) {
if (await commandExists(runtime)) {
return runtime;
}
}
throw new Error("Neither podman nor docker is available on PATH");
}
export async function imageExists(runtime, image) {
const args = runtime === "podman"
? ["image", "exists", image]
: ["image", "inspect", image];
const result = await runCapture(runtime, args, { reject: false });
return result.code === 0;
}
export async function buildWorkerImage({ runtime, image = DEFAULT_IMAGE, context = process.cwd() } = {}) {
const selectedRuntime = runtime || await detectContainerRuntime();
await runInherited(selectedRuntime, ["build", "-t", image, context]);
}
function mapContainerResult(result, { runtime, image, archivePath }) {
const containerFilePath = result.filePath;
let filePath = containerFilePath;
if (containerFilePath?.startsWith(`${CONTAINER_ARCHIVE_PATH}/`)) {
filePath = path.join(archivePath, path.relative(CONTAINER_ARCHIVE_PATH, containerFilePath));
}
return {
...result,
filePath,
archivePath,
container: {
runtime,
image,
filePath: containerFilePath,
archivePath: CONTAINER_ARCHIVE_PATH
}
};
}
async function resolveContainerInput(input) {
if (isUrlLike(input)) {
return { containerInput: input, inputMount: null };
}
const absolute = path.resolve(input);
const stat = await fs.stat(absolute).catch(() => null);
if (!stat?.isFile()) {
return { containerInput: input, inputMount: null };
}
return {
containerInput: path.posix.join(CONTAINER_INPUT_PATH, path.basename(absolute)),
inputMount: {
host: path.dirname(absolute),
container: CONTAINER_INPUT_PATH
}
};
}
function isUrlLike(value) {
return /^[a-z][a-z0-9+.-]*:/i.test(value);
}
async function commandExists(command) {
const result = await runCapture(command, ["--version"], { reject: false });
return result.code === 0;
}
function parseWorkerJson(stdout) {
const trimmed = stdout.trim();
if (!trimmed) {
throw new Error("Archive worker produced no JSON output");
}
const lines = trimmed.split(/\r?\n/).reverse();
for (const line of lines) {
const candidate = line.trim();
if (!candidate.startsWith("{")) {
continue;
}
try {
return JSON.parse(candidate);
} catch {
// Keep looking; earlier lines may contain log output.
}
}
throw new Error(`Archive worker output did not include JSON: ${trimmed.slice(0, 500)}`);
}
function runCapture(command, args, options = {}) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: options.cwd,
env: options.env || process.env,
stdio: ["ignore", "pipe", "pipe"]
});
let stdout = "";
let stderr = "";
child.stdout.on("data", (chunk) => {
stdout += chunk;
});
child.stderr.on("data", (chunk) => {
stderr += chunk;
});
child.on("error", (error) => {
if (options.reject === false) {
resolve({ code: 127, stdout, stderr: error.message });
} else {
reject(error);
}
});
child.on("close", (code) => {
const result = { code, stdout, stderr };
if (code !== 0 && options.reject !== false) {
const error = new Error(stderr.trim() || `${command} exited with ${code}`);
error.result = result;
reject(error);
} else {
resolve(result);
}
});
});
}
function runInherited(command, args, options = {}) {
return new Promise((resolve, reject) => {
const child = spawn(command, args, {
cwd: options.cwd,
env: options.env || process.env,
stdio: "inherit"
});
child.on("error", reject);
child.on("close", (code) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`${command} exited with ${code}`));
}
});
});
}
function parseArgs(argv) {
const args = {
command: argv[2],
positional: []
};
for (let i = 3; i < argv.length; i += 1) {
const arg = argv[i];
if (!arg.startsWith("--")) {
args.positional.push(arg);
continue;
}
const [flag, inlineValue] = arg.split("=", 2);
const key = flag.slice(2);
if (key.startsWith("no-")) {
args[key.slice(3)] = false;
} else if (inlineValue !== undefined) {
args[key] = inlineValue;
} else if (i + 1 < argv.length && !argv[i + 1].startsWith("--")) {
args[key] = argv[++i];
} else {
args[key] = true;
}
}
return args;
}
function usage() {
console.log(`Usage:
node src/container-runner.mjs archive <url-or-html-file> [options]
node src/container-runner.mjs build [options]
Options:
--runtime <podman|docker> Container runtime. Defaults to podman, then docker
--image <name> Worker image. Defaults to ${DEFAULT_IMAGE}
--archive-path <dir> Host output directory. Defaults to ARCHIVE_PATH or ${defaultArchivePath()}
--id <id> Output id/file stem
--build Build the worker image before archiving
--vnc Expose x11vnc from the worker for debugging
--vnc-port <port> Host VNC port. Defaults to 5901
--network <name> Optional runtime network to attach
--json Print machine-readable JSON`);
}
async function main() {
const args = parseArgs(process.argv);
if (!args.command || args.command === "help" || args.help) {
usage();
return;
}
const runtime = args.runtime || await detectContainerRuntime();
const image = args.image || DEFAULT_IMAGE;
if (args.command === "build") {
await buildWorkerImage({ runtime, image });
return;
}
if (args.command !== "archive") {
throw new Error(`Unknown command: ${args.command}`);
}
const input = args.positional[0];
if (!input) {
usage();
process.exitCode = 1;
return;
}
const result = await archiveWithContainer(input, {
runtime,
image,
archivePath: args["archive-path"],
id: args.id,
build: args.build === true,
vnc: args.vnc === true,
vncPort: args["vnc-port"],
network: args.network
});
if (args.json === true) {
console.log(JSON.stringify(result));
return;
}
console.log(`Archived: ${result.sourceUrl}`);
console.log(`Output: ${result.filePath}`);
console.log(`Worker: ${result.container.runtime} ${result.container.image}`);
if (result.externalAssets.length) {
console.log(`External asset references remaining: ${result.externalAssets.length}`);
for (const ref of result.externalAssets.slice(0, 20)) {
console.log(` ${ref}`);
}
} else {
console.log("External asset references remaining: 0");
}
if (result.warnings.length) {
console.log(`Warnings: ${result.warnings.length}`);
for (const warning of result.warnings.slice(0, 20)) {
console.log(` ${warning}`);
}
}
}
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch((error) => {
console.error(error.message);
process.exitCode = 1;
});
}