import fs from "node:fs/promises";
import path from "node:path";
import { fileURLToPath, pathToFileURL } from "node:url";
export const DEFAULT_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
const TEXT_TYPES = new Set([
"application/javascript",
"application/json",
"application/ld+json",
"application/xml",
"image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
"text/xml"
]);
const MIME_BY_EXT = new Map([
[".apng", "image/apng"],
[".avif", "image/avif"],
[".css", "text/css"],
[".gif", "image/gif"],
[".html", "text/html"],
[".ico", "image/x-icon"],
[".jpeg", "image/jpeg"],
[".jpg", "image/jpeg"],
[".js", "text/javascript"],
[".json", "application/json"],
[".m4a", "audio/mp4"],
[".mp3", "audio/mpeg"],
[".mp4", "video/mp4"],
[".otf", "font/otf"],
[".png", "image/png"],
[".svg", "image/svg+xml"],
[".ttf", "font/ttf"],
[".webm", "video/webm"],
[".webp", "image/webp"],
[".woff", "font/woff"],
[".woff2", "font/woff2"],
[".xml", "application/xml"]
]);
const TRANSPARENT_IMAGE_DATA_URI =
"data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA=";
export function defaultArchivePath() {
return process.env.ARCHIVE_PATH || path.join(process.env.TMPDIR || "/tmp", "local-page-archives");
}
export function isHttpUrl(value) {
return /^https?:\/\//i.test(value);
}
export function isFileUrl(value) {
return /^file:\/\//i.test(value);
}
export function inputToUrl(input) {
if (/^[a-z][a-z0-9+.-]*:/i.test(input)) {
return input;
}
return pathToFileURL(path.resolve(input)).href;
}
export function slugForUrl(inputUrl) {
const url = new URL(inputUrl);
const stem =
`${url.hostname}${url.pathname}`
.replace(/\/+$/, "")
.replace(/[^a-z0-9]+/gi, "-")
.replace(/^-+|-+$/g, "")
.slice(0, 90) || "archive";
return `${stem}-${new Date().toISOString().replace(/[:.]/g, "-")}`;
}
export function findEffectiveBase(html, fallbackBaseUrl) {
const match = html.match(/