stealth
This commit is contained in:
630
src/archiver.mjs
630
src/archiver.mjs
@@ -1,6 +1,7 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { createRequire } from "node:module";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import {
|
||||
AssetInliner,
|
||||
DEFAULT_USER_AGENT,
|
||||
@@ -12,15 +13,595 @@ import {
|
||||
} from "./asset-inliner.mjs";
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const PAGE_TIMEOUT_MS = 60000;
|
||||
const NETWORK_IDLE_TIMEOUT_MS = 5000;
|
||||
const VIEWPORT = {
|
||||
width: 1024,
|
||||
width: 1366,
|
||||
height: 768
|
||||
};
|
||||
|
||||
export { DEFAULT_USER_AGENT, defaultArchivePath };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Privacy filters integration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const PRIVACY_FILTERS_DIR = path.join(__dirname, "..", "privacy-filters");
|
||||
|
||||
let privacyFiltersAvailable = false;
|
||||
let filterRules = { blockRules: [], allowRules: [], cosmeticRules: [] };
|
||||
let userScriptData = []; // { file, content, matches, excludes }
|
||||
|
||||
async function loadPrivacyFilters() {
|
||||
try {
|
||||
const filterPath = path.join(PRIVACY_FILTERS_DIR, "bpc-paywall-filter.txt");
|
||||
const filterContent = await fs.readFile(filterPath, "utf8");
|
||||
filterRules = parseFilterRules(filterContent);
|
||||
|
||||
const userscriptDir = path.join(PRIVACY_FILTERS_DIR, "userscript");
|
||||
const userScriptFiles = [
|
||||
"bpc.en.user.js",
|
||||
"bpc.de.user.js",
|
||||
"bpc.es.pt.user.js",
|
||||
"bpc.fi.se.user.js",
|
||||
"bpc.fr.user.js",
|
||||
"bpc.it.user.js",
|
||||
"bpc.nl.user.js",
|
||||
"bpc.pl.user.js"
|
||||
];
|
||||
|
||||
for (const file of userScriptFiles) {
|
||||
const content = await fs.readFile(path.join(userscriptDir, file), "utf8");
|
||||
const meta = parseUserScriptMetadata(content);
|
||||
userScriptData.push({ file, content, ...meta });
|
||||
}
|
||||
|
||||
privacyFiltersAvailable = true;
|
||||
} catch {
|
||||
// Privacy filters directory missing or unreadable; archive without them.
|
||||
}
|
||||
}
|
||||
|
||||
await loadPrivacyFilters();
|
||||
|
||||
// --- Adblock filter parsing ------------------------------------------------
|
||||
|
||||
function parseFilterRules(content) {
|
||||
const blockRules = [];
|
||||
const allowRules = [];
|
||||
const cosmeticRules = [];
|
||||
let inPreprocessor = false;
|
||||
|
||||
for (const rawLine of content.split("\n")) {
|
||||
const line = rawLine.trim();
|
||||
if (!line) continue;
|
||||
|
||||
if (line.startsWith("!#if")) {
|
||||
inPreprocessor = true;
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith("!#endif")) {
|
||||
inPreprocessor = false;
|
||||
continue;
|
||||
}
|
||||
if (inPreprocessor || line.startsWith("!#") || line.startsWith("!")) continue;
|
||||
|
||||
// Cosmetic exception (#@#) – skip.
|
||||
if (line.includes("#@#")) continue;
|
||||
|
||||
// Exception network rules
|
||||
if (line.startsWith("@@")) {
|
||||
const rule = parseNetworkRule(line.slice(2));
|
||||
if (rule) allowRules.push(rule);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cosmetic filters
|
||||
const hashIdx = line.indexOf("##");
|
||||
if (hashIdx >= 0) {
|
||||
const domains = line.slice(0, hashIdx);
|
||||
const selector = line.slice(hashIdx + 2);
|
||||
if (!selector.startsWith("+js")) {
|
||||
const css = cosmeticSelectorToCss(selector);
|
||||
if (css) {
|
||||
cosmeticRules.push({ domains, css });
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Network rules
|
||||
const rule = parseNetworkRule(line);
|
||||
if (rule) blockRules.push(rule);
|
||||
}
|
||||
|
||||
return { blockRules, allowRules, cosmeticRules };
|
||||
}
|
||||
|
||||
function parseNetworkRule(line) {
|
||||
let options = [];
|
||||
let pattern = line;
|
||||
|
||||
const lastDollar = line.lastIndexOf("$");
|
||||
if (lastDollar > 0) {
|
||||
const optsStr = line.slice(lastDollar + 1);
|
||||
if (/^[a-z,=~\-|0-9]+$/i.test(optsStr)) {
|
||||
options = optsStr.split(",");
|
||||
pattern = line.slice(0, lastDollar);
|
||||
}
|
||||
}
|
||||
|
||||
if (!pattern) return null;
|
||||
|
||||
const type = options.find((o) =>
|
||||
["script", "stylesheet", "image", "media", "xmlhttprequest", "other", "inline-script"].includes(o)
|
||||
);
|
||||
const isThirdParty = options.includes("third-party");
|
||||
const isFirstParty = options.includes("~third-party");
|
||||
const important = options.includes("important");
|
||||
|
||||
let includeDomains = [];
|
||||
let excludeDomains = [];
|
||||
const domainOpt = options.find((o) => o.startsWith("domain="));
|
||||
if (domainOpt) {
|
||||
for (const d of domainOpt.slice(7).split("|")) {
|
||||
if (d.startsWith("~")) {
|
||||
excludeDomains.push(d.slice(1));
|
||||
} else {
|
||||
includeDomains.push(d);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pattern.startsWith("||")) {
|
||||
let domainPath = pattern.slice(2).replace(/\^$/, "");
|
||||
let [domain, ...pathParts] = domainPath.split("/");
|
||||
let path = pathParts.length > 0 ? "/" + pathParts.join("/") : "";
|
||||
return {
|
||||
kind: "domain",
|
||||
domain,
|
||||
path,
|
||||
type,
|
||||
isThirdParty,
|
||||
isFirstParty,
|
||||
includeDomains,
|
||||
excludeDomains,
|
||||
important
|
||||
};
|
||||
}
|
||||
|
||||
if (pattern.startsWith("/")) {
|
||||
const lastSlash = pattern.lastIndexOf("/");
|
||||
if (lastSlash > 0) {
|
||||
const regex = pattern.slice(1, lastSlash);
|
||||
return {
|
||||
kind: "regex",
|
||||
regex,
|
||||
type,
|
||||
isThirdParty,
|
||||
isFirstParty,
|
||||
includeDomains,
|
||||
excludeDomains,
|
||||
important
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function cosmeticSelectorToCss(selector) {
|
||||
const styleMatch = selector.match(/:style\((.+)\)$/);
|
||||
if (styleMatch) {
|
||||
const baseSelector = selector.slice(0, selector.lastIndexOf(":style("));
|
||||
return `${baseSelector} { ${styleMatch[1]} }`;
|
||||
}
|
||||
|
||||
if (
|
||||
selector.includes(":remove()") ||
|
||||
selector.includes(":matches-css") ||
|
||||
selector.includes(":matches-media") ||
|
||||
selector.includes(":xpath(") ||
|
||||
selector.includes(":upward(") ||
|
||||
selector.includes(":matches-path")
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return `${selector} { display: none !important; }`;
|
||||
}
|
||||
|
||||
function matchesCosmeticDomains(domainSpec, hostname) {
|
||||
if (!domainSpec || domainSpec === "*") return true;
|
||||
const domains = domainSpec.split(",");
|
||||
|
||||
const hasNegated = domains.some((d) => d.startsWith("~"));
|
||||
if (hasNegated) {
|
||||
for (const d of domains) {
|
||||
if (d.startsWith("~")) {
|
||||
const neg = d.slice(1);
|
||||
if (hostname === neg || hostname.endsWith("." + neg)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return domains.some((d) => hostname === d || hostname.endsWith("." + d));
|
||||
}
|
||||
|
||||
function matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule) {
|
||||
if (rule.includeDomains.length > 0) {
|
||||
const ok = rule.includeDomains.some(
|
||||
(d) => sourceHostname === d || sourceHostname.endsWith("." + d)
|
||||
);
|
||||
if (!ok) return false;
|
||||
}
|
||||
if (rule.excludeDomains.length > 0) {
|
||||
const blocked = rule.excludeDomains.some(
|
||||
(d) => sourceHostname === d || sourceHostname.endsWith("." + d)
|
||||
);
|
||||
if (blocked) return false;
|
||||
}
|
||||
|
||||
if (rule.type) {
|
||||
const typeMap = {
|
||||
script: "script",
|
||||
stylesheet: "stylesheet",
|
||||
image: "image",
|
||||
media: "media",
|
||||
xmlhttprequest: "xhr",
|
||||
other: "other",
|
||||
"inline-script": "script"
|
||||
};
|
||||
if (typeMap[rule.type] && resourceType !== typeMap[rule.type]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (rule.isThirdParty) {
|
||||
const is3p = hostname !== sourceHostname && !hostname.endsWith("." + sourceHostname);
|
||||
if (!is3p) return false;
|
||||
}
|
||||
if (rule.isFirstParty) {
|
||||
const is3p = hostname !== sourceHostname && !hostname.endsWith("." + sourceHostname);
|
||||
if (is3p) return false;
|
||||
}
|
||||
|
||||
if (rule.kind === "domain") {
|
||||
const domainRe = new RegExp(
|
||||
"^" + rule.domain.replace(/\./g, "\\.").replace(/\*/g, "[^.]*") + "$",
|
||||
"i"
|
||||
);
|
||||
if (!domainRe.test(hostname)) return false;
|
||||
|
||||
if (rule.path) {
|
||||
const pathRe = new RegExp(
|
||||
"^" + rule.path.replace(/\./g, "\\.").replace(/\*/g, ".*").replace(/\?/g, "\\?").replace(/\^/g, ""),
|
||||
"i"
|
||||
);
|
||||
if (!pathRe.test(urlObj.pathname)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (rule.kind === "regex") {
|
||||
try {
|
||||
const re = new RegExp(rule.regex, "i");
|
||||
return re.test(url);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function shouldBlockRequest(url, resourceType, sourceHostname) {
|
||||
if (url === sourceHostname || url.startsWith(sourceHostname + "/")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let urlObj;
|
||||
try {
|
||||
urlObj = new URL(url);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
const hostname = urlObj.hostname;
|
||||
|
||||
for (const rule of filterRules.allowRules) {
|
||||
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (const rule of filterRules.blockRules) {
|
||||
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// --- Userscript metadata parsing -------------------------------------------
|
||||
|
||||
function parseUserScriptMetadata(content) {
|
||||
const metaBlock = content.match(/\/\/\s*==UserScript==([\s\S]*?)\/\/\s*==\/UserScript==/);
|
||||
const matches = [];
|
||||
const excludes = [];
|
||||
if (!metaBlock) return { matches, excludes };
|
||||
|
||||
const lines = metaBlock[1].split("\n");
|
||||
for (const line of lines) {
|
||||
const matchMatch = line.match(/@match\s+(.+)/);
|
||||
if (matchMatch) {
|
||||
matches.push(matchMatch[1].trim());
|
||||
continue;
|
||||
}
|
||||
const excludeMatch = line.match(/@exclude\s+(.+)/);
|
||||
if (excludeMatch) {
|
||||
excludes.push(excludeMatch[1].trim());
|
||||
}
|
||||
}
|
||||
return { matches, excludes };
|
||||
}
|
||||
|
||||
function urlMatchesPattern(url, pattern) {
|
||||
// Simple glob-style pattern matching for userscript @match
|
||||
// Format: *://*.example.com/* or http://example.com/path
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
const protocol = urlObj.protocol.slice(0, -1); // "http" or "https"
|
||||
const hostname = urlObj.hostname;
|
||||
const pathname = urlObj.pathname;
|
||||
|
||||
// Split pattern
|
||||
const protoEnd = pattern.indexOf("://");
|
||||
if (protoEnd < 0) return false;
|
||||
const patternProto = pattern.slice(0, protoEnd);
|
||||
const rest = pattern.slice(protoEnd + 3);
|
||||
|
||||
// Protocol match
|
||||
if (patternProto !== "*" && patternProto !== protocol) return false;
|
||||
|
||||
// Split rest into host and path
|
||||
const slashIdx = rest.indexOf("/");
|
||||
const patternHost = slashIdx >= 0 ? rest.slice(0, slashIdx) : rest;
|
||||
const patternPath = slashIdx >= 0 ? rest.slice(slashIdx) : "/";
|
||||
|
||||
// Host match
|
||||
if (!matchHost(hostname, patternHost)) return false;
|
||||
|
||||
// Path match
|
||||
if (!matchPath(pathname, patternPath)) return false;
|
||||
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function matchHost(hostname, pattern) {
|
||||
if (pattern === "*") return true;
|
||||
if (pattern.startsWith("*.")) {
|
||||
const suffix = pattern.slice(2);
|
||||
return hostname === suffix || hostname.endsWith("." + suffix);
|
||||
}
|
||||
return hostname === pattern;
|
||||
}
|
||||
|
||||
function matchPath(pathname, pattern) {
|
||||
if (pattern === "/*") return true;
|
||||
// Convert glob pattern to regex
|
||||
const regex = "^" + pattern
|
||||
.replace(/\./g, "\\.")
|
||||
.replace(/\*/g, ".*")
|
||||
.replace(/\?/g, ".")
|
||||
+ "$";
|
||||
return new RegExp(regex, "i").test(pathname);
|
||||
}
|
||||
|
||||
function shouldInjectUserScript(url, meta) {
|
||||
let matched = false;
|
||||
for (const pattern of meta.matches) {
|
||||
if (urlMatchesPattern(url, pattern)) {
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!matched) return false;
|
||||
|
||||
for (const pattern of meta.excludes) {
|
||||
if (urlMatchesPattern(url, pattern)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// --- Browser helpers -------------------------------------------------------
|
||||
|
||||
function loadPlaywright() {
|
||||
try {
|
||||
return require("playwright");
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Playwright is required. Run "npm install" and "npm run install-browsers". Original error: ${error.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Manual stealth evasions injected into every page before any scripts run.
|
||||
const STEALTH_INIT_SCRIPT = `
|
||||
(() => {
|
||||
const patchNavigator = () => {
|
||||
try {
|
||||
// Override webdriver getter without using delete (can crash renderer)
|
||||
if (navigator.webdriver !== undefined) {
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined,
|
||||
configurable: true,
|
||||
enumerable: true
|
||||
});
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
try {
|
||||
if (!window.chrome) {
|
||||
window.chrome = { runtime: {} };
|
||||
} else if (!window.chrome.runtime) {
|
||||
window.chrome.runtime = {};
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
try {
|
||||
const originalQuery = window.navigator.permissions?.query;
|
||||
if (originalQuery) {
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications'
|
||||
? Promise.resolve({ state: Notification.permission })
|
||||
: originalQuery(parameters)
|
||||
);
|
||||
}
|
||||
} catch (e) {}
|
||||
};
|
||||
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', patchNavigator);
|
||||
} else {
|
||||
patchNavigator();
|
||||
}
|
||||
})();
|
||||
`;
|
||||
|
||||
function buildLaunchArgs(headless) {
|
||||
const args = [
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--disable-web-security",
|
||||
"--disable-features=IsolateOrigins,site-per-process",
|
||||
"--disable-site-isolation-trials",
|
||||
"--disable-infobars",
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-accelerated-2d-canvas",
|
||||
"--disable-gpu",
|
||||
"--window-size=1366,768"
|
||||
];
|
||||
|
||||
if (headless) {
|
||||
args.push("--headless=new");
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function buildIgnoreDefaultArgs() {
|
||||
return ["--enable-automation"];
|
||||
}
|
||||
|
||||
// --- Page helpers ----------------------------------------------------------
|
||||
|
||||
async function setupRequestBlocking(page, sourceHostname) {
|
||||
if (!privacyFiltersAvailable || filterRules.blockRules.length === 0) return;
|
||||
|
||||
await page.route("**/*", (route) => {
|
||||
try {
|
||||
const request = route.request();
|
||||
if (request.isNavigationRequest()) {
|
||||
route.continue();
|
||||
return;
|
||||
}
|
||||
const url = request.url();
|
||||
const type = request.resourceType();
|
||||
if (shouldBlockRequest(url, type, sourceHostname)) {
|
||||
route.abort("blockedbyclient");
|
||||
} else {
|
||||
route.continue();
|
||||
}
|
||||
} catch {
|
||||
route.continue();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function injectCosmeticFilters(page, hostname) {
|
||||
if (!privacyFiltersAvailable || filterRules.cosmeticRules.length === 0) return;
|
||||
|
||||
const lines = [];
|
||||
for (const rule of filterRules.cosmeticRules) {
|
||||
if (matchesCosmeticDomains(rule.domains, hostname)) {
|
||||
lines.push(rule.css);
|
||||
}
|
||||
}
|
||||
|
||||
if (lines.length > 0) {
|
||||
try {
|
||||
await page.addStyleTag({ content: lines.join("\n") });
|
||||
} catch {
|
||||
// Ignore cosmetic injection failures.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const GM_MOCK = `
|
||||
if (typeof GM === "undefined") {
|
||||
window.GM = {
|
||||
xmlHttpRequest: function(details) {
|
||||
fetch(details.url, {
|
||||
method: details.method || "GET",
|
||||
headers: details.headers || {},
|
||||
body: details.data || null
|
||||
})
|
||||
.then(response => response.text().then(text => ({
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
responseText: text,
|
||||
responseHeaders: Array.from(response.headers.entries())
|
||||
.map(([k, v]) => k + ": " + v).join("\\r\\n")
|
||||
})))
|
||||
.then(obj => {
|
||||
if (details.onload) details.onload(obj);
|
||||
})
|
||||
.catch(err => {
|
||||
if (details.onerror) details.onerror(err);
|
||||
});
|
||||
}
|
||||
};
|
||||
}
|
||||
`;
|
||||
|
||||
async function injectPrivacyUserScripts(page, sourceUrl) {
|
||||
if (!privacyFiltersAvailable || userScriptData.length === 0) return;
|
||||
|
||||
const matching = userScriptData.filter((us) => shouldInjectUserScript(sourceUrl, us));
|
||||
if (matching.length === 0) return;
|
||||
|
||||
// Inject GM API mock first.
|
||||
try {
|
||||
await page.addScriptTag({ content: GM_MOCK });
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
// Inject only matching userscripts.
|
||||
for (const us of matching) {
|
||||
try {
|
||||
await page.addScriptTag({ content: us.content });
|
||||
} catch {
|
||||
// Ignore injection failures for individual scripts.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Archiving
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function archivePage(input, options = {}) {
|
||||
const sourceUrl = inputToUrl(input);
|
||||
const archivePath = options.archivePath || defaultArchivePath();
|
||||
@@ -29,7 +610,7 @@ export async function archivePage(input, options = {}) {
|
||||
|
||||
await fs.mkdir(archivePath, { recursive: true });
|
||||
|
||||
const renderedHtml = await renderPage(sourceUrl);
|
||||
const renderedHtml = await renderPage(sourceUrl, options);
|
||||
const baseUrl = findEffectiveBase(renderedHtml, sourceUrl);
|
||||
const inliner = new AssetInliner({
|
||||
userAgent: DEFAULT_USER_AGENT,
|
||||
@@ -50,21 +631,48 @@ export async function archivePage(input, options = {}) {
|
||||
};
|
||||
}
|
||||
|
||||
export async function renderPage(sourceUrl) {
|
||||
export async function renderPage(sourceUrl, options = {}) {
|
||||
const playwright = loadPlaywright();
|
||||
const browser = await playwright.chromium.launch({ headless: true });
|
||||
|
||||
const hasDisplay = !!(process.env.DISPLAY || process.env.WAYLAND_DISPLAY);
|
||||
const headless = options.headless !== false && !hasDisplay;
|
||||
|
||||
const browser = await playwright.chromium.launch({
|
||||
headless,
|
||||
args: buildLaunchArgs(headless),
|
||||
ignoreDefaultArgs: buildIgnoreDefaultArgs()
|
||||
});
|
||||
|
||||
try {
|
||||
const context = await browser.newContext({
|
||||
userAgent: DEFAULT_USER_AGENT,
|
||||
viewport: VIEWPORT
|
||||
userAgent: options.userAgent || DEFAULT_USER_AGENT,
|
||||
viewport: VIEWPORT,
|
||||
locale: options.locale || "en-US",
|
||||
timezoneId: options.timezoneId || "America/New_York"
|
||||
});
|
||||
|
||||
// Inject stealth evasions into every new page before any scripts run.
|
||||
await context.addInitScript(STEALTH_INIT_SCRIPT);
|
||||
|
||||
const page = await context.newPage();
|
||||
const sourceHostname = new URL(sourceUrl).hostname;
|
||||
|
||||
// Block paywall/tracker requests before the page loads.
|
||||
await setupRequestBlocking(page, sourceHostname);
|
||||
|
||||
await page.goto(sourceUrl, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: PAGE_TIMEOUT_MS
|
||||
});
|
||||
|
||||
// Inject cosmetic CSS and userscripts to strip paywalls / ads.
|
||||
await injectCosmeticFilters(page, sourceHostname);
|
||||
await injectPrivacyUserScripts(page, sourceUrl);
|
||||
|
||||
// Give the userscripts a moment to run their setTimeout callbacks.
|
||||
const userscriptDelay = options.userscriptDelay || 2000;
|
||||
await page.waitForTimeout(userscriptDelay);
|
||||
|
||||
await waitForNetworkIdle(page);
|
||||
await snapshotLoadedResourceUrls(page);
|
||||
|
||||
@@ -112,16 +720,6 @@ async function snapshotLoadedResourceUrls(page) {
|
||||
});
|
||||
}
|
||||
|
||||
function loadPlaywright() {
|
||||
try {
|
||||
return require("playwright");
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Playwright is required. Run "npm install" and "npm run install-browsers". Original error: ${error.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function addArchiveComment(html, sourceUrl) {
|
||||
const safeSource = String(sourceUrl).replaceAll("--", "- -");
|
||||
const comment = `<!-- Archived locally. Source: ${safeSource}. Created: ${new Date().toISOString()}. -->`;
|
||||
|
||||
Reference in New Issue
Block a user