2026-05-14 08:12:13 -07:00
|
|
|
import fs from "node:fs/promises";
|
|
|
|
|
import path from "node:path";
|
|
|
|
|
import { createRequire } from "node:module";
|
2026-05-15 09:12:28 -07:00
|
|
|
import { fileURLToPath } from "node:url";
|
2026-05-14 08:12:13 -07:00
|
|
|
import {
|
|
|
|
|
AssetInliner,
|
|
|
|
|
DEFAULT_USER_AGENT,
|
|
|
|
|
defaultArchivePath,
|
|
|
|
|
findEffectiveBase,
|
|
|
|
|
inputToUrl,
|
|
|
|
|
isHttpUrl,
|
2026-05-16 16:05:32 -07:00
|
|
|
splitSrcset,
|
2026-05-15 01:00:27 -07:00
|
|
|
slugForUrl
|
2026-05-14 08:12:13 -07:00
|
|
|
} from "./asset-inliner.mjs";
|
|
|
|
|
|
|
|
|
|
const require = createRequire(import.meta.url);
|
2026-05-15 09:12:28 -07:00
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
2026-05-15 01:00:27 -07:00
|
|
|
const PAGE_TIMEOUT_MS = 60000;
|
|
|
|
|
const NETWORK_IDLE_TIMEOUT_MS = 5000;
|
|
|
|
|
const VIEWPORT = {
|
2026-05-15 09:12:28 -07:00
|
|
|
width: 1366,
|
2026-05-15 01:00:27 -07:00
|
|
|
height: 768
|
|
|
|
|
};
|
2026-05-14 08:12:13 -07:00
|
|
|
|
2026-05-16 22:32:30 -07:00
|
|
|
const COMMON_ANNOYANCE_SELECTORS = [
|
|
|
|
|
"[id^=\"sp_message_container_\"]",
|
|
|
|
|
"iframe[id^=\"sp_message_iframe_\"]",
|
|
|
|
|
"iframe[title*=\"consent\" i]",
|
|
|
|
|
"iframe[title*=\"privacy manager\" i]",
|
|
|
|
|
"#onetrust-consent-sdk",
|
|
|
|
|
"#onetrust-banner-sdk",
|
|
|
|
|
"#didomi-host",
|
|
|
|
|
"#qc-cmp2-container",
|
|
|
|
|
".qc-cmp2-container",
|
|
|
|
|
"#CybotCookiebotDialog",
|
|
|
|
|
".iubenda-cs-container",
|
|
|
|
|
"#cmpwrapper",
|
|
|
|
|
"[id^=\"cmpbox\"]",
|
|
|
|
|
".fc-consent-root",
|
|
|
|
|
".fc-dialog-container",
|
|
|
|
|
"[aria-modal=\"true\"][id*=\"consent\" i]",
|
|
|
|
|
"[aria-modal=\"true\"][id*=\"cookie\" i]",
|
|
|
|
|
"[role=\"dialog\"][aria-label*=\"cookie\" i]",
|
|
|
|
|
"[role=\"dialog\"][aria-label*=\"consent\" i]",
|
|
|
|
|
"[id*=\"cookie-banner\" i]",
|
|
|
|
|
"[class*=\"cookie-banner\" i]",
|
|
|
|
|
"[id*=\"cookie-consent\" i]",
|
|
|
|
|
"[class*=\"cookie-consent\" i]",
|
|
|
|
|
"[id*=\"cookie-notice\" i]",
|
|
|
|
|
"[class*=\"cookie-notice\" i]",
|
|
|
|
|
"[id*=\"cookie-popup\" i]",
|
|
|
|
|
"[class*=\"cookie-popup\" i]",
|
|
|
|
|
"[id*=\"adblock\" i]",
|
|
|
|
|
"[class*=\"adblock\" i]",
|
|
|
|
|
"[id*=\"ad-block\" i]",
|
|
|
|
|
"[class*=\"ad-block\" i]"
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const COMMON_ANNOYANCE_ROOT_CLASSES = [
|
|
|
|
|
"sp-message-open",
|
|
|
|
|
"didomi-popup-open",
|
|
|
|
|
"qc-cmp-ui-showing",
|
|
|
|
|
"ot-sdk-show-settings",
|
|
|
|
|
"iubenda-cs-visible"
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const COMMON_ANNOYANCE_CSS = `
|
|
|
|
|
${COMMON_ANNOYANCE_SELECTORS.join(",\n")} {
|
|
|
|
|
display: none !important;
|
|
|
|
|
visibility: hidden !important;
|
|
|
|
|
pointer-events: none !important;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
html.sp-message-open,
|
|
|
|
|
body.sp-message-open,
|
|
|
|
|
html.didomi-popup-open,
|
|
|
|
|
body.didomi-popup-open,
|
|
|
|
|
html.qc-cmp-ui-showing,
|
|
|
|
|
body.qc-cmp-ui-showing,
|
|
|
|
|
html.iubenda-cs-visible,
|
|
|
|
|
body.iubenda-cs-visible {
|
|
|
|
|
overflow: auto !important;
|
|
|
|
|
position: static !important;
|
|
|
|
|
}
|
|
|
|
|
`;
|
|
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
export { DEFAULT_USER_AGENT, defaultArchivePath };
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Privacy filters integration
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
const PRIVACY_FILTERS_DIR = path.join(__dirname, "..", "privacy-filters");
|
2026-05-16 22:07:39 -07:00
|
|
|
const FILTER_LIST_FILES = [
|
|
|
|
|
{ id: "bpc-paywall", file: "bpc-paywall-filter.txt" },
|
|
|
|
|
{ id: "easylist", file: path.join("lists", "easylist.txt") },
|
|
|
|
|
{ id: "ublock-filters", file: path.join("lists", "ublock-filters.txt") },
|
|
|
|
|
{ id: "easylist-cookie", file: path.join("lists", "easylist-cookie.txt") },
|
|
|
|
|
{ id: "ublock-annoyances", file: path.join("lists", "ublock-annoyances.txt") },
|
|
|
|
|
{ id: "ublock-cookies", file: path.join("lists", "ublock-cookies.txt") }
|
|
|
|
|
];
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
let privacyFiltersAvailable = false;
|
2026-05-16 22:07:39 -07:00
|
|
|
let filterRules = emptyFilterRules();
|
2026-05-15 09:12:28 -07:00
|
|
|
let userScriptData = []; // { file, content, matches, excludes }
|
2026-05-15 09:25:19 -07:00
|
|
|
let userScriptRequireContent = "";
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
async function loadPrivacyFilters() {
|
|
|
|
|
try {
|
2026-05-16 22:07:39 -07:00
|
|
|
const filterSets = [];
|
|
|
|
|
for (const list of FILTER_LIST_FILES) {
|
|
|
|
|
try {
|
|
|
|
|
const filterPath = path.join(PRIVACY_FILTERS_DIR, list.file);
|
|
|
|
|
const filterContent = await fs.readFile(filterPath, "utf8");
|
|
|
|
|
filterSets.push(parseFilterRules(filterContent, { source: list.id }));
|
|
|
|
|
} catch (error) {
|
|
|
|
|
if (error?.code !== "ENOENT") {
|
|
|
|
|
throw error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
filterRules = mergeFilterRules(filterSets);
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
const userscriptDir = path.join(PRIVACY_FILTERS_DIR, "userscript");
|
2026-05-15 09:25:19 -07:00
|
|
|
userScriptRequireContent = await fs.readFile(path.join(userscriptDir, "bpc_func.js"), "utf8");
|
2026-05-16 22:07:39 -07:00
|
|
|
userScriptData = [];
|
2026-05-15 09:12:28 -07:00
|
|
|
const userScriptFiles = [
|
|
|
|
|
"bpc.en.user.js",
|
|
|
|
|
"bpc.de.user.js",
|
|
|
|
|
"bpc.es.pt.user.js",
|
|
|
|
|
"bpc.fi.se.user.js",
|
|
|
|
|
"bpc.fr.user.js",
|
|
|
|
|
"bpc.it.user.js",
|
|
|
|
|
"bpc.nl.user.js",
|
|
|
|
|
"bpc.pl.user.js"
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const file of userScriptFiles) {
|
|
|
|
|
const content = await fs.readFile(path.join(userscriptDir, file), "utf8");
|
|
|
|
|
const meta = parseUserScriptMetadata(content);
|
|
|
|
|
userScriptData.push({ file, content, ...meta });
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
privacyFiltersAvailable =
|
|
|
|
|
filterRules.blockRules.length > 0 ||
|
|
|
|
|
filterRules.importantBlockRules.length > 0 ||
|
|
|
|
|
filterRules.allowRules.length > 0 ||
|
|
|
|
|
filterRules.importantAllowRules.length > 0 ||
|
|
|
|
|
filterRules.cosmeticRules.length > 0 ||
|
|
|
|
|
userScriptData.length > 0;
|
2026-05-15 09:12:28 -07:00
|
|
|
} catch {
|
|
|
|
|
// Privacy filters directory missing or unreadable; archive without them.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await loadPrivacyFilters();
|
|
|
|
|
|
|
|
|
|
// --- Adblock filter parsing ------------------------------------------------
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
const COSMETIC_SEPARATORS = [
|
|
|
|
|
{ token: "#@?#", kind: "cosmeticException", extended: true },
|
|
|
|
|
{ token: "#@$#", kind: "styleException" },
|
|
|
|
|
{ token: "#@%#", kind: "scriptException" },
|
|
|
|
|
{ token: "#@^", kind: "htmlException" },
|
|
|
|
|
{ token: "#@#", kind: "cosmeticException" },
|
|
|
|
|
{ token: "#?#", kind: "extendedCosmetic", extended: true },
|
|
|
|
|
{ token: "#$#", kind: "style" },
|
|
|
|
|
{ token: "#%#", kind: "script" },
|
|
|
|
|
{ token: "#^", kind: "html" },
|
|
|
|
|
{ token: "##", kind: "cosmetic" }
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
const RESOURCE_TYPE_ALIASES = new Map([
|
|
|
|
|
["beacon", "ping"],
|
|
|
|
|
["css", "stylesheet"],
|
|
|
|
|
["doc", "document"],
|
|
|
|
|
["document", "document"],
|
|
|
|
|
["fetch", "xmlhttprequest"],
|
|
|
|
|
["font", "font"],
|
|
|
|
|
["frame", "subdocument"],
|
|
|
|
|
["image", "image"],
|
|
|
|
|
["inline-script", "inline-script"],
|
|
|
|
|
["media", "media"],
|
|
|
|
|
["object", "object"],
|
|
|
|
|
["object-subrequest", "object"],
|
|
|
|
|
["other", "other"],
|
|
|
|
|
["ping", "ping"],
|
|
|
|
|
["script", "script"],
|
|
|
|
|
["stylesheet", "stylesheet"],
|
|
|
|
|
["subdocument", "subdocument"],
|
|
|
|
|
["websocket", "websocket"],
|
|
|
|
|
["xhr", "xmlhttprequest"],
|
|
|
|
|
["xmlhttprequest", "xmlhttprequest"]
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const SKIP_NETWORK_OPTION_NAMES = new Set([
|
|
|
|
|
"cookie",
|
|
|
|
|
"csp",
|
|
|
|
|
"cname",
|
|
|
|
|
"denyallow",
|
|
|
|
|
"ehide",
|
|
|
|
|
"elemhide",
|
|
|
|
|
"ghide",
|
|
|
|
|
"genericblock",
|
|
|
|
|
"generichide",
|
|
|
|
|
"header",
|
|
|
|
|
"ipaddress",
|
|
|
|
|
"jsonprune",
|
|
|
|
|
"method",
|
|
|
|
|
"permissions",
|
|
|
|
|
"popunder",
|
|
|
|
|
"popup",
|
|
|
|
|
"queryprune",
|
|
|
|
|
"redirect",
|
|
|
|
|
"redirect-rule",
|
|
|
|
|
"removeparam",
|
|
|
|
|
"replace",
|
|
|
|
|
"rewrite",
|
|
|
|
|
"shide",
|
|
|
|
|
"specifichide",
|
|
|
|
|
"uritransform",
|
|
|
|
|
"urlskip",
|
|
|
|
|
"webrtc",
|
|
|
|
|
"xmlprune"
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const MULTI_PART_PUBLIC_SUFFIXES = new Set([
|
|
|
|
|
"ac.uk",
|
|
|
|
|
"co.jp",
|
|
|
|
|
"co.nz",
|
|
|
|
|
"co.uk",
|
|
|
|
|
"com.au",
|
|
|
|
|
"com.br",
|
|
|
|
|
"com.mx",
|
|
|
|
|
"com.tr",
|
|
|
|
|
"com.tw",
|
|
|
|
|
"com.cn",
|
|
|
|
|
"net.au",
|
|
|
|
|
"net.nz",
|
|
|
|
|
"org.au",
|
|
|
|
|
"org.nz",
|
|
|
|
|
"org.uk"
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
function emptyFilterRules() {
|
|
|
|
|
return {
|
|
|
|
|
blockRules: [],
|
|
|
|
|
importantBlockRules: [],
|
|
|
|
|
allowRules: [],
|
|
|
|
|
importantAllowRules: [],
|
|
|
|
|
cosmeticRules: [],
|
|
|
|
|
cosmeticExceptionRules: [],
|
|
|
|
|
badFilterKeys: new Set(),
|
|
|
|
|
sourceFiles: [],
|
|
|
|
|
blockRuleIndex: null,
|
|
|
|
|
importantBlockRuleIndex: null,
|
|
|
|
|
allowRuleIndex: null,
|
|
|
|
|
importantAllowRuleIndex: null
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function mergeFilterRules(filterSets) {
|
|
|
|
|
const merged = emptyFilterRules();
|
|
|
|
|
for (const set of filterSets) {
|
|
|
|
|
merged.blockRules.push(...set.blockRules);
|
|
|
|
|
merged.importantBlockRules.push(...set.importantBlockRules);
|
|
|
|
|
merged.allowRules.push(...set.allowRules);
|
|
|
|
|
merged.importantAllowRules.push(...set.importantAllowRules);
|
|
|
|
|
merged.cosmeticRules.push(...set.cosmeticRules);
|
|
|
|
|
merged.cosmeticExceptionRules.push(...set.cosmeticExceptionRules);
|
|
|
|
|
merged.sourceFiles.push(...set.sourceFiles);
|
|
|
|
|
for (const key of set.badFilterKeys) {
|
|
|
|
|
merged.badFilterKeys.add(key);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (merged.badFilterKeys.size > 0) {
|
|
|
|
|
const isActive = (rule) => !merged.badFilterKeys.has(rule.key);
|
|
|
|
|
merged.blockRules = merged.blockRules.filter(isActive);
|
|
|
|
|
merged.importantBlockRules = merged.importantBlockRules.filter(isActive);
|
|
|
|
|
merged.allowRules = merged.allowRules.filter(isActive);
|
|
|
|
|
merged.importantAllowRules = merged.importantAllowRules.filter(isActive);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return finalizeFilterRules(merged);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function parseFilterRules(content, options = {}) {
|
|
|
|
|
const rules = emptyFilterRules();
|
|
|
|
|
if (options.source) {
|
|
|
|
|
rules.sourceFiles.push(options.source);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let preprocessorDepth = 0;
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
for (const rawLine of content.split("\n")) {
|
|
|
|
|
const line = rawLine.trim();
|
|
|
|
|
if (!line) continue;
|
|
|
|
|
|
|
|
|
|
if (line.startsWith("!#if")) {
|
2026-05-16 22:07:39 -07:00
|
|
|
preprocessorDepth += 1;
|
2026-05-15 09:12:28 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (line.startsWith("!#endif")) {
|
2026-05-16 22:07:39 -07:00
|
|
|
preprocessorDepth = Math.max(0, preprocessorDepth - 1);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (preprocessorDepth > 0 || line.startsWith("!#") || line.startsWith("!") || line.startsWith("[")) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const cosmetic = parseCosmeticFilterLine(line, options.source);
|
|
|
|
|
if (cosmetic) {
|
|
|
|
|
if (cosmetic.kind === "cosmeticException") {
|
|
|
|
|
rules.cosmeticExceptionRules.push(cosmetic);
|
|
|
|
|
} else if (cosmetic.kind === "cosmetic") {
|
|
|
|
|
rules.cosmeticRules.push(cosmetic);
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
if (cosmetic === false) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const isException = line.startsWith("@@");
|
|
|
|
|
const networkLine = isException ? line.slice(2) : line;
|
|
|
|
|
const rule = parseNetworkRule(networkLine, {
|
|
|
|
|
exception: isException,
|
|
|
|
|
source: options.source
|
|
|
|
|
});
|
|
|
|
|
if (!rule) continue;
|
|
|
|
|
|
|
|
|
|
if (rule.badfilter) {
|
|
|
|
|
rules.badFilterKeys.add(rule.key);
|
|
|
|
|
} else if (isException && rule.important) {
|
|
|
|
|
rules.importantAllowRules.push(rule);
|
|
|
|
|
} else if (isException) {
|
|
|
|
|
rules.allowRules.push(rule);
|
|
|
|
|
} else if (rule.important) {
|
|
|
|
|
rules.importantBlockRules.push(rule);
|
|
|
|
|
} else {
|
|
|
|
|
rules.blockRules.push(rule);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return finalizeFilterRules(rules);
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function finalizeFilterRules(rules) {
|
|
|
|
|
if (rules.badFilterKeys.size > 0) {
|
|
|
|
|
const isActive = (rule) => !rules.badFilterKeys.has(rule.key);
|
|
|
|
|
rules.blockRules = rules.blockRules.filter(isActive);
|
|
|
|
|
rules.importantBlockRules = rules.importantBlockRules.filter(isActive);
|
|
|
|
|
rules.allowRules = rules.allowRules.filter(isActive);
|
|
|
|
|
rules.importantAllowRules = rules.importantAllowRules.filter(isActive);
|
|
|
|
|
}
|
|
|
|
|
rules.blockRuleIndex = buildNetworkRuleIndex(rules.blockRules);
|
|
|
|
|
rules.importantBlockRuleIndex = buildNetworkRuleIndex(rules.importantBlockRules);
|
|
|
|
|
rules.allowRuleIndex = buildNetworkRuleIndex(rules.allowRules);
|
|
|
|
|
rules.importantAllowRuleIndex = buildNetworkRuleIndex(rules.importantAllowRules);
|
|
|
|
|
return rules;
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function buildNetworkRuleIndex(rules) {
|
|
|
|
|
const byDomain = new Map();
|
|
|
|
|
const wildcardDomainRules = [];
|
|
|
|
|
const otherRules = [];
|
|
|
|
|
|
|
|
|
|
for (const rule of rules) {
|
|
|
|
|
if (rule.kind !== "domain") {
|
|
|
|
|
otherRules.push(rule);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (rule.domain.includes("*")) {
|
|
|
|
|
wildcardDomainRules.push(rule);
|
2026-05-15 09:12:28 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
const bucket = byDomain.get(rule.domain) || [];
|
|
|
|
|
bucket.push(rule);
|
|
|
|
|
byDomain.set(rule.domain, bucket);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { byDomain, wildcardDomainRules, otherRules };
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function parseCosmeticFilterLine(line, source) {
|
|
|
|
|
const separator = findCosmeticSeparator(line);
|
|
|
|
|
if (!separator) return null;
|
|
|
|
|
|
|
|
|
|
const domains = line.slice(0, separator.index);
|
|
|
|
|
const body = line.slice(separator.index + separator.token.length).trim();
|
|
|
|
|
if (!body) return false;
|
|
|
|
|
|
|
|
|
|
if (separator.kind === "cosmeticException") {
|
|
|
|
|
return {
|
|
|
|
|
kind: "cosmeticException",
|
|
|
|
|
domains,
|
|
|
|
|
selector: cosmeticSelectorKey(body),
|
|
|
|
|
source
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (separator.kind === "style") {
|
|
|
|
|
const css = adguardStyleRuleToCss(body);
|
|
|
|
|
return css
|
|
|
|
|
? {
|
|
|
|
|
kind: "cosmetic",
|
|
|
|
|
domains,
|
|
|
|
|
selector: cosmeticSelectorKey(body),
|
|
|
|
|
css,
|
|
|
|
|
source
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
: false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (separator.kind !== "cosmetic" || separator.extended) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const css = cosmeticSelectorToCss(body);
|
|
|
|
|
return css
|
|
|
|
|
? {
|
|
|
|
|
kind: "cosmetic",
|
|
|
|
|
domains,
|
|
|
|
|
selector: cosmeticSelectorKey(body),
|
|
|
|
|
css,
|
|
|
|
|
source
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
: false;
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function findCosmeticSeparator(line) {
|
|
|
|
|
let best = null;
|
|
|
|
|
for (const separator of COSMETIC_SEPARATORS) {
|
|
|
|
|
const index = line.indexOf(separator.token);
|
|
|
|
|
if (
|
|
|
|
|
index >= 0 &&
|
|
|
|
|
(!best || index < best.index || (index === best.index && separator.token.length > best.token.length))
|
|
|
|
|
) {
|
|
|
|
|
best = { ...separator, index };
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
return best;
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function cosmeticSelectorKey(selector) {
|
|
|
|
|
return selector.trim().replace(/\s+/g, " ");
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function parseNetworkRule(line, options = {}) {
|
|
|
|
|
const split = splitNetworkOptions(line);
|
|
|
|
|
const parsedOptions = parseNetworkOptions(split.options);
|
|
|
|
|
const key = networkRuleKey(split.pattern, parsedOptions.optionsForKey, options.exception);
|
|
|
|
|
|
|
|
|
|
if (parsedOptions.badfilter) {
|
|
|
|
|
return { badfilter: true, key };
|
|
|
|
|
}
|
|
|
|
|
if (!split.pattern || parsedOptions.skip) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const compiled = compileNetworkPattern(split.pattern, parsedOptions.matchCase);
|
|
|
|
|
if (!compiled) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...compiled,
|
|
|
|
|
key,
|
|
|
|
|
types: parsedOptions.types,
|
|
|
|
|
excludedTypes: parsedOptions.excludedTypes,
|
|
|
|
|
isThirdParty: parsedOptions.isThirdParty,
|
|
|
|
|
isFirstParty: parsedOptions.isFirstParty,
|
|
|
|
|
includeDomains: parsedOptions.includeDomains,
|
|
|
|
|
excludeDomains: parsedOptions.excludeDomains,
|
|
|
|
|
includeTargetDomains: parsedOptions.includeTargetDomains,
|
|
|
|
|
excludeTargetDomains: parsedOptions.excludeTargetDomains,
|
|
|
|
|
important: parsedOptions.important,
|
|
|
|
|
source: options.source
|
|
|
|
|
};
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function splitNetworkOptions(line) {
|
2026-05-15 09:12:28 -07:00
|
|
|
const lastDollar = line.lastIndexOf("$");
|
2026-05-16 22:07:39 -07:00
|
|
|
if (lastDollar <= 0) {
|
|
|
|
|
return { pattern: line, options: [] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const optionText = line.slice(lastDollar + 1);
|
|
|
|
|
if (!looksLikeFilterOptions(optionText)) {
|
|
|
|
|
return { pattern: line, options: [] };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
pattern: line.slice(0, lastDollar),
|
|
|
|
|
options: splitFilterOptions(optionText)
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function looksLikeFilterOptions(optionText) {
|
|
|
|
|
if (!optionText || /\s/.test(optionText)) return false;
|
|
|
|
|
const firstOption = optionText.split(",", 1)[0];
|
|
|
|
|
return /^~?[a-z][a-z0-9_-]*(?:=|$)/i.test(firstOption);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function splitFilterOptions(optionText) {
|
|
|
|
|
return optionText
|
|
|
|
|
.split(",")
|
|
|
|
|
.map((option) => option.trim())
|
|
|
|
|
.filter(Boolean);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseNetworkOptions(options) {
|
|
|
|
|
const parsed = {
|
|
|
|
|
types: [],
|
|
|
|
|
excludedTypes: [],
|
|
|
|
|
isThirdParty: false,
|
|
|
|
|
isFirstParty: false,
|
|
|
|
|
includeDomains: [],
|
|
|
|
|
excludeDomains: [],
|
|
|
|
|
includeTargetDomains: [],
|
|
|
|
|
excludeTargetDomains: [],
|
|
|
|
|
important: false,
|
|
|
|
|
matchCase: false,
|
|
|
|
|
badfilter: false,
|
|
|
|
|
skip: false,
|
|
|
|
|
optionsForKey: []
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const rawOption of options) {
|
|
|
|
|
const option = rawOption.trim();
|
|
|
|
|
if (!option) continue;
|
|
|
|
|
|
|
|
|
|
const negated = option.startsWith("~");
|
|
|
|
|
const optionBody = negated ? option.slice(1) : option;
|
|
|
|
|
const eqIndex = optionBody.indexOf("=");
|
|
|
|
|
const name = (eqIndex >= 0 ? optionBody.slice(0, eqIndex) : optionBody).toLowerCase();
|
|
|
|
|
const value = eqIndex >= 0 ? optionBody.slice(eqIndex + 1) : "";
|
|
|
|
|
|
|
|
|
|
if (name !== "badfilter") {
|
|
|
|
|
parsed.optionsForKey.push(option);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (name === "badfilter") {
|
|
|
|
|
parsed.badfilter = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "important") {
|
|
|
|
|
parsed.important = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "match-case") {
|
|
|
|
|
parsed.matchCase = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "third-party" || name === "3p" || name === "strict3p") {
|
|
|
|
|
parsed.isThirdParty = !negated;
|
|
|
|
|
parsed.isFirstParty = negated;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "first-party" || name === "1p" || name === "strict1p") {
|
|
|
|
|
parsed.isFirstParty = !negated;
|
|
|
|
|
parsed.isThirdParty = negated;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "domain" || name === "from") {
|
|
|
|
|
const domains = parseDomainOptionValue(value);
|
|
|
|
|
parsed.includeDomains.push(...domains.include);
|
|
|
|
|
parsed.excludeDomains.push(...domains.exclude);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (name === "to") {
|
|
|
|
|
const domains = parseDomainOptionValue(value);
|
|
|
|
|
parsed.includeTargetDomains.push(...domains.include);
|
|
|
|
|
parsed.excludeTargetDomains.push(...domains.exclude);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const resourceType = RESOURCE_TYPE_ALIASES.get(name);
|
|
|
|
|
if (resourceType) {
|
|
|
|
|
if (negated) {
|
|
|
|
|
parsed.excludedTypes.push(resourceType);
|
2026-05-15 09:12:28 -07:00
|
|
|
} else {
|
2026-05-16 22:07:39 -07:00
|
|
|
parsed.types.push(resourceType);
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (SKIP_NETWORK_OPTION_NAMES.has(name) || eqIndex >= 0) {
|
|
|
|
|
parsed.skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return parsed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseDomainOptionValue(value) {
|
|
|
|
|
const include = [];
|
|
|
|
|
const exclude = [];
|
|
|
|
|
if (!value) {
|
|
|
|
|
return { include, exclude };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const rawDomain of value.split("|")) {
|
|
|
|
|
const domain = rawDomain.trim().toLowerCase();
|
|
|
|
|
if (!domain) continue;
|
|
|
|
|
if (domain.startsWith("~")) {
|
|
|
|
|
exclude.push(domain.slice(1));
|
|
|
|
|
} else {
|
|
|
|
|
include.push(domain);
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
return { include, exclude };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function networkRuleKey(pattern, options, exception) {
|
|
|
|
|
return `${exception ? "@@" : ""}${pattern}${options.length > 0 ? `$${options.join(",")}` : ""}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function compileNetworkPattern(pattern, matchCase) {
|
|
|
|
|
const flags = matchCase ? "" : "i";
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
if (pattern.startsWith("||")) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const domainRule = parseDomainAnchoredPattern(pattern, flags);
|
|
|
|
|
return domainRule;
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:25:19 -07:00
|
|
|
if (pattern.startsWith("/") && pattern.endsWith("/") && pattern.length > 1) {
|
2026-05-16 22:07:39 -07:00
|
|
|
try {
|
|
|
|
|
return {
|
|
|
|
|
kind: "regex",
|
|
|
|
|
regex: new RegExp(pattern.slice(1, -1), flags)
|
|
|
|
|
};
|
|
|
|
|
} catch {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2026-05-15 09:25:19 -07:00
|
|
|
return {
|
2026-05-16 22:07:39 -07:00
|
|
|
kind: "pattern",
|
|
|
|
|
regex: new RegExp(adblockPatternToRegex(pattern), flags)
|
2026-05-15 09:25:19 -07:00
|
|
|
};
|
2026-05-16 22:07:39 -07:00
|
|
|
} catch {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function parseDomainAnchoredPattern(pattern, flags) {
|
|
|
|
|
const domainPath = pattern.slice(2);
|
|
|
|
|
let domainEnd = 0;
|
|
|
|
|
while (
|
|
|
|
|
domainEnd < domainPath.length &&
|
|
|
|
|
domainPath[domainEnd] !== "/" &&
|
|
|
|
|
domainPath[domainEnd] !== "^"
|
|
|
|
|
) {
|
|
|
|
|
domainEnd += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const domain = domainPath.slice(0, domainEnd).toLowerCase();
|
|
|
|
|
if (!domain || /[\\[\]{}()]/.test(domain)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const suffix = domainPath.slice(domainEnd);
|
|
|
|
|
let path = "";
|
|
|
|
|
if (suffix.startsWith("/")) {
|
|
|
|
|
path = suffix;
|
|
|
|
|
} else if (suffix.startsWith("^/")) {
|
|
|
|
|
path = suffix.slice(1);
|
|
|
|
|
} else if (suffix && suffix !== "^") {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let pathRegex = null;
|
|
|
|
|
if (path) {
|
|
|
|
|
try {
|
|
|
|
|
pathRegex = new RegExp("^" + adblockPatternToRegex(path), flags);
|
|
|
|
|
} catch {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:25:19 -07:00
|
|
|
return {
|
2026-05-16 22:07:39 -07:00
|
|
|
kind: "domain",
|
|
|
|
|
domain,
|
|
|
|
|
path,
|
|
|
|
|
pathRegex
|
2026-05-15 09:25:19 -07:00
|
|
|
};
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function adguardStyleRuleToCss(rule) {
|
|
|
|
|
if (!rule.includes("{") || !rule.includes("}") || /[\r\n]/.test(rule)) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
return rule;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
function cosmeticSelectorToCss(selector) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const trimmed = selector.trim();
|
|
|
|
|
if (!trimmed || trimmed.startsWith("+js") || trimmed.startsWith("^")) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (trimmed.endsWith(":remove()")) {
|
|
|
|
|
const baseSelector = trimmed.slice(0, -":remove()".length);
|
|
|
|
|
return isSupportedCosmeticSelector(baseSelector)
|
|
|
|
|
? `${baseSelector} { display: none !important; }`
|
|
|
|
|
: null;
|
2026-05-15 09:25:19 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
const styleMatch = trimmed.match(/:style\((.+)\)$/);
|
2026-05-15 09:12:28 -07:00
|
|
|
if (styleMatch) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const baseSelector = trimmed.slice(0, trimmed.lastIndexOf(":style("));
|
|
|
|
|
return isSupportedCosmeticSelector(baseSelector)
|
|
|
|
|
? `${baseSelector} { ${styleMatch[1]} }`
|
|
|
|
|
: null;
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
return isSupportedCosmeticSelector(trimmed)
|
|
|
|
|
? `${trimmed} { display: none !important; }`
|
|
|
|
|
: null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isSupportedCosmeticSelector(selector) {
|
|
|
|
|
if (!selector || /[\r\n{}]/.test(selector)) return false;
|
|
|
|
|
|
|
|
|
|
const unsupportedTokens = [
|
|
|
|
|
":-abp-contains(",
|
|
|
|
|
":-abp-has(",
|
|
|
|
|
":-abp-properties(",
|
|
|
|
|
":contains(",
|
|
|
|
|
":has-text(",
|
|
|
|
|
":matches-attr(",
|
|
|
|
|
":matches-css",
|
|
|
|
|
":matches-media",
|
|
|
|
|
":matches-path",
|
|
|
|
|
":min-text-length(",
|
|
|
|
|
":others()",
|
|
|
|
|
":remove()",
|
|
|
|
|
":upward(",
|
|
|
|
|
":watch-attr(",
|
|
|
|
|
":xpath("
|
|
|
|
|
];
|
|
|
|
|
const lower = selector.toLowerCase();
|
|
|
|
|
return !unsupportedTokens.some((token) => lower.includes(token));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function getCosmeticCssForHostname(rules, hostname) {
|
|
|
|
|
const normalizedHostname = String(hostname || "").toLowerCase();
|
|
|
|
|
const exceptionKeys = new Set();
|
|
|
|
|
for (const exception of rules.cosmeticExceptionRules || []) {
|
|
|
|
|
if (matchesCosmeticDomains(exception.domains, normalizedHostname)) {
|
|
|
|
|
exceptionKeys.add(exception.selector);
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
const lines = [];
|
|
|
|
|
for (const rule of rules.cosmeticRules || []) {
|
|
|
|
|
if (
|
|
|
|
|
matchesCosmeticDomains(rule.domains, normalizedHostname) &&
|
|
|
|
|
!exceptionKeys.has(rule.selector)
|
|
|
|
|
) {
|
|
|
|
|
lines.push(rule.css);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return lines;
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function matchesCosmeticDomains(domainSpec, hostname) {
|
|
|
|
|
if (!domainSpec || domainSpec === "*") return true;
|
2026-05-16 22:07:39 -07:00
|
|
|
const domains = domainSpec
|
|
|
|
|
.split(",")
|
|
|
|
|
.map((domain) => domain.trim().toLowerCase())
|
|
|
|
|
.filter(Boolean);
|
|
|
|
|
|
|
|
|
|
const positives = [];
|
|
|
|
|
const negatives = [];
|
|
|
|
|
for (const domain of domains) {
|
|
|
|
|
if (domain.startsWith("~")) {
|
|
|
|
|
negatives.push(domain.slice(1));
|
|
|
|
|
} else {
|
|
|
|
|
positives.push(domain);
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
if (negatives.some((domain) => domainMatchesPattern(hostname, domain))) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (positives.length === 0) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return positives.some((domain) => domainMatchesPattern(hostname, domain));
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const normalizedSourceHostname = String(sourceHostname || "").toLowerCase();
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
if (rule.includeDomains.length > 0) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const ok = rule.includeDomains.some((domain) =>
|
|
|
|
|
domainMatchesPattern(normalizedSourceHostname, domain)
|
2026-05-15 09:12:28 -07:00
|
|
|
);
|
|
|
|
|
if (!ok) return false;
|
|
|
|
|
}
|
|
|
|
|
if (rule.excludeDomains.length > 0) {
|
2026-05-16 22:07:39 -07:00
|
|
|
const blocked = rule.excludeDomains.some((domain) =>
|
|
|
|
|
domainMatchesPattern(normalizedSourceHostname, domain)
|
|
|
|
|
);
|
|
|
|
|
if (blocked) return false;
|
|
|
|
|
}
|
|
|
|
|
if (rule.includeTargetDomains.length > 0) {
|
|
|
|
|
const ok = rule.includeTargetDomains.some((domain) =>
|
|
|
|
|
domainMatchesPattern(hostname, domain)
|
|
|
|
|
);
|
|
|
|
|
if (!ok) return false;
|
|
|
|
|
}
|
|
|
|
|
if (rule.excludeTargetDomains.length > 0) {
|
|
|
|
|
const blocked = rule.excludeTargetDomains.some((domain) =>
|
|
|
|
|
domainMatchesPattern(hostname, domain)
|
2026-05-15 09:12:28 -07:00
|
|
|
);
|
|
|
|
|
if (blocked) return false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
if (rule.excludedTypes.some((type) => resourceTypeMatches(type, resourceType))) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2026-05-15 09:25:19 -07:00
|
|
|
if (rule.types.length > 0) {
|
|
|
|
|
if (!rule.types.some((type) => resourceTypeMatches(type, resourceType))) {
|
2026-05-15 09:12:28 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
const isThirdParty = isThirdPartyRequest(hostname, normalizedSourceHostname);
|
|
|
|
|
if (rule.isThirdParty && !isThirdParty) {
|
|
|
|
|
return false;
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
2026-05-16 22:07:39 -07:00
|
|
|
if (rule.isFirstParty && isThirdParty) {
|
|
|
|
|
return false;
|
2026-05-15 09:12:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (rule.kind === "domain") {
|
2026-05-15 09:25:19 -07:00
|
|
|
if (!domainPatternMatches(hostname, rule.domain)) return false;
|
2026-05-16 22:07:39 -07:00
|
|
|
if (rule.pathRegex && !rule.pathRegex.test(urlObj.pathname + urlObj.search)) return false;
|
2026-05-15 09:12:28 -07:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
if (rule.kind === "regex" || rule.kind === "pattern") {
|
|
|
|
|
return rule.regex.test(url);
|
2026-05-15 09:25:19 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:25:19 -07:00
|
|
|
function resourceTypeMatches(filterType, resourceType) {
|
|
|
|
|
const typeMap = {
|
|
|
|
|
document: ["document"],
|
|
|
|
|
font: ["font"],
|
|
|
|
|
image: ["image"],
|
|
|
|
|
"inline-script": ["script"],
|
|
|
|
|
media: ["media"],
|
|
|
|
|
object: ["object"],
|
|
|
|
|
other: ["other"],
|
2026-05-16 22:07:39 -07:00
|
|
|
ping: ["ping", "fetch"],
|
2026-05-15 09:25:19 -07:00
|
|
|
script: ["script"],
|
|
|
|
|
stylesheet: ["stylesheet"],
|
2026-05-16 22:07:39 -07:00
|
|
|
subdocument: ["document", "subdocument"],
|
|
|
|
|
websocket: ["websocket"],
|
|
|
|
|
xmlhttprequest: ["fetch", "xhr", "xmlhttprequest"]
|
2026-05-15 09:25:19 -07:00
|
|
|
};
|
|
|
|
|
const mapped = typeMap[filterType];
|
|
|
|
|
return mapped ? mapped.includes(resourceType) : false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function domainPatternMatches(hostname, pattern) {
|
|
|
|
|
const normalized = pattern.replace(/\^$/, "").toLowerCase();
|
|
|
|
|
if (!normalized) return false;
|
|
|
|
|
|
|
|
|
|
if (!normalized.includes("*")) {
|
|
|
|
|
return hostname === normalized || hostname.endsWith("." + normalized);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
return domainMatchesPattern(hostname, normalized);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function domainMatchesPattern(hostname, pattern) {
|
|
|
|
|
const normalizedHostname = String(hostname || "").toLowerCase();
|
|
|
|
|
const normalizedPattern = String(pattern || "").replace(/\^$/, "").toLowerCase();
|
|
|
|
|
if (!normalizedPattern) return false;
|
|
|
|
|
if (normalizedPattern === "*") return true;
|
|
|
|
|
if (!normalizedPattern.includes("*")) {
|
|
|
|
|
return normalizedHostname === normalizedPattern || normalizedHostname.endsWith("." + normalizedPattern);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const source = normalizedPattern
|
|
|
|
|
.split("*")
|
|
|
|
|
.map((part) => part.replace(/[|\\{}()[\]^$+?.]/g, "\\$&"))
|
|
|
|
|
.join(".*");
|
|
|
|
|
const re = new RegExp(`${normalizedPattern.startsWith("*") ? "^" : "(?:^|\\.)"}${source}$`, "i");
|
|
|
|
|
return re.test(normalizedHostname);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isThirdPartyRequest(hostname, sourceHostname) {
|
|
|
|
|
if (!hostname || !sourceHostname) {
|
|
|
|
|
return hostname !== sourceHostname;
|
|
|
|
|
}
|
|
|
|
|
const requestSite = registrableDomain(hostname);
|
|
|
|
|
const sourceSite = registrableDomain(sourceHostname);
|
|
|
|
|
if (!requestSite || !sourceSite) {
|
|
|
|
|
return hostname !== sourceHostname && !hostname.endsWith("." + sourceHostname);
|
|
|
|
|
}
|
|
|
|
|
return requestSite !== sourceSite;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function registrableDomain(hostname) {
|
|
|
|
|
const normalized = String(hostname || "").toLowerCase().replace(/\.$/, "");
|
|
|
|
|
if (!normalized || /^\d{1,3}(?:\.\d{1,3}){3}$/.test(normalized) || normalized === "localhost") {
|
|
|
|
|
return normalized;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parts = normalized.split(".").filter(Boolean);
|
|
|
|
|
if (parts.length <= 2) {
|
|
|
|
|
return normalized;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const suffix2 = parts.slice(-2).join(".");
|
|
|
|
|
if (MULTI_PART_PUBLIC_SUFFIXES.has(suffix2) && parts.length >= 3) {
|
|
|
|
|
return parts.slice(-3).join(".");
|
|
|
|
|
}
|
|
|
|
|
return parts.slice(-2).join(".");
|
2026-05-15 09:25:19 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function adblockPatternToRegex(pattern) {
|
|
|
|
|
let source = "";
|
|
|
|
|
let remaining = pattern;
|
|
|
|
|
let anchoredStart = false;
|
|
|
|
|
let anchoredEnd = false;
|
|
|
|
|
|
|
|
|
|
if (remaining.startsWith("|")) {
|
|
|
|
|
anchoredStart = true;
|
|
|
|
|
remaining = remaining.slice(1);
|
|
|
|
|
}
|
|
|
|
|
if (remaining.endsWith("|")) {
|
|
|
|
|
anchoredEnd = true;
|
|
|
|
|
remaining = remaining.slice(0, -1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const ch of remaining) {
|
|
|
|
|
if (ch === "*") {
|
|
|
|
|
source += ".*";
|
|
|
|
|
} else if (ch === "^") {
|
|
|
|
|
source += "(?:[^A-Za-z0-9_.%-]|$)";
|
|
|
|
|
} else {
|
|
|
|
|
source += ch.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return `${anchoredStart ? "^" : ""}${source}${anchoredEnd ? "$" : ""}`;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
export function shouldBlockRequestWithRules(rules, url, resourceType, sourceHostname) {
|
|
|
|
|
if (url === sourceHostname || (sourceHostname && url.startsWith(sourceHostname + "/"))) {
|
2026-05-15 09:12:28 -07:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let urlObj;
|
|
|
|
|
try {
|
|
|
|
|
urlObj = new URL(url);
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
const hostname = urlObj.hostname;
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
for (const rule of networkRuleCandidates(rules.importantAllowRules, rules.importantAllowRuleIndex, hostname)) {
|
2026-05-15 09:12:28 -07:00
|
|
|
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
for (const rule of networkRuleCandidates(rules.importantBlockRules, rules.importantBlockRuleIndex, hostname)) {
|
|
|
|
|
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const rule of networkRuleCandidates(rules.allowRules, rules.allowRuleIndex, hostname)) {
|
|
|
|
|
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const rule of networkRuleCandidates(rules.blockRules, rules.blockRuleIndex, hostname)) {
|
2026-05-15 09:12:28 -07:00
|
|
|
if (matchesNetworkRule(url, urlObj, hostname, resourceType, sourceHostname, rule)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:07:39 -07:00
|
|
|
function shouldBlockRequest(url, resourceType, sourceHostname) {
|
|
|
|
|
return shouldBlockRequestWithRules(filterRules, url, resourceType, sourceHostname);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function* networkRuleCandidates(rules = [], index, hostname) {
|
|
|
|
|
if (!index) {
|
|
|
|
|
yield* rules;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const suffix of hostnameSuffixes(hostname)) {
|
|
|
|
|
const bucket = index.byDomain.get(suffix);
|
|
|
|
|
if (bucket) {
|
|
|
|
|
yield* bucket;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
yield* index.wildcardDomainRules;
|
|
|
|
|
yield* index.otherRules;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function hostnameSuffixes(hostname) {
|
|
|
|
|
const normalized = String(hostname || "").toLowerCase();
|
|
|
|
|
if (!normalized) return [""];
|
|
|
|
|
|
|
|
|
|
const labels = normalized.split(".").filter(Boolean);
|
|
|
|
|
const suffixes = [];
|
|
|
|
|
for (let index = 0; index < labels.length; index += 1) {
|
|
|
|
|
suffixes.push(labels.slice(index).join("."));
|
|
|
|
|
}
|
|
|
|
|
return suffixes;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
// --- Userscript metadata parsing -------------------------------------------
|
|
|
|
|
|
|
|
|
|
function parseUserScriptMetadata(content) {
|
|
|
|
|
const metaBlock = content.match(/\/\/\s*==UserScript==([\s\S]*?)\/\/\s*==\/UserScript==/);
|
|
|
|
|
const matches = [];
|
|
|
|
|
const excludes = [];
|
|
|
|
|
if (!metaBlock) return { matches, excludes };
|
|
|
|
|
|
|
|
|
|
const lines = metaBlock[1].split("\n");
|
|
|
|
|
for (const line of lines) {
|
|
|
|
|
const matchMatch = line.match(/@match\s+(.+)/);
|
|
|
|
|
if (matchMatch) {
|
|
|
|
|
matches.push(matchMatch[1].trim());
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const excludeMatch = line.match(/@exclude\s+(.+)/);
|
|
|
|
|
if (excludeMatch) {
|
|
|
|
|
excludes.push(excludeMatch[1].trim());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return { matches, excludes };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function urlMatchesPattern(url, pattern) {
|
|
|
|
|
// Simple glob-style pattern matching for userscript @match
|
|
|
|
|
// Format: *://*.example.com/* or http://example.com/path
|
|
|
|
|
try {
|
|
|
|
|
const urlObj = new URL(url);
|
|
|
|
|
const protocol = urlObj.protocol.slice(0, -1); // "http" or "https"
|
|
|
|
|
const hostname = urlObj.hostname;
|
|
|
|
|
const pathname = urlObj.pathname;
|
|
|
|
|
|
|
|
|
|
// Split pattern
|
|
|
|
|
const protoEnd = pattern.indexOf("://");
|
|
|
|
|
if (protoEnd < 0) return false;
|
|
|
|
|
const patternProto = pattern.slice(0, protoEnd);
|
|
|
|
|
const rest = pattern.slice(protoEnd + 3);
|
|
|
|
|
|
|
|
|
|
// Protocol match
|
|
|
|
|
if (patternProto !== "*" && patternProto !== protocol) return false;
|
|
|
|
|
|
|
|
|
|
// Split rest into host and path
|
|
|
|
|
const slashIdx = rest.indexOf("/");
|
|
|
|
|
const patternHost = slashIdx >= 0 ? rest.slice(0, slashIdx) : rest;
|
|
|
|
|
const patternPath = slashIdx >= 0 ? rest.slice(slashIdx) : "/";
|
|
|
|
|
|
|
|
|
|
// Host match
|
|
|
|
|
if (!matchHost(hostname, patternHost)) return false;
|
|
|
|
|
|
|
|
|
|
// Path match
|
|
|
|
|
if (!matchPath(pathname, patternPath)) return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
} catch {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function matchHost(hostname, pattern) {
|
|
|
|
|
if (pattern === "*") return true;
|
|
|
|
|
if (pattern.startsWith("*.")) {
|
|
|
|
|
const suffix = pattern.slice(2);
|
|
|
|
|
return hostname === suffix || hostname.endsWith("." + suffix);
|
|
|
|
|
}
|
|
|
|
|
return hostname === pattern;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function matchPath(pathname, pattern) {
|
|
|
|
|
if (pattern === "/*") return true;
|
|
|
|
|
// Convert glob pattern to regex
|
|
|
|
|
const regex = "^" + pattern
|
|
|
|
|
.replace(/\./g, "\\.")
|
|
|
|
|
.replace(/\*/g, ".*")
|
|
|
|
|
.replace(/\?/g, ".")
|
|
|
|
|
+ "$";
|
|
|
|
|
return new RegExp(regex, "i").test(pathname);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function shouldInjectUserScript(url, meta) {
|
|
|
|
|
let matched = false;
|
|
|
|
|
for (const pattern of meta.matches) {
|
|
|
|
|
if (urlMatchesPattern(url, pattern)) {
|
|
|
|
|
matched = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!matched) return false;
|
|
|
|
|
|
|
|
|
|
for (const pattern of meta.excludes) {
|
|
|
|
|
if (urlMatchesPattern(url, pattern)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Browser helpers -------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
function loadPlaywright() {
|
|
|
|
|
try {
|
|
|
|
|
return require("playwright");
|
|
|
|
|
} catch (error) {
|
|
|
|
|
throw new Error(
|
|
|
|
|
`Playwright is required. Run "npm install" and "npm run install-browsers". Original error: ${error.message}`
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Manual stealth evasions injected into every page before any scripts run.
|
|
|
|
|
const STEALTH_INIT_SCRIPT = `
|
|
|
|
|
(() => {
|
|
|
|
|
const patchNavigator = () => {
|
|
|
|
|
try {
|
|
|
|
|
// Override webdriver getter without using delete (can crash renderer)
|
|
|
|
|
if (navigator.webdriver !== undefined) {
|
|
|
|
|
Object.defineProperty(navigator, 'webdriver', {
|
|
|
|
|
get: () => undefined,
|
|
|
|
|
configurable: true,
|
|
|
|
|
enumerable: true
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
if (!window.chrome) {
|
|
|
|
|
window.chrome = { runtime: {} };
|
|
|
|
|
} else if (!window.chrome.runtime) {
|
|
|
|
|
window.chrome.runtime = {};
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const originalQuery = window.navigator.permissions?.query;
|
|
|
|
|
if (originalQuery) {
|
|
|
|
|
window.navigator.permissions.query = (parameters) => (
|
|
|
|
|
parameters.name === 'notifications'
|
|
|
|
|
? Promise.resolve({ state: Notification.permission })
|
|
|
|
|
: originalQuery(parameters)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (document.readyState === 'loading') {
|
|
|
|
|
document.addEventListener('DOMContentLoaded', patchNavigator);
|
|
|
|
|
} else {
|
|
|
|
|
patchNavigator();
|
|
|
|
|
}
|
|
|
|
|
})();
|
|
|
|
|
`;
|
|
|
|
|
|
|
|
|
|
function buildLaunchArgs(headless) {
|
|
|
|
|
const args = [
|
|
|
|
|
"--disable-blink-features=AutomationControlled",
|
|
|
|
|
"--disable-web-security",
|
|
|
|
|
"--disable-features=IsolateOrigins,site-per-process",
|
|
|
|
|
"--disable-site-isolation-trials",
|
|
|
|
|
"--disable-infobars",
|
|
|
|
|
"--no-sandbox",
|
|
|
|
|
"--disable-setuid-sandbox",
|
|
|
|
|
"--disable-dev-shm-usage",
|
|
|
|
|
"--disable-accelerated-2d-canvas",
|
|
|
|
|
"--disable-gpu",
|
|
|
|
|
"--window-size=1366,768"
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
if (headless) {
|
|
|
|
|
args.push("--headless=new");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return args;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function buildIgnoreDefaultArgs() {
|
|
|
|
|
return ["--enable-automation"];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Page helpers ----------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
async function setupRequestBlocking(page, sourceHostname) {
|
2026-05-16 22:07:39 -07:00
|
|
|
if (
|
|
|
|
|
!privacyFiltersAvailable ||
|
|
|
|
|
(filterRules.blockRules.length === 0 && filterRules.importantBlockRules.length === 0)
|
|
|
|
|
) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
await page.route("**/*", (route) => {
|
|
|
|
|
try {
|
|
|
|
|
const request = route.request();
|
2026-05-15 09:25:19 -07:00
|
|
|
if (request.isNavigationRequest() && request.frame() === page.mainFrame()) {
|
2026-05-15 09:12:28 -07:00
|
|
|
route.continue();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
const url = request.url();
|
|
|
|
|
const type = request.resourceType();
|
|
|
|
|
if (shouldBlockRequest(url, type, sourceHostname)) {
|
|
|
|
|
route.abort("blockedbyclient");
|
|
|
|
|
} else {
|
|
|
|
|
route.continue();
|
|
|
|
|
}
|
|
|
|
|
} catch {
|
|
|
|
|
route.continue();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function injectCosmeticFilters(page, hostname) {
|
2026-05-16 22:32:30 -07:00
|
|
|
const lines = [COMMON_ANNOYANCE_CSS];
|
|
|
|
|
if (privacyFiltersAvailable && filterRules.cosmeticRules.length > 0) {
|
|
|
|
|
lines.push(...getCosmeticCssForHostname(filterRules, hostname));
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
if (lines.length > 0) {
|
|
|
|
|
try {
|
|
|
|
|
await page.addStyleTag({ content: lines.join("\n") });
|
|
|
|
|
} catch {
|
|
|
|
|
// Ignore cosmetic injection failures.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 22:32:30 -07:00
|
|
|
async function removeCommonAnnoyances(page) {
|
|
|
|
|
try {
|
|
|
|
|
await page.evaluate(({ selectors, rootClasses }) => {
|
|
|
|
|
for (const selector of selectors) {
|
|
|
|
|
try {
|
|
|
|
|
document.querySelectorAll(selector).forEach((element) => element.remove());
|
|
|
|
|
} catch {
|
|
|
|
|
// Ignore selectors unsupported by the current browser.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const root of [document.documentElement, document.body].filter(Boolean)) {
|
|
|
|
|
root.classList.remove(...rootClasses);
|
|
|
|
|
root.removeAttribute("data-previous-scroll-y");
|
|
|
|
|
|
|
|
|
|
const overflow = root.style.overflow || "";
|
|
|
|
|
const position = root.style.position || "";
|
|
|
|
|
if (/hidden|clip/i.test(overflow)) {
|
|
|
|
|
root.style.removeProperty("overflow");
|
|
|
|
|
}
|
|
|
|
|
if (/fixed/i.test(position)) {
|
|
|
|
|
root.style.removeProperty("position");
|
|
|
|
|
root.style.removeProperty("top");
|
|
|
|
|
root.style.removeProperty("left");
|
|
|
|
|
root.style.removeProperty("right");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}, {
|
|
|
|
|
selectors: COMMON_ANNOYANCE_SELECTORS,
|
|
|
|
|
rootClasses: COMMON_ANNOYANCE_ROOT_CLASSES
|
|
|
|
|
});
|
|
|
|
|
} catch {
|
|
|
|
|
// Ignore cleanup failures; the archive is still useful.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
const GM_MOCK = `
|
|
|
|
|
if (typeof GM === "undefined") {
|
|
|
|
|
window.GM = {
|
|
|
|
|
xmlHttpRequest: function(details) {
|
|
|
|
|
fetch(details.url, {
|
|
|
|
|
method: details.method || "GET",
|
|
|
|
|
headers: details.headers || {},
|
|
|
|
|
body: details.data || null
|
|
|
|
|
})
|
|
|
|
|
.then(response => response.text().then(text => ({
|
|
|
|
|
status: response.status,
|
|
|
|
|
statusText: response.statusText,
|
|
|
|
|
responseText: text,
|
|
|
|
|
responseHeaders: Array.from(response.headers.entries())
|
|
|
|
|
.map(([k, v]) => k + ": " + v).join("\\r\\n")
|
|
|
|
|
})))
|
|
|
|
|
.then(obj => {
|
|
|
|
|
if (details.onload) details.onload(obj);
|
|
|
|
|
})
|
|
|
|
|
.catch(err => {
|
|
|
|
|
if (details.onerror) details.onerror(err);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
`;
|
|
|
|
|
|
|
|
|
|
async function injectPrivacyUserScripts(page, sourceUrl) {
|
|
|
|
|
if (!privacyFiltersAvailable || userScriptData.length === 0) return;
|
|
|
|
|
|
|
|
|
|
const matching = userScriptData.filter((us) => shouldInjectUserScript(sourceUrl, us));
|
|
|
|
|
if (matching.length === 0) return;
|
|
|
|
|
|
|
|
|
|
// Inject GM API mock first.
|
|
|
|
|
try {
|
|
|
|
|
await page.addScriptTag({ content: GM_MOCK });
|
2026-05-15 09:25:19 -07:00
|
|
|
if (userScriptRequireContent) {
|
|
|
|
|
await page.addScriptTag({ content: userScriptRequireContent });
|
|
|
|
|
}
|
2026-05-15 09:12:28 -07:00
|
|
|
} catch {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Inject only matching userscripts.
|
|
|
|
|
for (const us of matching) {
|
|
|
|
|
try {
|
|
|
|
|
await page.addScriptTag({ content: us.content });
|
|
|
|
|
} catch {
|
|
|
|
|
// Ignore injection failures for individual scripts.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
// Archiving
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
export async function archivePage(input, options = {}) {
|
|
|
|
|
const sourceUrl = inputToUrl(input);
|
|
|
|
|
const archivePath = options.archivePath || defaultArchivePath();
|
|
|
|
|
const id = options.id || slugForUrl(sourceUrl);
|
|
|
|
|
const filePath = path.join(archivePath, `${id}.html`);
|
|
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
await fs.mkdir(archivePath, { recursive: true });
|
2026-05-14 08:12:13 -07:00
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
const renderedHtml = await renderPage(sourceUrl, options);
|
2026-05-15 01:00:27 -07:00
|
|
|
const baseUrl = findEffectiveBase(renderedHtml, sourceUrl);
|
2026-05-16 22:07:39 -07:00
|
|
|
const sourceHostname = new URL(sourceUrl).hostname;
|
2026-05-14 08:12:13 -07:00
|
|
|
const inliner = new AssetInliner({
|
2026-05-15 01:00:27 -07:00
|
|
|
userAgent: DEFAULT_USER_AGENT,
|
2026-05-16 22:07:39 -07:00
|
|
|
referer: isHttpUrl(sourceUrl) ? sourceUrl : undefined,
|
|
|
|
|
shouldBlockAsset: (assetUrl, resourceType) =>
|
|
|
|
|
shouldBlockRequest(assetUrl, resourceType, sourceHostname)
|
2026-05-14 08:12:13 -07:00
|
|
|
});
|
2026-05-15 01:00:27 -07:00
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
const inlined = await inliner.inlineHtml(renderedHtml, baseUrl);
|
2026-05-15 01:00:27 -07:00
|
|
|
const finalHtml = addArchiveComment(inlined, sourceUrl);
|
2026-05-14 08:12:13 -07:00
|
|
|
await fs.writeFile(filePath, finalHtml, "utf8");
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id,
|
|
|
|
|
filePath,
|
|
|
|
|
sourceUrl,
|
|
|
|
|
archivePath,
|
|
|
|
|
warnings: inliner.warnings,
|
|
|
|
|
externalAssets: findExternalAssetRefs(finalHtml)
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 09:12:28 -07:00
|
|
|
export async function renderPage(sourceUrl, options = {}) {
|
2026-05-14 08:12:13 -07:00
|
|
|
const playwright = loadPlaywright();
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
const hasDisplay = !!(process.env.DISPLAY || process.env.WAYLAND_DISPLAY);
|
|
|
|
|
const headless = options.headless !== false && !hasDisplay;
|
|
|
|
|
|
|
|
|
|
const browser = await playwright.chromium.launch({
|
|
|
|
|
headless,
|
|
|
|
|
args: buildLaunchArgs(headless),
|
|
|
|
|
ignoreDefaultArgs: buildIgnoreDefaultArgs()
|
|
|
|
|
});
|
2026-05-15 01:00:27 -07:00
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
try {
|
|
|
|
|
const context = await browser.newContext({
|
2026-05-15 09:12:28 -07:00
|
|
|
userAgent: options.userAgent || DEFAULT_USER_AGENT,
|
|
|
|
|
viewport: VIEWPORT,
|
|
|
|
|
locale: options.locale || "en-US",
|
|
|
|
|
timezoneId: options.timezoneId || "America/New_York"
|
2026-05-14 08:12:13 -07:00
|
|
|
});
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
// Inject stealth evasions into every new page before any scripts run.
|
|
|
|
|
await context.addInitScript(STEALTH_INIT_SCRIPT);
|
|
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
const page = await context.newPage();
|
2026-05-15 09:12:28 -07:00
|
|
|
const sourceHostname = new URL(sourceUrl).hostname;
|
|
|
|
|
|
|
|
|
|
// Block paywall/tracker requests before the page loads.
|
|
|
|
|
await setupRequestBlocking(page, sourceHostname);
|
2026-05-14 08:12:13 -07:00
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
await page.goto(sourceUrl, {
|
|
|
|
|
waitUntil: "domcontentloaded",
|
|
|
|
|
timeout: PAGE_TIMEOUT_MS
|
|
|
|
|
});
|
2026-05-15 09:12:28 -07:00
|
|
|
|
|
|
|
|
// Inject cosmetic CSS and userscripts to strip paywalls / ads.
|
|
|
|
|
await injectCosmeticFilters(page, sourceHostname);
|
|
|
|
|
await injectPrivacyUserScripts(page, sourceUrl);
|
|
|
|
|
|
|
|
|
|
// Give the userscripts a moment to run their setTimeout callbacks.
|
|
|
|
|
const userscriptDelay = options.userscriptDelay || 2000;
|
|
|
|
|
await page.waitForTimeout(userscriptDelay);
|
|
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
await waitForNetworkIdle(page);
|
2026-05-16 22:32:30 -07:00
|
|
|
await removeCommonAnnoyances(page);
|
2026-05-15 01:00:27 -07:00
|
|
|
await snapshotLoadedResourceUrls(page);
|
2026-05-16 16:05:32 -07:00
|
|
|
await snapshotRuntimeStyles(page);
|
2026-05-14 08:12:13 -07:00
|
|
|
|
|
|
|
|
return await page.content();
|
|
|
|
|
} finally {
|
|
|
|
|
await browser.close();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
async function waitForNetworkIdle(page) {
|
2026-05-14 08:12:13 -07:00
|
|
|
try {
|
2026-05-15 01:00:27 -07:00
|
|
|
await page.waitForLoadState("networkidle", {
|
|
|
|
|
timeout: NETWORK_IDLE_TIMEOUT_MS
|
2026-05-14 08:12:13 -07:00
|
|
|
});
|
2026-05-15 01:00:27 -07:00
|
|
|
} catch {
|
|
|
|
|
// Some pages keep sockets open; the DOM snapshot is still useful.
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
async function snapshotLoadedResourceUrls(page) {
|
|
|
|
|
await page.evaluate(() => {
|
|
|
|
|
document.querySelectorAll("img").forEach((img) => {
|
|
|
|
|
if (img.currentSrc) {
|
|
|
|
|
img.setAttribute("src", img.currentSrc);
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
});
|
2026-05-14 08:12:13 -07:00
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
document.querySelectorAll("video,audio").forEach((media) => {
|
|
|
|
|
if (media.currentSrc) {
|
|
|
|
|
media.setAttribute("src", media.currentSrc);
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
});
|
2026-05-14 08:12:13 -07:00
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
document.querySelectorAll("iframe").forEach((frame) => {
|
|
|
|
|
try {
|
|
|
|
|
const doc = frame.contentDocument;
|
|
|
|
|
if (doc?.documentElement) {
|
|
|
|
|
frame.setAttribute("srcdoc", "<!doctype html>" + doc.documentElement.outerHTML);
|
|
|
|
|
frame.removeAttribute("src");
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
} catch {
|
|
|
|
|
// Cross-origin frames are handled later by the asset inliner when possible.
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
});
|
|
|
|
|
});
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
|
|
|
|
|
2026-05-16 16:05:32 -07:00
|
|
|
async function snapshotRuntimeStyles(page) {
|
|
|
|
|
await page.evaluate(() => {
|
|
|
|
|
const serializeRules = (sheet) => {
|
|
|
|
|
try {
|
|
|
|
|
return Array.from(sheet.cssRules || [])
|
|
|
|
|
.map((rule) => rule.cssText)
|
|
|
|
|
.join("\n");
|
|
|
|
|
} catch {
|
|
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (const sheet of Array.from(document.styleSheets)) {
|
|
|
|
|
const css = serializeRules(sheet);
|
|
|
|
|
if (!css.trim()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const owner = sheet.ownerNode;
|
|
|
|
|
if (owner instanceof HTMLStyleElement && !owner.textContent.trim()) {
|
|
|
|
|
owner.textContent = css;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const adoptedStyleSheets = Array.from(document.adoptedStyleSheets || []);
|
|
|
|
|
adoptedStyleSheets.forEach((sheet, index) => {
|
|
|
|
|
const css = serializeRules(sheet);
|
|
|
|
|
if (!css.trim()) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
const style = document.createElement("style");
|
|
|
|
|
style.setAttribute("data-archiver-adopted-stylesheet", String(index));
|
|
|
|
|
style.textContent = css;
|
|
|
|
|
document.head.appendChild(style);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-15 01:00:27 -07:00
|
|
|
function addArchiveComment(html, sourceUrl) {
|
|
|
|
|
const safeSource = String(sourceUrl).replaceAll("--", "- -");
|
|
|
|
|
const comment = `<!-- Archived locally. Source: ${safeSource}. Created: ${new Date().toISOString()}. -->`;
|
2026-05-14 08:12:13 -07:00
|
|
|
if (/<!doctype/i.test(html)) {
|
|
|
|
|
return html.replace(/<!doctype[^>]*>/i, (doctype) => `${doctype}\n${comment}`);
|
|
|
|
|
}
|
|
|
|
|
return `<!doctype html>\n${comment}\n${html}`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function findExternalAssetRefs(html) {
|
|
|
|
|
const refs = new Set();
|
2026-05-15 09:25:19 -07:00
|
|
|
const assetTagPattern = /<(?:img|source|audio|video|track|embed|object|input|iframe)\b[^>]*>/gi;
|
|
|
|
|
for (const match of html.matchAll(assetTagPattern)) {
|
|
|
|
|
const tag = match[0];
|
|
|
|
|
for (const attr of ["src", "srcset", "poster", "data"]) {
|
|
|
|
|
const value = readAttribute(tag, attr);
|
2026-05-16 16:05:32 -07:00
|
|
|
if (!value) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (attr === "srcset") {
|
|
|
|
|
addSrcsetRefs(refs, value);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (isSelfContainedAssetRef(value)) {
|
2026-05-15 09:25:19 -07:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
for (const part of value.split(",")) {
|
|
|
|
|
const candidate = part.trim().split(/\s+/)[0];
|
|
|
|
|
if (candidate && !isSelfContainedAssetRef(candidate)) {
|
|
|
|
|
refs.add(candidate);
|
|
|
|
|
}
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
const linkPattern = /<link\b[^>]*>/gi;
|
|
|
|
|
for (const match of html.matchAll(linkPattern)) {
|
|
|
|
|
const tag = match[0];
|
|
|
|
|
const rel = readAttribute(tag, "rel") || "";
|
|
|
|
|
if (!/\b(?:stylesheet|icon|apple-touch-icon|image_src|preload)\b/i.test(rel)) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
const href = readAttribute(tag, "href");
|
|
|
|
|
if (href && !isSelfContainedAssetRef(href)) {
|
|
|
|
|
refs.add(href);
|
|
|
|
|
}
|
2026-05-16 16:05:32 -07:00
|
|
|
const imageSrcset = readAttribute(tag, "imagesrcset");
|
|
|
|
|
if (imageSrcset) {
|
|
|
|
|
addSrcsetRefs(refs, imageSrcset);
|
|
|
|
|
}
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
const cssUrlPattern = /url\(\s*(["']?)([^"')]+)\1\s*\)/gi;
|
|
|
|
|
for (const match of html.matchAll(cssUrlPattern)) {
|
2026-05-14 09:11:05 -07:00
|
|
|
const candidate = cleanCssUrl(match[2]);
|
2026-05-14 08:12:13 -07:00
|
|
|
if (candidate && !isSelfContainedAssetRef(candidate)) {
|
|
|
|
|
refs.add(candidate);
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-05-15 01:00:27 -07:00
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
return Array.from(refs).sort();
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-16 16:05:32 -07:00
|
|
|
function addSrcsetRefs(refs, srcset) {
|
|
|
|
|
for (const part of splitSrcset(srcset)) {
|
|
|
|
|
const candidate = part.trim().split(/\s+/)[0];
|
|
|
|
|
if (candidate && !isSelfContainedAssetRef(candidate)) {
|
|
|
|
|
refs.add(candidate);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 08:12:13 -07:00
|
|
|
function isSelfContainedAssetRef(value) {
|
2026-05-14 09:11:05 -07:00
|
|
|
const trimmed = cleanCssUrl(value);
|
2026-05-14 08:12:13 -07:00
|
|
|
return (
|
|
|
|
|
!trimmed ||
|
|
|
|
|
trimmed.startsWith("#") ||
|
2026-05-14 09:11:05 -07:00
|
|
|
/^%23/i.test(trimmed) ||
|
2026-05-14 08:12:13 -07:00
|
|
|
/^(?:data|about|javascript|mailto|tel):/i.test(trimmed)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function readAttribute(tag, attr) {
|
2026-05-15 09:25:19 -07:00
|
|
|
const match = findAttribute(tag, attr);
|
|
|
|
|
return match ? match.value : "";
|
2026-05-14 08:12:13 -07:00
|
|
|
}
|
2026-05-14 09:11:05 -07:00
|
|
|
|
|
|
|
|
function cleanCssUrl(value) {
|
|
|
|
|
const decoded = String(value)
|
|
|
|
|
.trim()
|
|
|
|
|
.replaceAll("&", "&")
|
|
|
|
|
.replaceAll(""", '"')
|
|
|
|
|
.replaceAll("'", "'")
|
|
|
|
|
.replaceAll("'", "'");
|
|
|
|
|
const quote = decoded[0];
|
|
|
|
|
if ((quote === '"' || quote === "'") && decoded.at(-1) === quote) {
|
|
|
|
|
return decoded.slice(1, -1).trim();
|
|
|
|
|
}
|
|
|
|
|
return decoded;
|
|
|
|
|
}
|
2026-05-15 09:25:19 -07:00
|
|
|
|
|
|
|
|
function findAttribute(openingTag, attr) {
|
|
|
|
|
const attrLower = attr.toLowerCase();
|
|
|
|
|
const nameMatch = openingTag.match(/^<[^\s/>]+/);
|
|
|
|
|
let index = nameMatch ? nameMatch[0].length : 1;
|
|
|
|
|
|
|
|
|
|
while (index < openingTag.length) {
|
|
|
|
|
while (index < openingTag.length && /\s/.test(openingTag[index])) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
if (index >= openingTag.length || openingTag[index] === ">" || openingTag[index] === "/") {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const start = index;
|
|
|
|
|
while (index < openingTag.length && !/[\s=/>]/.test(openingTag[index])) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
const name = openingTag.slice(start, index);
|
|
|
|
|
|
|
|
|
|
while (index < openingTag.length && /\s/.test(openingTag[index])) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let value = "";
|
|
|
|
|
if (openingTag[index] === "=") {
|
|
|
|
|
index += 1;
|
|
|
|
|
while (index < openingTag.length && /\s/.test(openingTag[index])) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const quote = openingTag[index];
|
|
|
|
|
if (quote === '"' || quote === "'") {
|
|
|
|
|
index += 1;
|
|
|
|
|
const valueStart = index;
|
|
|
|
|
while (index < openingTag.length && openingTag[index] !== quote) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
value = openingTag.slice(valueStart, index);
|
|
|
|
|
if (openingTag[index] === quote) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
const valueStart = index;
|
|
|
|
|
while (index < openingTag.length && !/[\s>]/.test(openingTag[index])) {
|
|
|
|
|
index += 1;
|
|
|
|
|
}
|
|
|
|
|
value = openingTag.slice(valueStart, index);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (name.toLowerCase() === attrLower) {
|
|
|
|
|
return { start, end: index, value };
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|