Add EasyList filter support

This commit is contained in:
2026-05-16 22:07:39 -07:00
parent f4f1a7a78d
commit 46444b193b
12 changed files with 171818 additions and 228 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -144,6 +144,7 @@ export class AssetInliner {
this.referer = options.referer;
this.maxAssetBytes = options.maxAssetBytes || 30 * 1024 * 1024;
this.maxInlineStyleBytes = options.maxInlineStyleBytes || 128 * 1024;
this.shouldBlockAsset = options.shouldBlockAsset || null;
this.cache = new Map();
this.warnings = [];
}
@@ -214,7 +215,7 @@ export class AssetInliner {
if (!absolute || absolute.startsWith("data:")) {
return "";
}
const css = await this.fetchText(absolute, baseUrl);
const css = await this.fetchText(absolute, baseUrl, "stylesheet");
if (css == null) {
return "";
}
@@ -242,7 +243,7 @@ export class AssetInliner {
}
}
const dataUri = await this.toDataUri(href, baseUrl);
const dataUri = await this.toDataUri(href, baseUrl, linkResourceType(asValue));
if (!dataUri) {
return "";
}
@@ -251,12 +252,13 @@ export class AssetInliner {
async rewriteMediaAttributes(tag, baseUrl) {
let output = tag;
const tagName = getTagName(output);
for (const attr of ["src", "poster", "data"]) {
const value = getAttribute(output, attr);
if (!value) {
continue;
}
const dataUri = await this.toDataUri(value, baseUrl);
const dataUri = await this.toDataUri(value, baseUrl, mediaResourceType(tagName, attr));
if (dataUri) {
output = setAttribute(output, attr, dataUri);
} else {
@@ -291,7 +293,7 @@ export class AssetInliner {
if (!absolute || absolute.startsWith("data:")) {
return tag;
}
const text = await this.fetchText(absolute, baseUrl);
const text = await this.fetchText(absolute, baseUrl, "subdocument");
if (text != null) {
const inlined = await this.inlineHtml(text, absolute, { depth: depth + 1 });
let rewritten = removeAttribute(tag, "src");
@@ -308,7 +310,7 @@ export class AssetInliner {
const rewritten = [];
for (const candidate of candidates) {
const [urlPart, ...descriptor] = candidate.split(/\s+/);
const dataUri = await this.toDataUri(urlPart, baseUrl);
const dataUri = await this.toDataUri(urlPart, baseUrl, "image");
rewritten.push([dataUri || TRANSPARENT_IMAGE_DATA_URI, ...descriptor].join(" "));
}
return rewritten.join(", ");
@@ -323,7 +325,7 @@ export class AssetInliner {
if (!absolute || absolute.startsWith("data:")) {
return "";
}
const imported = await this.fetchText(absolute, baseUrl);
const imported = await this.fetchText(absolute, baseUrl, "stylesheet");
if (imported == null) {
return "";
}
@@ -336,33 +338,34 @@ export class AssetInliner {
if (!raw || raw.startsWith("#") || /^%23/i.test(raw) || /^(?:data|blob|about|javascript):/i.test(raw)) {
return match[0];
}
const dataUri = await this.toDataUri(raw, baseUrl);
const dataUri = await this.toDataUri(raw, baseUrl, cssResourceType(raw, baseUrl));
return dataUri ? `url("${dataUri}")` : "url(about:blank)";
});
return output;
}
async toDataUri(rawUrl, baseUrl) {
async toDataUri(rawUrl, baseUrl, resourceType = "other") {
const absolute = resolveUrl(rawUrl, baseUrl);
if (!absolute || absolute.startsWith("data:")) {
return absolute;
}
if (this.cache.has(absolute)) {
return this.cache.get(absolute);
const cacheKey = `${resourceType}:${absolute}`;
if (this.cache.has(cacheKey)) {
return this.cache.get(cacheKey);
}
const asset = await this.fetchAsset(absolute, baseUrl);
const asset = await this.fetchAsset(absolute, baseUrl, resourceType);
if (!asset) {
this.cache.set(absolute, null);
this.cache.set(cacheKey, null);
return null;
}
const dataUri = `data:${asset.contentType};base64,${asset.bytes.toString("base64")}`;
this.cache.set(absolute, dataUri);
this.cache.set(cacheKey, dataUri);
return dataUri;
}
async fetchText(rawUrl, baseUrl) {
const asset = await this.fetchAsset(rawUrl, baseUrl);
async fetchText(rawUrl, baseUrl, resourceType = "other") {
const asset = await this.fetchAsset(rawUrl, baseUrl, resourceType);
if (!asset) {
return null;
}
@@ -373,12 +376,15 @@ export class AssetInliner {
return asset.bytes.toString("utf8");
}
async fetchAsset(rawUrl, baseUrl) {
async fetchAsset(rawUrl, baseUrl, resourceType = "other") {
const absolute = resolveUrl(rawUrl, baseUrl);
if (!absolute || absolute.startsWith("data:")) {
return null;
}
try {
if (this.shouldBlockAsset?.(absolute, resourceType)) {
return null;
}
if (isFileUrl(absolute)) {
const filePath = fileURLToPath(absolute);
const bytes = await fs.readFile(filePath);
@@ -512,6 +518,43 @@ function mimeFromUrl(rawUrl) {
return MIME_BY_EXT.get(path.extname(pathname).toLowerCase()) || "application/octet-stream";
}
function linkResourceType(asValue) {
const normalized = String(asValue || "").toLowerCase();
if (normalized === "font") return "font";
if (normalized === "script") return "script";
if (normalized === "style") return "stylesheet";
if (normalized === "document") return "subdocument";
if (normalized === "audio" || normalized === "video") return "media";
return "image";
}
function mediaResourceType(tagName, attr) {
if (tagName === "iframe") return "subdocument";
if (tagName === "object" || tagName === "embed") return "object";
if (tagName === "audio" || tagName === "video") return "media";
if (attr === "poster") return "image";
if (tagName === "track") return "other";
return "image";
}
function cssResourceType(rawUrl, baseUrl) {
const absolute = resolveUrl(rawUrl, baseUrl) || rawUrl;
let pathname = absolute;
try {
pathname = new URL(absolute).pathname;
} catch {
// Keep raw string.
}
const ext = path.extname(pathname).toLowerCase();
if ([".woff", ".woff2", ".ttf", ".otf"].includes(ext)) return "font";
if ([".mp4", ".webm", ".mp3", ".m4a"].includes(ext)) return "media";
return "image";
}
function getTagName(markup) {
return markup.match(/^<([a-z0-9:-]+)/i)?.[1]?.toLowerCase() || "";
}
function getAttribute(tag, attr) {
const openingTag = getOpeningTag(tag);
if (!openingTag) {