Add EasyList filter support
This commit is contained in:
980
src/archiver.mjs
980
src/archiver.mjs
File diff suppressed because it is too large
Load Diff
@@ -144,6 +144,7 @@ export class AssetInliner {
|
||||
this.referer = options.referer;
|
||||
this.maxAssetBytes = options.maxAssetBytes || 30 * 1024 * 1024;
|
||||
this.maxInlineStyleBytes = options.maxInlineStyleBytes || 128 * 1024;
|
||||
this.shouldBlockAsset = options.shouldBlockAsset || null;
|
||||
this.cache = new Map();
|
||||
this.warnings = [];
|
||||
}
|
||||
@@ -214,7 +215,7 @@ export class AssetInliner {
|
||||
if (!absolute || absolute.startsWith("data:")) {
|
||||
return "";
|
||||
}
|
||||
const css = await this.fetchText(absolute, baseUrl);
|
||||
const css = await this.fetchText(absolute, baseUrl, "stylesheet");
|
||||
if (css == null) {
|
||||
return "";
|
||||
}
|
||||
@@ -242,7 +243,7 @@ export class AssetInliner {
|
||||
}
|
||||
}
|
||||
|
||||
const dataUri = await this.toDataUri(href, baseUrl);
|
||||
const dataUri = await this.toDataUri(href, baseUrl, linkResourceType(asValue));
|
||||
if (!dataUri) {
|
||||
return "";
|
||||
}
|
||||
@@ -251,12 +252,13 @@ export class AssetInliner {
|
||||
|
||||
async rewriteMediaAttributes(tag, baseUrl) {
|
||||
let output = tag;
|
||||
const tagName = getTagName(output);
|
||||
for (const attr of ["src", "poster", "data"]) {
|
||||
const value = getAttribute(output, attr);
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
const dataUri = await this.toDataUri(value, baseUrl);
|
||||
const dataUri = await this.toDataUri(value, baseUrl, mediaResourceType(tagName, attr));
|
||||
if (dataUri) {
|
||||
output = setAttribute(output, attr, dataUri);
|
||||
} else {
|
||||
@@ -291,7 +293,7 @@ export class AssetInliner {
|
||||
if (!absolute || absolute.startsWith("data:")) {
|
||||
return tag;
|
||||
}
|
||||
const text = await this.fetchText(absolute, baseUrl);
|
||||
const text = await this.fetchText(absolute, baseUrl, "subdocument");
|
||||
if (text != null) {
|
||||
const inlined = await this.inlineHtml(text, absolute, { depth: depth + 1 });
|
||||
let rewritten = removeAttribute(tag, "src");
|
||||
@@ -308,7 +310,7 @@ export class AssetInliner {
|
||||
const rewritten = [];
|
||||
for (const candidate of candidates) {
|
||||
const [urlPart, ...descriptor] = candidate.split(/\s+/);
|
||||
const dataUri = await this.toDataUri(urlPart, baseUrl);
|
||||
const dataUri = await this.toDataUri(urlPart, baseUrl, "image");
|
||||
rewritten.push([dataUri || TRANSPARENT_IMAGE_DATA_URI, ...descriptor].join(" "));
|
||||
}
|
||||
return rewritten.join(", ");
|
||||
@@ -323,7 +325,7 @@ export class AssetInliner {
|
||||
if (!absolute || absolute.startsWith("data:")) {
|
||||
return "";
|
||||
}
|
||||
const imported = await this.fetchText(absolute, baseUrl);
|
||||
const imported = await this.fetchText(absolute, baseUrl, "stylesheet");
|
||||
if (imported == null) {
|
||||
return "";
|
||||
}
|
||||
@@ -336,33 +338,34 @@ export class AssetInliner {
|
||||
if (!raw || raw.startsWith("#") || /^%23/i.test(raw) || /^(?:data|blob|about|javascript):/i.test(raw)) {
|
||||
return match[0];
|
||||
}
|
||||
const dataUri = await this.toDataUri(raw, baseUrl);
|
||||
const dataUri = await this.toDataUri(raw, baseUrl, cssResourceType(raw, baseUrl));
|
||||
return dataUri ? `url("${dataUri}")` : "url(about:blank)";
|
||||
});
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
async toDataUri(rawUrl, baseUrl) {
|
||||
async toDataUri(rawUrl, baseUrl, resourceType = "other") {
|
||||
const absolute = resolveUrl(rawUrl, baseUrl);
|
||||
if (!absolute || absolute.startsWith("data:")) {
|
||||
return absolute;
|
||||
}
|
||||
if (this.cache.has(absolute)) {
|
||||
return this.cache.get(absolute);
|
||||
const cacheKey = `${resourceType}:${absolute}`;
|
||||
if (this.cache.has(cacheKey)) {
|
||||
return this.cache.get(cacheKey);
|
||||
}
|
||||
const asset = await this.fetchAsset(absolute, baseUrl);
|
||||
const asset = await this.fetchAsset(absolute, baseUrl, resourceType);
|
||||
if (!asset) {
|
||||
this.cache.set(absolute, null);
|
||||
this.cache.set(cacheKey, null);
|
||||
return null;
|
||||
}
|
||||
const dataUri = `data:${asset.contentType};base64,${asset.bytes.toString("base64")}`;
|
||||
this.cache.set(absolute, dataUri);
|
||||
this.cache.set(cacheKey, dataUri);
|
||||
return dataUri;
|
||||
}
|
||||
|
||||
async fetchText(rawUrl, baseUrl) {
|
||||
const asset = await this.fetchAsset(rawUrl, baseUrl);
|
||||
async fetchText(rawUrl, baseUrl, resourceType = "other") {
|
||||
const asset = await this.fetchAsset(rawUrl, baseUrl, resourceType);
|
||||
if (!asset) {
|
||||
return null;
|
||||
}
|
||||
@@ -373,12 +376,15 @@ export class AssetInliner {
|
||||
return asset.bytes.toString("utf8");
|
||||
}
|
||||
|
||||
async fetchAsset(rawUrl, baseUrl) {
|
||||
async fetchAsset(rawUrl, baseUrl, resourceType = "other") {
|
||||
const absolute = resolveUrl(rawUrl, baseUrl);
|
||||
if (!absolute || absolute.startsWith("data:")) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if (this.shouldBlockAsset?.(absolute, resourceType)) {
|
||||
return null;
|
||||
}
|
||||
if (isFileUrl(absolute)) {
|
||||
const filePath = fileURLToPath(absolute);
|
||||
const bytes = await fs.readFile(filePath);
|
||||
@@ -512,6 +518,43 @@ function mimeFromUrl(rawUrl) {
|
||||
return MIME_BY_EXT.get(path.extname(pathname).toLowerCase()) || "application/octet-stream";
|
||||
}
|
||||
|
||||
function linkResourceType(asValue) {
|
||||
const normalized = String(asValue || "").toLowerCase();
|
||||
if (normalized === "font") return "font";
|
||||
if (normalized === "script") return "script";
|
||||
if (normalized === "style") return "stylesheet";
|
||||
if (normalized === "document") return "subdocument";
|
||||
if (normalized === "audio" || normalized === "video") return "media";
|
||||
return "image";
|
||||
}
|
||||
|
||||
function mediaResourceType(tagName, attr) {
|
||||
if (tagName === "iframe") return "subdocument";
|
||||
if (tagName === "object" || tagName === "embed") return "object";
|
||||
if (tagName === "audio" || tagName === "video") return "media";
|
||||
if (attr === "poster") return "image";
|
||||
if (tagName === "track") return "other";
|
||||
return "image";
|
||||
}
|
||||
|
||||
function cssResourceType(rawUrl, baseUrl) {
|
||||
const absolute = resolveUrl(rawUrl, baseUrl) || rawUrl;
|
||||
let pathname = absolute;
|
||||
try {
|
||||
pathname = new URL(absolute).pathname;
|
||||
} catch {
|
||||
// Keep raw string.
|
||||
}
|
||||
const ext = path.extname(pathname).toLowerCase();
|
||||
if ([".woff", ".woff2", ".ttf", ".otf"].includes(ext)) return "font";
|
||||
if ([".mp4", ".webm", ".mp3", ".m4a"].includes(ext)) return "media";
|
||||
return "image";
|
||||
}
|
||||
|
||||
function getTagName(markup) {
|
||||
return markup.match(/^<([a-z0-9:-]+)/i)?.[1]?.toLowerCase() || "";
|
||||
}
|
||||
|
||||
function getAttribute(tag, attr) {
|
||||
const openingTag = getOpeningTag(tag);
|
||||
if (!openingTag) {
|
||||
|
||||
Reference in New Issue
Block a user