Fixes for inline css (CSSOM)
This commit is contained in:
@@ -9,6 +9,7 @@ import {
|
||||
findEffectiveBase,
|
||||
inputToUrl,
|
||||
isHttpUrl,
|
||||
splitSrcset,
|
||||
slugForUrl
|
||||
} from "./asset-inliner.mjs";
|
||||
|
||||
@@ -762,6 +763,7 @@ export async function renderPage(sourceUrl, options = {}) {
|
||||
|
||||
await waitForNetworkIdle(page);
|
||||
await snapshotLoadedResourceUrls(page);
|
||||
await snapshotRuntimeStyles(page);
|
||||
|
||||
return await page.content();
|
||||
} finally {
|
||||
@@ -807,6 +809,44 @@ async function snapshotLoadedResourceUrls(page) {
|
||||
});
|
||||
}
|
||||
|
||||
async function snapshotRuntimeStyles(page) {
|
||||
await page.evaluate(() => {
|
||||
const serializeRules = (sheet) => {
|
||||
try {
|
||||
return Array.from(sheet.cssRules || [])
|
||||
.map((rule) => rule.cssText)
|
||||
.join("\n");
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
};
|
||||
|
||||
for (const sheet of Array.from(document.styleSheets)) {
|
||||
const css = serializeRules(sheet);
|
||||
if (!css.trim()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const owner = sheet.ownerNode;
|
||||
if (owner instanceof HTMLStyleElement && !owner.textContent.trim()) {
|
||||
owner.textContent = css;
|
||||
}
|
||||
}
|
||||
|
||||
const adoptedStyleSheets = Array.from(document.adoptedStyleSheets || []);
|
||||
adoptedStyleSheets.forEach((sheet, index) => {
|
||||
const css = serializeRules(sheet);
|
||||
if (!css.trim()) {
|
||||
return;
|
||||
}
|
||||
const style = document.createElement("style");
|
||||
style.setAttribute("data-archiver-adopted-stylesheet", String(index));
|
||||
style.textContent = css;
|
||||
document.head.appendChild(style);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function addArchiveComment(html, sourceUrl) {
|
||||
const safeSource = String(sourceUrl).replaceAll("--", "- -");
|
||||
const comment = `<!-- Archived locally. Source: ${safeSource}. Created: ${new Date().toISOString()}. -->`;
|
||||
@@ -823,7 +863,14 @@ export function findExternalAssetRefs(html) {
|
||||
const tag = match[0];
|
||||
for (const attr of ["src", "srcset", "poster", "data"]) {
|
||||
const value = readAttribute(tag, attr);
|
||||
if (!value || isSelfContainedAssetRef(value)) {
|
||||
if (!value) {
|
||||
continue;
|
||||
}
|
||||
if (attr === "srcset") {
|
||||
addSrcsetRefs(refs, value);
|
||||
continue;
|
||||
}
|
||||
if (isSelfContainedAssetRef(value)) {
|
||||
continue;
|
||||
}
|
||||
for (const part of value.split(",")) {
|
||||
@@ -846,6 +893,10 @@ export function findExternalAssetRefs(html) {
|
||||
if (href && !isSelfContainedAssetRef(href)) {
|
||||
refs.add(href);
|
||||
}
|
||||
const imageSrcset = readAttribute(tag, "imagesrcset");
|
||||
if (imageSrcset) {
|
||||
addSrcsetRefs(refs, imageSrcset);
|
||||
}
|
||||
}
|
||||
|
||||
const cssUrlPattern = /url\(\s*(["']?)([^"')]+)\1\s*\)/gi;
|
||||
@@ -859,6 +910,15 @@ export function findExternalAssetRefs(html) {
|
||||
return Array.from(refs).sort();
|
||||
}
|
||||
|
||||
function addSrcsetRefs(refs, srcset) {
|
||||
for (const part of splitSrcset(srcset)) {
|
||||
const candidate = part.trim().split(/\s+/)[0];
|
||||
if (candidate && !isSelfContainedAssetRef(candidate)) {
|
||||
refs.add(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isSelfContainedAssetRef(value) {
|
||||
const trimmed = cleanCssUrl(value);
|
||||
return (
|
||||
|
||||
@@ -91,6 +91,7 @@ export function resolveUrl(rawUrl, baseUrl) {
|
||||
const trimmed = htmlDecode(rawUrl.trim());
|
||||
if (
|
||||
!trimmed ||
|
||||
/^(?:undefined|null)$/i.test(trimmed) ||
|
||||
trimmed.startsWith("#") ||
|
||||
/^%23/i.test(trimmed) ||
|
||||
/^(?:about|blob|data|javascript|mailto|tel):/i.test(trimmed)
|
||||
@@ -233,11 +234,19 @@ export class AssetInliner {
|
||||
if (/\bpreload\b/i.test(rel) && /^style$/i.test(asValue)) {
|
||||
return "";
|
||||
}
|
||||
let output = tag;
|
||||
if (/\bpreload\b/i.test(rel) && /^image$/i.test(asValue)) {
|
||||
const imageSrcset = getAttribute(output, "imagesrcset");
|
||||
if (imageSrcset) {
|
||||
output = setAttribute(output, "imagesrcset", await this.inlineSrcset(imageSrcset, baseUrl));
|
||||
}
|
||||
}
|
||||
|
||||
const dataUri = await this.toDataUri(href, baseUrl);
|
||||
if (!dataUri) {
|
||||
return "";
|
||||
}
|
||||
return setAttribute(tag, "href", dataUri);
|
||||
return setAttribute(output, "href", dataUri);
|
||||
}
|
||||
|
||||
async rewriteMediaAttributes(tag, baseUrl) {
|
||||
@@ -293,8 +302,7 @@ export class AssetInliner {
|
||||
}
|
||||
|
||||
async inlineSrcset(value, baseUrl) {
|
||||
const candidates = value
|
||||
.split(",")
|
||||
const candidates = splitSrcset(value)
|
||||
.map((part) => part.trim())
|
||||
.filter(Boolean);
|
||||
const rewritten = [];
|
||||
@@ -425,6 +433,75 @@ export class AssetInliner {
|
||||
}
|
||||
}
|
||||
|
||||
export function splitSrcset(value) {
|
||||
const candidates = [];
|
||||
let current = "";
|
||||
let quote = "";
|
||||
let parenDepth = 0;
|
||||
|
||||
for (let index = 0; index < value.length; index += 1) {
|
||||
const ch = value[index];
|
||||
|
||||
if (quote) {
|
||||
current += ch;
|
||||
if (ch === quote) {
|
||||
quote = "";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"' || ch === "'") {
|
||||
quote = ch;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "(") {
|
||||
parenDepth += 1;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === ")" && parenDepth > 0) {
|
||||
parenDepth -= 1;
|
||||
current += ch;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === "," && parenDepth === 0 && isSrcsetCandidateSeparator(value, index, current)) {
|
||||
if (current.trim()) {
|
||||
candidates.push(current.trim());
|
||||
}
|
||||
current = "";
|
||||
while (index + 1 < value.length && /\s/.test(value[index + 1])) {
|
||||
index += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
current += ch;
|
||||
}
|
||||
|
||||
if (current.trim()) {
|
||||
candidates.push(current.trim());
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function isSrcsetCandidateSeparator(value, commaIndex, currentCandidate) {
|
||||
const after = value.slice(commaIndex + 1);
|
||||
if (!after.trim()) {
|
||||
return true;
|
||||
}
|
||||
if (/^\s/.test(after)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const tokens = currentCandidate.trim().split(/\s+/);
|
||||
const descriptor = tokens.at(-1) || "";
|
||||
return /^(?:\d+(?:\.\d+)?[wxh]|\d+(?:\.\d+)?x)$/i.test(descriptor);
|
||||
}
|
||||
|
||||
function mimeFromUrl(rawUrl) {
|
||||
let pathname = rawUrl;
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user