Fixes for inline css (CSSOM)

This commit is contained in:
2026-05-16 16:05:32 -07:00
parent 422994ad07
commit 40c63dc4e2
10 changed files with 301 additions and 103 deletions

View File

@@ -9,6 +9,7 @@ import {
findEffectiveBase,
inputToUrl,
isHttpUrl,
splitSrcset,
slugForUrl
} from "./asset-inliner.mjs";
@@ -762,6 +763,7 @@ export async function renderPage(sourceUrl, options = {}) {
await waitForNetworkIdle(page);
await snapshotLoadedResourceUrls(page);
await snapshotRuntimeStyles(page);
return await page.content();
} finally {
@@ -807,6 +809,44 @@ async function snapshotLoadedResourceUrls(page) {
});
}
async function snapshotRuntimeStyles(page) {
await page.evaluate(() => {
const serializeRules = (sheet) => {
try {
return Array.from(sheet.cssRules || [])
.map((rule) => rule.cssText)
.join("\n");
} catch {
return "";
}
};
for (const sheet of Array.from(document.styleSheets)) {
const css = serializeRules(sheet);
if (!css.trim()) {
continue;
}
const owner = sheet.ownerNode;
if (owner instanceof HTMLStyleElement && !owner.textContent.trim()) {
owner.textContent = css;
}
}
const adoptedStyleSheets = Array.from(document.adoptedStyleSheets || []);
adoptedStyleSheets.forEach((sheet, index) => {
const css = serializeRules(sheet);
if (!css.trim()) {
return;
}
const style = document.createElement("style");
style.setAttribute("data-archiver-adopted-stylesheet", String(index));
style.textContent = css;
document.head.appendChild(style);
});
});
}
function addArchiveComment(html, sourceUrl) {
const safeSource = String(sourceUrl).replaceAll("--", "- -");
const comment = `<!-- Archived locally. Source: ${safeSource}. Created: ${new Date().toISOString()}. -->`;
@@ -823,7 +863,14 @@ export function findExternalAssetRefs(html) {
const tag = match[0];
for (const attr of ["src", "srcset", "poster", "data"]) {
const value = readAttribute(tag, attr);
if (!value || isSelfContainedAssetRef(value)) {
if (!value) {
continue;
}
if (attr === "srcset") {
addSrcsetRefs(refs, value);
continue;
}
if (isSelfContainedAssetRef(value)) {
continue;
}
for (const part of value.split(",")) {
@@ -846,6 +893,10 @@ export function findExternalAssetRefs(html) {
if (href && !isSelfContainedAssetRef(href)) {
refs.add(href);
}
const imageSrcset = readAttribute(tag, "imagesrcset");
if (imageSrcset) {
addSrcsetRefs(refs, imageSrcset);
}
}
const cssUrlPattern = /url\(\s*(["']?)([^"')]+)\1\s*\)/gi;
@@ -859,6 +910,15 @@ export function findExternalAssetRefs(html) {
return Array.from(refs).sort();
}
function addSrcsetRefs(refs, srcset) {
for (const part of splitSrcset(srcset)) {
const candidate = part.trim().split(/\s+/)[0];
if (candidate && !isSelfContainedAssetRef(candidate)) {
refs.add(candidate);
}
}
}
function isSelfContainedAssetRef(value) {
const trimmed = cleanCssUrl(value);
return (

View File

@@ -91,6 +91,7 @@ export function resolveUrl(rawUrl, baseUrl) {
const trimmed = htmlDecode(rawUrl.trim());
if (
!trimmed ||
/^(?:undefined|null)$/i.test(trimmed) ||
trimmed.startsWith("#") ||
/^%23/i.test(trimmed) ||
/^(?:about|blob|data|javascript|mailto|tel):/i.test(trimmed)
@@ -233,11 +234,19 @@ export class AssetInliner {
if (/\bpreload\b/i.test(rel) && /^style$/i.test(asValue)) {
return "";
}
let output = tag;
if (/\bpreload\b/i.test(rel) && /^image$/i.test(asValue)) {
const imageSrcset = getAttribute(output, "imagesrcset");
if (imageSrcset) {
output = setAttribute(output, "imagesrcset", await this.inlineSrcset(imageSrcset, baseUrl));
}
}
const dataUri = await this.toDataUri(href, baseUrl);
if (!dataUri) {
return "";
}
return setAttribute(tag, "href", dataUri);
return setAttribute(output, "href", dataUri);
}
async rewriteMediaAttributes(tag, baseUrl) {
@@ -293,8 +302,7 @@ export class AssetInliner {
}
async inlineSrcset(value, baseUrl) {
const candidates = value
.split(",")
const candidates = splitSrcset(value)
.map((part) => part.trim())
.filter(Boolean);
const rewritten = [];
@@ -425,6 +433,75 @@ export class AssetInliner {
}
}
export function splitSrcset(value) {
const candidates = [];
let current = "";
let quote = "";
let parenDepth = 0;
for (let index = 0; index < value.length; index += 1) {
const ch = value[index];
if (quote) {
current += ch;
if (ch === quote) {
quote = "";
}
continue;
}
if (ch === '"' || ch === "'") {
quote = ch;
current += ch;
continue;
}
if (ch === "(") {
parenDepth += 1;
current += ch;
continue;
}
if (ch === ")" && parenDepth > 0) {
parenDepth -= 1;
current += ch;
continue;
}
if (ch === "," && parenDepth === 0 && isSrcsetCandidateSeparator(value, index, current)) {
if (current.trim()) {
candidates.push(current.trim());
}
current = "";
while (index + 1 < value.length && /\s/.test(value[index + 1])) {
index += 1;
}
continue;
}
current += ch;
}
if (current.trim()) {
candidates.push(current.trim());
}
return candidates;
}
function isSrcsetCandidateSeparator(value, commaIndex, currentCandidate) {
const after = value.slice(commaIndex + 1);
if (!after.trim()) {
return true;
}
if (/^\s/.test(after)) {
return true;
}
const tokens = currentCandidate.trim().split(/\s+/);
const descriptor = tokens.at(-1) || "";
return /^(?:\d+(?:\.\d+)?[wxh]|\d+(?:\.\d+)?x)$/i.test(descriptor);
}
function mimeFromUrl(rawUrl) {
let pathname = rawUrl;
try {