diff --git a/.changeset/pretty-candles-chew.md b/.changeset/pretty-candles-chew.md new file mode 100644 index 0000000000..2ee0a25f23 --- /dev/null +++ b/.changeset/pretty-candles-chew.md @@ -0,0 +1,8 @@ +--- +"rrweb-snapshot": minor +"rrweb": minor +"rrdom": patch +"@rrweb/types": patch +--- + +Added support for Asset Event and capturing many different types of assets (not just img#src) diff --git a/.changeset/yellow-vans-protect.md b/.changeset/yellow-vans-protect.md new file mode 100644 index 0000000000..28249aa8f0 --- /dev/null +++ b/.changeset/yellow-vans-protect.md @@ -0,0 +1,8 @@ +--- +"rrweb-snapshot": major +"@rrweb/types": patch +--- + +`NodeType` enum was moved from rrweb-snapshot to @rrweb/types +The following types where moved from rrweb-snapshot to @rrweb/types: `documentNode`, `documentTypeNode`, `attributes`, `legacyAttributes`, `elementNode`, `textNode`, `cdataNode`, `commentNode`, `serializedNode`, `serializedNodeWithId` and `DataURLOptions` +`inlineImage` config option is deprecated and in `rrweb` is an alias for `captureAssets` config option diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index e47ee3c9c2..239350cf40 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -75,12 +75,12 @@ jobs: runs-on: ubuntu-latest name: Format Code steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: repository: ${{ github.event.pull_request.head.repo.full_name }} ref: ${{ github.head_ref }} - name: Setup Node - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: lts/* cache: 'yarn' diff --git a/docs/assets.md b/docs/assets.md new file mode 100644 index 0000000000..2ef4f05b28 --- /dev/null +++ b/docs/assets.md @@ -0,0 +1,66 @@ +# Asset Capture Methods & Configuration in rrweb + +[rrweb](https://rrweb.io/) is a JavaScript library that allows you to record and replay user interactions on your website. It provides various configuration options for capturing assets (such as images) during the recording process. In this document, we will explore the different asset capture methods and their configuration options in rrweb. + +## Asset Events + +Assets are a new type of event that embody a serialized version of a http resource captured during snapshotting. Some examples are images, media files and stylesheets. Resources can be fetched externally (from cache) in the case of a href, or internally for blob: urls and same-origin stylesheets. Asset events are emitted subsequent to either a FullSnapshot or an IncrementalSnapshot (mutation), and although they may have a later timestamp, during replay they are rebuilt as part of the snapshot that they are associated with. In the case where e.g. a stylesheet is referenced at the time of a FullSnapshot, but hasn't been downloaded yet, there can be a subsequent mutation event with a later timestamp which, along with the asset event, can recreate the experience of a network-delayed load of the stylesheet. + +## Assets to mitigate stylesheet processing cost + +In the case of stylesheets, rrweb does some record-time processing in order to serialize the css rules which had a negative effect on the initial page loading times and how quickly the FullSnapshot was taken (see https://pagespeed.web.dev/). These are now taken out of the main thread and processed asynchronously to be emitted (up to `processStylesheetsWithin` ms) later. There is no corresponding delay on the replay side so long as the stylesheet has been successfully emitted. + +## Asset Capture Configuration + +The `captureAssets` configuration option allows you to customize the asset capture process. It is an object with the following properties: + +- `objectURLs` (default: `true`): This property specifies whether to capture same-origin `blob:` assets using object URLs. Object URLs are created using the `URL.createObjectURL()` method. Setting `objectURLs` to `true` enables the capture of object URLs. + +- `origins` (default: `false`): This property determines which origins to capture assets from. It can have the following values: + + - `false` or `[]`: Disables capturing any assets apart from object URLs, stylesheets (unless set to false) and images (if that setting is turned on). + - `true`: Captures assets from all origins. + - `[origin1, origin2, ...]`: Captures assets only from the specified origins. For example, `origins: ['https://s3.example.com/']` captures all assets from the origin `https://s3.example.com/`. + +- `images` (default: `false` or `true` if `inlineImages` is true in rrweb.record config): When set, this option turns on asset capturing for all images irrespective of their origin. When this configuration option is false, images may still be captured if their src url matches the `origins` setting above. + +- `stylesheets` (default: `'without-fetch'`): When set to `true`, this turns on capturing of all stylesheets and style elements via the asset system irrespective of origin. The default of `'without-fetch'` is designed to match with the previous `inlineStylesheet` behaviour, whereas the `true` value allows capturing of stylesheets which are otherwise inaccessible due to CORS restrictions to be captured via a fetch call, which will normally use the browser cache. If a stylesheet matches via the `origins` config above, it will be captured irrespective of this config setting (either directly or via fetch). + +- `stylesheetsRuleThreshold` (default: `0`): only invoke the asset system for stylesheets with more than this number of rules. Defaults to zero (rather than say 100) as it only looks at the 'outer' rules (e.g. could have a single media rule which nests 1000s of sub rules). This default may be increased based on feedback. + +- `processStylesheetsWithin` (default: `2000`): This property defines the maximum time in milliseconds that the browser should delay before processing stylesheets. Inline ` +`); + + const callback = vi.fn(); + serializeNode(el, callback); + expect(callback).toBeCalledTimes(1); + expect(callback).toHaveBeenCalledWith({ + element: el.querySelector('style'), + attr: 'css_text', + styleId: 1, + value: 'http://localhost:3000/', + }); + }); + + it('should detect style depending on if stylesheetsRuleThreshold is met', () => { + const el = render(`
+ + +
`); + + const callback = vi.fn(); + const stylesheetsRuleThreshold = 2; + const inlineImages = undefined; + serializeNode(el, callback, inlineImages, stylesheetsRuleThreshold); + expect(callback).toBeCalledTimes(1); + }); +}); diff --git a/packages/rrweb-snapshot/test/utils.test.ts b/packages/rrweb-snapshot/test/utils.test.ts index c422223bed..d744794221 100644 --- a/packages/rrweb-snapshot/test/utils.test.ts +++ b/packages/rrweb-snapshot/test/utils.test.ts @@ -2,14 +2,17 @@ * @vitest-environment jsdom */ import { describe, it, test, expect } from 'vitest'; -import { NodeType, serializedNode } from '../src/types'; +import { NodeType } from '@rrweb/types'; import { escapeImportStatement, extractFileExtension, fixSafariColons, + shouldIgnoreAsset, + isAttributeCapturable, + shouldCaptureAsset, isNodeMetaEqual, } from '../src/utils'; -import type { serializedNodeWithId } from 'rrweb-snapshot'; +import type { serializedNode, serializedNodeWithId } from '@rrweb/types'; describe('utils', () => { describe('isNodeMetaEqual()', () => { @@ -153,6 +156,7 @@ describe('utils', () => { expect(isNodeMetaEqual(element2, element3)).toBeFalsy(); }); }); + describe('extractFileExtension', () => { test('absolute path', () => { const path = 'https://example.com/styles/main.css'; @@ -280,4 +284,162 @@ describe('utils', () => { expect(out3).toEqual('[data-aa\\:other] { color: red; }'); }); }); + + describe('shouldIgnoreAsset()', () => { + it(`should ignore assets when config not specified`, () => { + expect(shouldIgnoreAsset('http://example.com', {})).toBe(true); + }); + + it(`should not ignore matching origin`, () => { + expect( + shouldIgnoreAsset('http://example.com/', { + origins: ['http://example.com'], + }), + ).toBe(false); + }); + + it(`should ignore mismatched origin`, () => { + expect( + shouldIgnoreAsset('http://123.com/', { + origins: ['http://example.com'], + }), + ).toBe(true); + }); + + it(`should ignore malformed url`, () => { + expect( + shouldIgnoreAsset('http:', { origins: ['http://example.com'] }), + ).toBe(true); + }); + + it(`should ignore malformed url even with origins: true`, () => { + expect(shouldIgnoreAsset('http:', { origins: true })).toBe(true); + }); + }); + + describe('isAttributeCapturable()', () => { + const validAttributeCombinations = [ + ['img', ['src', 'srcset']], + ['video', ['src']], + ['audio', ['src']], + ['embed', ['src']], + ['source', ['src']], + ['track', ['src']], + ['input', ['src']], + ['object', ['src']], + ] as const; + + const invalidAttributeCombinations = [ + ['img', ['href']], + ['script', ['href']], + ['link', ['src']], + ['video', ['href']], + ['audio', ['href']], + ['div', ['src']], + ['source', ['href']], + ['track', ['href']], + ['input', ['href']], + ['iframe', ['href']], + ['object', ['href']], + ['link', ['href']], // without rel="stylesheet" + ] as const; + + validAttributeCombinations.forEach(([tagName, attributes]) => { + const element = document.createElement(tagName); + attributes.forEach((attribute) => { + it(`should correctly identify <${tagName} ${attribute}> as capturable`, () => { + expect(isAttributeCapturable(element, attribute)).toBe(true); + }); + }); + }); + + invalidAttributeCombinations.forEach(([tagName, attributes]) => { + const element = document.createElement(tagName); + attributes.forEach((attribute) => { + it(`should correctly identify <${tagName} ${attribute}> as NOT capturable`, () => { + expect(isAttributeCapturable(element, attribute)).toBe(false); + }); + }); + }); + + it(`should correctly identify as capturable if inlineStylesheet == 'all'`, () => { + const element = document.createElement('link'); + element.setAttribute('rel', 'StyleSheet'); + + // pretend it has loaded but isn't CORS accessible + Object.defineProperty(element, 'sheet', { + value: true, + }); + + const ca = { + objectURLs: false, + origins: false, + }; + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + ...ca, + stylesheets: false, + }), + ).toBe(false); + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + ...ca, + stylesheets: 'without-fetch', + }), + ).toBe(false); // this is false for backwards compatibility + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + ...ca, + stylesheets: true, + }), + ).toBe(true); + }); + + it(`should not identify as capturable if it hasn't loaded yet`, () => { + const element = document.createElement('link'); + element.setAttribute('rel', 'StyleSheet'); + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + objectURLs: false, + origins: false, + stylesheets: true, + }), + ).toBe(false); // will capture as mutation when it loads + }); + + it(`should correctly identify stylesheet as capturable due to origin match, but respect a hard stylesheets=false`, () => { + const element = document.createElement('link'); + element.setAttribute('rel', 'StyleSheet'); + + // pretend it has loaded but isn't CORS accessible + Object.defineProperty(element, 'sheet', { + value: true, + }); + + const ca = { + objectURLs: false, + origins: ['https://example.com'], + }; + expect( + shouldCaptureAsset( + element, + 'href', + 'https://example.com/style.css', + ca, // stylesheets undefined (not actually possible from rrweb) + ), + ).toBe(true); + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + ...ca, + stylesheets: 'without-fetch', // the default from rrweb + }), + ).toBe(true); // because of origins + expect( + shouldCaptureAsset(element, 'href', 'https://example.com/style.css', { + ...ca, + stylesheets: false, // explicit off, override origins + }), + ).toBe(false); + }); + }); }); diff --git a/packages/rrweb-snapshot/tsconfig.json b/packages/rrweb-snapshot/tsconfig.json index 67a5bdab7d..e561253cfd 100644 --- a/packages/rrweb-snapshot/tsconfig.json +++ b/packages/rrweb-snapshot/tsconfig.json @@ -1,12 +1,21 @@ { "extends": "../../tsconfig.base.json", - "include": ["src"], - "exclude": ["vite.config.ts", "vitest.config.ts", "test"], + "include": [ + "src" + ], + "exclude": [ + "vite.config.ts", + "vitest.config.ts", + "test" + ], "compilerOptions": { "rootDir": "src", "tsBuildInfoFile": "./tsconfig.tsbuildinfo" }, "references": [ + { + "path": "../types" + }, { "path": "../utils" } diff --git a/packages/rrweb/package.json b/packages/rrweb/package.json index 2623a517ff..22c7ccdb2d 100644 --- a/packages/rrweb/package.json +++ b/packages/rrweb/package.json @@ -77,7 +77,8 @@ "tslib": "^2.3.1", "typescript": "^5.4.5", "vite": "^5.3.1", - "vite-plugin-dts": "^3.9.1" + "vite-plugin-dts": "^3.9.1", + "vitest": "^1.6.0" }, "dependencies": { "@rrweb/types": "^2.0.0-alpha.17", diff --git a/packages/rrweb/scripts/stream.js b/packages/rrweb/scripts/stream.js index b4e63a7d31..7cf55cabf3 100644 --- a/packages/rrweb/scripts/stream.js +++ b/packages/rrweb/scripts/stream.js @@ -49,7 +49,10 @@ async function injectRecording(frame, serverURL) { recordCanvas: false, recordCrossOriginIframes: true, collectFonts: true, - inlineImages: true, + captureAssets: { + objectURLs: true, + origins: true, + }, }); })(); }); diff --git a/packages/rrweb/src/record/iframe-manager.ts b/packages/rrweb/src/record/iframe-manager.ts index 20553feaf4..d5a1e5ba43 100644 --- a/packages/rrweb/src/record/iframe-manager.ts +++ b/packages/rrweb/src/record/iframe-manager.ts @@ -1,11 +1,12 @@ -import type { Mirror, serializedNodeWithId } from 'rrweb-snapshot'; -import { genId, NodeType } from 'rrweb-snapshot'; +import type { Mirror } from 'rrweb-snapshot'; +import { genId } from 'rrweb-snapshot'; import type { CrossOriginIframeMessageEvent } from '../types'; import CrossOriginIframeMirror from './cross-origin-iframe-mirror'; -import { EventType, IncrementalSource } from '@rrweb/types'; +import { EventType, NodeType, IncrementalSource } from '@rrweb/types'; import type { eventWithTime, eventWithoutTime, + serializedNodeWithId, mutationCallBack, } from '@rrweb/types'; import type { StylesheetManager } from './stylesheet-manager'; @@ -61,6 +62,7 @@ export class IframeManager { iframeEl: HTMLIFrameElement, childSn: serializedNodeWithId, ) { + // a mutation rather than an asset event so that we record the timestamp that the iframe was loaded this.mutationCb({ adds: [ { diff --git a/packages/rrweb/src/record/index.ts b/packages/rrweb/src/record/index.ts index 1308c378a6..7610927c98 100644 --- a/packages/rrweb/src/record/index.ts +++ b/packages/rrweb/src/record/index.ts @@ -27,6 +27,9 @@ import { type scrollCallback, type canvasMutationParam, type adoptedStyleSheetParam, + type assetParam, + type asset, + type assetStatus, } from '@rrweb/types'; import type { CrossOriginIframeMessageEventContent } from '../types'; import { IframeManager } from './iframe-manager'; @@ -40,11 +43,13 @@ import { unregisterErrorHandler, } from './error-handler'; import dom from '@rrweb/utils'; +import AssetManager from './observers/asset-manager'; let wrappedEmit!: (e: eventWithoutTime, isCheckout?: boolean) => void; let takeFullSnapshot!: (isCheckout?: boolean) => void; let canvasManager!: CanvasManager; +let assetManager!: AssetManager; let recording = false; // Multiple tools (i.e. MooTools, Prototype.js) override Array.from and drop support for the 2nd parameter @@ -95,12 +100,30 @@ function record( userTriggeredOnInput = false, collectFonts = false, inlineImages = false, + captureAssets = { + objectURLs: true, + origins: false, + }, plugins, keepIframeSrcFn = () => false, ignoreCSSAttributes = new Set([]), errorHandler, } = options; + if (inlineImages) { + captureAssets.images = inlineImages; + } + if (captureAssets.stylesheets === undefined) { + if (inlineStylesheet === 'all') { + captureAssets.stylesheets = true; + } else if (inlineStylesheet) { + // the prior default setting + captureAssets.stylesheets = 'without-fetch'; + } else { + captureAssets.stylesheets = false; + } + } + registerErrorHandler(errorHandler); const inEmittingFrame = recordCrossOriginIframes @@ -279,6 +302,12 @@ function record( }, }); + const wrappedAssetEmit = (p: assetParam) => + wrappedEmit({ + type: EventType.Asset, + data: p, + }); + const wrappedAdoptedStyleSheetEmit = (a: adoptedStyleSheetParam) => wrappedEmit({ type: EventType.IncrementalSnapshot, @@ -327,6 +356,12 @@ function record( dataURLOptions, }); + assetManager = new AssetManager({ + mutationCb: wrappedAssetEmit, + win: window, + captureAssets, + }); + const shadowDomManager = new ShadowDomManager({ mutationCb: wrappedMutationEmit, scrollCb: wrappedScrollEmit, @@ -338,10 +373,10 @@ function record( inlineStylesheet, maskInputOptions, dataURLOptions, + captureAssets, maskTextFn, maskInputFn, recordCanvas, - inlineImages, sampling, slimDOMOptions, iframeManager, @@ -349,6 +384,7 @@ function record( canvasManager, keepIframeSrcFn, processedNodeManager, + assetManager, }, mirror, }); @@ -374,6 +410,8 @@ function record( shadowDomManager.init(); + const capturedAssetStatuses: assetStatus[] = []; + mutationBuffers.forEach((buf) => buf.lock()); // don't allow any mirror modifications during snapshotting const node = snapshot(document, { mirror, @@ -381,14 +419,14 @@ function record( blockSelector, maskTextClass, maskTextSelector, - inlineStylesheet, + inlineStylesheet: Boolean(inlineStylesheet), // 'all' value can be discarded as has already been transferred into `captureAssets` maskAllInputs: maskInputOptions, maskTextFn, maskInputFn, slimDOM: slimDOMOptions, dataURLOptions, + captureAssets, recordCanvas, - inlineImages, onSerialize: (n) => { if (isSerializedIframe(n, mirror)) { iframeManager.addIframe(n as HTMLIFrameElement); @@ -408,20 +446,32 @@ function record( onStylesheetLoad: (linkEl, childSn) => { stylesheetManager.attachLinkElement(linkEl, childSn); }, + onAssetDetected: (asset: asset) => { + const assetStatus = assetManager.capture(asset); + if (Array.isArray(assetStatus)) { + // removeme when we just capture one asset from srcset + capturedAssetStatuses.push(...assetStatus); + } else { + capturedAssetStatuses.push(assetStatus); + } + }, keepIframeSrcFn, }); if (!node) { return console.warn('Failed to snapshot the document'); } - + const data: any = { + node, + initialOffset: getWindowScroll(window), + }; + if (capturedAssetStatuses.length) { + data.capturedAssetStatuses = capturedAssetStatuses; + } wrappedEmit( { type: EventType.FullSnapshot, - data: { - node, - initialOffset: getWindowScroll(window), - }, + data, }, isCheckout, ); @@ -536,7 +586,6 @@ function record( sampling, recordDOM, recordCanvas, - inlineImages, userTriggeredOnInput, collectFonts, doc, @@ -545,6 +594,7 @@ function record( keepIframeSrcFn, blockSelector, slimDOMOptions, + captureAssets, dataURLOptions, mirror, iframeManager, @@ -552,6 +602,7 @@ function record( shadowDomManager, processedNodeManager, canvasManager, + assetManager, ignoreCSSAttributes, plugins: plugins diff --git a/packages/rrweb/src/record/mutation.ts b/packages/rrweb/src/record/mutation.ts index 42170b4940..3c8c0cc03e 100644 --- a/packages/rrweb/src/record/mutation.ts +++ b/packages/rrweb/src/record/mutation.ts @@ -1,4 +1,6 @@ import { + absolutifyURLs, + getHref, serializeNodeWithId, transformAttribute, IGNORED_NODE, @@ -19,6 +21,7 @@ import type { removedNodeMutation, addedNodeMutation, Optional, + asset, } from '@rrweb/types'; import { isBlocked, @@ -33,6 +36,7 @@ import { closestElementOfNode, } from '../utils'; import dom from '@rrweb/utils'; +import { isProcessingStyleElement } from './observers/asset-manager'; type DoubleLinkedListNode = { previous: DoubleLinkedListNode | null; @@ -182,7 +186,7 @@ export default class MutationBuffer { private maskInputFn: observerParam['maskInputFn']; private keepIframeSrcFn: observerParam['keepIframeSrcFn']; private recordCanvas: observerParam['recordCanvas']; - private inlineImages: observerParam['inlineImages']; + private captureAssets: observerParam['captureAssets']; private slimDOMOptions: observerParam['slimDOMOptions']; private dataURLOptions: observerParam['dataURLOptions']; private doc: observerParam['doc']; @@ -193,6 +197,7 @@ export default class MutationBuffer { private canvasManager: observerParam['canvasManager']; private processedNodeManager: observerParam['processedNodeManager']; private unattachedDoc: HTMLDocument; + private assetManager: observerParam['assetManager']; public init(options: MutationBufferParam) { ( @@ -207,8 +212,8 @@ export default class MutationBuffer { 'maskTextFn', 'maskInputFn', 'keepIframeSrcFn', + 'captureAssets', 'recordCanvas', - 'inlineImages', 'slimDOMOptions', 'dataURLOptions', 'doc', @@ -218,6 +223,7 @@ export default class MutationBuffer { 'shadowDomManager', 'canvasManager', 'processedNodeManager', + 'assetManager', ] as const ).forEach((key) => { // just a type trick, the runtime result is correct @@ -312,6 +318,7 @@ export default class MutationBuffer { if (parentId === -1 || nextId === -1) { return addList.addNode(n); } + const sn = serializeNodeWithId(n, { doc: this.doc, mirror: this.mirror, @@ -321,14 +328,17 @@ export default class MutationBuffer { maskTextSelector: this.maskTextSelector, skipChild: true, newlyAddedElement: true, - inlineStylesheet: this.inlineStylesheet, + inlineStylesheet: Boolean(this.inlineStylesheet), maskInputOptions: this.maskInputOptions, maskTextFn: this.maskTextFn, maskInputFn: this.maskInputFn, slimDOMOptions: this.slimDOMOptions, dataURLOptions: this.dataURLOptions, + captureAssets: { + ...this.captureAssets, + _fromMutation: true, + }, recordCanvas: this.recordCanvas, - inlineImages: this.inlineImages, onSerialize: (currentN) => { if (isSerializedIframe(currentN, this.mirror)) { this.iframeManager.addIframe(currentN as HTMLIFrameElement); @@ -351,6 +361,9 @@ export default class MutationBuffer { this.stylesheetManager.attachLinkElement(link, childSn); }, cssCaptured, + onAssetDetected: (asset: asset) => { + this.assetManager.capture(asset); + }, }); if (sn) { adds.push({ @@ -453,13 +466,24 @@ export default class MutationBuffer { .map((text) => { const n = text.node; const parent = dom.parentNode(n); - if (parent && (parent as Element).tagName === 'TEXTAREA') { - // the node is being ignored as it isn't in the mirror, so shift mutation to attributes on parent textarea - this.genTextAreaValueMutation(parent as HTMLTextAreaElement); + let value = text.value; + if (parent) { + const parentEl = parent as Element; + if (parentEl.tagName === 'TEXTAREA') { + // the node is being ignored as it isn't in the mirror, so shift mutation to attributes on parent textarea + this.genTextAreaValueMutation(parentEl as HTMLTextAreaElement); + } else if (parentEl.tagName === 'STYLE') { + if (isProcessingStyleElement(parentEl)) { + // stylesheet hasn't been captured as an asset yet, ignore this mutation + return { id: -1, value: null }; + } else { + value = absolutifyURLs(value, getHref(this.doc)); + } + } } return { id: this.mirror.getId(n), - value: text.value, + value, }; }) // no need to include them on added elements, as they have just been serialized with up to date attribubtes @@ -629,12 +653,30 @@ export default class MutationBuffer { if (!ignoreAttribute(target.tagName, attributeName, value)) { // overwrite attribute if the mutations was triggered in same time - item.attributes[attributeName] = transformAttribute( + const transformedValue = transformAttribute( this.doc, toLowerCase(target.tagName), toLowerCase(attributeName), value, ); + if ( + transformedValue && + this.assetManager.shouldCapture( + target, + attributeName, + transformedValue, + this.captureAssets, + ) + ) { + this.assetManager.capture({ + element: target, + attr: attributeName, + value: transformedValue, + }); + attributeName = `rr_captured_${attributeName}`; + } + item.attributes[attributeName] = transformedValue; + if (attributeName === 'style') { if (!this.unattachedDoc) { try { @@ -695,6 +737,10 @@ export default class MutationBuffer { this.genTextAreaValueMutation(m.target as HTMLTextAreaElement); return; // any removedNodes won't have been in mirror either } + if (isProcessingStyleElement(m.target as HTMLElement)) { + // stylesheet hasn't been captured as an asset yet, don't need to record child mutations + return; + } m.addedNodes.forEach((n) => this.genAdds(n, m.target)); m.removedNodes.forEach((n) => { diff --git a/packages/rrweb/src/record/observers/asset-manager.ts b/packages/rrweb/src/record/observers/asset-manager.ts new file mode 100644 index 0000000000..f1241114ab --- /dev/null +++ b/packages/rrweb/src/record/observers/asset-manager.ts @@ -0,0 +1,352 @@ +import type { + IWindow, + SerializedCanvasArg, + SerializedCssTextArg, + eventWithTime, + listenerHandler, + asset, + captureAssetsParam, + assetStatus, +} from '@rrweb/types'; +import type { assetCallback } from '@rrweb/types'; +import { encode } from 'base64-arraybuffer'; + +import { patch } from '../../utils'; + +import type { recordOptions, ProcessingStyleElement } from '../../types'; +import { + getSourcesFromSrcset, + shouldCaptureAsset, + stringifyCssRules, + absolutifyURLs, + splitCssText, +} from 'rrweb-snapshot'; + +export function isProcessingStyleElement( + el: Element, +): el is ProcessingStyleElement { + return '__rrProcessingStylesheet' in el; +} + +export default class AssetManager { + private urlObjectMap = new Map(); + private urlTextMap = new Map(); + private capturedURLs = new Set(); + private capturingURLs = new Set(); + private failedURLs = new Set(); + private resetHandlers: listenerHandler[] = []; + private mutationCb: assetCallback; + public readonly config: Exclude< + recordOptions['captureAssets'], + undefined + >; + + public reset() { + this.urlObjectMap.clear(); + this.urlTextMap.clear(); + this.capturedURLs.clear(); + this.capturingURLs.clear(); + this.failedURLs.clear(); + this.resetHandlers.forEach((h) => h()); + } + + constructor(options: { + mutationCb: assetCallback; + win: IWindow; + captureAssets: Exclude< + recordOptions['captureAssets'], + undefined + >; + }) { + const { win } = options; + + this.mutationCb = options.mutationCb; + this.config = options.captureAssets; + + const urlObjectMap = this.urlObjectMap; + + if (this.config.objectURLs || this.config.images) { + try { + // monkeypatching allows us to store object blobs when they are created + // so that we don't have to perform a slower `fetch` in order to serialize them + const restoreHandler = patch( + win.URL, + 'createObjectURL', + function (original: (obj: File | Blob | MediaSource) => string) { + return function (obj: File | Blob | MediaSource) { + const url = original.apply(this, [obj]); + urlObjectMap.set(url, obj); + return url; + }; + }, + ); + this.resetHandlers.push(restoreHandler); + } catch { + console.error('failed to patch URL.createObjectURL'); + } + + try { + const restoreHandler = patch( + win.URL, + 'revokeObjectURL', + function (original: (objectURL: string) => void) { + return function (objectURL: string) { + urlObjectMap.delete(objectURL); + return original.apply(this, [objectURL]); + }; + }, + ); + this.resetHandlers.push(restoreHandler); + } catch { + console.error('failed to patch URL.revokeObjectURL'); + } + } + } + + public async getURLObject( + url: string, + ): Promise { + const object = this.urlObjectMap.get(url); + if (object) { + return object; + } + const text = this.urlTextMap.get(url); + if (text) { + return text; + } + + try { + const response = await fetch(url); + const contentType = response.headers.get('content-type'); + if (contentType && contentType.includes('text/css')) { + return await response.text(); + } else { + return await response.blob(); + } + } catch (e) { + console.warn(`getURLObject failed for ${url}`); + throw e; + } + } + + private captureStylesheet( + sheetBaseHref: string, + el: HTMLLinkElement | HTMLStyleElement, + styleId?: number, + ): assetStatus { + let cssRules: CSSRuleList; + let url = sheetBaseHref; // linkEl.href for a link element + if (styleId) { + url += `#rr_style_el:${styleId}`; + } + try { + cssRules = el.sheet!.cssRules; + } catch (e) { + if (el.tagName === 'STYLE') { + // sheetBaseHref represents the document url the style element is embedded in so can't be fetched + return { + url, + status: 'refused', + }; + } + if (this.capturedURLs.has(url)) { + return { + url, + status: 'captured', + }; + } else if (this.capturingURLs.has(url)) { + return { + url, + status: 'capturing', + }; + } else if (this.failedURLs.has(url)) { + return { + url, + status: 'error', + }; + } + this.capturingURLs.add(url); + // stylesheet could not be found or + // is not readable due to CORS, fallback to fetch + void this.getURLObject(url) + .then((cssText) => { + this.capturedURLs.add(url); + this.capturingURLs.delete(url); + + if (cssText && typeof cssText === 'string') { + const payload: SerializedCssTextArg = { + rr_type: 'CssText', + cssTexts: [absolutifyURLs(cssText, sheetBaseHref)], + }; + this.mutationCb({ + url, + payload, + }); + } + }) + .catch(this.fetchCatcher(url)); + return { + url, + status: 'capturing', // 'processing' ? + }; + } + const processStylesheet = () => { + cssRules = el.sheet!.cssRules; // update, as a mutation may have since occurred + const cssText = stringifyCssRules(cssRules, sheetBaseHref); + const payload: SerializedCssTextArg = { + rr_type: 'CssText', + cssTexts: [cssText], + }; + if (styleId) { + if (el.childNodes.length > 1) { + payload.cssTexts = splitCssText(cssText, el as HTMLStyleElement); + } + this.mutationCb({ + url, + payload, + }); + } else { + this.mutationCb({ + url: sheetBaseHref, + payload, + }); + } + if (isProcessingStyleElement(el)) { + delete el.__rrProcessingStylesheet; + } + }; + let timeout = this.config.processStylesheetsWithin; + if (!timeout && timeout !== 0) { + timeout = 2000; + } + if (window.requestIdleCallback !== undefined && timeout > 0) { + if (el.tagName === 'STYLE') { + // mark it so mutations on it can be ignored until processed + (el as ProcessingStyleElement).__rrProcessingStylesheet = true; + // process inline style elements before external links + // as they are more integral to the page and more likely + // to only appear on this page (can't be reconstructed if lost) + timeout = Math.floor(timeout / 2); + } + // try not to clog up main thread + requestIdleCallback(processStylesheet, { + timeout, + }); + return { + url, + status: 'capturing', // 'processing' ? + timeout, + }; + } else { + processStylesheet(); + return { + url, + status: 'captured', + }; + } + } + + public capture(asset: asset): assetStatus | assetStatus[] { + if ('sheet' in asset.element) { + return this.captureStylesheet( + asset.value, + asset.element as HTMLStyleElement | HTMLLinkElement, + asset.styleId, + ); + } else if (asset.attr === 'srcset') { + const statuses: assetStatus[] = []; + getSourcesFromSrcset(asset.value).forEach((url) => { + statuses.push(this.captureUrl(url)); + }); + return statuses; + } else { + return this.captureUrl(asset.value); + } + } + + private captureUrl(url: string): assetStatus { + if (this.capturedURLs.has(url)) { + return { + url, + status: 'captured', + }; + } else if (this.capturingURLs.has(url)) { + return { + url, + status: 'capturing', + }; + } else if (this.failedURLs.has(url)) { + return { + url, + status: 'error', + }; + } + this.capturingURLs.add(url); + void this.getURLObject(url) + .then(async (object) => { + if (object) { + let payload: SerializedCanvasArg; + if (object instanceof File || object instanceof Blob) { + const arrayBuffer = await object.arrayBuffer(); + const base64 = encode(arrayBuffer); // cpu intensive, probably good idea to move all of this to a webworker + + payload = { + rr_type: 'Blob', + type: object.type, + data: [ + { + rr_type: 'ArrayBuffer', + base64, // base64 + }, + ], + }; + + this.capturedURLs.add(url); + this.capturingURLs.delete(url); + + this.mutationCb({ + url, + payload, + }); + } + } + }) + .catch(this.fetchCatcher(url)); + + return { + url, + status: 'capturing', + }; + } + + private fetchCatcher(url: string) { + return (e: unknown) => { + let message = ''; + if (e instanceof Error) { + message = e.message; + } else if (typeof e === 'string') { + message = e; + } else if (e && typeof e === 'object' && 'toString' in e) { + message = (e as { toString(): string }).toString(); + } + this.mutationCb({ + url, + failed: { + message, + }, + }); + + this.failedURLs.add(url); + this.capturingURLs.delete(url); + }; + } + + public shouldCapture( + n: Element, + attribute: string, + value: string, + config: captureAssetsParam, + ): boolean { + return shouldCaptureAsset(n, attribute, value, config); + } +} diff --git a/packages/rrweb/src/record/observers/canvas/canvas-manager.ts b/packages/rrweb/src/record/observers/canvas/canvas-manager.ts index 6e6bfdf1aa..82c756ba24 100644 --- a/packages/rrweb/src/record/observers/canvas/canvas-manager.ts +++ b/packages/rrweb/src/record/observers/canvas/canvas-manager.ts @@ -1,4 +1,4 @@ -import type { ICanvas, Mirror, DataURLOptions } from 'rrweb-snapshot'; +import type { ICanvas, Mirror } from 'rrweb-snapshot'; import type { blockClass, canvasManagerMutationCallback, @@ -8,6 +8,7 @@ import type { IWindow, listenerHandler, CanvasArg, + DataURLOptions, } from '@rrweb/types'; import { isBlocked } from '../../../utils'; import { CanvasContext } from '@rrweb/types'; diff --git a/packages/rrweb/src/record/stylesheet-manager.ts b/packages/rrweb/src/record/stylesheet-manager.ts index c2bbacc6ff..4b410e0a12 100644 --- a/packages/rrweb/src/record/stylesheet-manager.ts +++ b/packages/rrweb/src/record/stylesheet-manager.ts @@ -1,6 +1,7 @@ -import type { elementNode, serializedNodeWithId } from 'rrweb-snapshot'; import { stringifyRule } from 'rrweb-snapshot'; import type { + elementNode, + serializedNodeWithId, adoptedStyleSheetCallback, adoptedStyleSheetParam, attributeMutation, @@ -26,7 +27,11 @@ export class StylesheetManager { linkEl: HTMLLinkElement, childSn: serializedNodeWithId, ) { - if ('_cssText' in (childSn as elementNode).attributes) + // a mutation rather than an asset event so that we record the timestamp that the stylesheet was loaded + if ( + '_cssText' in (childSn as elementNode).attributes || + 'rr_captured_href' in (childSn as elementNode).attributes + ) { this.mutationCb({ adds: [], removes: [], @@ -39,6 +44,7 @@ export class StylesheetManager { }, ], }); + } this.trackLinkElement(linkEl); } diff --git a/packages/rrweb/src/record/workers/image-bitmap-data-url-worker.ts b/packages/rrweb/src/record/workers/image-bitmap-data-url-worker.ts index 374edfe1b0..b5aed49634 100644 --- a/packages/rrweb/src/record/workers/image-bitmap-data-url-worker.ts +++ b/packages/rrweb/src/record/workers/image-bitmap-data-url-worker.ts @@ -1,6 +1,6 @@ import { encode } from 'base64-arraybuffer'; -import type { DataURLOptions } from 'rrweb-snapshot'; import type { + DataURLOptions, ImageBitmapDataURLWorkerParams, ImageBitmapDataURLWorkerResponse, } from '@rrweb/types'; diff --git a/packages/rrweb/src/replay/asset-manager/index.ts b/packages/rrweb/src/replay/asset-manager/index.ts new file mode 100644 index 0000000000..0861c3fa84 --- /dev/null +++ b/packages/rrweb/src/replay/asset-manager/index.ts @@ -0,0 +1,330 @@ +import type { + RebuildAssetManagerFinalStatus, + RebuildAssetManagerInterface, + RebuildAssetManagerStatus, + assetEvent, + SerializedCssTextArg, + SerializedCanvasArg, + serializedElementNodeWithId, +} from '@rrweb/types'; +import { deserializeArg } from '../canvas/deserialize-args'; +import { + getSourcesFromSrcset, + buildStyleNode, + type BuildCache, +} from 'rrweb-snapshot'; +import type { RRElement } from 'rrdom'; +import { updateSrcset } from './update-srcset'; + +export default class AssetManager implements RebuildAssetManagerInterface { + private originalToObjectURLMap: Map> = new Map(); + private urlToStylesheetMap: Map> = new Map(); + private nodeIdAttributeHijackedMap: Map> = + new Map(); + private loadingURLs: Set = new Set(); + private failedURLs: Set = new Set(); + private callbackMap: Map< + string, + Array<(status: RebuildAssetManagerFinalStatus) => void> + > = new Map(); + private liveMode: boolean; + private cache: BuildCache; + public expectedAssets: Set | null = null; + public replayerApproxTs: number = 0; + + constructor({ liveMode, cache }: { liveMode: boolean; cache: BuildCache }) { + this.liveMode = liveMode; + this.cache = cache; + } + + public async add(event: assetEvent & { timestamp: number }) { + const { data } = event; + const { url, payload, failed } = { payload: false, failed: false, ...data }; + if (failed) { + this.failedURLs.add(url); + this.executeCallbacks(url, { status: 'failed' }); + return; + } + if (this.loadingURLs.has(url)) { + return; + } + this.loadingURLs.add(url); + if (this.expectedAssets !== null) { + this.expectedAssets.delete(url); + } + + // tracks if deserializing did anything, not really needed for AssetManager + const status = { + isUnchanged: true, + }; + + if (payload.rr_type === 'CssText') { + const cssPayload = payload as SerializedCssTextArg; + let assets = this.urlToStylesheetMap.get(url); + if (!assets) { + assets = new Map(); + this.urlToStylesheetMap.set(url, assets); + } + assets.set(event.timestamp, cssPayload.cssTexts); + this.loadingURLs.delete(url); + this.failedURLs.delete(url); + this.executeCallbacks(url, { + status: 'loaded', + url, + cssTexts: cssPayload.cssTexts, + }); + } else { + // TODO: extract the logic only needed for assets from deserializeArg + const result = (await deserializeArg( + new Map(), + null, + status, + )(payload as SerializedCanvasArg)) as Blob | MediaSource; + const objectURL = URL.createObjectURL(result); + let assets = this.originalToObjectURLMap.get(url); + if (!assets) { + assets = new Map(); + this.originalToObjectURLMap.set(url, assets); + } + assets.set(event.timestamp, objectURL); + this.loadingURLs.delete(url); + this.failedURLs.delete(url); + this.executeCallbacks(url, { status: 'loaded', url: objectURL }); + } + } + + private executeCallbacks( + url: string, + status: RebuildAssetManagerFinalStatus, + ) { + const callbacks = this.callbackMap.get(url); + while (callbacks && callbacks.length > 0) { + const callback = callbacks.pop(); + if (!callback) { + break; + } + callback(status); + } + } + + // TODO: turn this into a true promise that throws if the asset fails to load + public async whenReady(url: string): Promise { + const currentStatus = this.get(url); + if ( + currentStatus.status === 'loaded' || + currentStatus.status === 'failed' + ) { + return currentStatus; + } else if ( + currentStatus.status === 'unknown' && + this.expectedAssets !== null && + this.expectedAssets.size === 0 && + !this.liveMode + ) { + // we don't expect assets to arrive later + return { + status: 'failed', + }; + } + let resolve: (status: RebuildAssetManagerFinalStatus) => void; + const promise = new Promise((r) => { + resolve = r; + }); + if (!this.callbackMap.has(url)) { + this.callbackMap.set(url, []); + } + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + this.callbackMap.get(url)!.push(resolve!); + + return promise; + } + + public get(url: string): RebuildAssetManagerStatus { + let tsResult: Map | Map | undefined; + tsResult = this.urlToStylesheetMap.get(url); + if (!tsResult) { + tsResult = this.originalToObjectURLMap.get(url); + } + if (tsResult) { + let result; + let bestTs: number | null = null; + // pick the asset with a timestamp closest to the current replayer value + // preferring ones that loaded after (assuming these are the ones that + // were triggered by the most recently played snapshot) + tsResult.forEach((value, ts) => { + if (bestTs === null) { + result = value; + bestTs = ts; + } else if (this.replayerApproxTs <= ts) { + if (bestTs < this.replayerApproxTs || ts < bestTs) { + result = value; + bestTs = ts; + } + } else if (bestTs < ts) { + result = value; + bestTs = ts; + } + }); + if (result === undefined) { + // satisfy typings + } else if (this.urlToStylesheetMap.has(url)) { + return { + status: 'loaded', + url, + cssTexts: result, + }; + } else { + return { + status: 'loaded', + url: result, + }; + } + } + + if (this.loadingURLs.has(url)) { + return { + status: 'loading', + }; + } + + if (this.failedURLs.has(url)) { + return { + status: 'failed', + }; + } + + return { + status: 'unknown', + }; + } + + public async manageAttribute( + node: RRElement | Element, + nodeId: number, + attribute: string, + serializedValue: string, + serializedNode?: serializedElementNodeWithId, + ): Promise { + const preloadedStatus = this.get(serializedValue); + + let isCssTextElement = false; + if (node.nodeName === 'STYLE') { + // includes s (these are recreated as