diff --git a/packages/rrweb-snapshot/.gitignore b/packages/rrweb-snapshot/.gitignore index 3365d49249..29746744d1 100644 --- a/packages/rrweb-snapshot/.gitignore +++ b/packages/rrweb-snapshot/.gitignore @@ -6,3 +6,4 @@ dist es lib temp +*-snapshot.json diff --git a/packages/rrweb-snapshot/package.json b/packages/rrweb-snapshot/package.json index 1a4cb19cd4..c1d24545d0 100644 --- a/packages/rrweb-snapshot/package.json +++ b/packages/rrweb-snapshot/package.json @@ -9,6 +9,9 @@ "test:watch": "jest --watch", "bundle": "rollup --config", "bundle:es-only": "cross-env ES_ONLY=true rollup --config", + "bundle:browser-only": "cross-env BROWSER_ONLY=true rollup --config", + "rebuild": "npm run bundle:browser-only && node scripts/rebuild.js", + "snapshot": "npm run bundle:browser-only && node scripts/snapshot.js", "dev": "yarn bundle:es-only --watch", "typings": "tsc -d --declarationDir typings", "prepublish": "npm run typings && npm run bundle" @@ -49,7 +52,7 @@ "jest": "^27.2.4", "jest-snapshot": "^23.6.0", "jsdom": "^16.4.0", - "puppeteer": "^1.15.0", + "puppeteer": "^13.5.1", "rollup": "^2.45.2", "rollup-plugin-terser": "^7.0.2", "ts-jest": "^27.0.5", diff --git a/packages/rrweb-snapshot/rollup.config.js b/packages/rrweb-snapshot/rollup.config.js index 6a8b0a0933..21a0e9ac85 100644 --- a/packages/rrweb-snapshot/rollup.config.js +++ b/packages/rrweb-snapshot/rollup.config.js @@ -6,7 +6,8 @@ function toMinPath(path) { return path.replace(/\.js$/, '.min.js'); } -let configs = [ +let configs = []; +let es_configs = [ // ES module - for building rrweb { input: './src/index.ts', @@ -19,7 +20,7 @@ let configs = [ ], }, ]; -let extra_configs = [ +let browser_configs = [ // browser { input: './src/index.ts', @@ -32,6 +33,8 @@ let extra_configs = [ }, ], }, +]; +let extra_configs = [ { input: './src/index.ts', plugins: [typescript(), terser()], @@ -69,8 +72,12 @@ let extra_configs = [ }, ]; -if (!process.env.ES_ONLY) { - configs.push(...extra_configs); +if (process.env.ES_ONLY) { + configs = es_configs; +} else if (process.env.BROWSER_ONLY) { + configs = browser_configs; +} else { + configs.push(...es_configs, ...browser_configs, ...extra_configs); } export default configs; diff --git a/packages/rrweb-snapshot/scripts/rebuild.js b/packages/rrweb-snapshot/scripts/rebuild.js new file mode 100644 index 0000000000..f5c1ea2d25 --- /dev/null +++ b/packages/rrweb-snapshot/scripts/rebuild.js @@ -0,0 +1,115 @@ +/* + Usage: node scripts/rebuild.js rrweb-snapshot.json + The script can also load GZIP compressed files, eg: .json.gz +*/ +const fs = require('fs'); +const puppeteer = require('puppeteer'); +const { promisify } = require('util'); +const { unzip } = require('zlib'); + +// assume that rrWeb script is in this folder +const rrWeb = './dist/rrweb-snapshot.js'; +const rrFile = process.argv[2]; +const waitSec = parseInt(process.argv[3] || 60); +const PAGE_TIMEOUT = 5000; +let HTML_FILE = 'temp.html'; + +function delay(time) { + return new Promise((resolve) => setTimeout(resolve, time)) +} + +function sluggify(str) { + return str + .replace(/[^a-zA-Z0-9 -]/gi, '-') + .replace(/ /g, '-') + .replace(/-+/g, '-') + .replace(/-+$/, '') +} + +function describe(jsHandle) { + return jsHandle.executionContext().evaluate((obj) => { + return typeof obj === 'string' ? obj : `${typeof obj}=${obj}` + }, jsHandle) +} + +process.on('exit', function() { + try { + fs.unlinkSync(HTML_FILE); + console.log(`Removed temp HTML file: ${HTML_FILE}`); + } catch (err) { + console.error(err); + } +}); + +(async function main() { + const browser = await puppeteer.launch({ + args: [ + '--disable-breakpad', + '--disable-default-apps', + '--disable-full-history-sync', + '--disable-notifications', + '--disable-speech-api', + '--disable-translate', + '--disable-web-security', + '--ignore-gpu-blacklist', + '--mute-audio', + '--no-default-browser-check', + '--no-pings', + '--start-maximized', + ], + defaultViewport: null, + headless: false, + }); + browser.on('disconnected', process.exit); + const page = await browser.newPage(); + + // listen to the browser console messages and scan objects + page.on('console', async (msg) => { + const args = await Promise.all(msg.args().map((arg) => describe(arg))); + let text = ''; + for (let i = 1; i < args.length; ++i) { + text += `${args[i]} `; + } + console.log(`CONSOLE ${msg.type()} :: ${msg.text()}`); + if (text.trim()) { + console.log(text.trim()); + } + }); + + // restoring snapshots shouldn't need internet + // enable internet to discover potential issues + await page.setOfflineMode(true); + // restore shouldn't need JS + await page.setJavaScriptEnabled(false); + + await page.setContent(''); + const rrCode = await fs.promises.readFile(rrWeb, { encoding: 'utf8' }); + let snap = await fs.promises.readFile(rrFile); + if (rrFile.endsWith('.gz')) { + snap = (await promisify(unzip)(snap)).toString(); + } + + await page.evaluate(`(function(){ + console.log('Restoring the snaphot...'); + ${rrCode}; + rrwebSnapshot.rebuild(${snap}, {doc: document}); + console.log('Snaphot restored!'); + for (let s of document.getElementsByTagName("noscript")) { + // Hide all restored noscript tags + s.style.display = "none"; + } + })();`); + + await page.waitForSelector('*', { timeout: PAGE_TIMEOUT }); + + const htm = await page.content(); + const title = (/(.+)<\/title>/i).exec(htm)[1]; + HTML_FILE = `${sluggify(title)}.html`; + await fs.promises.writeFile(HTML_FILE, htm.trim(), { encoding: 'utf8' }); + console.log(`Written temp HTML file: ${HTML_FILE}`); + + await page.goto(`file://${process.cwd()}/${HTML_FILE}`, { waitUntil: 'networkidle0', timeout: PAGE_TIMEOUT }); + await delay(waitSec * 1000); + + await browser.close(); +})(); diff --git a/packages/rrweb-snapshot/scripts/snapshot.js b/packages/rrweb-snapshot/scripts/snapshot.js new file mode 100644 index 0000000000..2eec532d18 --- /dev/null +++ b/packages/rrweb-snapshot/scripts/snapshot.js @@ -0,0 +1,104 @@ +/* + Usage: node scripts/snapshot.js 'https://example.com/whatever' [optional-output.json] + Some pages will load resources lazily, so it makes sense to scroll the page a bit, + to collect all the resources in the snapshot. +*/ +const fs = require('fs'); +const puppeteer = require('puppeteer'); + +// assume that rrWeb script is in this folder +const rrWeb = './dist/rrweb-snapshot.js'; +const url = process.argv[2]; +const out = process.argv[3] || 'rrweb-snapshot.json'; + +const PAGE_TIMEOUT = 25000; +const PAGE_DELAY = 2500; +const IMG_LOAD_TIMEOUT = 5000; + +function delay(time) { + return new Promise((resolve) => setTimeout(resolve, time)); +} + +(async function main() { + const browser = await puppeteer.launch({ + args: [ + '--disable-breakpad', + '--disable-default-apps', + '--disable-features=IsolateOrigins,site-per-process', + '--disable-full-history-sync', + '--disable-notifications', + '--disable-renderer-backgrounding', + '--disable-site-isolation-trials', + '--disable-speech-api', + '--disable-translate', + '--disable-web-security', + '--ignore-gpu-blacklist', + '--mute-audio', + '--no-default-browser-check', + '--no-pings', + '--start-maximized', + ], + defaultViewport: null, + headless: false, + }); + + browser.on('disconnected', process.exit); + const page = await browser.newPage(); + page.on('console', (msg) => console.log(`CONSOLE ${msg.type()} :: ${msg.text()}`)); + const rrCode = await fs.promises.readFile(rrWeb, { encoding: 'utf8' }); + + try { + await page.goto(url, { waitUntil: 'networkidle0', timeout: PAGE_TIMEOUT }); + await delay(PAGE_DELAY); + } catch (err) { + console.error(err); + await browser.close(); + return; + } + + // hack all images with img.crossOrigin="anonymous" before calling snapshot + // this is MANDATORY to capture images from websites that host images on CDNs + await page.evaluate((timeout) => { + const selectors = Array.from(document.getElementsByTagName('img')); + const reloadP = Promise.allSettled( + selectors.map((img) => { + const p = new Promise((resolve, reject) => { + img.addEventListener('load', () => { + console.log(`re-loaded <img src=${img.currentSrc}>`); + resolve(true); + }); + img.addEventListener('error', reject); + }); + img.loading = 'eager'; + img.crossOrigin = 'anonymous'; + return p; + }), + ); + return Promise.race([ + reloadP, + new Promise((resolve) => { + setTimeout(() => { + resolve('Timed out!'); + }, timeout); + }), + ]); + }, IMG_LOAD_TIMEOUT); + + const snapshot = await page.evaluate(function (rrCode) { + console.log('Taking the snaphot...'); + eval(rrCode); + return JSON.stringify( + rrwebSnapshot.snapshot(document, { + recordCanvas: true, + inlineImages: true, + inlineStylesheet: true, + dataURLOptions: { type: 'image/webp', quality: 0.8 }, + })[0], + ); + }, rrCode); + + await fs.promises.writeFile(out, snapshot, { encoding: 'utf8' }); + console.log(`Output file: "${out}" was saved`); + + await browser.close(); +})();