Include screenshots of affected elements in report (#181)

* Add screenshots of html elements to report * Add pdf screenshotting logic, function to save screenshots in result dir * Modify ejs to include screenshots in report * Implement -a flag to include/exclude additional element screenshot feature * Fix csv when allIssues is empty --------- Co-authored-by: jodichoo <[email protected]>
GovTechSG · Sep 21, 2023 · a4e9a21 · a4e9a21
1 parent a5f0e9b
commit a4e9a21
Show file tree

Hide file tree

Showing 16 changed files with 1,972 additions and 82 deletions.
diff --git a/README.md b/README.md
@@ -274,6 +274,12 @@ Options:
                                      domains to exclude from accessibility scan 
                                      separated by new line
                                             [string] [default: "exclusions.txt"]
+  -a, --additional  Additional features to include in the report:
+                    screenshots - Include element screenshots in the generated 
+                    report
+                    none - Exclude all additional features in the generated re
+                    port
+            [string] [choices: "screenshots", "none"] [default: "screenshots"]
 Examples:
   To scan sitemap of website:', 'node cli.js -c [ 1 | Sitemap ] -d <device> -u
    <url_link> -w <viewportWidth>

diff --git a/cli.js b/cli.js
@@ -213,6 +213,18 @@ Usage: node cli.js -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
     }
     return option; 
   })
+  // TODO: include/exclude screenshots
+  .coerce('a', option => {
+    const { choices } = cliOptions.a;
+    if (!choices.includes(option)) {
+      printMessage(
+        [`Invalid value for additional. Please provide valid keywords: ${choices.join(", ")}.`],
+        messageOptions,
+      );
+      process.exit(1);
+    }
+    return option;
+  })
   .check(argvs => {
     if (argvs.scanner === 'custom' && argvs.maxpages) {
       throw new Error('-p or --maxpages is only available in website and sitemap scans.');

diff --git a/combine.js b/combine.js
@@ -10,7 +10,6 @@ import { consoleLogger, silentLogger } from './logs.js';
 const combineRun = async (details, deviceToScan) => {
   const envDetails = { ...details };
 
-  // eslint-disable-next-line prettier/prettier
   const {
     type,
     url,
@@ -29,6 +28,7 @@ const combineRun = async (details, deviceToScan) => {
     needsReviewItems,
     fileTypes,
     blacklistedPatternsFilename,
+    includeScreenshots,
   } = envDetails;
 
   process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
@@ -79,6 +79,7 @@ const combineRun = async (details, deviceToScan) => {
         needsReviewItems,
         fileTypes,
         blacklistedPatterns,
+        includeScreenshots,
       );
       break;
 
@@ -96,6 +97,7 @@ const combineRun = async (details, deviceToScan) => {
         needsReviewItems,
         fileTypes,
         blacklistedPatterns,
+        includeScreenshots,
       );
       break;
 
@@ -119,7 +121,8 @@ const combineRun = async (details, deviceToScan) => {
       type,
       deviceToScan,
       urlsCrawled.scanned,
-      pagesNotScanned
+      pagesNotScanned,
+      browser
     );
     const [name, email] = nameEmail.split(':');
     await submitForm(

diff --git a/constants/cliFunctions.js b/constants/cliFunctions.js
@@ -125,6 +125,15 @@ export const cliOptions = {
     default: 'exclusions.txt',
     demandOption: false,
   },
+  a: {
+    alias: 'additional',
+    describe: 'Additional features to include in the report: \nscreenshots - Include element screenshots in the generated report \nnone - Exclude all additional features in the generated report',
+    type: 'string',
+    default: 'screenshots',
+    choices: ['screenshots', 'none'],
+    requiresArg: true,
+    demandOption: false,
+  }
 };
 
 export const configureReportSetting = isEnabled => {

diff --git a/constants/common.js b/constants/common.js
@@ -472,6 +472,7 @@ export const prepareData = argv => {
     needsReviewItems,
     fileTypes,
     blacklistedPatternsFilename,
+    additional,
   } = argv;
 
   // construct filename for scan results
@@ -499,6 +500,7 @@ export const prepareData = argv => {
     randomToken: resultFilename,
     fileTypes,
     blacklistedPatternsFilename,
+    includeScreenshots: !(additional === 'none'),
   };
 };
 

diff --git a/crawlers/commonCrawlerFunc.js b/crawlers/commonCrawlerFunc.js
@@ -4,6 +4,8 @@ import crawlee, { playwrightUtils } from 'crawlee';
 import axe from 'axe-core';
 import { axeScript, guiInfoStatusTypes, saflyIconSelector } from '../constants/constants.js';
 import { guiInfoLog } from '../logs.js';
+import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotFunc.js';
+import fs from 'fs';
 
 export const filterAxeResults = (needsReview, results, pageTitle) => {
   const { violations, passes, incomplete, url } = results;
@@ -34,15 +36,16 @@ export const filterAxeResults = (needsReview, results, pageTitle) => {
     }
 
     const addTo = (category, node) => {
-      const { html, failureSummary } = node;
+      const { html, failureSummary, screenshotPath } = node;
       if (!(rule in category.rules)) {
         category.rules[rule] = { description, helpUrl, conformance, totalItems: 0, items: [] };
       }
       const message = displayNeedsReview
         ? failureSummary.slice(failureSummary.indexOf('\n') + 1).trim()
         : failureSummary;
+      // add in screenshot path 
       category.rules[rule].items.push(
-        displayNeedsReview ? { html, message, displayNeedsReview } : { html, message },
+        displayNeedsReview ? { html, message, screenshotPath, displayNeedsReview } : { html, message, screenshotPath },
       );
       category.rules[rule].totalItems += 1;
       category.totalItems += 1;
@@ -93,7 +96,7 @@ export const filterAxeResults = (needsReview, results, pageTitle) => {
   };
 };
 
-export const runAxeScript = async (needsReview, page, selectors = []) => {
+export const runAxeScript = async (needsReview, includeScreenshots, page, randomToken, selectors = []) => {
   await crawlee.playwrightUtils.injectFile(page, axeScript);
 
   const results = await page.evaluate(
@@ -118,6 +121,11 @@ export const runAxeScript = async (needsReview, page, selectors = []) => {
     { selectors, saflyIconSelector },
   );
 
+  if (includeScreenshots) {
+    results.violations = await takeScreenshotForHTMLElements(results.violations, page, randomToken);
+  if (needsReview) results.incomplete = await takeScreenshotForHTMLElements(results.incomplete, page, randomToken);
+  }
+
   const pageTitle = await page.evaluate(() => document.title);
   return filterAxeResults(needsReview, results, pageTitle);
 };

diff --git a/crawlers/crawlDomain.js b/crawlers/crawlDomain.js
@@ -16,7 +16,7 @@ import {
   isSkippedUrl,
 } from '../constants/common.js';
 import { areLinksEqual } from '../utils.js';
-import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
+import { handlePdfDownload, runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
 import fs from 'fs';
 import { guiInfoLog } from '../logs.js';
 import { chromium } from 'playwright';
@@ -34,6 +34,7 @@ const crawlDomain = async (
   needsReviewItems,
   fileTypes,
   blacklistedPatterns,
+  includeScreenshots,
 ) => {
   let needsReview = needsReviewItems;
   const isScanHtml = ['all', 'html-only'].includes(fileTypes);
@@ -209,7 +210,7 @@ const crawlDomain = async (
         isBasicAuth = false;
       } else if (location.host.includes(host)) {
         if (isScanHtml) {
-          const results = await runAxeScript(needsReview, page);
+          const results = await runAxeScript(needsReview, includeScreenshots, page, randomToken);
           guiInfoLog(guiInfoStatusTypes.SCANNED, {
             numScanned: urlsCrawled.scanned.length,
             urlScanned: request.url,
@@ -283,7 +284,14 @@ const crawlDomain = async (
     await runPdfScan(randomToken);
 
     // transform result format
-    const pdfResults = mapPdfScanResults(randomToken, uuidToPdfMapping);
+    const pdfResults = await mapPdfScanResults(randomToken, uuidToPdfMapping);
+
+    // get screenshots from pdf docs
+    if (includeScreenshots) {
+      await Promise.all(pdfResults.map(
+        async result => await doPdfScreenshots(randomToken, result)
+      ));
+    }
 
     // push results for each pdf document to key value store
     await Promise.all(pdfResults.map(result => dataset.pushData(result)));

diff --git a/crawlers/crawlSitemap.js b/crawlers/crawlSitemap.js
@@ -19,6 +19,7 @@ import { areLinksEqual, isWhitelistedContentType } from '../utils.js';
 import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
 import fs from 'fs';
 import { guiInfoLog } from '../logs.js';
+import { doPdfScreenshots } from './pdfScanFunc.js';
 
 const crawlSitemap = async (
   sitemapUrl,
@@ -32,6 +33,7 @@ const crawlSitemap = async (
   needsReviewItems,
   fileTypes,
   blacklistedPatterns,
+  includeScreenshots,
 ) => {
   let needsReview = needsReviewItems;
   const isScanHtml = ['all', 'html-only'].includes(fileTypes);
@@ -141,7 +143,7 @@ const crawlSitemap = async (
       pagesCrawled += 1;
 
       if (isScanHtml && status === 200 && isWhitelistedContentType(contentType)) {
-        const results = await runAxeScript(needsReview, page);
+        const results = await runAxeScript(needsReview, includeScreenshots, page, randomToken);
         guiInfoLog(guiInfoStatusTypes.SCANNED, {
           numScanned: urlsCrawled.scanned.length,
           urlScanned: request.url,
@@ -208,7 +210,14 @@ const crawlSitemap = async (
     await runPdfScan(randomToken);
 
     // transform result format
-    const pdfResults = mapPdfScanResults(randomToken, uuidToPdfMapping);
+    const pdfResults = await mapPdfScanResults(randomToken, uuidToPdfMapping);
+
+    // get screenshots from pdf docs
+    if (includeScreenshots) {
+      await Promise.all(pdfResults.map(
+        async result => await doPdfScreenshots(randomToken, result)
+      ));
+    }
 
     // push results for each pdf document to key value store
     await Promise.all(pdfResults.map(result => dataset.pushData(result)));

diff --git a/crawlers/pdfScanFunc.js b/crawlers/pdfScanFunc.js
@@ -1,12 +1,14 @@
 import constants, { getExecutablePath, guiInfoStatusTypes } from '../constants/constants.js';
-import { exec, spawnSync } from 'child_process';
+import { spawnSync } from 'child_process';
 import { globSync } from 'glob';
 import { consoleLogger, guiInfoLog, silentLogger } from '../logs.js';
 import fs from 'fs';
 import { randomUUID } from 'crypto';
 import { createRequire } from 'module';
 import os from 'os';
 import path from 'path';
+import { getPageFromContext, getPdfScreenshots } from '../screenshotFunc/pdfScreenshotFunc.js';
+import { ensureDirSync } from 'fs-extra';
 
 const require = createRequire(import.meta.url);
 
@@ -164,7 +166,7 @@ export const runPdfScan = async randomToken => {
 };
 
 // transform results from veraPDF to desired format for report
-export const mapPdfScanResults = (randomToken, uuidToUrlMapping) => {
+export const mapPdfScanResults = async (randomToken, uuidToUrlMapping) => {
   const intermediateFolder = randomToken;
   const intermediateResultPath = `${intermediateFolder}/${constants.pdfScanResultFileName}`;
 
@@ -203,9 +205,11 @@ export const mapPdfScanResults = (randomToken, uuidToUrlMapping) => {
       .split('.')[0];
     const url = uuidToUrlMapping[uuid];
     const pageTitle = decodeURI(url).split('/').pop();
+    const filePath = `${randomToken}/${uuid}.pdf`;
 
     translated.url = url;
     translated.pageTitle = pageTitle;
+    translated.filePath = filePath;
 
     if (!validationResult) {
       // check for error in scan
@@ -225,7 +229,7 @@ export const mapPdfScanResults = (randomToken, uuidToUrlMapping) => {
       const { specification, testNumber, clause } = rule;
 
       if (isRuleExcluded(rule)) continue;
-      const [ruleId, transformedRule] = transformRule(rule);
+      const [ruleId, transformedRule] = await transformRule(rule, filePath);
 
       // ignore if violation is not in the meta file
       const meta = errorMeta[specification][clause][testNumber]?.STATUS ?? 'ignore';
@@ -240,20 +244,7 @@ export const mapPdfScanResults = (randomToken, uuidToUrlMapping) => {
   return resultsList;
 };
 
-const getPageFromContext = context => {
-  const path = context.split('/');
-  let pageNumber = -1;
-  if (context?.includes('pages') && path[path.length - 1].startsWith('pages')) {
-    path.forEach(nodeString => {
-      if (nodeString.includes('pages')) {
-        pageNumber = parseInt(nodeString.split(/[[\]]/)[1], 10) + 1;
-      }
-    });
-  }
-  return pageNumber;
-};
-
-const transformRule = rule => {
+const transformRule = async (rule, filePath) => {
   // get specific rule
   const transformed = {};
   const { specification, description, clause, testNumber, checks } = rule;
@@ -271,9 +262,29 @@ const transformRule = rule => {
 
   for (let checkIdx = 0; checkIdx < checks.length; checkIdx++) {
     const { errorMessage, context } = checks[checkIdx];
-    transformed.items.push({ message: errorMessage, page: getPageFromContext(context) });
+    const page = await getPageFromContext(context, filePath);
+    transformed.items.push({ message: errorMessage, page, context });
   }
   const ruleId = `pdf-${specification}-${clause}-${testNumber}`.replaceAll(' ', '_');
 
   return [ruleId, transformed];
 };
+
+export const doPdfScreenshots = async (randomToken, result) => {
+  const { filePath, pageTitle } = result;
+  const formattedPageTitle = pageTitle.replaceAll(" ", "_").split('.')[0];
+  const screenshotsDir = path.join(randomToken, 'elemScreenshots', 'pdf');
+
+  ensureDirSync(screenshotsDir);
+
+  for (const category of ['mustFix', 'goodToFix']) {
+    const ruleItems = Object.entries(result[category].rules);
+    for (const [ruleId, ruleInfo] of ruleItems) {
+      const { items } = ruleInfo;
+      const filename = `${formattedPageTitle}-${category}-${ruleId}`;
+      const screenshotPath = path.join(screenshotsDir, filename);
+      const newItems = await getPdfScreenshots(filePath, items, screenshotPath);
+      ruleInfo.items = newItems;
+    }
+  }
+};