Skip to content

Commit

Permalink
feat: CustomEnqueueLinksByClickingElements and fixes (#348)
Browse files Browse the repository at this point in the history
* CustomEnqueueLinksByClickingElements
* Minor refactor to request handler
* Logic copied over from crawDomain to CrawlSitemap
* Removal of custom flow 1.0 scan mode
* Sitemap crawl basic auth bug
* WCAG links in allIssues JSON

---------

Co-authored-by: angyonghaseyo <[email protected]>
  • Loading branch information
joshualai9922 and angyonghaseyo authored Jun 6, 2024
1 parent 00ac9ce commit 624b6b8
Show file tree
Hide file tree
Showing 10 changed files with 314 additions and 1,217 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ port> -u <url> OPTIONS
Options:
--help Show help [boolean]
-c, --scanner Type of scan, 1) sitemap, 2) website crawl,
3) custom flow, 4) custom flow 2.0, 5) int
3) custom flow, 4) intelligent
elligent
[required] [choices: "sitemap", "website", "custom", "custom2", "intelligent"]
[required] [choices: "sitemap", "website", "custom", "intelligent"]
-u, --url Website URL you want to scan
[string] [required]
-d, --customDevice Device you want to scan [string]
Expand Down
30 changes: 9 additions & 21 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import {
import constants from './constants/constants.js';
import { cliOptions, messageOptions } from './constants/cliFunctions.js';
import combineRun from './combine.js';
import playwrightAxeGenerator from './playwrightAxeGenerator.js';
import { silentLogger } from './logs.js';
import { fileURLToPath } from 'url';
import path from 'path';
Expand Down Expand Up @@ -242,7 +241,7 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
return allHeaders;
})
.check(argvs => {
if ((argvs.scanner === 'custom' || argvs.scanner === 'custom2') && argvs.maxpages) {
if ((argvs.scanner === 'custom') && argvs.maxpages) {
throw new Error('-p or --maxpages is only available in website and sitemap scans.');
}
return true;
Expand All @@ -257,16 +256,15 @@ Usage: npm run cli -- -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
.epilogue('').argv;

const scanInit = async (argvs: Answers): Promise<void> => {
let isNewCustomFlow = false;
if (constants.scannerTypes[argvs.scanner] === constants.scannerTypes.custom2) {
argvs.scanner = constants.scannerTypes.custom;
isNewCustomFlow = true;
let isCustomFlow = false;
if (constants.scannerTypes[argvs.scanner] === constants.scannerTypes.custom) {
isCustomFlow = true;
} else {
argvs.headless = argvs.headless === 'yes';
argvs.followRobots = argvs.followRobots === 'yes';
argvs.safeMode = argvs.safeMode === 'yes';
argvs.scanner = constants.scannerTypes[argvs.scanner];
}
argvs.scanner = constants.scannerTypes[argvs.scanner];
argvs.browserToRun = constants.browserTypes[argvs.browserToRun];

// let chromeDataDir = null;
Expand Down Expand Up @@ -298,7 +296,7 @@ const scanInit = async (argvs: Answers): Promise<void> => {
argvs.browserToRun,
clonedDataDir,
argvs.playwrightDeviceDetailsObject,
isNewCustomFlow,
isCustomFlow,
argvs.header,
);
switch (res.status) {
Expand Down Expand Up @@ -385,19 +383,9 @@ const scanInit = async (argvs: Answers): Promise<void> => {

printMessage([`Purple A11y version: ${appVersion}`, 'Starting scan...'], messageOptions);

if (argvs.scanner === constants.scannerTypes.custom && !isNewCustomFlow) {
try {
await playwrightAxeGenerator(data);
} catch (error) {
silentLogger.error(error);
printMessage([
`An error has occurred when running the custom flow scan. Please see above and errors.txt for more details.`,
]);
process.exit(2);
}
} else {
await combineRun(data, screenToScan);
}

await combineRun(data, screenToScan);


// Delete cloned directory
process.env.PURPLE_A11Y_VERBOSE
Expand Down
2 changes: 1 addition & 1 deletion src/constants/cliFunctions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export const cliOptions: { [key: string]: Options } = {
c: {
alias: 'scanner',
describe:
'Type of scan, 1) sitemap, 2) website crawl, 3) custom flow, 4) custom flow 2.0, 5) intelligent',
'Type of scan, 1) sitemap, 2) website crawl, 3) custom flow, 5) intelligent',
choices: Object.keys(constants.scannerTypes),
demandOption: true,
},
Expand Down
32 changes: 20 additions & 12 deletions src/constants/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ export const getUrlMessage = scanner => {
switch (scanner) {
case constants.scannerTypes.website:
case constants.scannerTypes.custom:
case constants.scannerTypes.custom2:
return 'Please enter URL of website: ';
case constants.scannerTypes.sitemap:
return 'Please enter URL or file path to sitemap, or drag and drop a sitemap file here: ';
Expand Down Expand Up @@ -262,7 +261,7 @@ export const sanitizeUrlInput = url => {
return data;
};

const requestToUrl = async (url, isNewCustomFlow, extraHTTPHeaders) => {
const requestToUrl = async (url, isCustomFlow, extraHTTPHeaders) => {
// User-Agent is modified to emulate a browser to handle cases where some sites ban non browser agents, resulting in a 403 error
const res = {};
const parsedUrl = new URL(url);
Expand All @@ -281,7 +280,8 @@ const requestToUrl = async (url, isNewCustomFlow, extraHTTPHeaders) => {
timeout: 5000,
})
.then(async response => {
const redirectUrl = response.request.res.responseUrl;
let redirectUrl = response.request.res.responseUrl;
redirectUrl = new URL(redirectUrl).href;
res.status = constants.urlCheckStatuses.success.code;
let data;
if (typeof response.data === 'string' || response.data instanceof String) {
Expand All @@ -304,7 +304,7 @@ const requestToUrl = async (url, isNewCustomFlow, extraHTTPHeaders) => {

const hasMetaRefresh = metaRefreshMatch && metaRefreshMatch.length > 1;

if (redirectUrl != null && (hasMetaRefresh || !isNewCustomFlow)) {
if (redirectUrl != null && (hasMetaRefresh || !isCustomFlow)) {
res.url = redirectUrl;
} else {
res.url = url;
Expand Down Expand Up @@ -356,12 +356,12 @@ const requestToUrl = async (url, isNewCustomFlow, extraHTTPHeaders) => {
return res;
};

const checkUrlConnectivity = async (url, isNewCustomFlow, extraHTTPHeaders) => {
const checkUrlConnectivity = async (url, isCustomFlow, extraHTTPHeaders) => {
const data = sanitizeUrlInput(url);

if (data.isValid) {
// Validate the connectivity of URL if the string format is url format
const res = await requestToUrl(data.url, isNewCustomFlow, extraHTTPHeaders);
const res = await requestToUrl(data.url, isCustomFlow, extraHTTPHeaders);
return res;
}

Expand All @@ -374,7 +374,7 @@ const checkUrlConnectivityWithBrowser = async (
browserToRun,
clonedDataDir,
playwrightDeviceDetailsObject,
isNewCustomFlow,
isCustomFlow,
extraHTTPHeaders,
) => {
const res = {};
Expand Down Expand Up @@ -441,7 +441,7 @@ const checkUrlConnectivityWithBrowser = async (
}

// set redirect link or final url
if (isNewCustomFlow) {
if (isCustomFlow) {
res.url = url;
} else {
res.url = page.url();
Expand Down Expand Up @@ -490,7 +490,7 @@ export const checkUrl = async (
browser,
clonedDataDir,
playwrightDeviceDetailsObject,
isNewCustomFlow,
isCustomFlow,
extraHTTPHeaders,
) => {
let res;
Expand All @@ -500,19 +500,19 @@ export const checkUrl = async (
browser,
clonedDataDir,
playwrightDeviceDetailsObject,
isNewCustomFlow,
isCustomFlow,
extraHTTPHeaders,
);
} else {
res = await checkUrlConnectivity(url, isNewCustomFlow, extraHTTPHeaders);
res = await checkUrlConnectivity(url, isCustomFlow, extraHTTPHeaders);
if (res.status === constants.urlCheckStatuses.axiosTimeout.code) {
if (browser || constants.launcher === webkit) {
res = await checkUrlConnectivityWithBrowser(
url,
browser,
clonedDataDir,
playwrightDeviceDetailsObject,
isNewCustomFlow,
isCustomFlow,
extraHTTPHeaders,
);
}
Expand Down Expand Up @@ -1711,3 +1711,11 @@ export const urlWithoutAuth = (url: string): URL => {
parsedUrl.password = '';
return parsedUrl;
};

export const waitForPageLoaded = async (page, timeout = 10000) => {
return Promise.race([
page.waitForLoadState('load'),
page.waitForLoadState('networkidle'),
new Promise((resolve) => setTimeout(resolve, timeout))
]);
}
6 changes: 4 additions & 2 deletions src/constants/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ const scannerTypes = {
sitemap: 'Sitemap',
website: 'Website',
custom: 'Custom',
custom2: 'Custom2',
intelligent: 'Intelligent',
};

Expand Down Expand Up @@ -307,19 +306,22 @@ const wcagLinks = {
'WCAG 1.3.1': 'https://www.w3.org/TR/WCAG21/#info-and-relationships',
// 'WCAG 1.3.4': 'https://www.w3.org/TR/WCAG21/#orientation', - TODO: review for veraPDF
'WCAG 1.3.5': 'https://www.w3.org/TR/WCAG21/#use-of-color',
'WCAG 1.4.1': 'https://www.w3.org/TR/WCAG21/#use-of-color',
'WCAG 1.4.2': 'https://www.w3.org/TR/WCAG21/#audio-control',
'WCAG 1.4.3': 'https://www.w3.org/TR/WCAG21/#contrast-minimum',
'WCAG 1.4.4': 'https://www.w3.org/TR/WCAG21/#resize-text',
// 'WCAG 1.4.10': 'https://www.w3.org/TR/WCAG21/#reflow', - TODO: review for veraPDF
'WCAG 1.4.12': 'https://www.w3.org/TR/WCAG21/#text-spacing',
'WCAG 2.1.1': 'https://www.w3.org/TR/WCAG21/#pause-stop-hide',
'WCAG 2.2.1': 'https://www.w3.org/TR/WCAG21/#timing-adjustable',
'WCAG 2.2.2': 'https://www.w3.org/TR/WCAG21/#pause-stop-hide',
'WCAG 2.4.1': 'https://www.w3.org/TR/WCAG21/#bypass-blocks',
'WCAG 2.4.2': 'https://www.w3.org/TR/WCAG21/#page-titled',
'WCAG 2.4.4': 'https://www.w3.org/TR/WCAG21/#link-purpose-in-context',
'WCAG 3.1.1': 'https://www.w3.org/TR/WCAG21/#language-of-page',
'WCAG 3.1.2': 'https://www.w3.org/TR/WCAG21/#labels-or-instructions',
'WCAG 4.1.1': 'https://www.w3.org/TR/WCAG21/#parsing',
'WCAG 4.1.2': 'https://www.w3.org/TR/WCAG21/#name-role-value',
'WCAG 4.1.2': 'https://www.w3.org/TR/WCAG21/#name-role-value'
};

const urlCheckStatuses = {
Expand Down
Loading

0 comments on commit 624b6b8

Please sign in to comment.