Skip to content
This repository has been archived by the owner on Dec 17, 2021. It is now read-only.

Add basic Lighthouse scan #320

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .snyk
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities.
version: v1.14.1
ignore: {}
patch: {}
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Append columns to each row with metadata about the scan itself, such as how long
* `trustymail`: The `trustymail` command, available from the [`trustymail`](https://github.com/dhs-ncats/trustymail) Python package from the [Department of Homeland Security's NCATS team](https://github.com/dhs-ncats). (Override path by setting the `TRUSTYMAIL_PATH` environment variable.)
* `third_parties` - What third party web services are in use, using [headless Chrome](https://developers.google.com/web/updates/2017/04/headless-chrome) to trap outgoing requests. (See documentation for [using](#headless-chrome) or [writing](#developing-chrome-scanners) Chrome-based scanners.)
* `a11y` - Accessibility issues, using [`pa11y`](https://github.com/pa11y/pa11y).
* `lighthouse` - Scanner that runs [`Google Lighthouse`](https://developers.google.com/web/tools/lighthouse).
* `noop` - Test scanner (no-op) used for development and debugging. Does nothing.

### Parallelization
Expand Down
65 changes: 65 additions & 0 deletions lighthouse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env node

/**
* Lighthouse scanner
* This module orchestrates parallel Lighthouse scans over one headless Chrome
* instance.
*/

const chromeLauncher = require('chrome-launcher');
const lighthouse = require('lighthouse');
const puppeteer = require('puppeteer');


var getBrowser = async () => {
return await puppeteer.launch({
// TODO: Let executable path be overrideable.
// executablePath: config.executablePath,
headless: true,
ignoreHTTPSErrors: true,
args: [
'--no-sandbox',
'--disable-gpu',
'--single-process'
]
});
};

function launchChromeAndRunLighthouse(url, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return lighthouse(url, opts, config).then(results => {
return chrome.kill().then(() => results.lhr)
});
});
}

const opts = {
chromeFlags: ['--headless', '--no-sandbox']
};

/*
function configure() {
let port;
chromeLauncher.launch({
chromeFlags: ['--headless', '--no-sandbox']
}).then(chrome => {
port = chrome.port
});
}

launchChromeAndRunLighthouse('https://www.whitehouse.gov', opts).then(results => {
console.log(results);
});
*/

getBrowser().then(async browser => {
const url = 'https://www.whitehouse.gov';
const {lhr} = await lighthouse(url, {
port: (new URL(browser.wsEndpoint())).port,
output: 'json',
logLevel: 'info',
});
console.log(lhr);
await browser.close();
});
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
"name": "domain-scan-headless-lambda",
"description": "Dependencies for building Lambda containers for headless Chrome in domain-scan.",
"dependencies": {
"puppeteer": "^2.0.0",
"lighthouse": "^5.6.0",
"puppeteer": "^2.1.1",
"tar": "^5.0.5"
}
}
50 changes: 50 additions & 0 deletions scanners/lighthouse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
'use strict';

const lighthouse = require('lighthouse');


const LIGHTHOUSE_AUDITS = [
'color-contrast',
'font-size',
'image-alt',
'input-image-alt',
'performance-budget',
'speed-index',
'tap-targets',
'timing-budget',
'total-byte-weight',
'unminified-css',
'unminified-javascript',
'uses-text-compression',
'viewport',
]


// JS entry point for Lighthouse scan.
module.exports = {
scan: async (domain, environment, options, browser, page) => {
const url = 'https://' + domain;
try {
const output = await lighthouse(url, {
port: (new URL(browser.wsEndpoint())).port,
onlyAudits: LIGHTHOUSE_AUDITS,

disableStorageReset: false,
saveAssets: false,
listAllAudits: false,
listTraceCategories: false,
printConfig: false,
output: [ 'json' ],
chromeFlags: '',
enableErrorReporting: false,
logLevel: 'silent',
outputPath: 'stdout',
});
return output.lhr.audits;

} catch (exc) {
console.log('problem scanning ' + domain + ' ' + exc.message);
return null;
}
}
}
128 changes: 128 additions & 0 deletions scanners/lighthouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
Implements a Google Lighthouse scan.

https://developers.google.com/web/tools/lighthouse

To use, set the `LIGHTHOUSE_PATH` environment variable to the Lighthouse path.
"""

import os


# Can also be run in Lambda.
# NOTE: untested
lambda_support = False

# Signal that this is a JS-based scan using headless Chrome.
# The scan method will be defined in lighthouse.js instead.
scan_headless = True

LIGHTHOUSE_PATH = os.environ.get('LIGHTHOUSE_PATH', 'lighthouse')
LIGHTHOUSE_AUDITS = [
'color-contrast',
'font-size',
'image-alt',
'input-image-alt',
'performance-budget',
'tap-targets',
'timing-budget',
'total-byte-weight',
'unminified-css',
'unminified-javascript',
'uses-text-compression',
'viewport',
'speed-index',
]
CHROME_PATH = os.environ.get('CHROME_PATH')


# Set a default number of workers for a particular scan type.
# Overridden by a --workers flag.
workers = 1


# Required CSV row conversion function. Usually one row, can be more.
#
# Run locally.
def to_rows(data):
return [[
audit['id'],
audit['description'],
audit['title'],
audit['score'],
audit['scoreDisplayMode']
] for name, audit in data.items() if name != 'error']


# CSV headers for each row of data. Referenced locally.
headers = ['ID', 'Description', 'Title', 'Score', 'Score Display Mode']


#
# Below is an implementation that will spawn Lighthouse via its cli rather than
# use a Puppeteer-managed headless Chrome.
#

# def _url_for_domain(domain: str, cache_dir: str):
# if domain.startswith('http://') or domain.startswith('https://'):
# return domain

# # If we have data from pshtt, use the canonical endpoint.
# canonical = utils.domain_canonical(domain, cache_dir=cache_dir)
# if canonical:
# return canonical

# # Otherwise, well, whatever.
# return 'http://' + domain

# Required scan function. This is the meat of the scanner, where things
# that use the network or are otherwise expensive would go.
#
# Runs locally or in the cloud (Lambda).
# def scan(domain: str, environment: dict, options: dict) -> dict:
# logging.debug('Scan function called with options: %s', options)

# cache_dir = options.get('_', {}).get('cache_dir', './cache')

# url = _url_for_domain(domain, cache_dir)
# lighthouse_cmd = ' '.join([
# LIGHTHOUSE_PATH,
# url,
# '--quiet',
# '--output=json',
# '--chrome-flags="--headless --no-sandbox"',
# *(f'--only-audits={audit}' for audit in LIGHTHOUSE_AUDITS),
# ])

# logging.info('Running Lighthouse CLI...')

# try:
# response = subprocess.check_output(
# lighthouse_cmd,
# stderr=subprocess.STDOUT,
# shell=True, env=None
# )
# raw = str(response, encoding='UTF-8')
# logging.info('Done running Lighthouse CLI')
# return json.loads(raw)['audits']
# except subprocess.CalledProcessError:
# logging.warning("Error running Lighthouse scan for URL %s." % url)
# return {}

# TODO: Add ability to override default LIGHTHOUSE_AUDITS
# Optional handler for custom CLI parameters. Takes the args (as a list of
# strings) and returns a dict of the options values and names that the scanner
# expects, and a list of the arguments it didn't know how to parse.
#
# Should return a dict of the options parsed by this parser (not a mutated form
# of the opts that are passed to it) and a list of the remaining args that it
# didn't recognize.
# def handle_scanner_args(args, opts) -> Tuple[dict, list]:
# parser = ArgumentParser(prefix_chars='--')
# parser.add_argument('--noop-delay', nargs=1)
# parsed, unknown = parser.parse_known_args(args)
# dicted = vars(parsed)
# should_be_single = ['noop_delay']
# dicted = make_values_single(dicted, should_be_single)
# dicted['noop_delay'] = int(dicted['noop_delay'], 10)
# return dicted, unknown
8 changes: 4 additions & 4 deletions utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,12 +416,12 @@ def try_command(command):
return False


def scan(command, env=None, allowed_return_codes=[]):
def scan(command, env=None, allowed_return_codes=[], shell=False):
try:
response = subprocess.check_output(
command,
stderr=subprocess.STDOUT,
shell=False, env=env
shell=shell, env=env
)
return str(response, encoding='UTF-8')
except subprocess.CalledProcessError as exc:
Expand Down Expand Up @@ -707,5 +707,5 @@ def suffix_pattern(suffixes):
return re.compile("(?:%s)$" % center)


def flatten(l):
return list(chain.from_iterable(l))
def flatten(lst):
return list(chain.from_iterable(lst))