Skip to content
This repository has been archived by the owner on Dec 17, 2021. It is now read-only.

Commit

Permalink
Add in a first-pass at a node.js-based scanner.
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnaab committed May 14, 2020
1 parent 63a195c commit 47aca99
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 77 deletions.
4 changes: 4 additions & 0 deletions .snyk
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities.
version: v1.14.1
ignore: {}
patch: {}
65 changes: 65 additions & 0 deletions lighthouse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env node

/**
* Lighthouse scanner
* This module orchestrates parallel Lighthouse scans over one headless Chrome
* instance.
*/

const chromeLauncher = require('chrome-launcher');
const lighthouse = require('lighthouse');
const puppeteer = require('puppeteer');


var getBrowser = async () => {
return await puppeteer.launch({
// TODO: Let executable path be overrideable.
// executablePath: config.executablePath,
headless: true,
ignoreHTTPSErrors: true,
args: [
'--no-sandbox',
'--disable-gpu',
'--single-process'
]
});
};

function launchChromeAndRunLighthouse(url, opts, config = null) {
return chromeLauncher.launch({chromeFlags: opts.chromeFlags}).then(chrome => {
opts.port = chrome.port;
return lighthouse(url, opts, config).then(results => {
return chrome.kill().then(() => results.lhr)
});
});
}

const opts = {
chromeFlags: ['--headless', '--no-sandbox']
};

/*
function configure() {
let port;
chromeLauncher.launch({
chromeFlags: ['--headless', '--no-sandbox']
}).then(chrome => {
port = chrome.port
});
}
launchChromeAndRunLighthouse('https://www.whitehouse.gov', opts).then(results => {
console.log(results);
});
*/

getBrowser().then(async browser => {
const url = 'https://www.whitehouse.gov';
const {lhr} = await lighthouse(url, {
port: (new URL(browser.wsEndpoint())).port,
output: 'json',
logLevel: 'info',
});
console.log(lhr);
await browser.close();
});
51 changes: 51 additions & 0 deletions scanners/lighthouse.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'use strict';

const lighthouse = require('lighthouse');


const LIGHTHOUSE_AUDITS = [
'color-contrast',
'font-size',
'image-alt',
'input-image-alt',
'performance-budget',
'speed-index',
'tap-targets',
'timing-budget',
'total-byte-weight',
'unminified-css',
'unminified-javascript',
'uses-text-compression',
'viewport',
]


// JS entry point for Lighthouse scan.
module.exports = {
scan: async (domain, environment, options, browser, page) => {
const url = 'https://' + domain;
try {
const output = await lighthouse(url, {
port: (new URL(browser.wsEndpoint())).port,
onlyAudits: LIGHTHOUSE_AUDITS,

disableStorageReset: false,
saveAssets: false,
listAllAudits: false,
listTraceCategories: false,
printConfig: false,
output: [ 'json' ],
chromeFlags: '',
enableErrorReporting: false,
logLevel: 'silent',
outputPath: 'stdout',
});
return output.lhr.audits;

} catch (exc) {
return {
error: exc.message
}
}
}
}
135 changes: 58 additions & 77 deletions scanners/lighthouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
To use, set the `LIGHTHOUSE_PATH` environment variable to the Lighthouse path.
"""


import json
import logging
import os
import subprocess

from utils import utils

# Can also be run in Lambda.
# NOTE: untested
lambda_support = False

# Signal that this is a JS-based scan using headless Chrome.
# The scan method will be defined in lighthouse.js instead.
scan_headless = True

LIGHTHOUSE_PATH = os.environ.get('LIGHTHOUSE_PATH', 'lighthouse')
LIGHTHOUSE_AUDITS = [
Expand All @@ -39,78 +41,6 @@
workers = 1


# Optional one-time initialization for all scans.
# If defined, any data returned will be passed to every scan instance and used
# to update the environment dict for that instance
# Will halt scan execution if it returns False or raises an exception.
#
# Run locally.
# def init(environment: dict, options: dict) -> dict:
# logging.debug("Init function.")

# #cache_dir = options.get('_', {}).get('cache_dir', './cache')

# return {'constant': 12345}


# Optional one-time initialization per-scan. If defined, any data
# returned will be passed to the instance for that domain and used to update
# the environment dict for that particular domain.
#
# Run locally.
# def init_domain(domain: str, environment: dict, options: dict) -> dict:
# logging.debug("Init function for %s." % domain)
# return {'variable': domain}


def _url_for_domain(domain: str, cache_dir: str):
if domain.startswith('http://') or domain.startswith('https://'):
return domain

# If we have data from pshtt, use the canonical endpoint.
canonical = utils.domain_canonical(domain, cache_dir=cache_dir)
if canonical:
return canonical

# Otherwise, well, whatever.
return 'http://' + domain


# Required scan function. This is the meat of the scanner, where things
# that use the network or are otherwise expensive would go.
#
# Runs locally or in the cloud (Lambda).
def scan(domain: str, environment: dict, options: dict) -> dict:
logging.debug('Scan function called with options: %s', options)

cache_dir = options.get('_', {}).get('cache_dir', './cache')

url = _url_for_domain(domain, cache_dir)
lighthouse_cmd = ' '.join([
LIGHTHOUSE_PATH,
url,
'--quiet',
'--output=json',
'--chrome-flags="--headless --no-sandbox"',
*(f'--only-audits={audit}' for audit in LIGHTHOUSE_AUDITS),
])

logging.info('Running Lighthouse CLI...')

try:
response = subprocess.check_output(
lighthouse_cmd,
stderr=subprocess.STDOUT,
shell=True, env=None
)
raw = str(response, encoding='UTF-8')
logging.info('Done running Lighthouse CLI')
return json.loads(raw)['audits']
except subprocess.CalledProcessError:
logging.warning("Error running Lighthouse scan for URL %s." % url)
return {}


# Required CSV row conversion function. Usually one row, can be more.
#
# Run locally.
Expand All @@ -128,6 +58,57 @@ def to_rows(data):
headers = ['ID', 'Description', 'Title', 'Score', 'Score Display Mode']


#
# Below is an implementation that will spawn Lighthouse via its cli rather than
# use a Puppeteer-managed headless Chrome.
#

# def _url_for_domain(domain: str, cache_dir: str):
# if domain.startswith('http://') or domain.startswith('https://'):
# return domain

# # If we have data from pshtt, use the canonical endpoint.
# canonical = utils.domain_canonical(domain, cache_dir=cache_dir)
# if canonical:
# return canonical

# # Otherwise, well, whatever.
# return 'http://' + domain

# Required scan function. This is the meat of the scanner, where things
# that use the network or are otherwise expensive would go.
#
# Runs locally or in the cloud (Lambda).
# def scan(domain: str, environment: dict, options: dict) -> dict:
# logging.debug('Scan function called with options: %s', options)

# cache_dir = options.get('_', {}).get('cache_dir', './cache')

# url = _url_for_domain(domain, cache_dir)
# lighthouse_cmd = ' '.join([
# LIGHTHOUSE_PATH,
# url,
# '--quiet',
# '--output=json',
# '--chrome-flags="--headless --no-sandbox"',
# *(f'--only-audits={audit}' for audit in LIGHTHOUSE_AUDITS),
# ])

# logging.info('Running Lighthouse CLI...')

# try:
# response = subprocess.check_output(
# lighthouse_cmd,
# stderr=subprocess.STDOUT,
# shell=True, env=None
# )
# raw = str(response, encoding='UTF-8')
# logging.info('Done running Lighthouse CLI')
# return json.loads(raw)['audits']
# except subprocess.CalledProcessError:
# logging.warning("Error running Lighthouse scan for URL %s." % url)
# return {}

# TODO: Add ability to override default LIGHTHOUSE_AUDITS
# Optional handler for custom CLI parameters. Takes the args (as a list of
# strings) and returns a dict of the options values and names that the scanner
Expand Down

0 comments on commit 47aca99

Please sign in to comment.