From e587db405731a9549ef1d4feb0915f68c7d19a08 Mon Sep 17 00:00:00 2001 From: Shane Frasier Date: Tue, 27 Mar 2018 16:37:05 -0400 Subject: [PATCH 1/5] Removing explicit dependency on sslyze --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c64d8936..bd132480 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,8 +21,6 @@ ipython # Scanners ############ -# sslyze -sslyze>=1.3.4,<1.4.0 cryptography # a11y From d91194e7986a81be28bae7af4313a130d9594012 Mon Sep 17 00:00:00 2001 From: Shane Frasier Date: Sat, 31 Mar 2018 18:00:51 -0400 Subject: [PATCH 2/5] Adding requirements-scanners.txt, which new users can use to install dependencies for all scanners --- requirements-scanners.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 requirements-scanners.txt diff --git a/requirements-scanners.txt b/requirements-scanners.txt new file mode 100644 index 00000000..4bc97f8f --- /dev/null +++ b/requirements-scanners.txt @@ -0,0 +1,18 @@ +############ +# Scanners +############ + +cryptography + +# a11y +pyyaml +requests + +# pshtt +git+https://github.com/dhs-ncats/pshtt.git#egg=pshtt + +# trustymail +git+https://github.com/dhs-ncats/trustymail.git#egg=trustymail + +# sslyze +sslyze>=1.3.4,<1.4.0 From a5b315725894cdadaa93e02ac528b0287f5c5335 Mon Sep 17 00:00:00 2001 From: Shane Frasier Date: Sun, 1 Apr 2018 09:19:57 -0400 Subject: [PATCH 3/5] Removing a11y and cryptography requirements, since they have moved to requirements-scanners.txt --- requirements.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index bd132480..702cc7c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,17 +17,6 @@ boto3 ipython -############ -# Scanners -############ - -cryptography - -# a11y -pyyaml -requests - - ############ # Gatherers ############ From ac660614ba8a9f6ae158df4339167f9caef3a42a Mon Sep 17 00:00:00 2001 From: Eric Mill Date: Sun, 1 Apr 2018 14:10:19 -0400 Subject: [PATCH 4/5] Continue separating out requirements by purpose. --- lambda/requirements-lambda.txt | 6 ++++++ requirements-dev.txt | 5 +++++ requirements-gatherers.txt | 3 +++ requirements-scanners.txt | 6 +----- requirements.txt | 38 ++++++++-------------------------- 5 files changed, 24 insertions(+), 34 deletions(-) create mode 100644 lambda/requirements-lambda.txt create mode 100644 requirements-dev.txt create mode 100644 requirements-gatherers.txt diff --git a/lambda/requirements-lambda.txt b/lambda/requirements-lambda.txt new file mode 100644 index 00000000..18061732 --- /dev/null +++ b/lambda/requirements-lambda.txt @@ -0,0 +1,6 @@ +# These are the only dependencies which must get packaged +# into every (Python-based) Lambda container build. + +strict-rfc3339 +publicsuffix + diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..0963f1f4 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +# These requirements are useful or needed when developing +# domain-scan itself. + +ipython +pytest diff --git a/requirements-gatherers.txt b/requirements-gatherers.txt new file mode 100644 index 00000000..e374b3c1 --- /dev/null +++ b/requirements-gatherers.txt @@ -0,0 +1,3 @@ +# censys +google-cloud-bigquery +google-auth-oauthlib diff --git a/requirements-scanners.txt b/requirements-scanners.txt index 4bc97f8f..d519ecac 100644 --- a/requirements-scanners.txt +++ b/requirements-scanners.txt @@ -1,8 +1,3 @@ -############ -# Scanners -############ - -cryptography # a11y pyyaml @@ -16,3 +11,4 @@ git+https://github.com/dhs-ncats/trustymail.git#egg=trustymail # sslyze sslyze>=1.3.4,<1.4.0 +cryptography diff --git a/requirements.txt b/requirements.txt index 702cc7c4..f462502f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,13 @@ -########################## -# Common (Local + Lambda) -########################## -# -# These are the only dependencies which must get packaged -# into every (Python-based) Lambda container build. - -strict-rfc3339 -publicsuffix +######## +# NOTE: Any requirements which are needed for Python-based Lambda function +# packaging should also be listed in lambda/requirements-lambda.txt. +# Those are in the bottom section below. -################# -# Common (Local) -################# - +# Used by parts of domain-scan, but not used inside of Lambda function +# invocation. boto3 -ipython - - -############ -# Gatherers -############ -# censys -google-cloud-bigquery -google-auth-oauthlib - - -############ -# Tests -############ - -pytest +# Used in Lanbda functions. Also copied to lambda/requirements-lambda.txt. +strict-rfc3339 +publicsuffix From 2a8c71109134094432246afa0005292e3aedfbbb Mon Sep 17 00:00:00 2001 From: Eric Mill Date: Sun, 1 Apr 2018 14:22:07 -0400 Subject: [PATCH 5/5] adjust remote build, document new reqs splitting --- README.md | 41 ++++++++++++++++++---------------- lambda/remote_build.sh | 2 +- lambda/requirements-lambda.txt | 1 - requirements-gatherers.txt | 3 +++ requirements-scanners.txt | 10 +++++---- requirements.txt | 2 +- 6 files changed, 33 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 10f5acbb..42e89bd2 100644 --- a/README.md +++ b/README.md @@ -21,23 +21,26 @@ There is also built-in support for using **headless Chrome** to efficiently meas ### Requirements -`domain-scan` requires **Python 3.5 and up**. To install dependencies: +`domain-scan` requires **Python 3.5 and up**. + +To install **core dependencies**: ```bash pip install -r requirements.txt ``` -This will automatically allow the use of two scanners: +You can install scanner- or gatherer-specific dependencies yourself. Or, you can "quick start" by just **installing all dependencies for all scanners and/or all gatherers**: -* `pshtt` - A scanner that uses the [`pshtt`](https://github.com/dhs-ncats/pshtt) Python package from the [Department of Homeland Security's NCATS team](https://github.com/dhs-ncats). -* `sslyze` - A scanner that uses the [`sslyze`](https://github.com/nabla-c0d3/sslyze) Python package maintained by Alban Diquet. +```bash +pip install -r requirements-scanners.txt +pip install -r requirements-gatherers.txt +``` -Other individual scanners will require additional externally installed dependencies: - -* `trustymail`: The `trustymail` command, available from the [`trustymail`](https://github.com/dhs-ncats/trustymail) Python package from the [Department of Homeland Security's NCATS team](https://github.com/dhs-ncats). (Override path by setting the `TRUSTYMAIL_PATH` environment variable.) -* `a11y`: The `pa11y` command, available from the [`pa11y`](https://www.npmjs.com/package/pa11y) Node package. (Override path by setting the `PA11Y_PATH` environment variable.) -* `third_parties`: The `phantomas` command, available from the [`phantomas`](https://www.npmjs.com/package/phantomas) Node package. (Override path by setting the `PHANTOMAS_PATH` environment variable.) +If you plan on **developing/testing domain-scan itself**, install development requirements: +```bash +pip install -r requirements-dev.txt +``` ### Usage @@ -65,7 +68,16 @@ Append columns to each row with metadata about the scan itself, such as how long ./scan example.com --scan=pshtt --meta ``` -##### Parallelization +### Scanners + +* `pshtt` - A scanner that uses the [`pshtt`](https://github.com/dhs-ncats/pshtt) Python package from the [Department of Homeland Security's NCATS team](https://github.com/dhs-ncats). +* `sslyze` - A scanner that uses the [`sslyze`](https://github.com/nabla-c0d3/sslyze) Python package maintained by Alban Diquet. +* `trustymail`: The `trustymail` command, available from the [`trustymail`](https://github.com/dhs-ncats/trustymail) Python package from the [Department of Homeland Security's NCATS team](https://github.com/dhs-ncats). (Override path by setting the `TRUSTYMAIL_PATH` environment variable.) +* `third_parties` - What third party web services are in use, using [headless Chrome](https://developers.google.com/web/updates/2017/04/headless-chrome) to trap outgoing requests. (See documentation for [using](#headless-chrome) or [writing](#developing-chrome-scanners) Chrome-based scanners.) +* `a11y` - Accessibility issues, using [`pa11y`](https://github.com/pa11y/pa11y). +* `noop` - Test scanner (no-op) used for development and debugging. Does nothing. + +### Parallelization It's important to understand that **scans run in parallel by default**, and **data is streamed to disk immediately** after each scan is done. @@ -117,15 +129,6 @@ See [`docs/lambda.md`](`docs/lambda.md`) for how to build and deploy Lambda-base ### Options -**Scanners:** - -* `pshtt` - HTTP/HTTPS/HSTS configuration, using [`pshtt`](https://github.com/dhs-ncats/pshtt). -* `trustymail` - MX/SPF/STARTTLS/DMARC configuration, using [`trustymail`](https://github.com/dhs-ncats/trustymail). -* `sslyze` - TLS/SSL configuration, using [`sslyze`](https://github.com/nabla-c0d3/sslyze). -* `third_parties` - What third party web services are in use, using [headless Chrome](https://developers.google.com/web/updates/2017/04/headless-chrome) to trap outgoing requests. (See documentation for [using](#headless-chrome) or [writing](#developing-chrome-scanners) Chrome-based scanners.) -* `a11y` - Accessibility issues, using [`pa11y`](https://github.com/pa11y/pa11y). -* `noop` - Test scanner (no-op) used for development and debugging. Does nothing. - **General options:** * `--scan` - **Required.** Comma-separated names of one or more scanners. diff --git a/lambda/remote_build.sh b/lambda/remote_build.sh index 4c3e2ef0..c5edde8e 100755 --- a/lambda/remote_build.sh +++ b/lambda/remote_build.sh @@ -42,7 +42,7 @@ pip install . cd .. cd domain-scan -pip install -r requirements.txt +pip install -r lambda/requirements-lambda.txt cd .. deactivate diff --git a/lambda/requirements-lambda.txt b/lambda/requirements-lambda.txt index 18061732..1ca82012 100644 --- a/lambda/requirements-lambda.txt +++ b/lambda/requirements-lambda.txt @@ -3,4 +3,3 @@ strict-rfc3339 publicsuffix - diff --git a/requirements-gatherers.txt b/requirements-gatherers.txt index e374b3c1..91b850da 100644 --- a/requirements-gatherers.txt +++ b/requirements-gatherers.txt @@ -1,3 +1,6 @@ +### +# Requirements used by specific gatherers. + # censys google-cloud-bigquery google-auth-oauthlib diff --git a/requirements-scanners.txt b/requirements-scanners.txt index d519ecac..44ec8416 100644 --- a/requirements-scanners.txt +++ b/requirements-scanners.txt @@ -1,7 +1,5 @@ - -# a11y -pyyaml -requests +### +# Requirements used by specific scanners. # pshtt git+https://github.com/dhs-ncats/pshtt.git#egg=pshtt @@ -12,3 +10,7 @@ git+https://github.com/dhs-ncats/trustymail.git#egg=trustymail # sslyze sslyze>=1.3.4,<1.4.0 cryptography + +# a11y / csp +pyyaml +requests diff --git a/requirements.txt b/requirements.txt index f462502f..37e38281 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,6 @@ # invocation. boto3 -# Used in Lanbda functions. Also copied to lambda/requirements-lambda.txt. +# Used in Lambda functions. Also copied to lambda/requirements-lambda.txt. strict-rfc3339 publicsuffix