From b207c4b7965bd728ce8de82ccdf8adfeff1466a9 Mon Sep 17 00:00:00 2001 From: Ajin Abraham Date: Mon, 4 Nov 2024 19:09:06 -0800 Subject: [PATCH] sarif qa (#122) --- njsscan/__init__.py | 2 +- njsscan/formatters/sarif.py | 159 ++++++++++++++---------------------- tox.ini | 3 + 3 files changed, 66 insertions(+), 98 deletions(-) diff --git a/njsscan/__init__.py b/njsscan/__init__.py index 1716c21..067cc43 100644 --- a/njsscan/__init__.py +++ b/njsscan/__init__.py @@ -6,7 +6,7 @@ __title__ = 'njsscan' __authors__ = 'Ajin Abraham' __copyright__ = f'Copyright {datetime.now().year} Ajin Abraham, OpenSecurity' -__version__ = '0.4.0' +__version__ = '0.4.1' __version_info__ = tuple(int(i) for i in __version__.split('.')) __all__ = [ '__title__', diff --git a/njsscan/formatters/sarif.py b/njsscan/formatters/sarif.py index 3662950..8b3a6ff 100644 --- a/njsscan/formatters/sarif.py +++ b/njsscan/formatters/sarif.py @@ -1,33 +1,13 @@ # -*- coding: utf_8 -*- -"""Sarif output format. +"""SARIF output formatter for NodeJS scan results. -Based on https://github.com/microsoft/bandit-sarif-formatter/ -blob/master/bandit_sarif_formatter/formatter.py +Based on https://github.com/microsoft/ +bandit-sarif-formatter/blob/master/ +bandit_sarif_formatter/formatter.py +MIT License, Copyright (c) Microsoft Corporation. -Copyright (c) Microsoft. All Rights Reserved. -MIT License - -Copyright (c) Microsoft Corporation. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE """ -from datetime import datetime +from datetime import datetime, timezone from pathlib import PurePath import urllib.parse as urlparse @@ -35,19 +15,15 @@ from jschema_to_python.to_json import to_json - TS_FORMAT = '%Y-%m-%dT%H:%M:%SZ' def level_from_severity(severity): - if severity == 'ERROR': - return 'error' - elif severity == 'WARNING': - return 'warning' - elif severity == 'INFO': - return 'note' - else: - return 'none' + return { + 'ERROR': 'error', + 'WARNING': 'warning', + 'INFO': 'note', + }.get(severity, 'none') def to_uri(file_path): @@ -55,72 +31,53 @@ def to_uri(file_path): if pure_path.is_absolute(): return pure_path.as_uri() else: - posix_path = pure_path.as_posix() # Replace backslashes with slashes. - return urlparse.quote(posix_path) # %-encode special characters. + return urlparse.quote(pure_path.as_posix()) -def get_rule_name(rule_id): - normalized = [] - noms = rule_id.split('_') - for nom in noms: - normalized.append(nom.capitalize()) - return ''.join(normalized) +def format_rule_name(rule_id): + return ''.join(word.capitalize() for word in rule_id.split('_')) def add_results(scan_results, run): if run.results is None: run.results = [] - res = {} - res.update(scan_results.get('nodejs', [])) - res.update(scan_results.get('templates', [])) + combined_results = { + **scan_results.get('nodejs', {}), + **scan_results.get('templates', {})} rules = {} rule_indices = {} - for rule_id, issue_dict in res.items(): + for rule_id, issue_dict in combined_results.items(): if 'files' not in issue_dict: - # no missing controls in sarif continue - result = create_result(rule_id, issue_dict, rules, rule_indices) + result = create_result( + rule_id, + issue_dict, + rules, + rule_indices) run.results.append(result) - if len(rules) > 0: + if rules: run.tool.driver.rules = list(rules.values()) def create_result(rule_id, issue_dict, rules, rule_indices): - if rule_id in rules: - rule = rules[rule_id] - rule_index = rule_indices[rule_id] - else: - doc = 'https://ajinabraham.github.io/nodejsscan/#{}'.format(rule_id) + rule, rule_index = rules.get(rule_id), rule_indices.get(rule_id) + + if not rule: + doc = ('https://ajinabraham.' + f'github.io/nodejsscan/#{rule_id}') cwe_id = issue_dict['metadata']['cwe'].split(':')[0].lower() rule = om.ReportingDescriptor( id=rule_id, - name=get_rule_name(rule_id), + name=format_rule_name(rule_id), help_uri=doc, - properties={ - 'tags': ['security', f'external/cwe/{cwe_id}'], - }, - ) + properties={'tags': ['security', f'external/cwe/{cwe_id}']}) rule_index = len(rules) rules[rule_id] = rule rule_indices[rule_id] = rule_index - locations = [] - for item in issue_dict['files']: - physical_location = om.PhysicalLocation( - artifact_location=om.ArtifactLocation( - uri=to_uri(item['file_path'])), - ) - physical_location.region = om.Region( - start_line=item['match_lines'][0], - end_line=item['match_lines'][1], - start_column=item['match_position'][0], - end_column=item['match_position'][1], - snippet=om.ArtifactContent(text=item['match_string']), - ) - locations.append(om.Location(physical_location=physical_location)) - + locations = [create_location(item) for item in issue_dict['files']] return om.Result( rule_id=rule.id, rule_index=rule_index, @@ -129,39 +86,47 @@ def create_result(rule_id, issue_dict, rules, rule_indices): locations=locations, properties={ 'owasp-web': issue_dict['metadata']['owasp-web'], - 'cwe': issue_dict['metadata']['cwe'], - }, - ) + 'cwe': issue_dict['metadata']['cwe']}) + + +def create_location(item): + return om.Location( + physical_location=om.PhysicalLocation( + artifact_location=om.ArtifactLocation(uri=to_uri(item['file_path'])), + region=om.Region( + start_line=item['match_lines'][0], + end_line=item['match_lines'][1], + start_column=item['match_position'][0], + end_column=item['match_position'][1], + snippet=om.ArtifactContent(text=item['match_string'])))) def sarif_output(outfile, scan_results, njsscan_version): log = om.SarifLog( - schema_uri=('https://raw.githubusercontent.com/oasis-tcs/' - 'sarif-spec/master/Schemata/sarif-schema-2.1.0.json'), + schema_uri=('https://raw.githubusercontent.com/' + 'oasis-tcs/sarif-spec/master/Schemata/' + 'sarif-schema-2.1.0.json'), version='2.1.0', - runs=[ - om.Run( - tool=om.Tool(driver=om.ToolComponent( - name='nodejsscan', - information_uri='https://github.com/ajinabraham/njsscan', - semantic_version=njsscan_version, - version=njsscan_version), - ), - invocations=[ - om.Invocation( - end_time_utc=datetime.utcnow().strftime(TS_FORMAT), - execution_successful=True, - ), - ], - ), - ], - ) + runs=[om.Run( + tool=om.Tool(driver=om.ToolComponent( + name='nodejsscan', + information_uri='https://github.com/ajinabraham/njsscan', + semantic_version=njsscan_version, + version=njsscan_version, + )), + invocations=[om.Invocation( + end_time_utc=datetime.now( + timezone.utc).strftime(TS_FORMAT), + execution_successful=True, + )])]) run = log.runs[0] add_results(scan_results, run) json_out = to_json(log) + if outfile: with open(outfile, 'w') as of: of.write(json_out) else: print(json_out) + return json_out diff --git a/tox.ini b/tox.ini index 95b5e88..1a1d136 100644 --- a/tox.ini +++ b/tox.ini @@ -16,6 +16,7 @@ setenv = skip_install = true deps = pydocstyle + autopep8 flake8 flake8-broken-line flake8-bugbear @@ -32,6 +33,7 @@ deps = pep8-naming radon commands = + autopep8 --recursive --in-place njsscan tests flake8 setup.py njsscan tests [testenv:bandit] @@ -97,3 +99,4 @@ ignore = R701, # Function too complex radon_max_cc = 10 +max-line-length = 88 \ No newline at end of file