From b207c4b7965bd728ce8de82ccdf8adfeff1466a9 Mon Sep 17 00:00:00 2001
From: Ajin Abraham <ajin25@gmail.com>
Date: Mon, 4 Nov 2024 19:09:06 -0800
Subject: [PATCH] sarif qa (#122)

---
 njsscan/__init__.py         |   2 +-
 njsscan/formatters/sarif.py | 159 ++++++++++++++----------------------
 tox.ini                     |   3 +
 3 files changed, 66 insertions(+), 98 deletions(-)

diff --git a/njsscan/__init__.py b/njsscan/__init__.py
index 1716c21..067cc43 100644
--- a/njsscan/__init__.py
+++ b/njsscan/__init__.py
@@ -6,7 +6,7 @@
 __title__ = 'njsscan'
 __authors__ = 'Ajin Abraham'
 __copyright__ = f'Copyright {datetime.now().year} Ajin Abraham, OpenSecurity'
-__version__ = '0.4.0'
+__version__ = '0.4.1'
 __version_info__ = tuple(int(i) for i in __version__.split('.'))
 __all__ = [
     '__title__',
diff --git a/njsscan/formatters/sarif.py b/njsscan/formatters/sarif.py
index 3662950..8b3a6ff 100644
--- a/njsscan/formatters/sarif.py
+++ b/njsscan/formatters/sarif.py
@@ -1,33 +1,13 @@
 # -*- coding: utf_8 -*-
-"""Sarif output format.
+"""SARIF output formatter for NodeJS scan results.
 
-Based on https://github.com/microsoft/bandit-sarif-formatter/
-blob/master/bandit_sarif_formatter/formatter.py
+Based on https://github.com/microsoft/
+bandit-sarif-formatter/blob/master/
+bandit_sarif_formatter/formatter.py
+MIT License, Copyright (c) Microsoft Corporation.
 
-Copyright (c) Microsoft.  All Rights Reserved.
-MIT License
-
-Copyright (c) Microsoft Corporation.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE
 """
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import PurePath
 import urllib.parse as urlparse
 
@@ -35,19 +15,15 @@
 
 from jschema_to_python.to_json import to_json
 
-
 TS_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
 
 
 def level_from_severity(severity):
-    if severity == 'ERROR':
-        return 'error'
-    elif severity == 'WARNING':
-        return 'warning'
-    elif severity == 'INFO':
-        return 'note'
-    else:
-        return 'none'
+    return {
+        'ERROR': 'error',
+        'WARNING': 'warning',
+        'INFO': 'note',
+    }.get(severity, 'none')
 
 
 def to_uri(file_path):
@@ -55,72 +31,53 @@ def to_uri(file_path):
     if pure_path.is_absolute():
         return pure_path.as_uri()
     else:
-        posix_path = pure_path.as_posix()  # Replace backslashes with slashes.
-        return urlparse.quote(posix_path)  # %-encode special characters.
+        return urlparse.quote(pure_path.as_posix())
 
 
-def get_rule_name(rule_id):
-    normalized = []
-    noms = rule_id.split('_')
-    for nom in noms:
-        normalized.append(nom.capitalize())
-    return ''.join(normalized)
+def format_rule_name(rule_id):
+    return ''.join(word.capitalize() for word in rule_id.split('_'))
 
 
 def add_results(scan_results, run):
     if run.results is None:
         run.results = []
-    res = {}
-    res.update(scan_results.get('nodejs', []))
-    res.update(scan_results.get('templates', []))
+    combined_results = {
+        **scan_results.get('nodejs', {}),
+        **scan_results.get('templates', {})}
     rules = {}
     rule_indices = {}
 
-    for rule_id, issue_dict in res.items():
+    for rule_id, issue_dict in combined_results.items():
         if 'files' not in issue_dict:
-            # no missing controls in sarif
             continue
-        result = create_result(rule_id, issue_dict, rules, rule_indices)
+        result = create_result(
+            rule_id,
+            issue_dict,
+            rules,
+            rule_indices)
         run.results.append(result)
 
-    if len(rules) > 0:
+    if rules:
         run.tool.driver.rules = list(rules.values())
 
 
 def create_result(rule_id, issue_dict, rules, rule_indices):
-    if rule_id in rules:
-        rule = rules[rule_id]
-        rule_index = rule_indices[rule_id]
-    else:
-        doc = 'https://ajinabraham.github.io/nodejsscan/#{}'.format(rule_id)
+    rule, rule_index = rules.get(rule_id), rule_indices.get(rule_id)
+
+    if not rule:
+        doc = ('https://ajinabraham.'
+               f'github.io/nodejsscan/#{rule_id}')
         cwe_id = issue_dict['metadata']['cwe'].split(':')[0].lower()
         rule = om.ReportingDescriptor(
             id=rule_id,
-            name=get_rule_name(rule_id),
+            name=format_rule_name(rule_id),
             help_uri=doc,
-            properties={
-                'tags': ['security', f'external/cwe/{cwe_id}'],
-            },
-        )
+            properties={'tags': ['security', f'external/cwe/{cwe_id}']})
         rule_index = len(rules)
         rules[rule_id] = rule
         rule_indices[rule_id] = rule_index
 
-    locations = []
-    for item in issue_dict['files']:
-        physical_location = om.PhysicalLocation(
-            artifact_location=om.ArtifactLocation(
-                uri=to_uri(item['file_path'])),
-        )
-        physical_location.region = om.Region(
-            start_line=item['match_lines'][0],
-            end_line=item['match_lines'][1],
-            start_column=item['match_position'][0],
-            end_column=item['match_position'][1],
-            snippet=om.ArtifactContent(text=item['match_string']),
-        )
-        locations.append(om.Location(physical_location=physical_location))
-
+    locations = [create_location(item) for item in issue_dict['files']]
     return om.Result(
         rule_id=rule.id,
         rule_index=rule_index,
@@ -129,39 +86,47 @@ def create_result(rule_id, issue_dict, rules, rule_indices):
         locations=locations,
         properties={
             'owasp-web': issue_dict['metadata']['owasp-web'],
-            'cwe': issue_dict['metadata']['cwe'],
-        },
-    )
+            'cwe': issue_dict['metadata']['cwe']})
+
+
+def create_location(item):
+    return om.Location(
+        physical_location=om.PhysicalLocation(
+            artifact_location=om.ArtifactLocation(uri=to_uri(item['file_path'])),
+            region=om.Region(
+                start_line=item['match_lines'][0],
+                end_line=item['match_lines'][1],
+                start_column=item['match_position'][0],
+                end_column=item['match_position'][1],
+                snippet=om.ArtifactContent(text=item['match_string']))))
 
 
 def sarif_output(outfile, scan_results, njsscan_version):
     log = om.SarifLog(
-        schema_uri=('https://raw.githubusercontent.com/oasis-tcs/'
-                    'sarif-spec/master/Schemata/sarif-schema-2.1.0.json'),
+        schema_uri=('https://raw.githubusercontent.com/'
+                    'oasis-tcs/sarif-spec/master/Schemata/'
+                    'sarif-schema-2.1.0.json'),
         version='2.1.0',
-        runs=[
-            om.Run(
-                tool=om.Tool(driver=om.ToolComponent(
-                    name='nodejsscan',
-                    information_uri='https://github.com/ajinabraham/njsscan',
-                    semantic_version=njsscan_version,
-                    version=njsscan_version),
-                ),
-                invocations=[
-                    om.Invocation(
-                        end_time_utc=datetime.utcnow().strftime(TS_FORMAT),
-                        execution_successful=True,
-                    ),
-                ],
-            ),
-        ],
-    )
+        runs=[om.Run(
+            tool=om.Tool(driver=om.ToolComponent(
+                name='nodejsscan',
+                information_uri='https://github.com/ajinabraham/njsscan',
+                semantic_version=njsscan_version,
+                version=njsscan_version,
+            )),
+            invocations=[om.Invocation(
+                end_time_utc=datetime.now(
+                    timezone.utc).strftime(TS_FORMAT),
+                execution_successful=True,
+            )])])
     run = log.runs[0]
     add_results(scan_results, run)
     json_out = to_json(log)
+
     if outfile:
         with open(outfile, 'w') as of:
             of.write(json_out)
     else:
         print(json_out)
+
     return json_out
diff --git a/tox.ini b/tox.ini
index 95b5e88..1a1d136 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,6 +16,7 @@ setenv =
 skip_install = true
 deps =
     pydocstyle
+    autopep8
     flake8
     flake8-broken-line
     flake8-bugbear
@@ -32,6 +33,7 @@ deps =
     pep8-naming
     radon
 commands =
+    autopep8 --recursive --in-place njsscan tests
     flake8 setup.py njsscan tests
 
 [testenv:bandit]
@@ -97,3 +99,4 @@ ignore =
     R701,
     # Function too complex
 radon_max_cc = 10
+max-line-length = 88
\ No newline at end of file