From 67c318d0829f101a1c9eaa34569829a9f55a1020 Mon Sep 17 00:00:00 2001 From: Yuguang Wang Date: Tue, 2 Jan 2024 17:10:54 +0800 Subject: [PATCH] plugins/semgrep: add csmock semgrep plugin Resolves: https://issues.redhat.com/browse/OSH-57 Closes: https://github.com/csutils/csmock/pull/149 --- py/plugins/semgrep.py | 166 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 py/plugins/semgrep.py diff --git a/py/plugins/semgrep.py b/py/plugins/semgrep.py new file mode 100644 index 0000000..4f7ecc0 --- /dev/null +++ b/py/plugins/semgrep.py @@ -0,0 +1,166 @@ +# Copyright (C) 2014 Red Hat, Inc. +# +# This file is part of csmock. +# +# csmock is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# csmock is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with csmock. If not, see . + +import os + +# disable metrics to be sent to semgrep cloud +SEMGREP_SEND_METRICS = "off" + +# FIXME: should we use a dedicated repo to maintain semgrep rules? +SEMGREP_RULES_REPO = "https://git.prodsec.redhat.com/prodsec-services/masssast" + +SEMGREP_SCAN_DIR = "/builddir/build/BUILD" + +SEMGREP_SCAN_OUTPUT = "/builddir/semgrep-scan-results.sarif" + +SEMGREP_RULES_CACHE_DIR = "/var/tmp/csmock/semgrep" + +SEMGREP_SCAN_LOG = "/builddir/semgrep-scan.log" + +FILTER_CMD = f"csgrep '%s' --mode=json --prepend-path-prefix={SEMGREP_SCAN_DIR}/ > '%s'" + + +class PluginProps: + def __init__(self): + self.description = ( + "A fast, open-source, static analysis engine for finding bugs, " + "detecting dependency vulnerabilities, and enforcing code standards." + ) + + # include this plug-in in `csmock --all-tools` + self.stable = True + + +class Plugin: + def __init__(self): + self.enabled = False + self.semgrep_bin = None + + def get_props(self): + return PluginProps() + + def enable(self): + self.enabled = True + + def init_parser(self, parser): + parser.add_argument( + "--semgrep-metrics", default=SEMGREP_SEND_METRICS, + help="configures how usage metrics are sent to the Semgrep server") + + parser.add_argument( + "--semgrep-rules-repo", default=SEMGREP_RULES_REPO, + help="downstream semgrep rules repo") + + parser.add_argument( + "--semgrep-rules-refresh", action="store_true", + help="force clone/refresh latest rules from downstream repo") + + parser.add_argument( + "--semgrep-scan-opts", + help="space-separated list of additional options passed to the 'semgrep scan' command") + + def handle_args(self, parser, args, props): + if not self.enabled: + return + + # download semgrep rules from downstream repo + def fetch_semgrep_rules_hook(results, props): + # use results.exec_cmd to install semgrep cli using pip + cmd = "python3 -m pip install --user semgrep" + ec = results.exec_cmd(cmd, shell=True) + if 0 != ec: + results.error("failed to install semgrep cli using pip") + + try: + # make sure the cache directory exists + os.makedirs(SEMGREP_RULES_CACHE_DIR, mode=0o755, exist_ok=True) + except OSError: + results.error(f"failed to create semgrep cache directory: {SEMGREP_RULES_CACHE_DIR}") + return 1 + # command to fetch semgrep rules + repo_clone_cmd = f"git clone {args.semgrep_rules_repo} {SEMGREP_RULES_CACHE_DIR}" + + # check whether we can reuse previously downloaded semgrep rules + if not args.semgrep_rules_refresh and os.listdir(SEMGREP_RULES_CACHE_DIR): + results.print_with_ts(f"reusing previously downloaded semgrep rules: {SEMGREP_RULES_CACHE_DIR}") + else: + # remove previously downloaded semgrep repo/rules + results.exec_cmd(["rm", "-rf", f"{SEMGREP_RULES_CACHE_DIR}/*"]) + + # fetch semgrep rules from downstream repo + ec = results.exec_cmd(repo_clone_cmd.split()) + if 0 != ec: + results.error("failed to fetch semgrep rules from downstream repo") + return ec + # query version of semgrep + semgrep_bin_dir = os.path.abspath("./.local/bin") + semgrep_bin = f"{semgrep_bin_dir}/semgrep" + self.semgrep_bin = semgrep_bin + (ec, out) = results.get_cmd_output([f"{self.semgrep_bin}", "--version"], shell=False) + if 0 != ec: + results.error("failed to query semgrep cli version", ec=ec) + return ec + + # parse and record the version of semgrep cli + version = out.split(" ")[0] + results.ini_writer.append("analyzer-version-semgrep-cli", version) + + # copy semgrep rules into the chroot + props.copy_in_files += [SEMGREP_RULES_CACHE_DIR] + + # get the results out of the chroot + props.copy_out_files += [SEMGREP_SCAN_OUTPUT, SEMGREP_SCAN_LOG] + return 0 + + props.pre_mock_hooks += [fetch_semgrep_rules_hook] + + def scan_hook(results, mock, props): + # command to run semgrep scan + cmd = (f"{self.semgrep_bin} scan --metrics={args.semgrep_metrics} --sarif" + f" --config={SEMGREP_RULES_CACHE_DIR}/rules") + + # append additional options passed to the 'semgrep scan' command + if args.semgrep_scan_opts: + cmd += f" {args.semgrep_scan_opts}" + + # eventually append the target directory to be scanned + cmd += f" --output={SEMGREP_SCAN_OUTPUT} {SEMGREP_SCAN_DIR} 2>{SEMGREP_SCAN_LOG}" + # run semgrep scan + ec = mock.exec_chroot_cmd(cmd) + + # below are all possible return codes from semgrep scan + if ec == 123: + results.error("Indiscriminate errors reported on standard error.") + elif ec == 124: + results.error("Command line parsing errors.") + elif ec == 125: + results.error("Unexpected internal errors (bugs).") + return 0 + + # run semgrep scan after successful build + props.post_install_hooks += [scan_hook] + + # convert the results into the csdiff's JSON format + def filter_hook(results): + src = results.dbgdir_raw + SEMGREP_SCAN_OUTPUT + if not os.path.exists(src): + return 0 + dst = "%s/semgrep-scan-results.json" % results.dbgdir_uni + cmd = FILTER_CMD % (src, dst) + return results.exec_cmd(cmd, shell=True) + + props.post_process_hooks += [filter_hook]