From 22679e16d21971939d1138c88f5bca0823bc2908 Mon Sep 17 00:00:00 2001 From: Justin Ibarra Date: Tue, 3 May 2022 12:30:11 -0800 Subject: [PATCH] Add delta command to determine changes to endpoint rules between tags (#1943) * update git tag loader to be compatible with lock validation * add diff command * default to query for missing rules --- detection_rules/devtools.py | 110 +++++++++++++++++++++---- detection_rules/rule_loader.py | 37 ++++++--- detection_rules/schemas/definitions.py | 1 + detection_rules/version_lock.py | 27 +++++- 4 files changed, 143 insertions(+), 32 deletions(-) diff --git a/detection_rules/devtools.py b/detection_rules/devtools.py index 511a4f7a4f8..d14588cba33 100644 --- a/detection_rules/devtools.py +++ b/detection_rules/devtools.py @@ -15,6 +15,7 @@ import time import typing import urllib.parse +from collections import defaultdict from pathlib import Path from typing import Dict, Optional, Tuple, List @@ -22,6 +23,7 @@ import requests.exceptions import yaml from elasticsearch import Elasticsearch +from eql.table import Table from kibana.connector import Kibana from . import rule_loader, utils @@ -33,7 +35,7 @@ from .misc import PYTHON_LICENSE, add_client, client_error from .packaging import PACKAGE_FILE, RELEASE_DIR, CURRENT_RELEASE_PATH, Package, current_stack_version from .version_lock import default_version_lock -from .rule import AnyRuleData, BaseRuleData, QueryRuleData, TOMLRule +from .rule import AnyRuleData, BaseRuleData, DeprecatedRule, QueryRuleData, ThreatMapping, TOMLRule from .rule_loader import RuleCollection, production_filter from .schemas import definitions from .semver import Version @@ -94,21 +96,9 @@ def build_release(config_file, update_version_lock: bool, generate_navigator: bo return package -@dev_group.command('build-integration-docs') -@click.argument('registry-version') -@click.option('--pre', required=True, help='Tag for pre-existing rules') -@click.option('--post', required=True, help='Tag for rules post updates') -@click.option('--directory', '-d', type=Path, required=True, help='Output directory to save docs to') -@click.option('--force', '-f', is_flag=True, help='Bypass the confirmation prompt') -@click.option('--remote', '-r', default='origin', help='Override the remote from "origin"') -@click.pass_context -def build_integration_docs(ctx: click.Context, registry_version: str, pre: str, post: str, directory: Path, force: bool, - remote: Optional[str] = 'origin') -> IntegrationSecurityDocs: +def get_release_diff(pre: str, post: str, remote: Optional[str] = 'origin' + ) -> (Dict[str, TOMLRule], Dict[str, TOMLRule], Dict[str, DeprecatedRule]): """Build documents from two git tags for an integration package.""" - if not force: - if not click.confirm(f'This will refresh tags and may overwrite local tags for: {pre} and {post}. Continue?'): - ctx.exit(1) - pre_rules = RuleCollection() pre_rules.load_git_tag(pre, remote, skip_query_validation=True) @@ -124,9 +114,28 @@ def build_integration_docs(ctx: click.Context, registry_version: str, pre: str, click.echo(' - ' + '\n - '.join([str(p) for p in post_rules.errors])) rules_changes = pre_rules.compare_collections(post_rules) + return rules_changes + +@dev_group.command('build-integration-docs') +@click.argument('registry-version') +@click.option('--pre', required=True, help='Tag for pre-existing rules') +@click.option('--post', required=True, help='Tag for rules post updates') +@click.option('--directory', '-d', type=Path, required=True, help='Output directory to save docs to') +@click.option('--force', '-f', is_flag=True, help='Bypass the confirmation prompt') +@click.option('--remote', '-r', default='origin', help='Override the remote from "origin"') +@click.pass_context +def build_integration_docs(ctx: click.Context, registry_version: str, pre: str, post: str, directory: Path, force: bool, + remote: Optional[str] = 'origin') -> IntegrationSecurityDocs: + """Build documents from two git tags for an integration package.""" + if not force: + if not click.confirm(f'This will refresh tags and may overwrite local tags for: {pre} and {post}. Continue?'): + ctx.exit(1) + + rules_changes = get_release_diff(pre, post, remote) docs = IntegrationSecurityDocs(registry_version, directory, True, *rules_changes) package_dir = docs.generate() + click.echo(f'Generated documents saved to: {package_dir}') updated, new, deprecated = rules_changes click.echo(f'- {len(updated)} updated rules') @@ -812,6 +821,76 @@ def raw_permalink(raw_link): return generated_urls +@dev_group.group('diff') +def diff_group(): + """Commands for statistics on changes and diffs.""" + + +@diff_group.command('endpoint-by-attack') +@click.option('--pre', required=True, help='Tag for pre-existing rules') +@click.option('--post', required=True, help='Tag for rules post updates') +@click.option('--force', '-f', is_flag=True, help='Bypass the confirmation prompt') +@click.option('--remote', '-r', default='origin', help='Override the remote from "origin"') +@click.pass_context +def endpoint_by_attack(ctx: click.Context, pre: str, post: str, force: bool, remote: Optional[str] = 'origin'): + """Rule diffs across tagged branches, broken down by ATT&CK tactics.""" + if not force: + if not click.confirm(f'This will refresh tags and may overwrite local tags for: {pre} and {post}. Continue?'): + ctx.exit(1) + + changed, new, deprecated = get_release_diff(pre, post, remote) + oses = ('windows', 'linux', 'macos') + + def delta_stats(rule_map) -> List[dict]: + stats = defaultdict(lambda: defaultdict(int)) + os_totals = defaultdict(int) + tactic_totals = defaultdict(int) + + for rule_id, rule in rule_map.items(): + threat = rule.contents.data.get('threat') + os_types = [i.lower() for i in rule.contents.data.get('tags') or [] if i.lower() in oses] + if not threat or not os_types: + continue + + if isinstance(threat[0], dict): + tactics = sorted(set(e['tactic']['name'] for e in threat)) + else: + tactics = ThreatMapping.flatten(threat).tactic_names + for tactic in tactics: + tactic_totals[tactic] += 1 + for os_type in os_types: + os_totals[os_type] += 1 + stats[tactic][os_type] += 1 + + # structure stats for table + rows = [] + for tac, stat in stats.items(): + row = {'tactic': tac, 'total': tactic_totals[tac]} + for os_type, count in stat.items(): + row[os_type] = count + rows.append(row) + + rows.append(dict(tactic='total_by_os', **os_totals)) + + return rows + + fields = ['tactic', 'linux', 'macos', 'windows', 'total'] + + changed_stats = delta_stats(changed) + table = Table.from_list(fields, changed_stats) + click.echo(f'Changed rules {len(changed)}\n{table}\n') + + new_stats = delta_stats(new) + table = Table.from_list(fields, new_stats) + click.echo(f'New rules {len(new)}\n{table}\n') + + dep_stats = delta_stats(deprecated) + table = Table.from_list(fields, dep_stats) + click.echo(f'Deprecated rules {len(deprecated)}\n{table}\n') + + return changed_stats, new_stats, dep_stats + + @dev_group.group('test') def test_group(): """Commands for testing against stack resources.""" @@ -898,7 +977,6 @@ def rule_event_search(ctx, rule, date_range, count, max_results, verbose, def rule_survey(ctx: click.Context, query, date_range, dump_file, hide_zero_counts, hide_errors, elasticsearch_client: Elasticsearch = None, kibana_client: Kibana = None): """Survey rule counts.""" - from eql.table import Table from kibana.resources import Signal from .main import search_rules diff --git a/detection_rules/rule_loader.py b/detection_rules/rule_loader.py index 448f7889e48..79364ff613e 100644 --- a/detection_rules/rule_loader.py +++ b/detection_rules/rule_loader.py @@ -270,20 +270,14 @@ def load_file(self, path: Path) -> Union[TOMLRule, DeprecatedRule]: def load_git_tag(self, branch: str, remote: Optional[str] = None, skip_query_validation=False): """Load rules from a Git branch.""" - from .version_lock import VersionLock - - commit_hash, v_lock, d_lock = load_locks_from_tag(remote, branch) - - v_lock_name_prefix = f'{remote}/' if remote else '' - v_lock_name = f'{v_lock_name_prefix}{branch}-{commit_hash}' - - version_lock = VersionLock(version_lock=v_lock, deprecated_lock=d_lock, name=v_lock_name) - self._version_lock = version_lock + from .version_lock import VersionLock, add_rule_types_to_lock git = utils.make_git() rules_dir = DEFAULT_RULES_DIR.relative_to(get_path(".")) paths = git("ls-tree", "-r", "--name-only", branch, rules_dir).splitlines() + rule_contents = [] + rule_map = {} for path in paths: path = Path(path) if path.suffix != ".toml": @@ -295,6 +289,23 @@ def load_git_tag(self, branch: str, remote: Optional[str] = None, skip_query_val if skip_query_validation: toml_dict['metadata']['query_schema_validation'] = False + rule_contents.append((toml_dict, path)) + rule_map[toml_dict['rule']['rule_id']] = toml_dict + + commit_hash, v_lock, d_lock = load_locks_from_tag(remote, branch) + + v_lock_name_prefix = f'{remote}/' if remote else '' + v_lock_name = f'{v_lock_name_prefix}{branch}-{commit_hash}' + + # For backwards compatibility with tagged branches that existed before the types were added and validation + # enforced, we will need to manually add the rule types to the version lock allow them to pass validation. + v_lock = add_rule_types_to_lock(v_lock, rule_map) + + version_lock = VersionLock(version_lock=v_lock, deprecated_lock=d_lock, name=v_lock_name) + self._version_lock = version_lock + + for rule_content in rule_contents: + toml_dict, path = rule_content try: self.load_dict(toml_dict, path) except ValidationError as e: @@ -345,10 +356,10 @@ def compare_collections(self, other: 'RuleCollection' new_rules = {} newly_deprecated = {} - pre_versions_hash = utils.dict_hash(self._version_lock.version_lock) - post_versions_hash = utils.dict_hash(other._version_lock.version_lock) - pre_deprecated_hash = utils.dict_hash(self._version_lock.deprecated_lock) - post_deprecated_hash = utils.dict_hash(other._version_lock.deprecated_lock) + pre_versions_hash = utils.dict_hash(self._version_lock.version_lock.to_dict()) + post_versions_hash = utils.dict_hash(other._version_lock.version_lock.to_dict()) + pre_deprecated_hash = utils.dict_hash(self._version_lock.deprecated_lock.to_dict()) + post_deprecated_hash = utils.dict_hash(other._version_lock.deprecated_lock.to_dict()) if pre_versions_hash == post_versions_hash and pre_deprecated_hash == post_deprecated_hash: return changed_rules, new_rules, newly_deprecated diff --git a/detection_rules/schemas/definitions.py b/detection_rules/schemas/definitions.py index 4a082e83063..19821e866e1 100644 --- a/detection_rules/schemas/definitions.py +++ b/detection_rules/schemas/definitions.py @@ -36,6 +36,7 @@ # we had a bad rule ID make it in before tightening up the pattern, and so we have to let it bypass KNOWN_BAD_RULE_IDS = Literal['119c8877-8613-416d-a98a-96b6664ee73a5'] +KNOWN_BAD_DEPRECATED_DATES = Literal['2021-03-03'] OPERATORS = ['equals'] TIMELINE_TEMPLATES: Final[dict] = { diff --git a/detection_rules/version_lock.py b/detection_rules/version_lock.py index 42d727a2d95..564efe3ccd5 100644 --- a/detection_rules/version_lock.py +++ b/detection_rules/version_lock.py @@ -56,7 +56,7 @@ def __getitem__(self, item) -> VersionLockFileEntry: @dataclass(frozen=True) class DeprecatedRulesEntry(MarshmallowDataclassMixin): """Schema for rule entry in the deprecated rules file.""" - deprecation_date: definitions.Date + deprecation_date: Union[definitions.Date, definitions.KNOWN_BAD_DEPRECATED_DATES] rule_name: definitions.RuleName stack_version: definitions.SemVer @@ -92,6 +92,27 @@ def load_versions() -> dict: return version_lock_file.to_dict() +# for tagged branches which existed before the types were added and validation enforced, we will need to manually add +# them to allow them to pass validation. These will only ever currently be loaded via the RuleCollection.load_git_tag +# method, which is primarily for generating diffs across releases, so there is no risk to versioning +def add_rule_types_to_lock(lock_contents: dict, rule_map: Dict[str, dict]): + """Add the rule type to entries in the lock file,if missing.""" + for rule_id, lock in lock_contents.items(): + rule = rule_map.get(rule_id, {}) + + # this defaults to query if the rule is not found - it is just for validation so should not impact + rule_type = rule.get('rule', {}).get('type', 'query') + + # the type is a bit less important than the structure to pass validation + lock['type'] = rule_type + + if 'previous' in lock: + for _, prev_lock in lock['previous'].items(): + prev_lock['type'] = rule_type + + return lock_contents + + class VersionLock: """Version handling for rule files and collections.""" @@ -108,12 +129,12 @@ def __init__(self, version_lock_file: Optional[Path] = None, deprecated_lock_fil if version_lock_file: self.version_lock = VersionLockFile.load_from_file(version_lock_file) else: - self.version_lock = VersionLockFile.from_dict(version_lock) + self.version_lock = VersionLockFile.from_dict(dict(data=version_lock)) if deprecated_lock_file: self.deprecated_lock = DeprecatedRulesFile.load_from_file(deprecated_lock_file) else: - self.deprecated_lock = DeprecatedRulesFile.from_dict(deprecated_lock) + self.deprecated_lock = DeprecatedRulesFile.from_dict(dict(data=deprecated_lock)) @staticmethod def save_file(path: Path, lock_file: Union[VersionLockFile, DeprecatedRulesFile]):