From 46261405497c0afece6867852039cc335d9dccff Mon Sep 17 00:00:00 2001 From: Abishek V Ashok Date: Wed, 30 Oct 2024 07:58:36 -0700 Subject: [PATCH] Parse context propagation from the metadata Summary: Parse the context propagation details or specs from the metadata Reviewed By: fahndrich Differential Revision: D64304874 fbshipit-source-id: 91cfb125aa72965202145f391e0818428240819c --- sapp/analysis_output.py | 63 ++++++++++++++++++- .../propagate_context_to_leaf_frames.py | 32 ++++++---- 2 files changed, 82 insertions(+), 13 deletions(-) diff --git a/sapp/analysis_output.py b/sapp/analysis_output.py index 0b2f17d..c213e7b 100644 --- a/sapp/analysis_output.py +++ b/sapp/analysis_output.py @@ -11,7 +11,7 @@ from dataclasses import dataclass from glob import glob from pathlib import Path -from typing import Any, Dict, IO, Iterable, List, Optional, Set +from typing import Any, Dict, IO, Iterable, List, Literal, Optional, Set from .sharded_files import ShardedFile @@ -63,6 +63,28 @@ class PartialFlowToMark: feature: str +@dataclass +class ContextPropagation: + """ + This is a specification of a context propagation that the user wishes us to use. + Context propagations are used to propagate context breadcrumbs from one frame + in an issue to another. + + `code` represents the issue code that the context/breadcrumb is derived from. + `pattern` is the pattern that the breadcrumb should match. + `ignore_callee_port_and_location` is a boolean that indicates whether the callee + port and location should be ignored when finding frames to propagate the + context/breadcrumb to. + `frame_type` is a string that indicates the type of the frame the context/breadcrumb + will be derived from. + """ + + code: int + pattern: str + frame_type: Literal["precondition", "postcondition"] + ignore_callee_port_and_location: bool + + @dataclass class Metadata: # Used to relativize paths in the results @@ -81,6 +103,9 @@ class Metadata: partial_flows_to_mark: List[PartialFlowToMark] = dataclasses.field( default_factory=list ) + context_propagation_specs: List[ContextPropagation] = dataclasses.field( + default_factory=list + ) def merge(self, o: "Metadata") -> "Metadata": return Metadata( @@ -96,6 +121,8 @@ def merge(self, o: "Metadata") -> "Metadata": + o.class_type_intervals_filenames, category_coverage=self.category_coverage, # should all be the same partial_flows_to_mark=self.partial_flows_to_mark + o.partial_flows_to_mark, + context_propagation_specs=self.context_propagation_specs + + o.context_propagation_specs, ) @@ -200,6 +227,9 @@ def from_directories(cls, directories: List[str]) -> "AnalysisOutput": partial_flows_to_mark = _parse_partial_flows_to_mark( metadata, "partial_flows" ) + context_propagation_specs = _parse_context_propagation_specs( + metadata, "context_propagation_specs" + ) this_metadata = Metadata( analysis_tool_version=metadata["version"], commit_hash=metadata.get("commit"), @@ -212,6 +242,7 @@ def from_directories(cls, directories: List[str]) -> "AnalysisOutput": class_type_intervals_filenames=class_type_intervals_filenames, category_coverage=metadata.get("category_coverage", []), partial_flows_to_mark=partial_flows_to_mark, + context_propagation_specs=context_propagation_specs, ) if not main_metadata: main_metadata = this_metadata @@ -256,6 +287,9 @@ def from_directory(cls, directory: str) -> "AnalysisOutput": metadata, "class_type_intervals_filename", directory ) partial_flows_to_mark = _parse_partial_flows_to_mark(metadata, "partial_flows") + context_propagation_specs = _parse_context_propagation_specs( + metadata, "context_propagation_specs" + ) return cls( directory=directory, filename_specs=filename_specs, @@ -272,6 +306,7 @@ def from_directory(cls, directory: str) -> "AnalysisOutput": class_type_intervals_filenames=class_type_intervals_filenames, category_coverage=metadata.get("category_coverage", []), partial_flows_to_mark=partial_flows_to_mark, + context_propagation_specs=context_propagation_specs, ), ) @@ -360,3 +395,29 @@ def _parse_partial_flows_to_mark( ) ) return parsed + + +def _parse_context_propagation_specs( + metadata_json: Dict[str, Any], key: str +) -> List[ContextPropagation]: + parsed: List[ContextPropagation] = [] + context_propagation_specs = metadata_json.get(key, []) + for context_propagation_spec in context_propagation_specs: + code = context_propagation_spec["code"] + context_propagations = context_propagation_spec["context_propagations"] + for context_propagation in context_propagations: + frame_types = context_propagation["frame_types"] + pattern = context_propagation["pattern"] + ignore_callee_port_and_location = context_propagation[ + "ignore_callee_port_and_location" + ] + for frame_type in frame_types: + parsed.append( + ContextPropagation( + code=code, + pattern=pattern, + frame_type=frame_type, + ignore_callee_port_and_location=ignore_callee_port_and_location, + ) + ) + return parsed diff --git a/sapp/pipeline/propagate_context_to_leaf_frames.py b/sapp/pipeline/propagate_context_to_leaf_frames.py index 49f10a9..93442ae 100644 --- a/sapp/pipeline/propagate_context_to_leaf_frames.py +++ b/sapp/pipeline/propagate_context_to_leaf_frames.py @@ -10,6 +10,7 @@ from collections import defaultdict, deque from typing import Dict, List, Set, Tuple +from ..analysis_output import ContextPropagation from ..models import IssueInstance, SharedTextKind, TraceFrame, TraceKind from ..trace_graph import TraceGraph from . import PipelineStep, Summary @@ -33,26 +34,29 @@ class PropagateContextToLeafFrames(PipelineStep[TraceGraph, TraceGraph]): def __init__( self, - issue_code: int, - feature_pattern: str, - frame_kind: TraceKind, - ignore_callee_port: bool = False, + context_propagation: ContextPropagation, ) -> None: super().__init__() # pyre-fixme[13]: Attribute `summary` is never initialized. self.summary: Summary # pyre-fixme[13]: Attribute `graph` is never initialized. self.graph: TraceGraph - self.feature_pattern = feature_pattern - self.issue_code = issue_code - self.frame_kind = frame_kind - # pyre-fixme[8]: Expected `Dict[FrameID, TaintKindToState]` for 1st param but got `defaultdict`. + self.feature_pattern: str = context_propagation.pattern + self.issue_code: int = context_propagation.code + self.frame_kind: TraceKind = ( + TraceKind.PRECONDITION + if context_propagation.frame_type == "precondition" + else TraceKind.POSTCONDITION + ) + # pyre-fixme[8]: Expected `Dict[FrameID, TaintKindToState]` for param 1 self.visited: Dict[FrameID, TaintKindToState] = defaultdict( lambda: defaultdict(lambda: PerTaintKindState()) ) self.leaf_features_added = 0 self.leaf_frames = 0 - self.ignore_callee_port = ignore_callee_port + self.ignore_callee_port_and_location: bool = ( + context_propagation.ignore_callee_port_and_location + ) def _subtract_kinds( self, @@ -204,7 +208,7 @@ def _add_contextual_features_to_neighbor_frames( candidate.callee_port != trace_frame.callee_port or candidate.callee_location != trace_frame.callee_location ) - and (not self.ignore_callee_port) + and (not self.ignore_callee_port_and_location) ): continue @@ -217,7 +221,10 @@ def run(self, input: TraceGraph, summary: Summary) -> Tuple[TraceGraph, Summary] self.graph = graph log.info( - f"Propagating feature {self.feature_pattern} in issues {self.issue_code} to {self.frame_kind} leaves" + f"Propagating feature {self.feature_pattern} in issues" + + f" {self.issue_code} to {self.frame_kind} leaves." + + " Callee port and location will" + + f"{'' if self.ignore_callee_port_and_location else ' not'} be ignored" ) for instance in graph.get_issue_instances(): @@ -238,7 +245,8 @@ def run(self, input: TraceGraph, summary: Summary) -> Tuple[TraceGraph, Summary] self._add_contextual_features_to_frame(trace_frame, features) self._add_contextual_features_to_neighbor_frames(trace_frame, features) log.info( - f"Added {self.leaf_features_added} features to {self.leaf_frames} trace frames" + f"Added {self.leaf_features_added} features to {self.leaf_frames}" + + " trace frames. " ) return graph, summary