Skip to content

Commit

Permalink
Parse context propagation from the metadata
Browse files Browse the repository at this point in the history
Summary: Parse the context propagation details or specs from the metadata

Reviewed By: fahndrich

Differential Revision: D64304874

fbshipit-source-id: 91cfb125aa72965202145f391e0818428240819c
  • Loading branch information
Abishek V Ashok authored and facebook-github-bot committed Oct 30, 2024
1 parent 151fbaf commit 4626140
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 13 deletions.
63 changes: 62 additions & 1 deletion sapp/analysis_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dataclasses import dataclass
from glob import glob
from pathlib import Path
from typing import Any, Dict, IO, Iterable, List, Optional, Set
from typing import Any, Dict, IO, Iterable, List, Literal, Optional, Set

from .sharded_files import ShardedFile

Expand Down Expand Up @@ -63,6 +63,28 @@ class PartialFlowToMark:
feature: str


@dataclass
class ContextPropagation:
"""
This is a specification of a context propagation that the user wishes us to use.
Context propagations are used to propagate context breadcrumbs from one frame
in an issue to another.
`code` represents the issue code that the context/breadcrumb is derived from.
`pattern` is the pattern that the breadcrumb should match.
`ignore_callee_port_and_location` is a boolean that indicates whether the callee
port and location should be ignored when finding frames to propagate the
context/breadcrumb to.
`frame_type` is a string that indicates the type of the frame the context/breadcrumb
will be derived from.
"""

code: int
pattern: str
frame_type: Literal["precondition", "postcondition"]
ignore_callee_port_and_location: bool


@dataclass
class Metadata:
# Used to relativize paths in the results
Expand All @@ -81,6 +103,9 @@ class Metadata:
partial_flows_to_mark: List[PartialFlowToMark] = dataclasses.field(
default_factory=list
)
context_propagation_specs: List[ContextPropagation] = dataclasses.field(
default_factory=list
)

def merge(self, o: "Metadata") -> "Metadata":
return Metadata(
Expand All @@ -96,6 +121,8 @@ def merge(self, o: "Metadata") -> "Metadata":
+ o.class_type_intervals_filenames,
category_coverage=self.category_coverage, # should all be the same
partial_flows_to_mark=self.partial_flows_to_mark + o.partial_flows_to_mark,
context_propagation_specs=self.context_propagation_specs
+ o.context_propagation_specs,
)


Expand Down Expand Up @@ -200,6 +227,9 @@ def from_directories(cls, directories: List[str]) -> "AnalysisOutput":
partial_flows_to_mark = _parse_partial_flows_to_mark(
metadata, "partial_flows"
)
context_propagation_specs = _parse_context_propagation_specs(
metadata, "context_propagation_specs"
)
this_metadata = Metadata(
analysis_tool_version=metadata["version"],
commit_hash=metadata.get("commit"),
Expand All @@ -212,6 +242,7 @@ def from_directories(cls, directories: List[str]) -> "AnalysisOutput":
class_type_intervals_filenames=class_type_intervals_filenames,
category_coverage=metadata.get("category_coverage", []),
partial_flows_to_mark=partial_flows_to_mark,
context_propagation_specs=context_propagation_specs,
)
if not main_metadata:
main_metadata = this_metadata
Expand Down Expand Up @@ -256,6 +287,9 @@ def from_directory(cls, directory: str) -> "AnalysisOutput":
metadata, "class_type_intervals_filename", directory
)
partial_flows_to_mark = _parse_partial_flows_to_mark(metadata, "partial_flows")
context_propagation_specs = _parse_context_propagation_specs(
metadata, "context_propagation_specs"
)
return cls(
directory=directory,
filename_specs=filename_specs,
Expand All @@ -272,6 +306,7 @@ def from_directory(cls, directory: str) -> "AnalysisOutput":
class_type_intervals_filenames=class_type_intervals_filenames,
category_coverage=metadata.get("category_coverage", []),
partial_flows_to_mark=partial_flows_to_mark,
context_propagation_specs=context_propagation_specs,
),
)

Expand Down Expand Up @@ -360,3 +395,29 @@ def _parse_partial_flows_to_mark(
)
)
return parsed


def _parse_context_propagation_specs(
metadata_json: Dict[str, Any], key: str
) -> List[ContextPropagation]:
parsed: List[ContextPropagation] = []
context_propagation_specs = metadata_json.get(key, [])
for context_propagation_spec in context_propagation_specs:
code = context_propagation_spec["code"]
context_propagations = context_propagation_spec["context_propagations"]
for context_propagation in context_propagations:
frame_types = context_propagation["frame_types"]
pattern = context_propagation["pattern"]
ignore_callee_port_and_location = context_propagation[
"ignore_callee_port_and_location"
]
for frame_type in frame_types:
parsed.append(
ContextPropagation(
code=code,
pattern=pattern,
frame_type=frame_type,
ignore_callee_port_and_location=ignore_callee_port_and_location,
)
)
return parsed
32 changes: 20 additions & 12 deletions sapp/pipeline/propagate_context_to_leaf_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from collections import defaultdict, deque
from typing import Dict, List, Set, Tuple

from ..analysis_output import ContextPropagation
from ..models import IssueInstance, SharedTextKind, TraceFrame, TraceKind
from ..trace_graph import TraceGraph
from . import PipelineStep, Summary
Expand All @@ -33,26 +34,29 @@ class PropagateContextToLeafFrames(PipelineStep[TraceGraph, TraceGraph]):

def __init__(
self,
issue_code: int,
feature_pattern: str,
frame_kind: TraceKind,
ignore_callee_port: bool = False,
context_propagation: ContextPropagation,
) -> None:
super().__init__()
# pyre-fixme[13]: Attribute `summary` is never initialized.
self.summary: Summary
# pyre-fixme[13]: Attribute `graph` is never initialized.
self.graph: TraceGraph
self.feature_pattern = feature_pattern
self.issue_code = issue_code
self.frame_kind = frame_kind
# pyre-fixme[8]: Expected `Dict[FrameID, TaintKindToState]` for 1st param but got `defaultdict`.
self.feature_pattern: str = context_propagation.pattern
self.issue_code: int = context_propagation.code
self.frame_kind: TraceKind = (
TraceKind.PRECONDITION
if context_propagation.frame_type == "precondition"
else TraceKind.POSTCONDITION
)
# pyre-fixme[8]: Expected `Dict[FrameID, TaintKindToState]` for param 1
self.visited: Dict[FrameID, TaintKindToState] = defaultdict(
lambda: defaultdict(lambda: PerTaintKindState())
)
self.leaf_features_added = 0
self.leaf_frames = 0
self.ignore_callee_port = ignore_callee_port
self.ignore_callee_port_and_location: bool = (
context_propagation.ignore_callee_port_and_location
)

def _subtract_kinds(
self,
Expand Down Expand Up @@ -204,7 +208,7 @@ def _add_contextual_features_to_neighbor_frames(
candidate.callee_port != trace_frame.callee_port
or candidate.callee_location != trace_frame.callee_location
)
and (not self.ignore_callee_port)
and (not self.ignore_callee_port_and_location)
):
continue

Expand All @@ -217,7 +221,10 @@ def run(self, input: TraceGraph, summary: Summary) -> Tuple[TraceGraph, Summary]
self.graph = graph

log.info(
f"Propagating feature {self.feature_pattern} in issues {self.issue_code} to {self.frame_kind} leaves"
f"Propagating feature {self.feature_pattern} in issues"
+ f" {self.issue_code} to {self.frame_kind} leaves."
+ " Callee port and location will"
+ f"{'' if self.ignore_callee_port_and_location else ' not'} be ignored"
)

for instance in graph.get_issue_instances():
Expand All @@ -238,7 +245,8 @@ def run(self, input: TraceGraph, summary: Summary) -> Tuple[TraceGraph, Summary]
self._add_contextual_features_to_frame(trace_frame, features)
self._add_contextual_features_to_neighbor_frames(trace_frame, features)
log.info(
f"Added {self.leaf_features_added} features to {self.leaf_frames} trace frames"
f"Added {self.leaf_features_added} features to {self.leaf_frames}"
+ " trace frames. "
)

return graph, summary

0 comments on commit 4626140

Please sign in to comment.