Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds SARIF traces support to SAPP #93

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions sapp/sarif.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __init__(
"pysa": (5000, 6000),
}
driver_json = {}
if tool == "pysa":
self.tool = tool
the-storm marked this conversation as resolved.
Show resolved Hide resolved
if self.tool == "pysa":
driver_json["name"] = "Pysa"
driver_json["informationUri"] = "https://github.com/facebook/pyre-check/"

Expand All @@ -64,11 +65,26 @@ def __init__(
for rule in tool_warning_messages:
rules_json.append({"id": str(rule.code), "name": rule.message})
driver_json["rules"] = rules_json
elif self.tool == "mariana-trench":
driver_json["name"] = "Mariana Trench"
driver_json[
"informationUri"
] = "https://github.com/facebook/mariana-trench/"

tool_warning_messages = get_warning_message_range(
session,
self._tool_warning_code_ranges[self.tool][0],
self._tool_warning_code_ranges[self.tool][1],
)
rules_json = []
for rule in tool_warning_messages:
rules_json.append({"id": str(rule.code), "name": rule.message})
driver_json["rules"] = rules_json
the-storm marked this conversation as resolved.
Show resolved Hide resolved
else:
raise NotImplementedError

self.driver = driver_json
self.results = [issue.to_sarif() for issue in filtered_issues]
self.results = [issue.to_sarif(session, self.tool) for issue in filtered_issues]

def to_json(self, indent: int = 2) -> str:
return json.dumps(self, cls=SARIFEncoder, indent=indent)
Expand Down
7 changes: 7 additions & 0 deletions sapp/sarif_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,10 @@ def __str__(self) -> str:
str,
],
]
SARIFPhyicalLocationObject: TypeAlias = Dict[
str, Union[SARIFRegionObject, Dict[str, str]]
]
SARIFCodeflowLocationObject: TypeAlias = Dict[
str, Union[SARIFPhyicalLocationObject, Dict[str, str]]
]
SARIFCodeflowsObject: TypeAlias = List[Dict[str, List[Dict[str, List]]]]
7 changes: 6 additions & 1 deletion sapp/ui/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ def to_json(
],
}

def to_sarif(self, severity_level: str = "warning") -> SARIFResult:
def to_sarif(
self, session: Session, tool: str, severity_level: str = "warning"
) -> SARIFResult:
sarif_result = {
"ruleId": str(self.code),
"level": str(SARIFSeverityLevel(severity_level)),
Expand All @@ -250,6 +252,9 @@ def to_sarif(self, severity_level: str = "warning") -> SARIFResult:
}
],
}
from . import trace

sarif_result["codeFlows"] = trace.to_sarif(session, self, tool, True)
return sarif_result

def __hash__(self) -> int:
Expand Down
130 changes: 128 additions & 2 deletions sapp/ui/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Set, Tuple, Union
from typing import (
Any,
Dict,
Iterable,
List,
NamedTuple,
Optional,
Sequence,
Set,
Tuple,
Union,
)

import graphene
from graphql.execution.base import ResolveInfo
Expand All @@ -20,7 +31,13 @@
TraceFrameLeafAssoc,
TraceKind,
)
from ..sarif_types import (
SARIFCodeflowLocationObject,
SARIFCodeflowsObject,
SARIFPhyicalLocationObject,
)
from . import run
from .issues import IssueQueryResult

FilenameText: AliasedClass = aliased(SharedText)
CallableText: AliasedClass = aliased(SharedText)
Expand Down Expand Up @@ -103,6 +120,25 @@ def from_record(
def is_leaf(self) -> bool:
return self.callee_port in LEAF_NAMES

def get_clean_caller(self, tool: str) -> str:
return self._clean_callable_name(self.caller, tool)

def get_clean_callee(self, tool: str) -> str:
return self._clean_callable_name(self.callee, tool)

def _clean_callable_name(self, name: str, tool: str, depth: int = 1) -> str:
the-storm marked this conversation as resolved.
Show resolved Hide resolved
if name in LEAF_NAMES:
return name
if tool == "mariana-trench":
package_and_class, method_name = name.split(".")
method_name = method_name.split(":")[0] # parse
the-storm marked this conversation as resolved.
Show resolved Hide resolved
package_and_class = package_and_class.strip(";").split("/")[
depth * -1
] # parse Lpackage1/package2/class;.
the-storm marked this conversation as resolved.
Show resolved Hide resolved
return f"{package_and_class}.{method_name}"
else:
return name


class TraceTuple(NamedTuple):
trace_frame: TraceFrameQueryResult
Expand Down Expand Up @@ -168,7 +204,6 @@ def initial_frames(
issue_id: DBID,
kind: TraceKind,
) -> List[TraceFrameQueryResult]:

records = list(
session.query(
TraceFrame.id,
Expand Down Expand Up @@ -403,3 +438,94 @@ def trace_kind_to_shared_text_kind(trace_kind: Optional[TraceKind]) -> SharedTex
return SharedTextKind.SINK

raise AssertionError(f"{trace_kind} is invalid")


def to_sarif(
session: Session, issue: IssueQueryResult, tool: str, output_features: bool = False
) -> SARIFCodeflowsObject:
postcondition_initial_frames = initial_frames(
session,
issue.issue_instance_id,
TraceKind.POSTCONDITION,
)
precondition_initial_frames = initial_frames(
session,
issue.issue_instance_id,
TraceKind.PRECONDITION,
)
postcondition_navigation = navigate_trace_frames(
session,
postcondition_initial_frames,
set(issue.source_kinds),
set(issue.sink_kinds),
)
precondition_navigation = navigate_trace_frames(
session,
precondition_initial_frames,
set(issue.source_kinds),
set(issue.sink_kinds),
)
the-storm marked this conversation as resolved.
Show resolved Hide resolved
trace_tuples = _create_trace_tuples(
reversed(postcondition_navigation)
) + _create_trace_tuples(precondition_navigation)
codeflows = [{"threadFlows": [{"locations": []}]}]
nesting_level = 0
for t in trace_tuples:
location = _sarif_codeflow_location_from_trace_tuple(
t.trace_frame, tool, nesting_level, True
the-storm marked this conversation as resolved.
Show resolved Hide resolved
)
codeflows[0]["threadFlows"][0]["locations"].append(location)
nesting_level += 1

return codeflows
the-storm marked this conversation as resolved.
Show resolved Hide resolved


def _create_trace_tuples(
navigation: Iterable[Tuple[TraceFrameQueryResult, int]]
) -> List[TraceTuple]:
return [
TraceTuple(
trace_frame=trace_frame,
branches=branches,
missing=trace_frame.caller == "",
)
for trace_frame, branches in navigation
]


def _sarif_codeflow_location_from_trace_tuple(
trace_frame: TraceFrameQueryResult,
tool: str,
nesting_level: int = 1,
output_features: bool = False,
) -> Dict[str, Union[SARIFCodeflowLocationObject, Dict[str, str]]]:
features_str = titos = ""
if output_features:
frame_features = [
text.contents
for text in trace_frame.shared_texts
if text.kind is SharedTextKind.FEATURE
]
if frame_features:
features_str = f"features: {frame_features}"
if trace_frame.titos and len(trace_frame.titos.split(";")) > 0:
titos = f"via {len(trace_frame.titos.split(';'))} propagators"
the-storm marked this conversation as resolved.
Show resolved Hide resolved
trace_region = {}
if trace_frame.callee_location:
trace_region = trace_frame.callee_location.to_sarif()
location = {
the-storm marked this conversation as resolved.
Show resolved Hide resolved
"location": {
"physicalLocation": {
"artifactLocation": {
"uri": trace_frame.filename,
"uriBaseId": "%SRCROOT%",
},
"region": trace_region,
},
"message": {
"text": f"flow from {trace_frame.get_clean_caller(tool)}(...{trace_frame.caller_port}...) -[into]-> {trace_frame.get_clean_callee(tool)}(...{trace_frame.callee_port}...) {titos} {features_str}".strip()
},
},
"nestingLevel": nesting_level,
}
return location
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move all the logic regarding sarif into sarif.py?

Copy link
Contributor Author

@the-storm the-storm Jul 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which part exactly you'd like to move to sarif.py. The to_sarif function body just find the traces and calls _sarif_codeflow_location_from_trace_tuple. I don't want to move the latter to sarif.py because it is specific to traces. It is a private function that takes a trace_frame, tool and return the sarif codeflows object.

Copy link
Contributor

@arthaud arthaud Jul 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well IMO anything related to sarif should go into sarif.py. If we start putting function from every format into trace.py it will get overwhelming soon.
trace.py should provide a clean API that allows any formatter to get the data and do it's formatting.
From what I see, everything we use in the sarif functions is public? Tell me if I'm wrong.