Skip to content

Commit

Permalink
Update the marker assignment policy (#139)
Browse files Browse the repository at this point in the history
## What Changed?

Updates the marker assignment algorithm. It now attaches to a node `n`

1. all markers for `typeof(n)`
2. If `children(n).is_empty()` (determined by flowistry’s `PlaceInfo`)
then we must be a terminal place where subplaces aren’t tracked, meaning
that they won’t be marked via rule 1 and we have to overtaint, so
include all markers from `subtypes(typeof(n))`
3. Add all markers from `n.iter_projections().map(typeof)` to mark
ourselves if we are a field of a tainted type.

## Why Does It Need To?

This implements an updated marker policy. It simplifies the semantics
and makes them sound hopefully. This is possible now because we have
much better field sensitivity. Markers are now attached to every node of
the marked type as well as
- nodes which are children (e.g. fields) of a marked type
- nodes which are parents of the marked type **if** it the children of
this node are not separately tracked (overapproximation)

Mostly implements #129 

## Checklist

- [x] Above description has been filled out so that upon quash merge we
have a
  good record of what changed.
- [x] New functions, methods, types are documented. Old documentation is
updated
  if necessary
- [ ] Documentation in Notion has been updated
- [x] Tests for new behaviors are provided
  - [ ] New test suites (if any) ave been added to the CI tests (in
`.github/workflows/rust.yml`) either as compiler test or integration
test.
*Or* justification for their omission from CI has been provided in this
PR
    description.

---------

Co-authored-by: Will Crichton <[email protected]>
  • Loading branch information
JustusAdam and willcrichton authored Apr 6, 2024
1 parent b2e091c commit 593e192
Show file tree
Hide file tree
Showing 13 changed files with 727 additions and 125 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rustc_utils = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "89bc7
rustc_plugin = { git = "https://github.com/JustusAdam/rustc_plugin", rev = "89bc7b4979c8513a097068626b90b5b0e57f4917" }
# rustc_plugin = { path = "../rustc_plugin/crates/rustc_plugin" }
# rustc_utils = { path = "../rustc_plugin/crates/rustc_utils" }
flowistry = { git = "https://github.com/brownsys/flowistry", rev = "a2ccfca2e6b5668ffd246eddc6abaf4d6e440a35", default-features = false }

[profile.release]
debug = true
Expand Down
2 changes: 1 addition & 1 deletion crates/flowistry_pdg_construction/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ flowistry_pdg = { version = "0.1.0", path = "../flowistry_pdg", features = [
"rustc",
] }
#flowistry = { path = "../../../flowistry/crates/flowistry", default-features = false }
flowistry = { git = "https://github.com/brownsys/flowistry", rev = "a2ccfca2e6b5668ffd246eddc6abaf4d6e440a35", default-features = false }
flowistry = { workspace = true }

[dev-dependencies]
rustc_utils = { workspace = true, features = ["indexical", "test"] }
Expand Down
1 change: 0 additions & 1 deletion crates/flowistry_pdg_construction/src/construct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,6 @@ impl<'tcx> GraphConstructor<'tcx> {
);
true
} else {
dbg!(self.tcx.def_path_str(def_id));
false
}
}
Expand Down
18 changes: 8 additions & 10 deletions crates/paralegal-flow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ paralegal-spdg = { path = "../paralegal-spdg", features = ["rustc"] }

flowistry_pdg_construction = { path = "../flowistry_pdg_construction" }
flowistry_pdg = { path = "../flowistry_pdg" }
flowistry = { workspace = true }

rustc_utils = { workspace = true }
rustc_plugin = { workspace = true }
Expand All @@ -39,23 +40,20 @@ petgraph = { workspace = true }
humantime = "2"
strum = { workspace = true }
enum-map = "2.7"


#dot = "0.1"
dot = { git = "https://github.com/JustusAdam/dot-rust", rev = "ff2b42ceda98c639c8ea3cbfc56b83d6e06e8106" }
#dot = {path = "../../clones/dot-rust" }

serial_test = "2.0.0"
itertools = "0.12"
anyhow = "1.0.72"
thiserror = "1"
serde_bare = "0.5.0"
serde_json = "1"
toml = "0.7"

#dot = "0.1"
dot = { git = "https://github.com/JustusAdam/dot-rust", rev = "ff2b42ceda98c639c8ea3cbfc56b83d6e06e8106" }
#dot = {path = "../../clones/dot-rust" }

# This is just for pinning this dependency
camino = "= 1.0.9"
serial_test = "2.0.0"
itertools = "0.12"
anyhow = "1.0.72"
thiserror = "1"

[build-dependencies]
chrono = "0.4"
Expand Down
82 changes: 46 additions & 36 deletions crates/paralegal-flow/src/ana/graph_converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ use crate::{
utils::*,
DefId, HashMap, HashSet, MarkerCtx,
};
use flowistry::mir::placeinfo::PlaceInfo;
use flowistry_pdg::SourceUse;
use paralegal_spdg::{Node, SPDGStats};
use rustc_utils::cache::Cache;

use std::{
cell::RefCell,
Expand Down Expand Up @@ -64,14 +66,18 @@ pub struct GraphConverter<'tcx, 'a, C> {
call_string_resolver: call_string_resolver::CallStringResolver<'tcx>,
stats: SPDGStats,
analyzed_functions: HashSet<LocalDefId>,
place_info_cache: PlaceInfoCache<'tcx>,
}

pub type PlaceInfoCache<'tcx> = Rc<Cache<LocalDefId, PlaceInfo<'tcx>>>;

impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
/// Initialize a new converter by creating an initial PDG using flowistry.
pub fn new_with_flowistry(
generator: &'a SPDGGenerator<'tcx>,
known_def_ids: &'a mut C,
target: FnToAnalyze,
place_info_cache: PlaceInfoCache<'tcx>,
) -> Result<Self> {
let local_def_id = target.def_id.expect_local();
let start = Instant::now();
Expand Down Expand Up @@ -101,6 +107,7 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
call_string_resolver: CallStringResolver::new(generator.tcx, local_def_id),
stats,
analyzed_functions,
place_info_cache,
})
}

Expand Down Expand Up @@ -146,6 +153,16 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
}
}

fn place_info(&self, def_id: LocalDefId) -> &PlaceInfo<'tcx> {
self.place_info_cache.get(def_id, |_| {
PlaceInfo::build(
self.tcx(),
def_id.to_def_id(),
&self.tcx().body_for_def_id(def_id).unwrap(),
)
})
}

/// Find direct annotations on this node and register them in the marker map.
fn node_annotations(&mut self, old_node: Node, weight: &DepNode<'tcx>) {
let leaf_loc = weight.at.leaf();
Expand Down Expand Up @@ -280,8 +297,8 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
fn determine_place_type(
&self,
at: CallString,
place: mir::Place<'tcx>,
) -> mir::tcx::PlaceTy<'tcx> {
place: mir::PlaceRef<'tcx>,
) -> Option<mir::tcx::PlaceTy<'tcx>> {
let tcx = self.tcx();
let locations = at.iter_from_root().collect::<Vec<_>>();
let (last, mut rest) = locations.split_last().unwrap();
Expand All @@ -299,7 +316,9 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
// Flowistry sometimes tracks subplaces instead but we want the marker
// from the base place.
let place = if self.entrypoint_is_async() && place.local.as_u32() == 1 && rest.len() == 1 {
assert!(place.projection.len() >= 1, "{place:?} at {rest:?}");
if place.projection.is_empty() {
return None;
}
// in the case of targeting the top-level async closure (e.g. async args)
// we'll keep the first projection.
mir::Place {
Expand All @@ -315,7 +334,7 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
// Thread through each caller to recover generic arguments
let body = tcx.body_for_def_id(last.function).unwrap();
let raw_ty = place.ty(&body.body, tcx);
*resolution.try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty)
Some(*resolution.try_monomorphize(tcx, ty::ParamEnv::reveal_all(), &raw_ty))
}

/// Fetch annotations item identified by this `id`.
Expand Down Expand Up @@ -352,30 +371,17 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
fn handle_node_types(&mut self, old_node: Node, weight: &DepNode<'tcx>) {
let i = self.new_node_for(old_node);

let is_controller_argument =
matches!(self.try_as_root(weight.at), Some(l) if l.location == RichLocation::Start);

if self
.dep_graph
.graph
.edges_directed(old_node, Direction::Incoming)
.any(|e| e.weight().target_use.is_return() && e.weight().source_use.is_argument())
{
assert!(
weight.place.projection.is_empty(),
"{:?} at {} has projection",
weight.place,
weight.at
);
} else if !is_controller_argument {
let Some(place_ty) = self.determine_place_type(weight.at, weight.place.as_ref()) else {
return;
};
let place_info = self.place_info(weight.at.leaf().function);
let deep = !place_info.children(weight.place).is_empty();
let mut node_types = self.type_is_marked(place_ty, deep).collect::<HashSet<_>>();
for (p, _) in weight.place.iter_projections() {
if let Some(place_ty) = self.determine_place_type(weight.at, p) {
node_types.extend(self.type_is_marked(place_ty, false));
}
}

let place_ty = self.determine_place_type(weight.at, weight.place);

let is_external_call_source = weight.at.leaf().location != RichLocation::End;

let node_types = self.type_is_marked(place_ty, is_external_call_source);
self.known_def_ids.extend(node_types.iter().copied());
let tcx = self.tcx();
if !node_types.is_empty() {
Expand Down Expand Up @@ -614,18 +620,22 @@ impl<'a, 'tcx, C: Extend<DefId>> GraphConverter<'tcx, 'a, C> {
}

/// Return the (sub)types of this type that are marked.
fn type_is_marked(&self, typ: mir::tcx::PlaceTy<'tcx>, walk: bool) -> Vec<TypeId> {
if walk {
self.marker_ctx()
.all_type_markers(typ.ty)
.map(|t| t.1 .1)
.collect()
fn type_is_marked(
&'a self,
typ: mir::tcx::PlaceTy<'tcx>,
deep: bool,
) -> impl Iterator<Item = TypeId> + 'a {
if deep {
Either::Left(self.marker_ctx().deep_type_markers(typ.ty).iter().copied())
} else {
self.marker_ctx()
.type_has_surface_markers(typ.ty)
.into_iter()
.collect()
Either::Right(self.marker_ctx().shallow_type_markers(typ.ty))
}
.map(|(d, _)| d)

// self.marker_ctx()
// .all_type_markers(typ.ty)
// .map(|t| t.1 .1)
// .collect()
}

/// Similar to `CallString::is_at_root`, but takes into account top-level
Expand Down
11 changes: 9 additions & 2 deletions crates/paralegal-flow/src/ana/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ mod inline_judge;

use graph_converter::GraphConverter;

use self::inline_judge::InlineJudge;
use self::{graph_converter::PlaceInfoCache, inline_judge::InlineJudge};

/// Read-only database of information the analysis needs.
///
Expand All @@ -37,6 +37,7 @@ pub struct SPDGGenerator<'tcx> {
pub opts: &'static crate::Args,
pub tcx: TyCtxt<'tcx>,
stats: Stats,
place_info_cache: PlaceInfoCache<'tcx>,
}

impl<'tcx> SPDGGenerator<'tcx> {
Expand All @@ -51,6 +52,7 @@ impl<'tcx> SPDGGenerator<'tcx> {
opts,
tcx,
stats,
place_info_cache: Default::default(),
}
}

Expand All @@ -71,7 +73,12 @@ impl<'tcx> SPDGGenerator<'tcx> {
info!("Handling target {}", self.tcx.def_path_str(target.def_id));
let local_def_id = target.def_id.expect_local();

let converter = GraphConverter::new_with_flowistry(self, known_def_ids, target)?;
let converter = GraphConverter::new_with_flowistry(
self,
known_def_ids,
target,
self.place_info_cache.clone(),
)?;
let spdg = converter.make_spdg();

Ok((local_def_id, spdg))
Expand Down
Loading

0 comments on commit 593e192

Please sign in to comment.