diff --git a/crates/paralegal-flow/src/ana/graph_converter.rs b/crates/paralegal-flow/src/ana/graph_converter.rs index ee1019e332..dd617fe6a1 100644 --- a/crates/paralegal-flow/src/ana/graph_converter.rs +++ b/crates/paralegal-flow/src/ana/graph_converter.rs @@ -464,7 +464,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { let body = &tcx.body_for_def_id(at.function).unwrap().body; let node_span = body.local_decls[weight.place.local].source_info.span; - let new_idx = self.register_node( + self.register_node( i, NodeInfo { at: weight.at, @@ -472,20 +472,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { span: src_loc_for_span(node_span, tcx), }, ); - trace!( - "Node {new_idx:?}\n description: {:?}\n at: {at}\n stmt: {}", - weight.place, - match at.location { - RichLocation::Location(loc) => { - match body.stmt_at(loc) { - Either::Left(s) => format!("{:?}", s.kind), - Either::Right(s) => format!("{:?}", s.kind), - } - } - RichLocation::End => "end".to_string(), - RichLocation::Start => "start".to_string(), - } - ); self.node_annotations(i, weight); self.handle_node_types(i, weight); @@ -526,11 +512,6 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { Either::Right(self.marker_ctx().shallow_type_markers(typ.ty)) } .map(|(d, _)| d) - - // self.marker_ctx() - // .all_type_markers(typ.ty) - // .map(|t| t.1 .1) - // .collect() } /// Similar to `CallString::is_at_root`, but takes into account top-level @@ -551,8 +532,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { /// TODO: Include mutable inputs fn determine_return(&self) -> Box<[Node]> { // In async functions - let return_candidates = self - .spdg + self.spdg .node_references() .filter(|n| { let weight = n.weight(); @@ -560,11 +540,7 @@ impl<'a, 'tcx, C: Extend> GraphConverter<'tcx, 'a, C> { matches!(self.try_as_root(at), Some(l) if l.location == RichLocation::End) }) .map(|n| n.id()) - .collect::>(); - if return_candidates.len() != 1 { - warn!("Found many candidates for the return: {return_candidates:?}."); - } - return_candidates + .collect() } /// Determine the set if nodes corresponding to the inputs to the diff --git a/crates/paralegal-flow/src/lib.rs b/crates/paralegal-flow/src/lib.rs index 5d12b87b69..078d32a5af 100644 --- a/crates/paralegal-flow/src/lib.rs +++ b/crates/paralegal-flow/src/lib.rs @@ -85,7 +85,6 @@ pub mod dbg; mod discover; mod stats; //mod sah; -pub mod serializers; #[macro_use] pub mod utils; pub mod consts; diff --git a/crates/paralegal-flow/src/serializers.rs b/crates/paralegal-flow/src/serializers.rs deleted file mode 100644 index ade9d4bb1d..0000000000 --- a/crates/paralegal-flow/src/serializers.rs +++ /dev/null @@ -1,163 +0,0 @@ -//! [`serde`] serializers, most used for JSON debugging output in [`crate::dbg`]. -//! -//! The proxy structs are foreign serializers for their non-proxy counterparts, -//! see for more information. As a naming -//! convention `Proxy` is used to (de)serialize `` e.g. -//! [`BasicBlockProxy`] (de)serializes a [`mir::BasicBlock`]. -//! -//! Be aware that in some cases serialization is not bidirectional (usually if -//! there is a lifetime parameter in the serialized type). For instance -//! [`GlobalLocation`] can be serialized, but only a [`RawGlobalLocation`] can -//! be deserialized. -//! -//! Some types (such as [`mir::Body`]) first have to be explicitly transformed -//! into the respective proxy type. In the case of [`mir::Body`] this can be -//! done with [`BodyProxy::from_body_with_normalize`] -use paralegal_spdg::{rustc_portable::DefId, Identifier}; -use serde::Deserialize; - -use crate::{ - mir, - rust::TyCtxt, - serde::{Serialize, Serializer}, - utils::{extract_places, read_places_with_provenance, DfppBodyExt}, - Either, HashMap, HashSet, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub struct InstructionProxy { - #[serde(with = "paralegal_spdg::rustc_proxies::Location")] - pub location: mir::Location, - pub contents: String, - pub places: HashSet, -} - -/// A serializable version of a `mir::Body`. -/// -/// Be aware that this transports less information than the actual `mir::Body`. -/// It records for each [`mir::Location`] a string representation of the -/// statement or terminator at that location and a set of [`mir::Place`]s that -/// are mentioned in the statement/terminator, also represented as strings -/// (though using the efficient, interned [`Identifier`]s). -/// -/// Construct one with [`Self::from_body_with_normalize`]. -#[derive(Debug, Serialize, Deserialize)] -pub struct BodyProxy(pub Vec); - -fn iter_stmts<'a, 'tcx>( - b: &'a mir::Body<'tcx>, -) -> impl Iterator< - Item = ( - mir::Location, - Either<&'a mir::Statement<'tcx>, &'a mir::Terminator<'tcx>>, - ), -> { - b.basic_blocks.iter_enumerated().flat_map(|(block, bbdat)| { - bbdat - .statements - .iter() - .enumerate() - .map(move |(statement_index, stmt)| { - ( - mir::Location { - block, - statement_index, - }, - Either::Left(stmt), - ) - }) - .chain(std::iter::once(( - mir::Location { - block, - statement_index: bbdat.statements.len(), - }, - Either::Right(bbdat.terminator()), - ))) - }) -} - -impl<'tcx> From<&mir::Body<'tcx>> for BodyProxy { - fn from(body: &mir::Body<'tcx>) -> Self { - Self( - iter_stmts(body) - .map(|(location, stmt)| InstructionProxy { - location, - contents: stmt.either(|s| format!("{:?}", s.kind), |t| format!("{:?}", t.kind)), - places: extract_places(location, body, false) - .into_iter() - .map(|p| Identifier::new_intern(&format!("{p:?}"))) - .collect(), - }) - .collect::>(), - ) - } -} - -impl BodyProxy { - /// Create a serializable version of a `mir::Body` by stringifying - /// everything. - /// - /// Includes, as the set of places for each statements the read places with - /// provenance as calculated by [`read_places_with_provenance`]. - pub fn from_body_with_normalize<'tcx>(body: &mir::Body<'tcx>, tcx: TyCtxt<'tcx>) -> Self { - Self( - iter_stmts(body) - .map(|(location, stmt)| InstructionProxy { - location, - contents: stmt.either(|s| format!("{:?}", s.kind), |t| format!("{:?}", t.kind)), - places: read_places_with_provenance( - location, - &body.stmt_at_better_err(location), - tcx, - ) - .map(|p| Identifier::new_intern(&format!("{p:?}"))) - .collect(), - }) - .collect::>(), - ) - } -} - -/// This exists because of serde's restrictions on how you derive serializers. -/// [`BodyIdProxy`] can be used to serialize a [`BodyId`](hir::BodyId) but if -/// the [`BodyId`](hir::BodyId) is used as e.g. a key in a map or in a vector it -/// does not dispatch to the remote impl on [`BodyIdProxy`]. Implementing the -/// serializers for the map or vector by hand is annoying so instead you can map -/// over the datastructure, wrap each [`BodyId`](hir::BodyId) in this proxy type -/// and then dispatch to the `serialize` impl for the reconstructed data -/// structure. -#[derive(Serialize, Deserialize)] -pub struct BodyIdProxy2(#[serde(with = "paralegal_spdg::rustc_proxies::DefId")] pub DefId); - -/// A serializable version of [`mir::Body`]s, mapped to their [`hir::BodyId`] so -/// that you can resolve the body belonging to a global location (see -/// [`IsGlobalLocation::function`]). -pub struct Bodies(pub HashMap); - -impl Serialize for Bodies { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - self.0 - .iter() - .map(|(bid, (name, b))| (BodyIdProxy2(*bid), *name, b)) - .collect::>() - .serialize(serializer) - } -} - -impl<'de> Deserialize<'de> for Bodies { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - Vec::deserialize(deserializer).map(|v| { - Bodies( - v.into_iter() - .map(|(BodyIdProxy2(bid), s, v)| (bid, (s, v))) - .collect(), - ) - }) - } -} diff --git a/crates/paralegal-flow/src/utils/mod.rs b/crates/paralegal-flow/src/utils/mod.rs index 60377e620c..5fb0f66bde 100644 --- a/crates/paralegal-flow/src/utils/mod.rs +++ b/crates/paralegal-flow/src/utils/mod.rs @@ -16,7 +16,7 @@ use crate::{ hir_id::HirId, BodyId, }, - mir::{self, Location, Place, ProjectionElem, Statement, Terminator}, + mir::{self, Location, Place, ProjectionElem}, rustc_borrowck::consumers::BodyWithBorrowckFacts, rustc_data_structures::intern::Interned, rustc_span::Span as RustSpan, @@ -461,19 +461,6 @@ impl<'tcx, F: FnMut(&mir::Place<'tcx>)> mir::visit::Visitor<'tcx> for PlaceVisit } } -/// Return the places that are read in this statements and possible ref/deref -/// un-layerings of those places. -/// -/// XXX(Justus) This part of the algorithms/API I am a bit hazy about. I haven't -/// quite worked out what this soundly means myself. -pub fn read_places_with_provenance<'tcx>( - l: Location, - stmt: &Either<&Statement<'tcx>, &Terminator<'tcx>>, - tcx: TyCtxt<'tcx>, -) -> impl Iterator> { - places_read(tcx, l, stmt, None).flat_map(move |place| place.provenance(tcx).into_iter()) -} - pub enum Overlap<'tcx> { Equal, Independent, @@ -571,31 +558,6 @@ impl<'hir> NodeExt<'hir> for hir::Node<'hir> { } } -/// Old version of [`places_read`], should be considered deprecated. -pub fn extract_places<'tcx>( - l: mir::Location, - body: &mir::Body<'tcx>, - exclude_return_places_from_call: bool, -) -> HashSet> { - use mir::visit::Visitor; - let mut places = HashSet::new(); - let mut vis = PlaceVisitor(|p: &mir::Place<'tcx>| { - places.insert(*p); - }); - match body.stmt_at_better_err(l) { - Either::Right(mir::Terminator { - kind: mir::TerminatorKind::Call { func, args, .. }, - .. - }) if exclude_return_places_from_call => std::iter::once(func) - .chain(args.iter()) - .for_each(|o| vis.visit_operand(o, l)), - _ => body.basic_blocks[l.block] - .visitable(l.statement_index) - .apply(l, &mut vis), - }; - places -} - /// A trait for types that can be converted into a [`mir::LocalDefId`] via /// [`TyCtxt`]. pub trait IntoLocalDefId { @@ -630,52 +592,6 @@ impl IntoLocalDefId for &'_ D { } } -/// Get the name of the function for this body as an `Ident`. This handles such -/// cases correctly where the function in question has no proper name, as is the -/// case for closures. -/// -/// You should probably use [`unique_and_terse_body_name_pls`] instead, as it -/// avoids name clashes. -pub fn body_name_pls(tcx: TyCtxt, id: I) -> Ident { - let map = tcx.hir(); - let def_id = id.into_local_def_id(tcx); - let node = map.find_by_def_id(def_id).unwrap(); - node.ident() - .or_else(|| { - matches!( - node, - hir::Node::Expr(hir::Expr { - kind: hir::ExprKind::Closure(..), - .. - }) - ) - .then(|| { - let owner = map.enclosing_body_owner(map.local_def_id_to_hir_id(def_id)); - Ident::from_str(&(body_name_pls(tcx, owner).to_string() + "_closure")) - }) - }) - .unwrap_or_else(|| panic!("Node {node:?} could not be named")) -} - -/// Gives a string name for this i that is free of name clashes, as it -/// includes a hash of the id. -pub fn unique_and_terse_body_name_pls(tcx: TyCtxt, id: I) -> Symbol { - let def_id = id.into_local_def_id(tcx); - let ident = body_name_pls(tcx, def_id); - Symbol::intern(&format!("{}_{}", ident.name, ShortHash::new(def_id))) -} - -/// Create a file for dumping an `ext` kind of output for `id`. The name of the -/// resulting file avoids clashes but is also descriptive (uses the resolved -/// name of `id`). -pub fn dump_file_pls( - tcx: TyCtxt, - id: I, - ext: &str, -) -> std::io::Result { - File::create(format!("{}.{ext}", unique_and_terse_body_name_pls(tcx, id))) -} - pub trait ProjectionElemExt { fn may_be_indirect(self) -> bool; } @@ -692,98 +608,6 @@ impl ProjectionElemExt for ProjectionElem { } } -/// Return all places mentioned at this location that are *read*. Which means -/// that if a `Place` is not read but assigned (e.g. the return place of a -/// function call), it will not be in the result set. -pub fn places_read<'tcx>( - tcx: TyCtxt<'tcx>, - location: mir::Location, - stmt: &Either<&mir::Statement<'tcx>, &mir::Terminator<'tcx>>, - read_after: Option>, -) -> impl Iterator> { - use mir::visit::Visitor; - let mut places = HashSet::new(); - let mut vis = PlaceVisitor(|p: &mir::Place<'tcx>| { - places.insert(*p); - }); - match stmt { - // TODO: This needs to deal with fields!! - Either::Left(mir::Statement { - kind: mir::StatementKind::Assign(a), - .. - }) => { - let mut proj = a.0.iter_projections(); - // We advance the iterator from the end until we find a projection - // that might not return the full object, e.g. field access or - // indexing. - // - // `iter_projections` returns an iterator of successively more - // projections, e.g. it starts with the local itself, like `_1` and - // then adds on e.g. `*_1`, `(*_1).foo` etc. - // - // We advance from the end because we want to basically drop - // everything that is more specific. As an example if you had - // `*((*_1).foo) = bla` then only the `foo` field gets modified, so - // `_1` *and* `*_1` should still be considered read, but we can't - // just do "filter" or the last `*` will cause `((*_1).foo, *)` to - // end up in the result as well (leakage). - let last_field_proj = proj.rfind(|pl| pl.1.may_be_indirect()); - // Now we iterate over the rest, including the field projection we - // found, because we only consider the first part of the tuple (a - // `PlaceRef`) which contains a place *up to* the projection in the - // second part of the tuple (which is what our condition was on)> - for pl in proj.chain(last_field_proj.into_iter()) { - vis.visit_place( - &::from_ref(pl.0, tcx), - mir::visit::PlaceContext::MutatingUse(mir::visit::MutatingUseContext::Store), - location, - ); - } - if let mir::Rvalue::Aggregate(_, ops) = &a.1 { - match handle_aggregate_assign(a.0, &a.1, tcx, &ops.raw, read_after) { - Ok(place) => vis.visit_place( - &place, - mir::visit::PlaceContext::NonMutatingUse( - mir::visit::NonMutatingUseContext::Move, - ), - location, - ), - Err(e) => { - debug!("handle_aggregate_assign threw {e}"); - vis.visit_rvalue(&a.1, location); - } - } - } else { - vis.visit_rvalue(&a.1, location); - } - } - Either::Right(term) => vis.visit_terminator(term, location), // TODO this is not correct - _ => (), - }; - places.into_iter() -} - -fn handle_aggregate_assign<'tcx>( - place: mir::Place<'tcx>, - _rvalue: &mir::Rvalue<'tcx>, - tcx: TyCtxt<'tcx>, - ops: &[mir::Operand<'tcx>], - read_after: Option>, -) -> Result, &'static str> { - let read_after = read_after.ok_or("no read after provided")?; - let inner_project = &read_after.projection[place.projection.len()..]; - let (field, rest_project) = inner_project.split_first().ok_or("projection too short")?; - let f = if let mir::ProjectionElem::Field(f, _) = field { - f - } else { - return Err("Not a field projection"); - }; - Ok(ops[f.as_usize()] - .place() - .ok_or("Constant")? - .project_deeper(rest_project, tcx)) -} - /// Brother to [`IntoLocalDefId`], converts the id type to a [`DefId`] using [`TyCtxt`] pub trait IntoDefId { fn into_def_id(self, tcx: TyCtxt) -> DefId; @@ -877,32 +701,6 @@ pub fn identifier_for_item(tcx: TyCtxt, did: D) -> I ) } -/// Creates an `Identifier` for this `HirId` -pub fn unique_identifier_for_item(tcx: TyCtxt, did: D) -> Identifier { - let did = did.into_def_id(tcx); - let get_parent = || unique_identifier_for_item(tcx, tcx.parent(did)); - Identifier::new_intern(&format!( - "{}_{}", - tcx.opt_item_name(did) - .map(|n| n.to_string()) - .or_else(|| { - use hir::def::DefKind::*; - match tcx.def_kind(did) { - OpaqueTy => Some("opaque".to_string()), - Closure => Some(format!("{}_closure", get_parent())), - Generator => Some(format!("{}_generator", get_parent())), - _ => None, - } - }) - .unwrap_or_else(|| panic!( - "Could not name {} {:?}", - tcx.def_path_debug_str(did), - tcx.def_kind(did) - )), - ShortHash::new(did), - )) -} - #[derive(Error, Debug)] pub enum BodyResolutionError { #[error("not a function-like object")] @@ -996,26 +794,6 @@ impl<'tcx> TyCtxtExt<'tcx> for TyCtxt<'tcx> { } } -/// A struct that can be used to apply a [`FnMut`] to every [`Place`] in a MIR -/// object via the [`MutVisitor`](mir::visit::MutVisitor) trait. Crucial -/// difference to [`PlaceVisitor`] is that this function can alter the place -/// itself. -pub struct RePlacer<'tcx, F>(TyCtxt<'tcx>, F); - -impl<'tcx, F: FnMut(&mut mir::Place<'tcx>)> mir::visit::MutVisitor<'tcx> for RePlacer<'tcx, F> { - fn tcx<'a>(&'a self) -> TyCtxt<'tcx> { - self.0 - } - fn visit_place( - &mut self, - place: &mut mir::Place<'tcx>, - _context: mir::visit::PlaceContext, - _location: mir::Location, - ) { - self.1(place) - } -} - /// Conveniently create a vector of [`Symbol`]s. This way you can just write /// `sym_vec!["s1", "s2", ...]` and this macro will make sure to call /// [`Symbol::intern`]