diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 84f589b65..691ac5f07 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1,16 +1,9 @@ +use crate::annotate::AnnotationBuffer; use crate::borrowck; -use crate::context::AnalysisCtxt; -use crate::context::AnalysisCtxtData; -use crate::context::FlagSet; -use crate::context::GlobalAnalysisCtxt; -use crate::context::GlobalAssignment; -use crate::context::LFnSig; -use crate::context::LTy; -use crate::context::LTyCtxt; -use crate::context::LocalAssignment; -use crate::context::PermissionSet; -use crate::context::PointerId; -use crate::context::PointerInfo; +use crate::context::{ + self, AnalysisCtxt, AnalysisCtxtData, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, + LTy, LTyCtxt, LocalAssignment, PermissionSet, PointerId, PointerInfo, +}; use crate::dataflow; use crate::dataflow::DataflowConstraints; use crate::equiv::GlobalEquivSet; @@ -57,8 +50,7 @@ use rustc_middle::ty::Ty; use rustc_middle::ty::TyCtxt; use rustc_middle::ty::TyKind; use rustc_middle::ty::WithOptConstParam; -use rustc_span::Span; -use rustc_span::Symbol; +use rustc_span::{Span, Symbol}; use std::collections::HashMap; use std::collections::HashSet; use std::env; @@ -115,6 +107,10 @@ impl MaybeUnset { pub fn take(&mut self) -> T { self.0.take().expect("value is not set") } + + pub fn is_set(&self) -> bool { + self.0.is_some() + } } impl Deref for MaybeUnset { @@ -664,19 +660,19 @@ fn run(tcx: TyCtxt) { pointee_type::generate_constraints(&acx, &mir) })); - let pointee_constraints = match r { - Ok(x) => x, + let mut info = FuncInfo::default(); + let local_pointee_types = LocalPointerTable::new(acx.num_pointers()); + info.acx_data.set(acx.into_data()); + + match r { + Ok(pointee_constraints) => { + info.pointee_constraints.set(pointee_constraints); + } Err(pd) => { gacx.mark_fn_failed(ldid.to_def_id(), pd); - continue; } - }; - - let local_pointee_types = LocalPointerTable::new(acx.num_pointers()); + } - let mut info = FuncInfo::default(); - info.acx_data.set(acx.into_data()); - info.pointee_constraints.set(pointee_constraints); info.local_pointee_types.set(local_pointee_types); info.recent_writes.set(RecentWrites::new(&mir)); func_info.insert(ldid, info); @@ -1101,6 +1097,9 @@ fn run(tcx: TyCtxt) { field_ltys, ); })); + + info.acx_data.set(acx.into_data()); + match r { Ok(()) => {} Err(pd) => { @@ -1108,8 +1107,6 @@ fn run(tcx: TyCtxt) { continue; } } - - info.acx_data.set(acx.into_data()); } let mut num_changed = 0; @@ -1156,6 +1153,9 @@ fn run(tcx: TyCtxt) { acx.check_string_literal_perms(&asn); })); + + info.acx_data.set(acx.into_data()); + match r { Ok(()) => {} Err(pd) => { @@ -1163,8 +1163,6 @@ fn run(tcx: TyCtxt) { continue; } } - - info.acx_data.set(acx.into_data()); } // Check that these perms haven't changed. @@ -1233,6 +1231,9 @@ fn run(tcx: TyCtxt) { // for a single function makes FileCheck tests easier to write. let mut func_reports = HashMap::::new(); + // Buffer for annotations, which are inserted inline as comments when rewriting. + let mut ann = AnnotationBuffer::new(tcx); + // Generate rewrites for all functions. let mut all_rewrites = Vec::new(); @@ -1310,6 +1311,9 @@ fn run(tcx: TyCtxt) { all_rewrites.extend(expr_rewrites); all_rewrites.extend(ty_rewrites); })); + + info.acx_data.set(acx.into_data()); + match r { Ok(()) => {} Err(pd) => { @@ -1317,8 +1321,6 @@ fn run(tcx: TyCtxt) { continue; } } - - info.acx_data.set(acx.into_data()); } // This call never panics, which is important because this is the fallback if the more @@ -1456,6 +1458,54 @@ fn run(tcx: TyCtxt) { if let Some(report) = func_reports.remove(&ldid) { eprintln!("{}", report); } + + info.acx_data.set(acx.into_data()); + } + + // Generate annotations for all functions. + for ldid in tcx.hir().body_owners() { + // Skip any body owners that aren't present in `func_info`, and also get the info itself. + let info = match func_info.get_mut(&ldid) { + Some(x) => x, + None => continue, + }; + + if !info.acx_data.is_set() { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let asn = gasn.and(&mut info.lasn); + + // Generate inline annotations for pointer-typed locals + for (local, decl) in mir.local_decls.iter_enumerated() { + let span = local_span(decl); + let mut ptrs = Vec::new(); + let ty_str = context::print_ty_with_pointer_labels(acx.local_tys[local], |ptr| { + if ptr.is_none() { + return String::new(); + } + ptrs.push(ptr); + format!("{{{}}}", ptr) + }); + if ptrs.is_empty() { + continue; + } + // TODO: emit addr_of when it's nontrivial + // TODO: emit pointee_types when nontrivial + ann.emit(span, format_args!("typeof({:?}) = {}", local, ty_str)); + for ptr in ptrs { + ann.emit( + span, + format_args!(" {} = {:?}, {:?}", ptr, asn.perms()[ptr], asn.flags()[ptr]), + ); + } + } + + info.acx_data.set(acx.into_data()); } // Print results for `static` items. @@ -1492,6 +1542,33 @@ fn run(tcx: TyCtxt) { let ty_flags = gasn.flags[pid]; eprintln!("{name:}: ({pid}) perms = {ty_perms:?}, flags = {ty_flags:?}"); } + + // Emit annotations for fields + let span = match tcx.def_ident_span(did) { + Some(x) => x, + None => { + warn!("field {:?} has no def_ident_span to annotate", did); + continue; + } + }; + let mut ptrs = Vec::new(); + let ty_str = context::print_ty_with_pointer_labels(field_lty, |ptr| { + if ptr.is_none() { + return String::new(); + } + ptrs.push(ptr); + format!("{{{}}}", ptr) + }); + if ptrs.len() == 0 { + continue; + } + ann.emit(span, format_args!("typeof({}) = {}", name, ty_str)); + for ptr in ptrs { + ann.emit( + span, + format_args!(" {} = {:?}, {:?}", ptr, gasn.perms[ptr], gasn.flags[ptr]), + ); + } } let mut adt_dids = gacx.adt_metadata.table.keys().cloned().collect::>(); @@ -1506,14 +1583,23 @@ fn run(tcx: TyCtxt) { // Apply rewrites // ---------------------------------- + let annotations = ann.finish(); + // Apply rewrite to all functions at once. let mut update_files = rewrite::UpdateFiles::No; - if let Ok(val) = env::var("C2RUST_ANALYZE_REWRITE_IN_PLACE") { - if val == "1" { - update_files = rewrite::UpdateFiles::Yes; + if let Ok(val) = env::var("C2RUST_ANALYZE_REWRITE_MODE") { + match val.as_str() { + "none" => {} + "inplace" => { + update_files = rewrite::UpdateFiles::InPlace; + } + "alongside" => { + update_files = rewrite::UpdateFiles::Alongside; + } + _ => panic!("bad value {:?} for C2RUST_ANALYZE_REWRITE_MODE", val), } } - rewrite::apply_rewrites(tcx, all_rewrites, update_files); + rewrite::apply_rewrites(tcx, all_rewrites, annotations, update_files); // ---------------------------------- // Report caught panics @@ -1612,7 +1698,7 @@ fn make_sig_fixed(gasn: &mut GlobalAssignment, lsig: &LFnSig) { } } -fn describe_local(tcx: TyCtxt, decl: &LocalDecl) -> String { +fn local_span(decl: &LocalDecl) -> Span { let mut span = decl.source_info.span; if let Some(ref info) = decl.local_info { if let LocalInfo::User(ref binding_form) = **info { @@ -1622,6 +1708,11 @@ fn describe_local(tcx: TyCtxt, decl: &LocalDecl) -> String { } } } + span +} + +fn describe_local(tcx: TyCtxt, decl: &LocalDecl) -> String { + let span = local_span(decl); describe_span(tcx, span) } diff --git a/c2rust-analyze/src/annotate.rs b/c2rust-analyze/src/annotate.rs new file mode 100644 index 000000000..aaaa9ec34 --- /dev/null +++ b/c2rust-analyze/src/annotate.rs @@ -0,0 +1,73 @@ +use log::warn; +use rustc_middle::ty::TyCtxt; +use rustc_span::{FileName, Span}; +use std::collections::HashMap; +use std::fmt::Display; + +pub struct AnnotationBuffer<'tcx> { + tcx: TyCtxt<'tcx>, + /// Map from `file_idx` to a list of annotations as `(line_number, text)` pairs. + m: HashMap>, +} + +impl<'tcx> AnnotationBuffer<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> AnnotationBuffer<'tcx> { + AnnotationBuffer { + tcx, + m: HashMap::new(), + } + } + + pub fn _clear(&mut self) { + self.m.clear(); + } + + pub fn emit(&mut self, span: Span, msg: impl Display) { + if span.is_dummy() { + // `DUMMY_SP` covers the range `BytePos(0) .. BytePos(0)`. Whichever file happens to + // be added to the `SourceMap` first will be assigned a range starting at `BytePos(0)`, + // so the `SourceFile` lookup below would attach the annotation to that file. Rather + // than letting the annotation be attached to an arbitrary file, we warn and discard + // it. + warn!("discarding annotation on DUMMY_SP: {}", msg); + return; + } + + let sm = self.tcx.sess.source_map(); + + let span = span.source_callsite(); + let pos = span.lo(); + let file_idx = sm.lookup_source_file_idx(pos); + let sf = &sm.files()[file_idx]; + let line = sf.lookup_line(pos).unwrap_or(0); + + let src = sm + .span_to_snippet(span) + .unwrap_or_else(|_| "".into()); + let src = src.split_ascii_whitespace().collect::>().join(" "); + let (src1, src2, src3) = if src.len() > 20 { + (&src[..15], " ... ", &src[src.len() - 5..]) + } else { + (&src[..], "", "") + }; + self.m.entry(file_idx).or_insert_with(Vec::new).push(( + line, + format!("{}: {}{}{}: {}", line + 1, src1, src2, src3, msg), + )); + } + + pub fn finish(self) -> HashMap> { + let mut m = HashMap::new(); + let sm = self.tcx.sess.source_map(); + for (file_idx, v) in self.m { + let sf = &sm.files()[file_idx]; + let old = m.insert(sf.name.clone(), v); + assert!( + old.is_none(), + "found multiple SourceFiles named {:?}", + sf.name + ); + } + m + } +} diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 81cecd1fb..85701a3fb 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -35,7 +35,7 @@ use rustc_middle::ty::TyCtxt; use rustc_middle::ty::TyKind; use rustc_type_ir::RegionKind::{ReEarlyBound, ReStatic}; use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; +use std::fmt::{Debug, Write as _}; use std::ops::Index; bitflags! { @@ -1399,3 +1399,127 @@ impl Assignment<'_> { ) } } + +/// Print an `LTy` as a string, using the provided callback to print the labels on each pointer and +/// reference type. +/// +/// Note this completely omits any labels on non-pointer types. +pub fn print_ty_with_pointer_labels( + lty: LabeledTy, + mut f: impl FnMut(L) -> String, +) -> String { + let mut out = String::new(); + print_ty_with_pointer_labels_into(&mut out, lty, &mut f); + out +} + +pub fn print_ty_with_pointer_labels_into( + dest: &mut String, + lty: LabeledTy, + f: &mut impl FnMut(L) -> String, +) { + use rustc_type_ir::TyKind::*; + match lty.ty.kind() { + // Types with no arguments + Bool | Char | Int(_) | Uint(_) | Float(_) | Str | Foreign(_) | Never => { + write!(dest, "{:?}", lty.ty).unwrap(); + } + + // Types with arguments + Adt(adt_def, _substs) => { + write!(dest, "{:?}", adt_def.did()).unwrap(); + if lty.args.len() != 0 { + dest.push('<'); + // TODO: region args + for (i, &arg_lty) in lty.args.iter().enumerate() { + if i > 0 { + dest.push_str(", "); + } + print_ty_with_pointer_labels_into(dest, arg_lty, f); + } + dest.push('>'); + } + } + &Array(_elem, len) => { + dest.push('['); + print_ty_with_pointer_labels_into(dest, lty.args[0], f); + write!(dest, "; {:?}]", len).unwrap(); + } + &Slice(_elem) => { + dest.push('['); + print_ty_with_pointer_labels_into(dest, lty.args[0], f); + dest.push(']'); + } + RawPtr(mty) => { + if mty.mutbl == Mutability::Not { + dest.push_str("*const "); + } else { + dest.push_str("*mut "); + } + let s = f(lty.label); + if s.len() > 0 { + dest.push_str(&s); + dest.push_str(" "); + } + print_ty_with_pointer_labels_into(dest, lty.args[0], f); + } + &Ref(_rg, _ty, mutbl) => { + let s = f(lty.label); + if mutbl == Mutability::Not { + dest.push_str("&"); + if s.len() > 0 { + dest.push(' '); + } + } else { + dest.push_str("&mut "); + } + if s.len() > 0 { + dest.push_str(&s); + dest.push_str(" "); + } + print_ty_with_pointer_labels_into(dest, lty.args[0], f); + } + FnDef(def_id, _substs) => { + write!(dest, "{:?}", def_id).unwrap(); + if lty.args.len() != 0 { + dest.push('<'); + // TODO: region args + for (i, &arg_lty) in lty.args.iter().enumerate() { + if i > 0 { + dest.push_str(", "); + } + print_ty_with_pointer_labels_into(dest, arg_lty, f); + } + dest.push('>'); + } + } + FnPtr(_) => { + let (ret_lty, arg_ltys) = lty.args.split_last().unwrap(); + dest.push_str("fn("); + for (i, &arg_lty) in arg_ltys.iter().enumerate() { + if i > 0 { + dest.push_str(", "); + } + print_ty_with_pointer_labels_into(dest, arg_lty, f); + } + dest.push_str(") -> "); + print_ty_with_pointer_labels_into(dest, ret_lty, f); + } + Tuple(_) => { + dest.push_str("("); + for (i, &arg_lty) in lty.args.iter().enumerate() { + if i > 0 { + dest.push_str(", "); + } + print_ty_with_pointer_labels_into(dest, arg_lty, f); + } + dest.push_str(")"); + } + + // Types that aren't actually supported by this code yet + Dynamic(..) | Closure(..) | Generator(..) | GeneratorWitness(..) | Projection(..) + | Opaque(..) | Param(..) | Bound(..) | Placeholder(..) | Infer(..) | Error(..) => { + write!(dest, "unknown:{:?}", lty.ty).unwrap(); + } + } +} diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index afd5d2843..fd3befdd8 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -16,6 +16,7 @@ extern crate rustc_target; extern crate rustc_type_ir; mod analyze; +mod annotate; mod borrowck; mod context; mod dataflow; @@ -37,7 +38,7 @@ use analyze::AnalysisCallbacks; use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; -use clap::{ArgAction, Parser}; +use clap::{ArgAction, Parser, ValueEnum}; use rustc_driver::RunCompiler; use rustc_driver::TimePassesCallbacks; use rustc_session::config::CrateType; @@ -77,10 +78,16 @@ struct Args { /// from this list will be marked non-rewritable (`FIXED`). #[clap(long, action(ArgAction::Append))] rewrite_paths: Vec, - /// Rewrite source files on disk. The default is to print the rewritten source code to stdout - /// as part of the tool's debug output. - #[clap(long)] + + /// Whether to rewrite source files on disk. The default is to print the rewritten source code + /// to stdout as part of the tool's debug output. + #[clap(long, value_enum)] + rewrite_mode: Option, + + /// Synonym for `--rewrite-mode inplace`, kept around for backward compatibility. + #[clap(long, hide(true), conflicts_with("rewrite_mode"))] rewrite_in_place: bool, + /// Use `todo!()` placeholders in shims for casts that must be implemented manually. /// /// When a function requires a shim, and the shim requires a cast that can't be generated @@ -113,6 +120,19 @@ struct InterceptedCargoArgs { extra_args: Vec, } +#[derive(Clone, Copy, Debug, ValueEnum)] +enum RewriteMode { + /// Do not write rewritten code to disk. + #[value(name = "none")] + None, + /// Apply rewrites to the original source files in-place. + #[value(name = "inplace")] + InPlace, + /// Save rewritten code to a separate file alongside each source file. + #[value(name = "alongside")] + Alongside, +} + fn exit_with_status(status: ExitStatus) { process::exit(status.code().unwrap_or(1)) } @@ -364,6 +384,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { let Args { rustflags, rewrite_paths, + mut rewrite_mode, rewrite_in_place, use_manual_shims, fixed_defs_list, @@ -380,6 +401,13 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { let manifest_path = manifest_path.as_deref(); let _manifest_dir = manifest_path.and_then(|path| path.parent()); + if rewrite_in_place { + // `rewrite_in_place` and `rewrite_mode` are annotated as conflicting options, so if both + // are set, `Args::parse()` should have exited with an error. + assert!(rewrite_mode.is_none()); + rewrite_mode = Some(RewriteMode::InPlace); + } + set_rust_toolchain()?; // Resolve the sysroot once in the [`cargo_wrapper`] @@ -412,8 +440,13 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { cmd.env("C2RUST_ANALYZE_REWRITE_PATHS", rewrite_paths); } - if rewrite_in_place { - cmd.env("C2RUST_ANALYZE_REWRITE_IN_PLACE", "1"); + if let Some(rewrite_mode) = rewrite_mode { + let val = match rewrite_mode { + RewriteMode::None => "none", + RewriteMode::InPlace => "inplace", + RewriteMode::Alongside => "alongside", + }; + cmd.env("C2RUST_ANALYZE_REWRITE_MODE", val); } if use_manual_shims { diff --git a/c2rust-analyze/src/rewrite/apply.rs b/c2rust-analyze/src/rewrite/apply.rs index 7944139df..bd9d9d4ad 100644 --- a/c2rust-analyze/src/rewrite/apply.rs +++ b/c2rust-analyze/src/rewrite/apply.rs @@ -2,7 +2,7 @@ use crate::rewrite::Rewrite; use rustc_hir::Mutability; use rustc_span::source_map::{FileName, SourceMap}; use rustc_span::{BytePos, SourceFile, Span, SyntaxContext}; -use std::cmp::Reverse; +use std::cmp::{self, Reverse}; use std::collections::HashMap; use std::convert::Infallible; use std::fmt; @@ -212,6 +212,10 @@ pub trait Sink { type Error; const PARENTHESIZE_EXPRS: bool; fn emit_str(&mut self, s: &str) -> Result<(), Self::Error>; + /// Emit a string from the original file into the output. `s` is a contiguous substring of the + /// original, and the start of `s` is on line `line`. If the first character of `s` is `'\n'`, + /// then it's the newline separating `line` from `line + 1`. + fn emit_orig_str(&mut self, s: &str, line: usize) -> Result<(), Self::Error>; fn emit_fmt(&mut self, args: fmt::Arguments) -> Result<(), Self::Error>; fn emit_expr(&mut self) -> Result<(), Self::Error>; fn emit_sub(&mut self, idx: usize, span: Span) -> Result<(), Self::Error>; @@ -496,7 +500,7 @@ struct RewriteTreeSink<'a, F> { rt: Option<&'a RewriteTree>, } -impl<'a, F: FnMut(&str)> RewriteTreeSink<'a, F> { +impl<'a, F: FnMut(&str, Option)> RewriteTreeSink<'a, F> { fn new(file: &'a SourceFile, emit: &'a mut F) -> RewriteTreeSink<'a, F> { RewriteTreeSink { file, @@ -529,7 +533,12 @@ impl<'a, F: FnMut(&str)> RewriteTreeSink<'a, F> { // so subtract the file's start to obtain indices within its data. let lo_in_file = lo - self.file.start_pos; let hi_in_file = hi - self.file.start_pos; - self.emit_str(&src[lo_in_file.0 as usize..hi_in_file.0 as usize]) + let s = &src[lo_in_file.0 as usize..hi_in_file.0 as usize]; + if let Some(line) = self.file.lookup_line(lo) { + self.emit_orig_str(s, line) + } else { + self.emit_str(s) + } } fn emit_span_with_rewrites( @@ -555,16 +564,20 @@ impl<'a, F: FnMut(&str)> RewriteTreeSink<'a, F> { } } -impl<'a, F: FnMut(&str)> Sink for RewriteTreeSink<'a, F> { +impl<'a, F: FnMut(&str, Option)> Sink for RewriteTreeSink<'a, F> { type Error = Infallible; const PARENTHESIZE_EXPRS: bool = true; fn emit_str(&mut self, s: &str) -> Result<(), Self::Error> { - (self.emit)(s); + (self.emit)(s, None); + Ok(()) + } + fn emit_orig_str(&mut self, s: &str, line: usize) -> Result<(), Self::Error> { + (self.emit)(s, Some(line)); Ok(()) } fn emit_fmt(&mut self, args: fmt::Arguments) -> Result<(), Self::Error> { - (self.emit)(&format!("{args}")); + (self.emit)(&format!("{args}"), None); Ok(()) } fn emit_expr(&mut self) -> Result<(), Self::Error> { @@ -580,12 +593,54 @@ impl<'a, F: FnMut(&str)> Sink for RewriteTreeSink<'a, F> { } } +#[derive(Debug, Default)] +struct LineMapBuilder { + /// Map from line indices in the original source code to line indices in `buf`. For each input + /// line, we give the index of the first output line containing some unmodified portion of the + /// input line. + v: Vec>, +} + +impl LineMapBuilder { + /// Record that part of input line `i` is found in output line `j`. + pub fn record(&mut self, i: usize, j: usize) { + if i >= self.v.len() { + self.v.resize(i + 1, None); + } + self.v[i] = Some(self.v[i].map_or(j, |old_j| cmp::min(j, old_j))); + } + + pub fn finish(self) -> Vec { + // If an input line is missing from the output, find the next non-missing input line and + // use its output index instead. This way, we always have somewhere to attach annotations + // for any line. + let mut out = Vec::with_capacity(self.v.len()); + for (i, j) in self.v.into_iter().enumerate() { + if let Some(j) = j { + // Reuse `j` to fill in for any previous `None` entries, then push a `j` for input + // line `i` itself. + out.resize(i + 1, j); + } + // Otherwise, do nothing. + } + out + } +} + +pub struct FileRewrite { + /// The rewritten source code for this file. + pub new_src: String, + /// For each input line in the original source code, this gives the line number within + /// `new_src` of the first output line that contains some part of the input line. + pub line_map: Vec, +} + /// Apply rewrites `rws` to the source files covered by their `Span`s. Returns a map giving the /// rewritten source code for each file that contains at least one rewritten `Span`. pub fn apply_rewrites( source_map: &SourceMap, rws: Vec<(Span, Rewrite)>, -) -> HashMap { +) -> HashMap { let (rts, errs) = RewriteTree::build(rws); for (span, rw, err) in errs { eprintln!( @@ -594,7 +649,7 @@ pub fn apply_rewrites( ); } - let mut new_src = HashMap::new(); + let mut file_rewrites = HashMap::new(); let mut rts = &rts as &[RewriteTree]; while !rts.is_empty() { let file = source_map.lookup_source_file(rts[0].span.lo()); @@ -607,15 +662,37 @@ pub fn apply_rewrites( rts = rest; let mut buf = String::new(); - let mut emit = |s: &str| buf.push_str(s); + // Number of newlines in `buf`. + let mut buf_line = 0; + let mut line_map = LineMapBuilder::default(); + let mut emit = |s: &str, line| { + if let Some(mut line) = line { + line_map.record(line, buf_line); + for _ in s.matches('\n') { + line += 1; + buf_line += 1; + line_map.record(line, buf_line); + } + } else { + buf_line += s.matches('\n').count(); + } + buf.push_str(s); + }; + let mut sink = RewriteTreeSink::new(&file, &mut emit); let file_span = Span::new(file.start_pos, file.end_pos, SyntaxContext::root(), None); sink.emit_span_with_rewrites(file_span, file_rts).unwrap(); - new_src.insert(file.name.clone(), buf); + file_rewrites.insert( + file.name.clone(), + FileRewrite { + new_src: buf, + line_map: line_map.finish(), + }, + ); } - new_src + file_rewrites } #[cfg(test)] diff --git a/c2rust-analyze/src/rewrite/mod.rs b/c2rust-analyze/src/rewrite/mod.rs index 607870565..e6eb35ebc 100644 --- a/c2rust-analyze/src/rewrite/mod.rs +++ b/c2rust-analyze/src/rewrite/mod.rs @@ -26,6 +26,7 @@ use rustc_hir::Mutability; use rustc_middle::ty::TyCtxt; use rustc_span::{FileName, Span}; +use std::collections::HashMap; use std::fmt; use std::fs; @@ -229,6 +230,9 @@ impl apply::Sink for FormatterSink<'_, '_> { fn emit_str(&mut self, s: &str) -> fmt::Result { self.0.write_str(s) } + fn emit_orig_str(&mut self, s: &str, _line: usize) -> fmt::Result { + self.0.write_str(s) + } fn emit_fmt(&mut self, args: fmt::Arguments) -> fmt::Result { self.0.write_fmt(args) } @@ -243,17 +247,78 @@ impl apply::Sink for FormatterSink<'_, '_> { } } +/// Return a copy of `src` with `annotations` added as comments. If `line_map` is provided, the +/// line numbers in `annotations` are remapped using `line_map` before the annotations are inserted +/// in `src`. +fn add_annotations( + src: String, + line_map: Option<&[usize]>, + mut annotations: Vec<(usize, String)>, +) -> String { + if annotations.is_empty() { + return src; + } + + let map_line = |i: usize| -> usize { + if let Some(line_map) = line_map { + line_map.get(i).copied().unwrap_or(usize::MAX) + } else { + i + } + }; + + // Stable sort by input line, preserving the order in which annotations were added. + annotations.sort_by_key(|&(line, _)| line); + // The `usize` in each `annotations` entry is an input line number. Map it to an output line + // number using `line_map`. Input lines with no matching output line are placed at the end. + for &mut (ref mut line, _) in &mut annotations { + *line = map_line(*line); + } + // Now stable sort by output line, preserving the order of input line if several input lines + // map to one output line, and also preserving the order in which annotations were added. + annotations.sort_by_key(|&(line, _)| line); + + let mut out = String::with_capacity(src.len()); + let mut idx = 0; + for (j, line) in src.lines().enumerate() { + out.push_str(line); + out.push('\n'); + // Now emit all annotations associated with line `j`. + while let Some((_, ann)) = annotations.get(idx).filter(|&&(line, _)| line == j) { + idx += 1; + let indent_len = line.len() - line.trim_start().len(); + out.push_str(&line[..indent_len]); + out.push_str("// "); + out.push_str(ann); + out.push('\n'); + } + } + + // Emit any leftover annotations at the end of the file. + while let Some((_, ann)) = annotations.get(idx) { + idx += 1; + out.push_str("// "); + out.push_str(ann); + out.push('\n'); + } + + out +} + #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] pub enum UpdateFiles { No, - Yes, + InPlace, + Alongside, } -pub fn apply_rewrites(tcx: TyCtxt, rewrites: Vec<(Span, Rewrite)>, update_files: UpdateFiles) { - // TODO: emit new source code properly instead of just printing - let new_src = apply::apply_rewrites(tcx.sess.source_map(), rewrites); - - for (filename, src) in new_src { +pub fn apply_rewrites( + tcx: TyCtxt, + rewrites: Vec<(Span, Rewrite)>, + mut annotations: HashMap>, + update_files: UpdateFiles, +) { + let emit = |filename, src: String| { println!("\n\n ===== BEGIN {:?} =====", filename); for line in src.lines() { // Omit filecheck directives from the debug output, as filecheck can get confused due @@ -267,10 +332,15 @@ pub fn apply_rewrites(tcx: TyCtxt, rewrites: Vec<(Span, Rewrite)>, update_files: } println!(" ===== END {:?} =====", filename); - if update_files == UpdateFiles::Yes { + if matches!(update_files, UpdateFiles::InPlace | UpdateFiles::Alongside) { let mut path_ok = false; if let FileName::Real(ref rfn) = filename { if let Some(path) = rfn.local_path() { + let path = match update_files { + UpdateFiles::InPlace => path.to_owned(), + UpdateFiles::Alongside => path.with_extension("new.rs"), + _ => unreachable!(), + }; fs::write(path, src).unwrap(); path_ok = true; } @@ -279,6 +349,29 @@ pub fn apply_rewrites(tcx: TyCtxt, rewrites: Vec<(Span, Rewrite)>, update_files: log::warn!("couldn't write to non-real file {filename:?}"); } } + }; + + let new_src = apply::apply_rewrites(tcx.sess.source_map(), rewrites); + for (filename, file_rw) in new_src { + let annotations = annotations.remove(&filename).unwrap_or(Vec::new()); + let new_src = add_annotations(file_rw.new_src, Some(&file_rw.line_map), annotations); + emit(filename, new_src); + } + + // Also emit files that have annotations but no rewrites. + if annotations.len() > 0 { + let mut leftover_annotations = annotations.into_iter().collect::>(); + leftover_annotations.sort(); + let sm = tcx.sess.source_map(); + for (filename, annotations) in leftover_annotations { + let sf = sm.get_source_file(&filename).unwrap(); + let src = match sf.src { + Some(ref x) => String::clone(x), + None => continue, + }; + let src = add_annotations(src, None, annotations); + emit(filename, src); + } } } diff --git a/c2rust-analyze/tests/filecheck/insertion_sort_driver.rs b/c2rust-analyze/tests/filecheck/insertion_sort_driver.rs index 6b650ee59..1e392a316 100644 --- a/c2rust-analyze/tests/filecheck/insertion_sort_driver.rs +++ b/c2rust-analyze/tests/filecheck/insertion_sort_driver.rs @@ -13,7 +13,8 @@ pub unsafe extern "C" fn insertion_sort(n: libc::c_int, p: *mut libc::c_int) { // CHECK: let tmp: {{.*}} = *&(&(&*(p))[((i as isize) as usize) ..])[0]; let tmp: libc::c_int = *p.offset(i as isize); let mut j: libc::c_int = i; - // CHECK-NOT: p.offset + // `p.offset` should not appear, except in inline annotation comments. + // CHECK-NOT: {{^[^/]*}}p.offset while j > 0 as libc::c_int && *p.offset((j - 1 as libc::c_int) as isize) > tmp { *p.offset(j as isize) = @@ -28,10 +29,9 @@ pub unsafe extern "C" fn insertion_sort(n: libc::c_int, p: *mut libc::c_int) { // CHECK-LABEL: pub unsafe extern "C" fn check_eq // CHECK-SAME: p: &'h0 [(libc::c_int)] -// CHECK-NEXT: q: &'h1 [(libc::c_int)] +// CHECK-SAME: q: &'h1 [(libc::c_int)] #[no_mangle] -pub unsafe extern "C" fn check_eq(n: libc::c_int, p: *mut libc::c_int, - q: *mut libc::c_int) { +pub unsafe extern "C" fn check_eq(n: libc::c_int, p: *mut libc::c_int, q: *mut libc::c_int) { let mut i: libc::c_int = 0 as libc::c_int; while i < n { //assert!(*p.offset(i as isize) == *q.offset(i as isize));