diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 691ac5f07..88d0bea5b 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1,8 +1,9 @@ use crate::annotate::AnnotationBuffer; use crate::borrowck; use crate::context::{ - self, AnalysisCtxt, AnalysisCtxtData, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, - LTy, LTyCtxt, LocalAssignment, PermissionSet, PointerId, PointerInfo, + self, AnalysisCtxt, AnalysisCtxtData, DontRewriteFieldReason, DontRewriteFnReason, + DontRewriteStaticReason, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, LTy, LTyCtxt, + LocalAssignment, PermissionSet, PointerId, PointerInfo, }; use crate::dataflow; use crate::dataflow::DataflowConstraints; @@ -32,19 +33,11 @@ use rustc_hir::def_id::DefIndex; use rustc_hir::def_id::LocalDefId; use rustc_hir::definitions::DefPathData; use rustc_index::vec::IndexVec; -use rustc_middle::mir::visit::Visitor; -use rustc_middle::mir::AggregateKind; -use rustc_middle::mir::BindingForm; -use rustc_middle::mir::Body; -use rustc_middle::mir::Constant; -use rustc_middle::mir::Local; -use rustc_middle::mir::LocalDecl; -use rustc_middle::mir::LocalInfo; -use rustc_middle::mir::LocalKind; -use rustc_middle::mir::Location; -use rustc_middle::mir::Operand; -use rustc_middle::mir::Rvalue; -use rustc_middle::mir::StatementKind; +use rustc_middle::mir::visit::{PlaceContext, Visitor}; +use rustc_middle::mir::{ + AggregateKind, BindingForm, Body, Constant, Local, LocalDecl, LocalInfo, LocalKind, Location, + Operand, Place, PlaceElem, PlaceRef, Rvalue, StatementKind, +}; use rustc_middle::ty::GenericArgKind; use rustc_middle::ty::Ty; use rustc_middle::ty::TyCtxt; @@ -564,6 +557,8 @@ fn run(tcx: TyCtxt) { eprintln!(" {:?}", ldid); } + populate_field_users(&mut gacx, &all_fn_ldids); + // ---------------------------------- // Label all global types // ---------------------------------- @@ -621,7 +616,7 @@ fn run(tcx: TyCtxt) { // ---------------------------------- for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -669,7 +664,7 @@ fn run(tcx: TyCtxt) { info.pointee_constraints.set(pointee_constraints); } Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::POINTEE_INVALID, pd); } } @@ -698,7 +693,7 @@ fn run(tcx: TyCtxt) { } for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -716,7 +711,7 @@ fn run(tcx: TyCtxt) { // Print results for debugging for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -742,7 +737,7 @@ fn run(tcx: TyCtxt) { // computed during this the process is kept around for use in later passes. let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers()); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -763,7 +758,7 @@ fn run(tcx: TyCtxt) { Ok(x) => x, Err(pd) => { info.acx_data.set(acx.into_data()); - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::DATAFLOW_INVALID, pd); continue; } }; @@ -796,7 +791,7 @@ fn run(tcx: TyCtxt) { gacx.remap_pointers(&global_equiv_map, global_counter); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -979,8 +974,6 @@ fn run(tcx: TyCtxt) { // Items in the "fixed defs" list have all pointers in their types set to `FIXED`. For // testing, putting #[c2rust_analyze_test::fixed_signature] on an item has the same effect. - // - // Functions in the list are also added to `gacx.fns_fixed`. for ldid in tcx.hir_crate_items(()).definitions() { // TODO (HACK): `Clone::clone` impls are omitted from `fn_sigs` and cause a panic below. if is_impl_clone(tcx, ldid.to_def_id()) { @@ -996,7 +989,8 @@ fn run(tcx: TyCtxt) { None => panic!("missing fn_sig for {:?}", ldid), }; make_sig_fixed(&mut gasn, lsig); - gacx.fns_fixed.insert(ldid.to_def_id()); + gacx.dont_rewrite_fns + .add(ldid.to_def_id(), DontRewriteFnReason::USER_REQUEST); } DefKind::Struct | DefKind::Enum | DefKind::Union => { @@ -1016,6 +1010,8 @@ fn run(tcx: TyCtxt) { None => panic!("missing field_lty for {:?}", ldid), }; make_ty_fixed(&mut gasn, lty); + gacx.dont_rewrite_fields + .add(field.did, DontRewriteFieldReason::USER_REQUEST); } } } @@ -1034,6 +1030,8 @@ fn run(tcx: TyCtxt) { if !ptr.is_none() { gasn.flags[ptr].insert(FlagSet::FIXED); } + gacx.dont_rewrite_statics + .add(ldid.to_def_id(), DontRewriteStaticReason::USER_REQUEST); } _ => {} @@ -1052,6 +1050,7 @@ fn run(tcx: TyCtxt) { } gacx.mark_fn_failed( ldid.to_def_id(), + DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, PanicDetail::new("explicit fail_before_analysis for testing".to_owned()), ); } @@ -1069,7 +1068,7 @@ fn run(tcx: TyCtxt) { let old_gasn = gasn.clone(); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -1103,7 +1102,11 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::BORROWCK_INVALID, + pd, + ); continue; } } @@ -1136,7 +1139,7 @@ fn run(tcx: TyCtxt) { // Do final processing on each function. for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -1159,7 +1162,11 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::MISC_ANALYSIS_INVALID, + pd, + ); continue; } } @@ -1223,6 +1230,7 @@ fn run(tcx: TyCtxt) { } gacx.mark_fn_failed( ldid.to_def_id(), + DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()), ); } @@ -1246,30 +1254,20 @@ fn run(tcx: TyCtxt) { let manual_shim_casts = manual_shim_casts; // It may take multiple tries to reach a state where all rewrites succeed. - loop { + for i in 0.. { + assert!(i < 100); func_reports.clear(); all_rewrites.clear(); eprintln!("\n--- start rewriting ---"); - // Before generating rewrites, add the FIXED flag to the signatures of all functions that - // failed analysis. - // - // The set of failed functions is monotonically nondecreasing throughout this loop, so - // there's no need to worry about potentially removing `FIXED` from some functions. - for did in gacx.iter_fns_failed() { - let lsig = gacx.fn_sigs[&did]; - for sig_lty in lsig.inputs_and_output() { - for lty in sig_lty.iter() { - let ptr = lty.label; - if !ptr.is_none() { - gasn.flags[ptr].insert(FlagSet::FIXED); - } - } - } - } + // Update non-rewritten items first. This has two purposes. First, it clears the + // `new_keys()` lists, which we check at the end of the loop to see whether we've reached a + // fixpoint. Second, doing this adds the `FIXED` flag to pointers that we shouldn't + // rewrite, such as pointers in the signatures of non-rewritten functions. + process_new_dont_rewrite_items(&mut gacx, &mut gasn); for &ldid in &all_fn_ldids { - if gacx.fn_skip_rewrite(ldid.to_def_id()) { + if gacx.dont_rewrite_fn(ldid.to_def_id()) { continue; } @@ -1278,7 +1276,7 @@ fn run(tcx: TyCtxt) { let name = tcx.item_name(ldid.to_def_id()); let mir = tcx.mir_built(ldid_const); let mir = mir.borrow(); - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let mut acx = gacx.function_context_with_data(&mir, info.acx_data.take()); let asn = gasn.and(&mut info.lasn); let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); @@ -1291,8 +1289,14 @@ fn run(tcx: TyCtxt) { } let hir_body_id = tcx.hir().body_owned_by(ldid); - let expr_rewrites = - rewrite::gen_expr_rewrites(&acx, &asn, pointee_types, &mir, hir_body_id); + let expr_rewrites = rewrite::gen_expr_rewrites( + &mut acx, + &asn, + pointee_types, + ldid.to_def_id(), + &mir, + hir_body_id, + ); let ty_rewrites = rewrite::gen_ty_rewrites(&acx, &asn, pointee_types, &mir, ldid); // Print rewrites let report = func_reports.entry(ldid).or_default(); @@ -1317,7 +1321,7 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::REWRITE_INVALID, pd); continue; } } @@ -1329,7 +1333,6 @@ fn run(tcx: TyCtxt) { all_rewrites.extend(shim_call_rewrites); // Generate shims for functions that need them. - let mut any_failed = false; for def_id in shim_fn_def_ids { let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { all_rewrites.push(rewrite::gen_shim_definition_rewrite( @@ -1342,14 +1345,17 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(def_id, pd); - any_failed = true; + gacx.mark_fn_failed(def_id, DontRewriteFnReason::SHIM_GENERATION_FAILED, pd); continue; } } } - if !any_failed { + // Exit the loop upon reaching a fixpoint. + let any_new_dont_rewrite_keys = !gacx.dont_rewrite_fns.new_keys().is_empty() + || !gacx.dont_rewrite_statics.new_keys().is_empty() + || !gacx.dont_rewrite_fields.new_keys().is_empty(); + if !any_new_dont_rewrite_keys { break; } } @@ -1422,7 +1428,7 @@ fn run(tcx: TyCtxt) { None => continue, }; - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -1617,14 +1623,30 @@ fn run(tcx: TyCtxt) { } eprintln!("\nerror summary:"); - for ldid in tcx.hir().body_owners() { - if let Some(detail) = gacx.fns_failed.get(&ldid.to_def_id()) { - eprintln!( - "analysis of {:?} failed: {}", - ldid, - detail.to_string_short() - ); - } + fn sorted_def_ids(it: impl IntoIterator) -> Vec { + let mut v = it.into_iter().collect::>(); + v.sort(); + v + } + for def_id in sorted_def_ids(gacx.dont_rewrite_fns.keys()) { + let opt_detail = gacx.fns_failed.get(&def_id); + let flags = gacx.dont_rewrite_fns.get(def_id); + assert!(opt_detail.is_some() || !flags.is_empty()); + let detail_str = match opt_detail { + Some(detail) => detail.to_string_short(), + None => "(no panic)".into(), + }; + eprintln!("analysis of {def_id:?} failed: {flags:?}, {detail_str}"); + } + + for def_id in sorted_def_ids(gacx.dont_rewrite_statics.keys()) { + let flags = gacx.dont_rewrite_statics.get(def_id); + eprintln!("analysis of {def_id:?} failed: {flags:?}"); + } + + for def_id in sorted_def_ids(gacx.dont_rewrite_fields.keys()) { + let flags = gacx.dont_rewrite_fields.get(def_id); + eprintln!("analysis of {def_id:?} failed: {flags:?}"); } eprintln!( @@ -1935,6 +1957,133 @@ fn for_each_callee(tcx: TyCtxt, ldid: LocalDefId, f: impl FnMut(LocalDefId)) { CalleeVisitor { tcx, mir, f }.visit_body(mir); } +/// Call `f` for each field mentioned in a place projection within the body of `ldid`. +fn for_each_field_use(tcx: TyCtxt, ldid: LocalDefId, f: impl FnMut(DefId)) { + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let mir: &Body = &mir; + + struct FieldUseVisitor<'a, 'tcx, F> { + tcx: TyCtxt<'tcx>, + mir: &'a Body<'tcx>, + f: F, + } + + impl<'tcx, F: FnMut(DefId)> Visitor<'tcx> for FieldUseVisitor<'_, 'tcx, F> { + fn visit_place( + &mut self, + place: &Place<'tcx>, + _context: PlaceContext, + _location: Location, + ) { + for (i, elem) in place.projection.iter().enumerate() { + let field_idx = match elem { + PlaceElem::Field(x, _) => x, + _ => continue, + }; + // Build a `PlaceRef` with all the projections up to, but not including, `elem`. + let place_ref = PlaceRef { + local: place.local, + projection: &place.projection[..i], + }; + let adt_pty = place_ref.ty(self.mir, self.tcx); + let adt_def = match adt_pty.ty.ty_adt_def() { + Some(x) => x, + // `PlaceElem::Field` also works on tuple types, which don't have an `AdtDef`. + None => continue, + }; + let variant_def = match adt_pty.variant_index { + None => adt_def.non_enum_variant(), + Some(i) => adt_def.variant(i), + }; + let field_def = &variant_def.fields[field_idx.index()]; + (self.f)(field_def.did); + } + } + } + + FieldUseVisitor { tcx, mir, f }.visit_body(mir); +} + +/// Populate `gacx.field_users` and `gacx.fn_fields_used`. +fn populate_field_users(gacx: &mut GlobalAnalysisCtxt, fn_ldids: &[LocalDefId]) { + let mut field_users = HashMap::new(); + let mut seen = HashSet::new(); + + for &ldid in fn_ldids { + let mut fn_fields = Vec::new(); + let mut fn_seen = HashSet::new(); + for_each_field_use(gacx.tcx, ldid, |field_def_id| { + if let Some(field_ldid) = field_def_id.as_local() { + if seen.insert((field_ldid, ldid)) { + field_users + .entry(field_ldid) + .or_insert_with(Vec::new) + .push(ldid); + } + + if fn_seen.insert(field_ldid) { + fn_fields.push(field_ldid); + } + } + }); + gacx.fn_fields_used.insert(ldid, fn_fields); + } + + for (k, v) in field_users { + gacx.field_users.insert(k, v); + } +} + +/// Call `take_new_keys()` on `gacx.dont_rewrite_{fns,statics,fields}` and process the results. +/// This involves adding `FIXED` to some pointers and maybe propagating `DontRewrite` flags to +/// other items. +fn process_new_dont_rewrite_items(gacx: &mut GlobalAnalysisCtxt, gasn: &mut GlobalAssignment) { + for i in 0.. { + assert!(i < 20); + let mut found_any = false; + + for did in gacx.dont_rewrite_fns.take_new_keys() { + found_any = true; + let lsig = &gacx.fn_sigs[&did]; + make_sig_fixed(gasn, lsig); + + let ldid = match did.as_local() { + Some(x) => x, + None => continue, + }; + + for &field_ldid in gacx.fn_fields_used.get(ldid) { + gacx.dont_rewrite_fields.add( + field_ldid.to_def_id(), + DontRewriteFieldReason::NON_REWRITTEN_USE, + ); + } + + // TODO: callers/callees + } + + for did in gacx.dont_rewrite_statics.take_new_keys() { + found_any = true; + let lty = gacx.static_tys[&did]; + make_ty_fixed(gasn, lty); + } + + for did in gacx.dont_rewrite_fields.take_new_keys() { + found_any = true; + let lty = gacx.field_ltys[&did]; + make_ty_fixed(gasn, lty); + } + + // The previous steps can cause more items to become non-rewritten. Keep going until + // there's no more work to do. + if !found_any { + break; + } + } +} + pub struct AnalysisCallbacks; impl rustc_driver::Callbacks for AnalysisCallbacks { diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 85701a3fb..24f7ac7cc 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -34,9 +34,12 @@ use rustc_middle::ty::Ty; use rustc_middle::ty::TyCtxt; use rustc_middle::ty::TyKind; use rustc_type_ir::RegionKind::{ReEarlyBound, ReStatic}; -use std::collections::{HashMap, HashSet}; +use std::collections::hash_map::{Entry, HashMap}; +use std::collections::HashSet; use std::fmt::{Debug, Write as _}; -use std::ops::Index; +use std::hash::Hash; +use std::mem; +use std::ops::{BitOr, Index, Range}; bitflags! { /// Permissions are created such that we allow dropping permissions in any assignment. @@ -186,6 +189,74 @@ bitflags! { } } +bitflags! { + /// Flags indicating reasons why a function isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteFnReason: u16 { + /// The user requested that this function be left unchanged. + const USER_REQUEST = 1 << 0; + /// The function contains an unsupported int-to-pointer cast. + const INT_TO_PTR_CAST = 1 << 1; + /// The function calls an extern function that has no safe rewrite. + const EXTERN_CALL = 1 << 2; + /// The function calls another local function that isn't being rewritten. + const NON_REWRITTEN_CALLEE = 1 << 3; + /// The function uses `Cell` pointers whose types are too complex for the current rewrite + /// rules. + const COMPLEX_CELL = 1 << 4; + /// The function contains a pointer-to-pointer cast that isn't covered by rewrite rules. + const PTR_TO_PTR_CAST = 1 << 5; + /// The function dereferences a pointer that would remain raw after rewriting. + const RAW_PTR_DEREF = 1 << 6; + /// Calling this function from non-rewritten code requires a shim, but shim generation + /// failed. + const SHIM_GENERATION_FAILED = 1 << 7; + + /// Pointee analysis results for this function are invalid. + const POINTEE_INVALID = 1 << 10; + /// Dataflow analysis results for this function are invalid. + const DATAFLOW_INVALID = 1 << 11; + /// Borrowcheck/Polonius analysis results for this function are invalid. + const BORROWCK_INVALID = 1 << 12; + /// Results of some other analysis for this function are invalid. + const MISC_ANALYSIS_INVALID = 1 << 13; + /// The set of rewrites generated for this function is invalid or incomplete. + const REWRITE_INVALID = 1 << 14; + /// Analysis results for this function are valid, but were marked as invalid anyway in + /// order to test error recovery. + const FAKE_INVALID_FOR_TESTING = 1 << 15; + + const ANALYSIS_INVALID_MASK = Self::POINTEE_INVALID.bits + | Self::DATAFLOW_INVALID.bits + | Self::BORROWCK_INVALID.bits + | Self::MISC_ANALYSIS_INVALID.bits + | Self::REWRITE_INVALID.bits + | Self::FAKE_INVALID_FOR_TESTING.bits; + } +} + +bitflags! { + /// Flags indicating reasons why a static isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteStaticReason: u16 { + /// The user requested that this static be left unchanged. + const USER_REQUEST = 0x0001; + /// The static is used in a function that isn't being rewritten. + const NON_REWRITTEN_USE = 0x0002; + } +} + +bitflags! { + /// Flags indicating reasons why an ADT field isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteFieldReason: u16 { + /// The user requested that this field be left unchanged. + const USER_REQUEST = 0x0001; + /// The field is used in a function that isn't being rewritten. + const NON_REWRITTEN_USE = 0x0002; + } +} + pub use crate::pointer_id::PointerId; pub type LTy<'tcx> = LabeledTy<'tcx, PointerId>; @@ -317,6 +388,7 @@ pub struct GlobalAnalysisCtxt<'tcx> { ptr_info: GlobalPointerTable, pub fn_sigs: HashMap>, + pub fn_fields_used: MultiMap, /// A map of all [`KnownFn`]s as determined by [`all_known_fns`]. /// @@ -326,14 +398,16 @@ pub struct GlobalAnalysisCtxt<'tcx> { /// [`name`]: KnownFn::name known_fns: HashMap<&'static str, &'static KnownFn>, + pub dont_rewrite_fns: FlagMap, + pub dont_rewrite_statics: FlagMap, + pub dont_rewrite_fields: FlagMap, + /// `DefId`s of functions where analysis failed, and a [`PanicDetail`] explaining the reason /// for each failure. pub fns_failed: HashMap, - /// `DefId`s of functions that were marked "fixed" (non-rewritable) through command-line - /// arguments. - pub fns_fixed: HashSet, pub field_ltys: HashMap>, + pub field_users: MultiMap, pub static_tys: HashMap>, pub addr_of_static: HashMap, @@ -696,13 +770,17 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { lcx: LabeledTyCtxt::new(tcx), ptr_info: GlobalPointerTable::empty(), fn_sigs: HashMap::new(), + fn_fields_used: MultiMap::new(), known_fns: all_known_fns() .iter() .map(|known_fn| (known_fn.name, known_fn)) .collect(), + dont_rewrite_fns: FlagMap::new(), + dont_rewrite_statics: FlagMap::new(), + dont_rewrite_fields: FlagMap::new(), fns_failed: HashMap::new(), - fns_fixed: HashSet::new(), field_ltys: HashMap::new(), + field_users: MultiMap::new(), static_tys: HashMap::new(), addr_of_static: HashMap::new(), adt_metadata: AdtMetadataTable::default(), @@ -761,10 +839,14 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { lcx, ref mut ptr_info, ref mut fn_sigs, + fn_fields_used: _, known_fns: _, + dont_rewrite_fns: _, + dont_rewrite_statics: _, + dont_rewrite_fields: _, fns_failed: _, - fns_fixed: _, ref mut field_ltys, + field_users: _, ref mut static_tys, ref mut addr_of_static, adt_metadata: _, @@ -820,39 +902,15 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { } } - pub fn fn_failed(&self, did: DefId) -> bool { - self.fns_failed.contains_key(&did) - } - - pub fn mark_fn_failed(&mut self, did: DefId, detail: PanicDetail) { - if self.fns_failed.contains_key(&did) { - return; - } - - self.fns_failed.insert(did, detail); - } - - pub fn iter_fns_failed(&self) -> impl Iterator + '_ { - self.fns_failed.keys().copied() - } - - pub fn fn_skip_rewrite(&self, did: DefId) -> bool { - self.fn_failed(did) || self.fns_fixed.contains(&did) + pub fn mark_fn_failed(&mut self, did: DefId, reason: DontRewriteFnReason, detail: PanicDetail) { + self.dont_rewrite_fns.add(did, reason); + // Insert `detail` if there isn't yet an entry for this `DefId`. + self.fns_failed.entry(did).or_insert(detail); } /// Iterate over the `DefId`s of all functions that should skip rewriting. pub fn iter_fns_skip_rewrite<'a>(&'a self) -> impl Iterator + 'a { - // This let binding avoids a lifetime error with the closure and return-position `impl - // Trait`. - let fns_fixed = &self.fns_fixed; - // If the same `DefId` is in both `fns_failed` and `fns_fixed`, be sure to return it only - // once. - fns_fixed.iter().copied().chain( - self.fns_failed - .keys() - .copied() - .filter(move |did| !fns_fixed.contains(&did)), - ) + self.dont_rewrite_fns.keys() } pub fn known_fn(&self, def_id: DefId) -> Option<&'static KnownFn> { @@ -880,6 +938,17 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .filter_map(|(&def_id, fn_sig)| Some((fn_sig, self.known_fn(def_id)?))) .flat_map(|(fn_sig, known_fn)| known_fn.ptr_perms(fn_sig)) } + + /// Check whether the function with the given `def_id` has been marked as non-rewritable. + pub fn dont_rewrite_fn(&self, def_id: DefId) -> bool { + self.dont_rewrite_fns.contains(def_id) + } + /// Check whether analysis failed for the function with the given `def_id`. + pub fn fn_analysis_invalid(&self, def_id: DefId) -> bool { + self.dont_rewrite_fns + .get(def_id) + .intersects(DontRewriteFnReason::ANALYSIS_INVALID_MASK) + } } impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { @@ -1523,3 +1592,97 @@ pub fn print_ty_with_pointer_labels_into( } } } + +/// Map for associating flags (such as `DontRewriteFnReason`) with keys (such as `DefId`). +pub struct FlagMap { + /// Stores the current flags for each key. If no flags are set, the entry is omitted; that is, + /// for every entry `(k, v)`, it's always the case that `v != V::default()`. + m: HashMap, + /// Keys that were added to `m` since the last call to `take_new()`. Specifically, this + /// includes every `k` for which `get(k)` was `V::default()` but `get(k)` now has a different, + /// non-default value. + new: Vec, +} + +impl FlagMap +where + K: Copy + Hash + Eq, + V: Copy + Default + Eq + BitOr, +{ + pub fn new() -> FlagMap { + FlagMap { + m: HashMap::new(), + new: Vec::new(), + } + } + + pub fn add(&mut self, k: K, v: V) { + if v == V::default() { + return; + } + let cur = self.m.entry(k).or_default(); + if *cur == V::default() { + self.new.push(k); + } + *cur = *cur | v; + } + + /// Get the current flags for `k`. Returns `V::default()` if no flags have been set. + pub fn get(&self, k: K) -> V { + self.m.get(&k).copied().unwrap_or_default() + } + + pub fn contains(&self, k: K) -> bool { + self.m.contains_key(&k) + } + + pub fn new_keys(&self) -> &[K] { + &self.new + } + + pub fn take_new_keys(&mut self) -> Vec { + mem::take(&mut self.new) + } + + pub fn keys<'a>(&'a self) -> impl Iterator + 'a { + self.m.keys().copied() + } +} + +/// A map from keys to lists of values, with a compact representation. +#[derive(Clone, Debug)] +pub struct MultiMap { + defs: HashMap>, + users: Vec, +} + +impl MultiMap +where + K: Copy + Hash + Eq + Debug, +{ + pub fn new() -> MultiMap { + MultiMap { + defs: HashMap::new(), + users: Vec::new(), + } + } + + pub fn insert(&mut self, def: K, users: impl IntoIterator) { + let e = match self.defs.entry(def) { + Entry::Vacant(e) => e, + Entry::Occupied(_) => panic!("duplicate entry for {def:?}"), + }; + let start = self.users.len(); + self.users.extend(users); + let end = self.users.len(); + e.insert(start..end); + } + + pub fn get(&self, def: K) -> &[V] { + let range = match self.defs.get(&def) { + Some(x) => x, + None => return &[], + }; + &self.users[range.clone()] + } +} diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index cb2027e1e..e5b391752 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -7,7 +7,7 @@ //! all adjustments, as this would make even non-rewritten code extremely verbose, so we try to //! materialize adjustments only on code that's subject to some rewrite. -use crate::context::{AnalysisCtxt, Assignment, FlagSet, LTy, PermissionSet}; +use crate::context::{AnalysisCtxt, Assignment, DontRewriteFnReason, FlagSet, LTy, PermissionSet}; use crate::panic_detail; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; @@ -128,6 +128,7 @@ struct ExprRewriteVisitor<'a, 'tcx> { mir: &'a Body<'tcx>, loc: Location, sub_loc: Vec, + errors: DontRewriteFnReason, } impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { @@ -152,9 +153,14 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { statement_index: 0, }, sub_loc: Vec::new(), + errors: DontRewriteFnReason::empty(), } } + fn err(&mut self, reason: DontRewriteFnReason) { + self.errors.insert(reason); + } + fn enter R, R>(&mut self, sub: SubLoc, f: F) -> R { self.sub_loc.push(sub); let r = f(self); @@ -240,6 +246,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if !flags.contains(FlagSet::FIXED) { let desc = type_desc::perms_to_desc(local_lty.ty, perms, flags); if desc.own == Ownership::Cell { + if pl.projection.len() > 1 || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } // this is an assignment like `*x = 2` but `x` has CELL permissions self.emit(RewriteKind::CellSet); } @@ -256,6 +266,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { let desc = type_desc::local_perms_to_desc(local_ty, perms, flags); if desc.own == Ownership::Cell { // this is an assignment like `let x = 2` but `x` has CELL permissions + if !pl.projection.is_empty() || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } self.enter_rvalue(|v| v.emit(RewriteKind::CellNew)) } @@ -269,6 +283,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if !flags.contains(FlagSet::FIXED) && flags.contains(FlagSet::CELL) { // this is an assignment like `let x = *y` but `y` has CELL permissions + if pl.projection.len() > 1 || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } self.enter_rvalue(|v| v.emit(RewriteKind::CellGet)) } } @@ -1025,7 +1043,7 @@ pub fn gen_mir_rewrites<'tcx>( asn: &Assignment, pointee_types: PointerTable>, mir: &Body<'tcx>, -) -> HashMap> { +) -> (HashMap>, DontRewriteFnReason) { let mut out = HashMap::new(); let mut v = ExprRewriteVisitor::new(acx, asn, pointee_types, &mut out, mir); @@ -1048,5 +1066,6 @@ pub fn gen_mir_rewrites<'tcx>( } } - out + let errors = v.errors; + (out, errors) } diff --git a/c2rust-analyze/src/rewrite/expr/mod.rs b/c2rust-analyze/src/rewrite/expr/mod.rs index dda17fe74..4bb7612ac 100644 --- a/c2rust-analyze/src/rewrite/expr/mod.rs +++ b/c2rust-analyze/src/rewrite/expr/mod.rs @@ -2,6 +2,7 @@ use crate::context::{AnalysisCtxt, Assignment}; use crate::pointee_type::PointeeTypes; use crate::pointer_id::PointerTable; use crate::rewrite::Rewrite; +use rustc_hir::def_id::DefId; use rustc_hir::BodyId; use rustc_middle::mir::Body; use rustc_span::Span; @@ -17,13 +18,17 @@ pub use self::convert::convert_cast_rewrite; pub use self::mir_op::CastBuilder; pub fn gen_expr_rewrites<'tcx>( - acx: &AnalysisCtxt<'_, 'tcx>, + acx: &mut AnalysisCtxt<'_, 'tcx>, asn: &Assignment, pointee_types: PointerTable>, + def_id: DefId, mir: &Body<'tcx>, hir_body_id: BodyId, ) -> Vec<(Span, Rewrite)> { - let mir_rewrites = mir_op::gen_mir_rewrites(acx, asn, pointee_types, mir); + let (mir_rewrites, errors) = mir_op::gen_mir_rewrites(acx, asn, pointee_types, mir); + if !errors.is_empty() { + acx.gacx.dont_rewrite_fns.add(def_id, errors); + } let unlower_map = unlower::unlower(acx.tcx(), mir, hir_body_id); let rewrites_by_expr = distribute::distribute(acx.tcx(), unlower_map, mir_rewrites); let address_of_rewrites = hir_only_casts::remove_hir_only_casts(acx.tcx(), hir_body_id, |ex| { diff --git a/c2rust-analyze/tests/filecheck/test_attrs.rs b/c2rust-analyze/tests/filecheck/test_attrs.rs index 7d5a73ac6..d76e77059 100644 --- a/c2rust-analyze/tests/filecheck/test_attrs.rs +++ b/c2rust-analyze/tests/filecheck/test_attrs.rs @@ -17,13 +17,13 @@ fn g(x: *mut i32) -> *mut i32 { x } -// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::h) failed: [unknown]: explicit fail_before_analysis for testing +// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::h) failed: FAKE_INVALID_FOR_TESTING, [unknown]: explicit fail_before_analysis for testing #[c2rust_analyze_test::fail_before_analysis] fn h(x: *mut i32) -> *mut i32 { x } -// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::i) failed: [unknown]: explicit fail_before_rewriting for testing +// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::i) failed: FAKE_INVALID_FOR_TESTING, [unknown]: explicit fail_before_rewriting for testing #[c2rust_analyze_test::fail_before_rewriting] fn i(x: *mut i32) -> *mut i32 { x