From b2e6d8f7ce1cc50c4adbfd9abc4c57f4e0477fc0 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Tue, 23 Jul 2024 11:42:22 -0700 Subject: [PATCH 01/45] Instrumentation for delayed UB stemming from uninitialized memory --- .../compiler_interface.rs | 7 + .../delayed_ub/delayed_ub_visitor.rs | 152 ++++ .../delayed_ub/instrumentation_visitor.rs | 102 +++ .../transform/check_uninit/delayed_ub/mod.rs | 148 ++++ .../delayed_ub/points_to_analysis.rs | 664 ++++++++++++++++++ .../delayed_ub/points_to_graph.rs | 218 ++++++ .../kani_middle/transform/check_uninit/mod.rs | 164 ++--- .../transform/check_uninit/ptr_uninit/mod.rs | 128 ++++ .../{ => ptr_uninit}/uninit_visitor.rs | 197 +----- .../check_uninit/relevant_instruction.rs | 130 ++++ .../transform/check_uninit/ty_layout.rs | 33 + .../src/kani_middle/transform/internal_mir.rs | 654 +++++++++++++++++ .../kani_middle/transform/kani_intrinsics.rs | 7 +- .../src/kani_middle/transform/mod.rs | 5 +- kani-compiler/src/main.rs | 1 + .../uninit/access-padding-via-cast/expected | 2 +- .../uninit/delayed-ub-transmute/expected | 2 +- tests/expected/uninit/delayed-ub/expected | 2 +- tests/expected/uninit/intrinsics/expected | 10 +- 19 files changed, 2322 insertions(+), 304 deletions(-) create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs rename kani-compiler/src/kani_middle/transform/check_uninit/{ => ptr_uninit}/uninit_visitor.rs (77%) create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs create mode 100644 kani-compiler/src/kani_middle/transform/internal_mir.rs diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index 986cd00e32a5..6b73b25cab67 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -115,6 +115,13 @@ impl GotocCodegenBackend { call_graph, ); + // Re-collect reachable items after global transformations were applied. This is necessary + // since global pass could add extra calls to instrumentation. + let (items, _) = with_timer( + || collect_reachable_items(tcx, &mut transformer, starting_items), + "codegen reachability analysis (second pass)", + ); + // Follow rustc naming convention (cx is abbrev for context). // https://rustc-dev-guide.rust-lang.org/conventions.html#naming-conventions let mut gcx = diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs new file mode 100644 index 000000000000..cde02c8b82a8 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs @@ -0,0 +1,152 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! This module contains the visitor responsible for collecting initial analysis targets for delayed +//! UB instrumentation. + +use stable_mir::{ + mir::{ + mono::{Instance, InstanceKind}, + visit::Location, + Body, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, Place, + Rvalue, Statement, StatementKind, Terminator, TerminatorKind, + }, + ty::{RigidTy, TyKind}, +}; + +use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; + +/// Visitor that finds initial analysis targets for delayed UB instrumentation. For our purposes, +/// analysis targets are *pointers* to places reading and writing from which should be tracked. +pub struct DelayedUbVisitor { + body: Body, + delayed_ub_targets: Vec, +} + +impl DelayedUbVisitor { + pub fn new(body: Body) -> Self { + Self { body, delayed_ub_targets: vec![] } + } + + pub fn into_targets(self) -> Vec { + self.delayed_ub_targets + } +} + +impl MirVisitor for DelayedUbVisitor { + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + if let Rvalue::Cast(kind, operand, ty) = rvalue { + let operand_ty = operand.ty(self.body.locals()).unwrap(); + match kind { + CastKind::Transmute | CastKind::PtrToPtr => { + if let ( + RigidTy::RawPtr(from_ty, Mutability::Mut), + RigidTy::RawPtr(to_ty, Mutability::Mut), + ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) + { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + if !tys_layout_compatible(from_ty, to_ty) { + self.delayed_ub_targets.push(place.clone()); + } + } + Operand::Constant(_) => { + unimplemented!( + "Delayed UB in presence of constants is not yet supported." + ) + } + } + } + } + _ => {} + }; + } + self.super_rvalue(rvalue, location); + } + + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if let StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(copy)) = + &stmt.kind + { + match ©.dst { + Operand::Copy(place) | Operand::Move(place) => { + self.delayed_ub_targets.push(place.clone()); + } + Operand::Constant(_) => { + unimplemented!("Delayed UB in presence of constants is not yet supported.") + } + } + } + self.super_statement(stmt, location); + } + + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if let TerminatorKind::Call { func, args, .. } = &term.kind { + let instance = match try_resolve_instance(self.body.locals(), func) { + Ok(instance) => instance, + Err(reason) => { + panic!("{reason}"); + } + }; + if instance.kind == InstanceKind::Intrinsic { + match instance.intrinsic_name().unwrap().as_str() { + "copy" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + // Here, `dst` is the second argument. + match &args[1] { + Operand::Copy(place) | Operand::Move(place) => { + self.delayed_ub_targets.push(place.clone()); + } + Operand::Constant(_) => unimplemented!( + "Delayed UB in presence of constants is not yet supported." + ), + } + } + "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `volatile_copy`" + ); + assert!(matches!( + args[0].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + assert!(matches!( + args[1].ty(self.body.locals()).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + // Here, `dst` is the first argument. + match &args[0] { + Operand::Copy(place) | Operand::Move(place) => { + self.delayed_ub_targets.push(place.clone()); + } + Operand::Constant(_) => unimplemented!( + "Delayed UB in presence of constants is not yet supported." + ), + } + } + _ => {} + } + } + } + self.super_terminator(term, location); + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result { + let ty = func.ty(locals).unwrap(); + match ty.kind() { + TyKind::RigidTy(RigidTy::FnDef(def, args)) => Ok(Instance::resolve(def, &args).unwrap()), + _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs new file mode 100644 index 000000000000..f8d60debca10 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -0,0 +1,102 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Visitor that collects all instructions relevant to uninitialized memory access caused by delayed +//! UB. In practice, that means collecting all instructions where the place is featured. + +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::check_uninit::relevant_instruction::{ + InitRelevantInstruction, MemoryInitOp, +}; +use crate::kani_middle::transform::check_uninit::TargetFinder; + +use stable_mir::mir::visit::{Location, PlaceContext}; +use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, ProjectionElem, Statement}; + +pub struct DelayedUbTargetVisitor<'a> { + /// Whether we should skip the next instruction, since it might've been instrumented already. + /// When we instrument an instruction, we partition the basic block, and the instruction that + /// may trigger UB becomes the first instruction of the basic block, which we need to skip + /// later. + skip_next: bool, + /// The instruction being visited at a given point. + current: SourceInstruction, + /// The target instruction that should be verified. + pub target: Option, + /// The list of places we should be looking for, ignoring others. + place_filter: &'a [Place], +} + +impl<'a> TargetFinder for DelayedUbTargetVisitor<'a> { + fn find_next( + body: &MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + place_filter: &[Place], + ) -> Option { + let mut visitor = DelayedUbTargetVisitor { + skip_next: skip_first, + current: SourceInstruction::Statement { idx: 0, bb }, + target: None, + place_filter, + }; + visitor.visit_basic_block(&body.blocks()[bb]); + visitor.target + } +} + +impl<'a> DelayedUbTargetVisitor<'a> { + fn push_target(&mut self, source_op: MemoryInitOp) { + let target = self.target.get_or_insert_with(|| InitRelevantInstruction { + source: self.current, + after_instruction: vec![], + before_instruction: vec![], + }); + target.push_operation(source_op); + } +} + +impl<'a> MirVisitor for DelayedUbTargetVisitor<'a> { + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if self.skip_next { + self.skip_next = false; + } else if self.target.is_none() { + // Check all inner places. + self.super_statement(stmt, location); + // Switch to the next statement. + let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; + self.current = SourceInstruction::Statement { idx: idx + 1, bb }; + } + } + + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { + // Match the place by its local. + if self + .place_filter + .iter() + .any(|instrumented_place| instrumented_place.local == place.local) + { + let deref_projection_detected = place + .projection + .iter() + .any(|projection_elem| matches!(projection_elem, ProjectionElem::Deref)); + // We should only track the place itself, not whatever it gets dereferenced to. + if !deref_projection_detected { + // If we are mutating the place, initialize it. + if ptx.is_mutating() { + self.push_target(MemoryInitOp::SetRef { + operand: Operand::Copy(place.clone()), + value: true, + position: InsertPosition::After, + }); + } else { + // Otherwise, check its initialization. + self.push_target(MemoryInitOp::CheckRef { + operand: Operand::Copy(place.clone()), + }); + } + } + } + self.super_place(place, ptx, location) + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs new file mode 100644 index 000000000000..499fe95e6b15 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -0,0 +1,148 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Global transformation pass that injects checks that catch delayed UB caused by uninitialized memory. + +use std::collections::HashMap; +use std::collections::HashSet; + +use crate::args::ExtraChecks; +use crate::kani_middle::reachability::CallGraph; +use crate::kani_middle::transform::body::CheckType; +use crate::kani_middle::transform::body::MutableBody; +use crate::kani_middle::transform::check_uninit::UninitInstrumenter; +use crate::kani_middle::transform::internal_mir::RustcInternalMir; +use crate::kani_middle::transform::BodyTransformation; +use crate::kani_middle::transform::GlobalPass; +use crate::kani_middle::transform::TransformationResult; +use crate::kani_queries::QueryDb; +use delayed_ub_visitor::DelayedUbVisitor; +use instrumentation_visitor::DelayedUbTargetVisitor; +use points_to_analysis::PointsToAnalysis; +use points_to_graph::PlaceOrAlloc; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; +use stable_mir::mir::mono::{Instance, MonoItem}; +use stable_mir::mir::MirVisitor; +use stable_mir::mir::Place; +use stable_mir::ty::FnDef; +use stable_mir::CrateDef; + +mod delayed_ub_visitor; +mod instrumentation_visitor; +mod points_to_analysis; +mod points_to_graph; + +#[derive(Debug)] +pub struct DelayedUbPass { + pub check_type: CheckType, + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl DelayedUbPass { + pub fn new(check_type: CheckType) -> Self { + Self { check_type, mem_init_fn_cache: HashMap::new() } + } +} + +impl GlobalPass for DelayedUbPass { + fn is_enabled(&self, query_db: &QueryDb) -> bool { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform( + &mut self, + tcx: TyCtxt, + call_graph: &CallGraph, + starting_items: &[MonoItem], + instances: Vec, + transformer: &mut BodyTransformation, + ) { + // Collect all analysis targets (pointers to places reading and writing from which should be + // tracked). + let targets: HashSet<_> = instances + .iter() + .flat_map(|instance| { + let def_id = rustc_internal::internal(tcx, instance.def.def_id()); + let body = instance.body().unwrap(); + let mut visitor = DelayedUbVisitor::new(body.clone()); + visitor.visit_body(&body); + // Convert all places into the format of aliasing graph for later comparison. + visitor.into_targets().into_iter().map(move |place| { + PlaceOrAlloc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) + }) + }) + .collect(); + + // Only perform this analysis if there is something to analyze. + if !targets.is_empty() { + let mut places_need_instrumentation = HashSet::new(); + // Analyze aliasing for every harness. + for entry_item in starting_items { + // Convert each entry function into instance, if possible. + let entry_fn = match entry_item { + MonoItem::Fn(instance) => Some(*instance), + MonoItem::Static(static_def) => { + let instance: Instance = (*static_def).into(); + instance.has_body().then_some(instance) + } + MonoItem::GlobalAsm(_) => None, + }; + if let Some(instance) = entry_fn { + let body = instance.body().unwrap(); + // Dataflow analysis does not yet work with StableMIR, so need to perform backward + // conversion. + let internal_body = body.internal_mir(tcx); + let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); + let results = PointsToAnalysis::run( + internal_body.clone(), + tcx, + internal_def_id, + call_graph, + &instances, + transformer, + ); + // Since analysis targets are *pointers*, need to get its followers for instrumentation. + for target in targets.iter() { + places_need_instrumentation.extend(results.pointees_of(target)); + } + } + } + + // Instrument each instance based on the final targets we found. + for instance in instances { + let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); + let mut instrumenter = UninitInstrumenter { + check_type: self.check_type.clone(), + mem_init_fn_cache: &mut self.mem_init_fn_cache, + }; + // Retrieve the body with all local instrumentation passes applied. + let new_body = MutableBody::from(transformer.body(tcx, instance)); + // Retrieve all places we need to instrument in the appropriate format. + let place_filter: Vec = places_need_instrumentation + .iter() + .filter(|place| { + // Make sure only places from the current instance are included. + place.has_def_id(internal_def_id) + }) + .filter_map(|global_place_or_alloc| { + match global_place_or_alloc.without_def_id() { + PlaceOrAlloc::Alloc(_) => None, // Allocations cannot be read directly, so we need not worry about them. + PlaceOrAlloc::Place(place) => Some(rustc_internal::stable(place)), // Convert back to StableMIR. + } + }) + .collect(); + // Finally, instrument. + let (instrumentation_added, body) = instrumenter + .instrument::(tcx, new_body, instance, &place_filter); + // If some instrumentation has been performed, update the cached body in the local transformer. + if instrumentation_added { + transformer.cache.entry(instance).and_modify(|transformation_result| { + *transformation_result = TransformationResult::Modified(body.into()); + }); + } + } + } + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs new file mode 100644 index 000000000000..48590c9fcea7 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -0,0 +1,664 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Implementation of points-to analysis using Rust's native dataflow framework. This provides +//! necessary aliasing information for instrumenting delayed UB later on. + +use crate::kani_middle::{ + reachability::CallGraph, + transform::{ + check_uninit::delayed_ub::points_to_graph::{ + GlobalPlaceOrAlloc, PlaceOrAlloc, PointsToGraph, + }, + internal_mir::RustcInternalMir, + BodyTransformation, + }, +}; +use rustc_ast::Mutability; +use rustc_hir::def_id::DefId; +use rustc_middle::{ + mir::{ + BasicBlock, BinOp, Body, CallReturnPlaces, Location, NonDivergingIntrinsic, Operand, Place, + ProjectionElem, Rvalue, Statement, StatementKind, Terminator, TerminatorEdges, + TerminatorKind, + }, + ty::{Instance, InstanceKind, List, ParamEnv, TyCtxt, TyKind}, +}; +use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; +use rustc_smir::rustc_internal; +use rustc_span::source_map::Spanned; +use stable_mir::mir::mono::Instance as StableInstance; +use std::collections::HashSet; + +/// Main points-to analysis object. Since this one will be created anew for each instance analysis, +/// we need to make sure big data structures are not copied unnecessarily. +pub struct PointsToAnalysis<'a, 'b, 'c, 'tcx> { + def_id: DefId, + body: Body<'tcx>, + tcx: TyCtxt<'tcx>, + call_graph: &'a CallGraph, + instances: &'b Vec, + transformer: &'c mut BodyTransformation, +} + +impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { + /// Perform the analysis on a body, outputting the graph containing aliasing information of the + /// body itself and any body reachable from it. + pub fn run( + body: Body<'tcx>, + tcx: TyCtxt<'tcx>, + def_id: DefId, + call_graph: &'a CallGraph, + instances: &'b Vec, + transformer: &'c mut BodyTransformation, + ) -> PointsToGraph<'tcx> { + let analysis = Self { body: body.clone(), tcx, def_id, call_graph, instances, transformer }; + let mut cursor = + analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); + let mut results = PointsToGraph::new(&body, def_id); + for (idx, _) in body.basic_blocks.iter().enumerate() { + cursor.seek_to_block_end(idx.into()); + results.join(cursor.get()); + } + results + } +} + +impl<'a, 'b, 'c, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { + type Domain = PointsToGraph<'tcx>; + + type Direction = Forward; + + const NAME: &'static str = "PointsToAnalysis"; + + /// Dataflow state instantiated at the beginning of each basic block. + fn bottom_value(&self, body: &Body<'tcx>) -> Self::Domain { + PointsToGraph::new(body, self.def_id) + } + + /// Dataflow state instantiated at the entry into the body, for us this coincides with the + /// bottom value, so we don't need to do anything. + fn initialize_start_block( + &self, + _body: &rustc_middle::mir::Body<'tcx>, + _state: &mut Self::Domain, + ) { + } +} + +impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { + /// Update current dataflow state based on the information we can infer from the given + /// statement. + fn apply_statement_effect( + &mut self, + state: &mut Self::Domain, + statement: &Statement<'tcx>, + _location: Location, + ) { + // The only two statements that can introduce new aliasing information are assignments and + // copies using `copy_nonoverlapping`. + match &statement.kind { + StatementKind::Assign(assign_box) => { + let (place, rvalue) = *assign_box.clone(); + // Lvalue is `place`, which is already available to us. + let lvalue_set = state.follow_from_place(place, self.def_id); + // Determine all places which the newly created rvalue could point to. + let rvalue_set = match rvalue { + // Using the operand unchanged. + Rvalue::Use(operand) + | Rvalue::ShallowInitBox(operand, _) + | Rvalue::Cast(_, operand, _) + | Rvalue::Repeat(operand, ..) => self.find_operand_pointees(state, operand), + Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => { + // Here, a reference to a place is created, which leaves the place + // unchanged. + state.follow_from_place(place, self.def_id) + } + Rvalue::BinaryOp(bin_op, operands) => { + match bin_op { + BinOp::Offset => { + // Offsetting a pointer should still be within the boundaries of the + // same object, so we can simply use the operand unchanged. + let (ptr, _) = *operands.clone(); + match ptr { + Operand::Copy(place) | Operand::Move(place) => { + state.follow(&state.follow_from_place(place, self.def_id)) + } + Operand::Constant(_) => { + unreachable!("Pointer in offset should not be a constant.") + } + } + } + BinOp::Add + | BinOp::AddUnchecked + | BinOp::AddWithOverflow + | BinOp::Sub + | BinOp::SubUnchecked + | BinOp::SubWithOverflow + | BinOp::Mul + | BinOp::MulUnchecked + | BinOp::MulWithOverflow + | BinOp::Div + | BinOp::Rem + | BinOp::BitXor + | BinOp::BitAnd + | BinOp::BitOr + | BinOp::Shl + | BinOp::ShlUnchecked + | BinOp::Shr + | BinOp::ShrUnchecked => { + // While unlikely, those could be pointer addresses, so we need to + // track them. We assume that even shifted addresses will be within + // the same original object. + let (l_operand, r_operand) = *operands.clone(); + let l_operand_set = match l_operand { + Operand::Copy(place) | Operand::Move(place) => { + state.follow(&state.follow_from_place(place, self.def_id)) + } + Operand::Constant(_) => HashSet::new(), + }; + let r_operand_set = match r_operand { + Operand::Copy(place) | Operand::Move(place) => { + state.follow(&state.follow_from_place(place, self.def_id)) + } + Operand::Constant(_) => HashSet::new(), + }; + l_operand_set.union(&r_operand_set).cloned().collect() + } + BinOp::Eq + | BinOp::Lt + | BinOp::Le + | BinOp::Ne + | BinOp::Ge + | BinOp::Gt + | BinOp::Cmp => { + // None of those could yield an address as the result. + HashSet::new() + } + } + } + Rvalue::UnaryOp(_, operand) => { + // The same story from BinOp applies here, too. Need to track those things. + match operand { + Operand::Copy(place) | Operand::Move(place) => { + state.follow(&state.follow_from_place(place, self.def_id)) + } + Operand::Constant(_) => HashSet::new(), + } + } + Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { + // All of those should yield a constant. + HashSet::new() + } + Rvalue::Aggregate(_, operands) => { + // Conservatively find a union of all places mentioned here. + let places = operands + .into_iter() + .filter_map(|operand| { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + // Simply add a constant here. + let place_or_alloc: PlaceOrAlloc = place.into(); + Some(place_or_alloc.with_def_id(self.def_id)) + } + Operand::Constant(_) => { + // This is a constant, the aliasing state is empty + None + } + } + }) + .collect(); + state.follow(&places) + } + Rvalue::CopyForDeref(place) => { + // Use a place unchanged. + state.follow(&state.follow_from_place(place, self.def_id)) + } + Rvalue::ThreadLocalRef(_) => { + unimplemented!("Delayed UB analysis in Kani does not support statics.") + } + }; + // Create an edge between all places which could be lvalue and all places rvalue + // could be pointing to. + state.extend(&lvalue_set, &rvalue_set); + } + StatementKind::Intrinsic(non_diverging_intrinsic) => { + match *non_diverging_intrinsic.clone() { + NonDivergingIntrinsic::CopyNonOverlapping(copy_nonoverlapping) => { + // Copy between `*const a` and `*mut b` is semantically equivalent to *b = + // *a with respect to aliasing. + self.apply_copy_effect( + state, + copy_nonoverlapping.src.clone(), + copy_nonoverlapping.dst.clone(), + ); + } + NonDivergingIntrinsic::Assume(..) => { /* This is a no-op. */ } + } + } + StatementKind::FakeRead(..) + | StatementKind::SetDiscriminant { .. } + | StatementKind::Deinit(..) + | StatementKind::StorageLive(..) + | StatementKind::StorageDead(..) + | StatementKind::Retag(..) + | StatementKind::PlaceMention(..) + | StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::ConstEvalCounter + | StatementKind::Nop => { /* This is a no-op with regard to aliasing. */ } + } + } + + fn apply_terminator_effect<'mir>( + &mut self, + state: &mut Self::Domain, + terminator: &'mir Terminator<'tcx>, + _location: Location, + ) -> TerminatorEdges<'mir, 'tcx> { + if let TerminatorKind::Call { func, args, destination, .. } = &terminator.kind { + let instance = match try_resolve_instance(&self.body, func, self.tcx) { + Ok(instance) => instance, + Err(reason) => { + unimplemented!("{reason}") + } + }; + match instance.def { + // Intrinsics could introduce aliasing edges we care about. + InstanceKind::Intrinsic(def_id) => { + match self.tcx.intrinsic(def_id).unwrap().name.to_string().as_str() { + name if name.starts_with("atomic") => { + match name { + // All `atomic_cxchg` intrinsics take `dst, old, src` as arguments. + // This is equivalent to `destination = *dst; *dst = src`. + name if name.starts_with("atomic_cxchg") => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let src_set = match args[2].node { + Operand::Copy(place) | Operand::Move(place) => { + state.follow_from_place(place, self.def_id) + } + Operand::Constant(_) => HashSet::new(), + }; + let dst_set = match args[0].node { + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper( + &[ProjectionElem::Deref], + self.tcx, + ), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + let destination_set = + state.follow_from_place(*destination, self.def_id); + state.extend(&destination_set, &state.follow(&dst_set)); + state.extend(&dst_set, &state.follow(&src_set)); + } + // All `atomic_load` intrinsics take `src` as an argument. + // This is equivalent to `destination = *src`. + name if name.starts_with("atomic_load") => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + let src_set = match args[0].node { + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper( + &[ProjectionElem::Deref], + self.tcx, + ), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + let destination_set = + state.follow_from_place(*destination, self.def_id); + state.extend(&destination_set, &state.follow(&src_set)); + } + // All `atomic_store` intrinsics take `dst, val` as arguments. + // This is equivalent to `*dst = val`. + name if name.starts_with("atomic_store") => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let dst_set = match args[0].node { + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper( + &[ProjectionElem::Deref], + self.tcx, + ), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + let val_set = match args[1].node { + Operand::Copy(place) | Operand::Move(place) => { + state.follow_from_place(place, self.def_id) + } + Operand::Constant(_) => HashSet::new(), + }; + state.extend(&dst_set, &state.follow(&val_set)); + } + // All other `atomic` intrinsics take `dst, src` as arguments. + // This is equivalent to `destination = *dst; *dst = src`. + _ => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let src_set = match args[1].node { + Operand::Copy(place) | Operand::Move(place) => { + state.follow_from_place(place, self.def_id) + } + Operand::Constant(_) => HashSet::new(), + }; + let dst_set = match args[0].node { + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper( + &[ProjectionElem::Deref], + self.tcx, + ), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + let destination_set = + state.follow_from_place(*destination, self.def_id); + state.extend(&destination_set, &state.follow(&dst_set)); + state.extend(&dst_set, &state.follow(&src_set)); + } + }; + } + // Similar to `copy_nonoverlapping`, argument order is `src`, `dst`, `count`. + "copy" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + assert!(matches!( + args[1].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + self.apply_copy_effect( + state, + args[0].node.clone(), + args[1].node.clone(), + ); + } + // Similar to `copy_nonoverlapping`, argument order is `dst`, `src`, `count`. + "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { + assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + assert!(matches!( + args[1].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + self.apply_copy_effect( + state, + args[1].node.clone(), + args[0].node.clone(), + ); + } + // Semantically equivalent to dest = *a + "volatile_load" | "unaligned_volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `volatile_load`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Not) + )); + // Destination of the return value. + let lvalue_set = state.follow_from_place(*destination, self.def_id); + let rvalue_set = match args[0].node { + // Need to add an additional dereference, since the value is loaded, not pointer. + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.def_id, + ), + Operand::Constant(_) => HashSet::new(), + }; + state.extend(&lvalue_set, &state.follow(&rvalue_set)); + } + // Semantically equivalent *a = b. + "volatile_store" | "unaligned_volatile_store" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `volatile_store`" + ); + assert!(matches!( + args[0].node.ty(&self.body, self.tcx).kind(), + TyKind::RawPtr(_, Mutability::Mut) + )); + let lvalue_set = match args[0].node { + // Need to add an additional dereference, since storing into the dereference. + Operand::Copy(place) | Operand::Move(place) => state + .follow_from_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer should not be a constant") + } + }; + let rvalue_set = match args[1].node { + Operand::Copy(place) | Operand::Move(place) => { + state.follow_from_place(place, self.def_id) + } + Operand::Constant(_) => HashSet::new(), + }; + state.extend(&lvalue_set, &state.follow(&rvalue_set)); + } + _ => { + // TODO: go through the list of intrinsics and make sure none have + // slipped; I am sure we still missing some. + if self.tcx.is_mir_available(def_id) { + self.apply_regular_call_effect(state, instance, args, destination); + } + } + } + } + _ => { + if self.tcx.is_foreign_item(instance.def_id()) { + match self + .tcx + .def_path_str_with_args(instance.def_id(), instance.args) + .as_str() + { + // This is an internal function responsible for heap allocation, + // which creates a new node we need to add to the points-to graph. + "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { + let lvalue_set = state.follow_from_place(*destination, self.def_id); + let rvalue_set = HashSet::from([ + PlaceOrAlloc::new_alloc().with_def_id(self.def_id) + ]); + state.extend(&lvalue_set, &rvalue_set); + } + _ => {} + } + } else { + self.apply_regular_call_effect(state, instance, args, destination); + } + } + } + }; + terminator.edges() + } + + // We probably should not care about this. + fn apply_call_return_effect( + &mut self, + _state: &mut Self::Domain, + _block: BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance<'tcx>( + body: &Body<'tcx>, + func: &Operand<'tcx>, + tcx: TyCtxt<'tcx>, +) -> Result, String> { + let ty = func.ty(body, tcx); + match ty.kind() { + TyKind::FnDef(def, args) => { + match Instance::try_resolve(tcx, ParamEnv::reveal_all(), *def, &args) { + Ok(Some(instance)) => Ok(instance), + _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), + } + } + _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), + } +} + +impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { + // Update the analysis state according to the operation, which is semantically equivalent to `*to = *from`. + fn apply_copy_effect( + &self, + state: &mut PointsToGraph<'tcx>, + from: Operand<'tcx>, + to: Operand<'tcx>, + ) { + let lvalue_set = match to { + Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + let rvalue_set = match from { + Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.def_id, + ), + Operand::Constant(_) => { + unreachable!("pointer cannot be a constant") + } + }; + state.extend(&lvalue_set, &state.follow(&rvalue_set)); + } + + // Find all places where the operand could point to at the current stage of the program. + fn find_operand_pointees( + &self, + state: &mut PointsToGraph<'tcx>, + operand: Operand<'tcx>, + ) -> HashSet> { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + // Find all places which are pointed to by the place. + state.follow(&state.follow_from_place(place, self.def_id)) + } + Operand::Constant(_) => { + // Constants do not point to anything, the aliasing state is empty. + HashSet::new() + } + } + } + + // Update the analysis state according to the regular function call. + fn apply_regular_call_effect( + &mut self, + state: &mut PointsToGraph<'tcx>, + instance: Instance, + args: &[Spanned>], + destination: &Place<'tcx>, + ) { + // Here we simply call another function, so need to retrieve internal + // body for it. + let new_body = { + let internal_instance = rustc_internal::stable(instance); + assert!(self.instances.contains(&internal_instance)); + let stable_body = self.transformer.body(self.tcx, rustc_internal::stable(instance)); + stable_body.internal_mir(self.tcx) + }; + + // Recursively run the analysis and join the results into the current state. + let new_result = PointsToAnalysis::run( + new_body, + self.tcx, + instance.def_id(), + self.call_graph, + self.instances, + self.transformer, + ); + state.join(&new_result); + + // One missing link is the connections between the arguments in the + // caller and parameters in the callee, add it to the graph. + // + // TODO: this is probably wrong if the arguments are passed via spread, + // as in with closures, so we would need to fix that. + for (i, arg) in args.iter().enumerate() { + match &arg.node { + Operand::Copy(place) | Operand::Move(place) => { + let lvalue_set = HashSet::from([PlaceOrAlloc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + let rvalue_set = state.follow_from_place(*place, self.def_id); + state.extend(&lvalue_set, &state.follow(&rvalue_set)); + } + Operand::Constant(_) => {} + } + } + // Similarly, need to connect the return value to the return + // destination. + let lvalue_set = state.follow_from_place(*destination, self.def_id); + let rvalue_set = HashSet::from([PlaceOrAlloc::Place(Place { + local: 0usize.into(), + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + state.extend(&lvalue_set, &state.follow(&rvalue_set)); + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs new file mode 100644 index 000000000000..bc4c918b0578 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -0,0 +1,218 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Graph data structure to store the results of points-to analysis. + +use rustc_hir::def_id::DefId; +use rustc_middle::{ + mir::{Body, Place, ProjectionElem}, + ty::List, +}; +use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; +use std::{ + collections::{HashMap, HashSet, VecDeque}, + sync::atomic::{AtomicUsize, Ordering}, +}; + +/// A node in the points-to graph, which could be a place on the stack or a heap allocation. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum PlaceOrAlloc<'tcx> { + Alloc(usize), + Place(Place<'tcx>), +} + +/// A node tagged with a DefId, to differentiate between places across different functions. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub struct GlobalPlaceOrAlloc<'tcx> { + def_id: DefId, + place_or_alloc: PlaceOrAlloc<'tcx>, +} + +impl<'tcx> GlobalPlaceOrAlloc<'tcx> { + /// Check if the node has a given DefId. + pub fn has_def_id(&self, def_id: DefId) -> bool { + self.def_id == def_id + } + + /// Remove DefId from the node. + pub fn without_def_id(&self) -> PlaceOrAlloc<'tcx> { + self.place_or_alloc + } +} + +impl<'tcx> From> for PlaceOrAlloc<'tcx> { + fn from(value: Place<'tcx>) -> Self { + PlaceOrAlloc::Place(value) + } +} + +impl<'tcx> PlaceOrAlloc<'tcx> { + /// Generate a new alloc with increasing allocation id. + pub fn new_alloc() -> Self { + static NEXT_ALLOC_ID: AtomicUsize = AtomicUsize::new(0); + PlaceOrAlloc::Alloc(NEXT_ALLOC_ID.fetch_add(1, Ordering::Relaxed)) + } + + /// Tag the node with a DefId. + pub fn with_def_id(&self, def_id: DefId) -> GlobalPlaceOrAlloc<'tcx> { + GlobalPlaceOrAlloc { def_id, place_or_alloc: *self } + } +} + +/// Graph data structure that stores the current results of the point-to analysis. The graph is +/// directed, so having an edge between two places means that one is pointing to the other. For +/// example, `a = &b` would translate to `a --> b` and `a = b` to `a --> {all pointees of b}`. +/// +/// Note that the aliasing is stored between places with no projections, which is sound but can be +/// imprecise. I.e., if two places have an edge in the graph, could mean that some scalar sub-places +/// (e.g. _1.0) of the places alias, too, but not the deref ones. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct PointsToGraph<'tcx> { + /// A hash map of node --> {nodes} edges. + edges: HashMap, HashSet>>, +} + +impl<'tcx> PointsToGraph<'tcx> { + /// Create a new graph, adding all existing places without projections from a body. + pub fn new(body: &Body, def_id: DefId) -> Self { + let places = (0..body.local_decls.len()).map(|local| { + let place: PlaceOrAlloc = + Place { local: local.into(), projection: List::empty() }.into(); + (place.with_def_id(def_id), HashSet::new()) + }); + Self { edges: HashMap::from_iter(places) } + } + + /// Collect all nodes which have incoming edges from `nodes`. + pub fn follow( + &self, + nodes: &HashSet>, + ) -> HashSet> { + nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() + } + + /// For each node in `from`, add an edge to each node in `to`. + pub fn extend( + &mut self, + from: &HashSet>, + to: &HashSet>, + ) { + for node in from.iter() { + let node_pointees = self.edges.entry(*node).or_default(); + node_pointees.extend(to.iter()); + } + } + + /// Collect all scalar places to which a given place can alias. This is needed to resolve all + /// deref-like projections. + pub fn follow_from_place( + &self, + place: Place<'tcx>, + current_def_id: DefId, + ) -> HashSet> { + let place_or_alloc: PlaceOrAlloc = + Place { local: place.local, projection: List::empty() }.into(); + let mut node_set = HashSet::from([place_or_alloc.with_def_id(current_def_id)]); + for projection in place.projection { + match projection { + ProjectionElem::Deref => { + node_set = self.follow(&node_set); + } + ProjectionElem::Field(..) + | ProjectionElem::Index(..) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } + | ProjectionElem::Downcast(..) + | ProjectionElem::OpaqueCast(..) + | ProjectionElem::Subtype(..) => { + /* There operations are no-ops w.r.t aliasing since we are tracking it on per-object basis. */ + } + } + } + node_set + } + + /// Dump the graph into a file using the graphviz format for later visualization. + pub fn dump(&self, file_path: &str) { + let nodes: Vec = self + .edges + .keys() + .map(|from| format!("\t\"{:?}:{:?}\"", from.def_id, from.place_or_alloc)) + .collect(); + let nodes_str = nodes.join("\n"); + let edges: Vec = self + .edges + .iter() + .flat_map(|(from, to)| { + let from = format!("\"{:?}:{:?}\"", from.def_id, from.place_or_alloc); + to.iter().map(move |to| { + let to = format!("\"{:?}:{:?}\"", to.def_id, to.place_or_alloc); + format!("\t{} -> {}", from.clone(), to) + }) + }) + .collect(); + let edges_str = edges.join("\n"); + std::fs::write(file_path, format!("digraph {{\n{}\n{}\n}}", nodes_str, edges_str)).unwrap(); + } + + /// Find a transitive closure of the graph starting from a given place. + pub fn transitive_closure( + &self, + target: &GlobalPlaceOrAlloc<'tcx>, + ) -> HashSet> { + let mut result = HashSet::new(); + let mut queue = VecDeque::from([*target]); + while !queue.is_empty() { + let next_target = queue.pop_front().unwrap(); + if !result.contains(&next_target) { + let outgoing_edges = self.edges.get(&next_target).unwrap(); + queue.extend(outgoing_edges.iter()); + result.insert(next_target); + } + } + result + } + + /// Retrieve all places to which a given place is pointing to. + pub fn pointees_of( + &self, + target: &GlobalPlaceOrAlloc<'tcx>, + ) -> HashSet> { + self.edges + .get(&target) + .expect(format!("unable to retrieve {:?} from points-to graph", target).as_str()) + .clone() + } +} + +/// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous +/// join operation. In our case, this is a simple union of two graphs. This "lattice" is finite, +/// because in the worst case all places will alias to all places, in which case the join will be a +/// no-op. +impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { + fn join(&mut self, other: &Self) -> bool { + let mut updated = false; + // Check every node in the other graph. + for (from, to) in other.edges.iter() { + // If node already exists in the original graph. + if self.edges.contains_key(from) { + // Check if there are any edges that are in the other graph but not in the original + // graph. + if to.difference(self.edges.get(from).unwrap()).count() != 0 { + updated = true; + } + // Add all edges to the original graph. + self.edges.get_mut(from).unwrap().extend(to.iter()); + } else { + // If node does not exist, add the node and all edges from it. + self.edges.insert(*from, to.clone()); + updated = true; + } + } + updated + } +} + +/// This is a requirement for the fixpoint solver, and there is no derive macro for this, so +/// implement it manually. +impl<'tcx, C> DebugWithContext for PointsToGraph<'tcx> {} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs index 4f94f94d17f1..279229a7d9d0 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -1,33 +1,41 @@ // Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT // -//! Implement a transformation pass that instruments the code to detect possible UB due to -//! the accesses to uninitialized memory. +//! Module containing multiple transformation passes that instrument the code to detect possible UB +//! due to the accesses to uninitialized memory. -use crate::args::ExtraChecks; use crate::kani_middle::find_fn_def; -use crate::kani_middle::transform::body::{ - CheckType, InsertPosition, MutableBody, SourceInstruction, -}; -use crate::kani_middle::transform::{TransformPass, TransformationType}; -use crate::kani_queries::QueryDb; +use crate::kani_middle::transform::body::CheckType; +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use relevant_instruction::{InitRelevantInstruction, MemoryInitOp}; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; use stable_mir::mir::mono::Instance; -use stable_mir::mir::{AggregateKind, Body, ConstOperand, Mutability, Operand, Place, Rvalue}; +use stable_mir::mir::{ + AggregateKind, BasicBlockIdx, ConstOperand, Mutability, Operand, Place, Rvalue, +}; use stable_mir::ty::{ FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy, }; use stable_mir::CrateDef; use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; -use tracing::{debug, trace}; +pub use ty_layout::{PointeeInfo, PointeeLayout}; + +pub(crate) mod delayed_ub; +pub(crate) mod ptr_uninit; +mod relevant_instruction; mod ty_layout; -mod uninit_visitor; -pub use ty_layout::{PointeeInfo, PointeeLayout}; -use uninit_visitor::{CheckUninitVisitor, InitRelevantInstruction, MemoryInitOp}; +/// Trait that the instrumentation target providers must implement to work with the instrumenter. +trait TargetFinder { + fn find_next( + body: &MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + place_filter: &[Place], + ) -> Option; +} // Function bodies of those functions will not be instrumented as not to cause infinite recursion. const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = &[ @@ -41,33 +49,24 @@ const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = &[ "KaniSetStrPtrInitialized", ]; -/// Instrument the code with checks for uninitialized memory. +/// Instruments the code with checks for uninitialized memory, agnostic to the source of targets. #[derive(Debug)] -pub struct UninitPass { +pub struct UninitInstrumenter<'a> { pub check_type: CheckType, /// Used to cache FnDef lookups of injected memory initialization functions. - pub mem_init_fn_cache: HashMap<&'static str, FnDef>, + pub mem_init_fn_cache: &'a mut HashMap<&'static str, FnDef>, } -impl TransformPass for UninitPass { - fn transformation_type() -> TransformationType - where - Self: Sized, - { - TransformationType::Instrumentation - } - - fn is_enabled(&self, query_db: &QueryDb) -> bool - where - Self: Sized, - { - let args = query_db.args(); - args.ub_check.contains(&ExtraChecks::Uninit) - } - - fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { - trace!(function=?instance.name(), "transform"); - +impl<'a> UninitInstrumenter<'a> { + /// Instrument a body with memory initialization checks, the visitor that generates + /// instrumentation targets must be provided via a TF type parameter. + fn instrument( + &mut self, + tcx: TyCtxt, + mut body: MutableBody, + instance: Instance, + place_filter: &[Place], + ) -> (bool, MutableBody) { // Need to break infinite recursion when memory initialization checks are inserted, so the // internal functions responsible for memory initialization are skipped. if tcx @@ -80,13 +79,7 @@ impl TransformPass for UninitPass { return (false, body); } - let mut new_body = MutableBody::from(body); - let orig_len = new_body.blocks().len(); - - // Inject a call to set-up memory initialization state if the function is a harness. - if is_harness(instance, tcx) { - inject_memory_init_setup(&mut new_body, tcx, &mut self.mem_init_fn_cache); - } + let orig_len = body.blocks().len(); // Set of basic block indices for which analyzing first statement should be skipped. // @@ -100,21 +93,19 @@ impl TransformPass for UninitPass { // Do not cache body.blocks().len() since it will change as we add new checks. let mut bb_idx = 0; - while bb_idx < new_body.blocks().len() { + while bb_idx < body.blocks().len() { if let Some(candidate) = - CheckUninitVisitor::find_next(&new_body, bb_idx, skip_first.contains(&bb_idx)) + TF::find_next(&body, bb_idx, skip_first.contains(&bb_idx), place_filter) { - self.build_check_for_instruction(tcx, &mut new_body, candidate, &mut skip_first); + self.build_check_for_instruction(tcx, &mut body, candidate, &mut skip_first); bb_idx += 1 } else { bb_idx += 1; }; } - (orig_len != new_body.blocks().len(), new_body.into()) + (orig_len != body.blocks().len(), body) } -} -impl UninitPass { /// Inject memory initialization checks for each operation in an instruction. fn build_check_for_instruction( &mut self, @@ -123,7 +114,6 @@ impl UninitPass { instruction: InitRelevantInstruction, skip_first: &mut HashSet, ) { - debug!(?instruction, "build_check"); let mut source = instruction.source; for operation in instruction.before_instruction { self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); @@ -175,7 +165,9 @@ impl UninitPass { }; match operation { - MemoryInitOp::CheckSliceChunk { .. } | MemoryInitOp::Check { .. } => { + MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::Check { .. } + | MemoryInitOp::CheckRef { .. } => { self.build_get_and_check(tcx, body, source, operation, pointee_ty_info, skip_first) } MemoryInitOp::SetSliceChunk { .. } @@ -211,7 +203,7 @@ impl UninitPass { // Depending on whether accessing the known number of elements in the slice, need to // pass is as an argument. let (diagnostic, args) = match &operation { - MemoryInitOp::Check { .. } => { + MemoryInitOp::Check { .. } | MemoryInitOp::CheckRef { .. } => { let diagnostic = "KaniIsPtrInitialized"; let args = vec![ptr_operand.clone(), layout_operand]; (diagnostic, args) @@ -275,14 +267,22 @@ impl UninitPass { // Make sure all non-padding bytes are initialized. collect_skipped(&operation, body, skip_first); - let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + // Find the real operand type for a good error message. + let operand_ty = match &operation { + MemoryInitOp::Check { operand } + | MemoryInitOp::CheckSliceChunk { operand, .. } + | MemoryInitOp::CheckRef { operand } => operand.ty(body.locals()).unwrap(), + _ => unreachable!(), + }; body.add_check( tcx, &self.check_type, source, operation.position(), ret_place.local, - &format!("Undefined Behavior: Reading from an uninitialized pointer of type `{ptr_operand_ty}`"), + &format!( + "Undefined Behavior: Reading from an uninitialized pointer of type `{operand_ty}`" + ), ) } @@ -483,59 +483,3 @@ pub fn resolve_mem_init_fn(fn_def: FnDef, layout_size: usize, associated_type: T ) .unwrap() } - -/// Checks if the instance is a harness -- an entry point of Kani analysis. -fn is_harness(instance: Instance, tcx: TyCtxt) -> bool { - let harness_identifiers = [ - vec![ - rustc_span::symbol::Symbol::intern("kanitool"), - rustc_span::symbol::Symbol::intern("proof_for_contract"), - ], - vec![ - rustc_span::symbol::Symbol::intern("kanitool"), - rustc_span::symbol::Symbol::intern("proof"), - ], - ]; - harness_identifiers.iter().any(|attr_path| { - tcx.has_attrs_with_path(rustc_internal::internal(tcx, instance.def.def_id()), attr_path) - }) -} - -/// Inject an initial call to set-up memory initialization tracking. -fn inject_memory_init_setup( - new_body: &mut MutableBody, - tcx: TyCtxt, - mem_init_fn_cache: &mut HashMap<&'static str, FnDef>, -) { - // First statement or terminator in the harness. - let mut source = if !new_body.blocks()[0].statements.is_empty() { - SourceInstruction::Statement { idx: 0, bb: 0 } - } else { - SourceInstruction::Terminator { bb: 0 } - }; - - // Dummy return place. - let ret_place = Place { - local: new_body.new_local( - Ty::new_tuple(&[]), - source.span(new_body.blocks()), - Mutability::Not, - ), - projection: vec![], - }; - - // Resolve the instance and inject a call to set-up the memory initialization state. - let memory_initialization_init = Instance::resolve( - get_mem_init_fn_def(tcx, "KaniInitializeMemoryInitializationState", mem_init_fn_cache), - &GenericArgs(vec![]), - ) - .unwrap(); - - new_body.add_call( - &memory_initialization_init, - &mut source, - InsertPosition::Before, - vec![], - ret_place, - ); -} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs new file mode 100644 index 000000000000..14dbfa3929c4 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs @@ -0,0 +1,128 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! A transformation pass that instruments the code to detect possible UB due to the accesses to +//! uninitialized memory via raw pointers. + +use crate::args::ExtraChecks; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; +use crate::kani_middle::transform::check_uninit::{get_mem_init_fn_def, UninitInstrumenter}; +use crate::kani_middle::transform::{TransformPass, TransformationType}; +use crate::kani_queries::QueryDb; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; +use stable_mir::mir::mono::Instance; +use stable_mir::mir::{Body, Mutability, Place}; +use stable_mir::ty::{FnDef, GenericArgs, Ty}; +use stable_mir::CrateDef; +use std::collections::HashMap; +use std::fmt::Debug; +use tracing::trace; + +mod uninit_visitor; + +/// Top-level pass that instruments the code with checks for uninitialized memory access through raw +/// pointers. +#[derive(Debug)] +pub struct UninitPass { + pub check_type: CheckType, + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl TransformPass for UninitPass { + fn transformation_type() -> TransformationType + where + Self: Sized, + { + TransformationType::Instrumentation + } + + fn is_enabled(&self, query_db: &QueryDb) -> bool + where + Self: Sized, + { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + trace!(function=?instance.name(), "transform"); + + let mut changed = false; + let mut new_body = MutableBody::from(body); + + // Inject a call to set-up memory initialization state if the function is a harness. + if is_harness(instance, tcx) { + inject_memory_init_setup(&mut new_body, tcx, &mut self.mem_init_fn_cache); + changed = true; + } + + // Call a helper that performs the actual instrumentation. + let mut instrumenter = UninitInstrumenter { + check_type: self.check_type.clone(), + mem_init_fn_cache: &mut self.mem_init_fn_cache, + }; + let (instrumentation_added, body) = instrumenter + .instrument::(tcx, new_body, instance, &[]); + + (changed || instrumentation_added, body.into()) + } +} + +/// Checks if the instance is a harness -- an entry point of Kani analysis. +fn is_harness(instance: Instance, tcx: TyCtxt) -> bool { + let harness_identifiers = [ + vec![ + rustc_span::symbol::Symbol::intern("kanitool"), + rustc_span::symbol::Symbol::intern("proof_for_contract"), + ], + vec![ + rustc_span::symbol::Symbol::intern("kanitool"), + rustc_span::symbol::Symbol::intern("proof"), + ], + ]; + harness_identifiers.iter().any(|attr_path| { + tcx.has_attrs_with_path(rustc_internal::internal(tcx, instance.def.def_id()), attr_path) + }) +} + +/// Inject an initial call to set-up memory initialization tracking. +fn inject_memory_init_setup( + new_body: &mut MutableBody, + tcx: TyCtxt, + mem_init_fn_cache: &mut HashMap<&'static str, FnDef>, +) { + // First statement or terminator in the harness. + let mut source = if !new_body.blocks()[0].statements.is_empty() { + SourceInstruction::Statement { idx: 0, bb: 0 } + } else { + SourceInstruction::Terminator { bb: 0 } + }; + + // Dummy return place. + let ret_place = Place { + local: new_body.new_local( + Ty::new_tuple(&[]), + source.span(new_body.blocks()), + Mutability::Not, + ), + projection: vec![], + }; + + // Resolve the instance and inject a call to set-up the memory initialization state. + let memory_initialization_init = Instance::resolve( + get_mem_init_fn_def(tcx, "KaniInitializeMemoryInitializationState", mem_init_fn_cache), + &GenericArgs(vec![]), + ) + .unwrap(); + + new_body.add_call( + &memory_initialization_init, + &mut source, + InsertPosition::Before, + vec![], + ret_place, + ); +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs similarity index 77% rename from kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs rename to kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 4c768aa2ee81..91a7d526508f 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -4,6 +4,11 @@ //! Visitor that collects all instructions relevant to uninitialized memory access. use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::check_uninit::relevant_instruction::{ + InitRelevantInstruction, MemoryInitOp, +}; +use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; +use crate::kani_middle::transform::check_uninit::TargetFinder; use stable_mir::mir::alloc::GlobalAlloc; use stable_mir::mir::mono::{Instance, InstanceKind}; use stable_mir::mir::visit::{Location, PlaceContext}; @@ -12,123 +17,7 @@ use stable_mir::mir::{ Place, PointerCoercion, ProjectionElem, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }; -use stable_mir::ty::{ConstantKind, RigidTy, Ty, TyKind}; -use strum_macros::AsRefStr; - -use super::{PointeeInfo, PointeeLayout}; - -/// Memory initialization operations: set or get memory initialization state for a given pointer. -#[derive(AsRefStr, Clone, Debug)] -pub enum MemoryInitOp { - /// Check memory initialization of data bytes in a memory region starting from the pointer - /// `operand` and of length `sizeof(operand)` bytes. - Check { operand: Operand }, - /// Set memory initialization state of data bytes in a memory region starting from the pointer - /// `operand` and of length `sizeof(operand)` bytes. - Set { operand: Operand, value: bool, position: InsertPosition }, - /// Check memory initialization of data bytes in a memory region starting from the pointer - /// `operand` and of length `count * sizeof(operand)` bytes. - CheckSliceChunk { operand: Operand, count: Operand }, - /// Set memory initialization state of data bytes in a memory region starting from the pointer - /// `operand` and of length `count * sizeof(operand)` bytes. - SetSliceChunk { operand: Operand, count: Operand, value: bool, position: InsertPosition }, - /// Set memory initialization of data bytes in a memory region starting from the reference to - /// `operand` and of length `sizeof(operand)` bytes. - SetRef { operand: Operand, value: bool, position: InsertPosition }, - /// Unsupported memory initialization operation. - Unsupported { reason: String }, - /// Operation that trivially accesses uninitialized memory, results in injecting `assert!(false)`. - TriviallyUnsafe { reason: String }, -} - -impl MemoryInitOp { - /// Produce an operand for the relevant memory initialization related operation. This is mostly - /// required so that the analysis can create a new local to take a reference in - /// `MemoryInitOp::SetRef`. - pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { - match self { - MemoryInitOp::Check { operand, .. } - | MemoryInitOp::Set { operand, .. } - | MemoryInitOp::CheckSliceChunk { operand, .. } - | MemoryInitOp::SetSliceChunk { operand, .. } => operand.clone(), - MemoryInitOp::SetRef { operand, .. } => Operand::Copy(Place { - local: { - let place = match operand { - Operand::Copy(place) | Operand::Move(place) => place, - Operand::Constant(_) => unreachable!(), - }; - body.new_assignment( - Rvalue::AddressOf(Mutability::Not, place.clone()), - source, - self.position(), - ) - }, - projection: vec![], - }), - MemoryInitOp::Unsupported { .. } | MemoryInitOp::TriviallyUnsafe { .. } => { - unreachable!() - } - } - } - - pub fn expect_count(&self) -> Operand { - match self { - MemoryInitOp::CheckSliceChunk { count, .. } - | MemoryInitOp::SetSliceChunk { count, .. } => count.clone(), - MemoryInitOp::Check { .. } - | MemoryInitOp::Set { .. } - | MemoryInitOp::SetRef { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), - } - } - - pub fn expect_value(&self) -> bool { - match self { - MemoryInitOp::Set { value, .. } - | MemoryInitOp::SetSliceChunk { value, .. } - | MemoryInitOp::SetRef { value, .. } => *value, - MemoryInitOp::Check { .. } - | MemoryInitOp::CheckSliceChunk { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), - } - } - - pub fn position(&self) -> InsertPosition { - match self { - MemoryInitOp::Set { position, .. } - | MemoryInitOp::SetSliceChunk { position, .. } - | MemoryInitOp::SetRef { position, .. } => *position, - MemoryInitOp::Check { .. } - | MemoryInitOp::CheckSliceChunk { .. } - | MemoryInitOp::Unsupported { .. } - | MemoryInitOp::TriviallyUnsafe { .. } => InsertPosition::Before, - } - } -} - -/// Represents an instruction in the source code together with all memory initialization checks/sets -/// that are connected to the memory used in this instruction and whether they should be inserted -/// before or after the instruction. -#[derive(Clone, Debug)] -pub struct InitRelevantInstruction { - /// The instruction that affects the state of the memory. - pub source: SourceInstruction, - /// All memory-related operations that should happen after the instruction. - pub before_instruction: Vec, - /// All memory-related operations that should happen after the instruction. - pub after_instruction: Vec, -} - -impl InitRelevantInstruction { - pub fn push_operation(&mut self, source_op: MemoryInitOp) { - match source_op.position() { - InsertPosition::Before => self.before_instruction.push(source_op), - InsertPosition::After => self.after_instruction.push(source_op), - } - } -} +use stable_mir::ty::{ConstantKind, RigidTy, TyKind}; pub struct CheckUninitVisitor<'a> { locals: &'a [LocalDecl], @@ -145,11 +34,12 @@ pub struct CheckUninitVisitor<'a> { bb: BasicBlockIdx, } -impl<'a> CheckUninitVisitor<'a> { - pub fn find_next( - body: &'a MutableBody, +impl<'a> TargetFinder for CheckUninitVisitor<'a> { + fn find_next( + body: &MutableBody, bb: BasicBlockIdx, skip_first: bool, + _place_filter: &[Place], ) -> Option { let mut visitor = CheckUninitVisitor { locals: body.locals(), @@ -161,7 +51,9 @@ impl<'a> CheckUninitVisitor<'a> { visitor.visit_basic_block(&body.blocks()[bb]); visitor.target } +} +impl<'a> CheckUninitVisitor<'a> { fn push_target(&mut self, source_op: MemoryInitOp) { let target = self.target.get_or_insert_with(|| InitRelevantInstruction { source: self.current, @@ -186,7 +78,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { operand: copy.src.clone(), count: copy.count.clone(), }); - // Destimation is a *mut T so it gets initialized. + // Destination is a *mut T so it gets initialized. self.push_target(MemoryInitOp::SetSliceChunk { operand: copy.dst.clone(), count: copy.count.clone(), @@ -572,37 +464,9 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } } } - CastKind::PtrToPtr => { - let operand_ty = operand.ty(&self.locals).unwrap(); - if let ( - RigidTy::RawPtr(from_ty, Mutability::Mut), - RigidTy::RawPtr(to_ty, Mutability::Mut), - ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) - { - if !tys_layout_compatible(from_ty, to_ty) { - // If casting from a mutable pointer to a mutable pointer with - // different layouts, delayed UB could occur. - self.push_target(MemoryInitOp::Unsupported { - reason: "Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB.".to_string(), - }); - } - } - } CastKind::Transmute => { let operand_ty = operand.ty(&self.locals).unwrap(); - if let ( - RigidTy::RawPtr(from_ty, Mutability::Mut), - RigidTy::RawPtr(to_ty, Mutability::Mut), - ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) - { - if !tys_layout_compatible(from_ty, to_ty) { - // If casting from a mutable pointer to a mutable pointer with different - // layouts, delayed UB could occur. - self.push_target(MemoryInitOp::Unsupported { - reason: "Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB.".to_string(), - }); - } - } else if !tys_layout_compatible(&operand_ty, &ty) { + if !tys_layout_compatible(&operand_ty, &ty) { // If transmuting between two types of incompatible layouts, padding // bytes are exposed, which is UB. self.push_target(MemoryInitOp::TriviallyUnsafe { @@ -773,36 +637,3 @@ fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result bool { - // Retrieve layouts to assess compatibility. - let from_ty_info = PointeeInfo::from_ty(*from_ty); - let to_ty_info = PointeeInfo::from_ty(*to_ty); - if let (Ok(from_ty_info), Ok(to_ty_info)) = (from_ty_info, to_ty_info) { - let from_ty_layout = match from_ty_info.layout() { - PointeeLayout::Sized { layout } => layout, - PointeeLayout::Slice { element_layout } => element_layout, - PointeeLayout::TraitObject => return false, - }; - let to_ty_layout = match to_ty_info.layout() { - PointeeLayout::Sized { layout } => layout, - PointeeLayout::Slice { element_layout } => element_layout, - PointeeLayout::TraitObject => return false, - }; - // Ensure `to_ty_layout` does not have a larger size. - if to_ty_layout.len() <= from_ty_layout.len() { - // Check data and padding bytes pair-wise. - if from_ty_layout.iter().zip(to_ty_layout.iter()).all( - |(from_ty_layout_byte, to_ty_layout_byte)| { - // Make sure all data and padding bytes match. - from_ty_layout_byte == to_ty_layout_byte - }, - ) { - return true; - } - } - }; - false -} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs new file mode 100644 index 000000000000..417dee46d9c1 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs @@ -0,0 +1,130 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Module containing data structures used in identifying places that need instrumentation and the +//! character of instrumentation needed. + +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use stable_mir::mir::{Mutability, Operand, Place, Rvalue}; +use strum_macros::AsRefStr; + +/// Memory initialization operations: set or get memory initialization state for a given pointer. +#[derive(AsRefStr, Clone, Debug)] +pub enum MemoryInitOp { + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `sizeof(operand)` bytes. + Check { operand: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `sizeof(operand)` bytes. + Set { operand: Operand, value: bool, position: InsertPosition }, + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + CheckSliceChunk { operand: Operand, count: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + SetSliceChunk { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `sizeof(operand)` bytes. + CheckRef { operand: Operand }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `sizeof(operand)` bytes. + SetRef { operand: Operand, value: bool, position: InsertPosition }, + /// Unsupported memory initialization operation. + Unsupported { reason: String }, + /// Operation that trivially accesses uninitialized memory, results in injecting `assert!(false)`. + TriviallyUnsafe { reason: String }, +} + +impl MemoryInitOp { + /// Produce an operand for the relevant memory initialization related operation. This is mostly + /// required so that the analysis can create a new local to take a reference in + /// `MemoryInitOp::SetRef`. + pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { + match self { + MemoryInitOp::Check { operand, .. } + | MemoryInitOp::Set { operand, .. } + | MemoryInitOp::CheckSliceChunk { operand, .. } + | MemoryInitOp::SetSliceChunk { operand, .. } => operand.clone(), + MemoryInitOp::CheckRef { operand, .. } | MemoryInitOp::SetRef { operand, .. } => { + Operand::Copy(Place { + local: { + let place = match operand { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) => unreachable!(), + }; + body.new_assignment( + Rvalue::AddressOf(Mutability::Not, place.clone()), + source, + self.position(), + ) + }, + projection: vec![], + }) + } + MemoryInitOp::Unsupported { .. } | MemoryInitOp::TriviallyUnsafe { .. } => { + unreachable!() + } + } + } + + pub fn expect_count(&self) -> Operand { + match self { + MemoryInitOp::CheckSliceChunk { count, .. } + | MemoryInitOp::SetSliceChunk { count, .. } => count.clone(), + MemoryInitOp::Check { .. } + | MemoryInitOp::Set { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::SetRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), + } + } + + pub fn expect_value(&self) -> bool { + match self { + MemoryInitOp::Set { value, .. } + | MemoryInitOp::SetSliceChunk { value, .. } + | MemoryInitOp::SetRef { value, .. } => *value, + MemoryInitOp::Check { .. } + | MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => unreachable!(), + } + } + + pub fn position(&self) -> InsertPosition { + match self { + MemoryInitOp::Set { position, .. } + | MemoryInitOp::SetSliceChunk { position, .. } + | MemoryInitOp::SetRef { position, .. } => *position, + MemoryInitOp::Check { .. } + | MemoryInitOp::CheckSliceChunk { .. } + | MemoryInitOp::CheckRef { .. } + | MemoryInitOp::Unsupported { .. } + | MemoryInitOp::TriviallyUnsafe { .. } => InsertPosition::Before, + } + } +} + +/// Represents an instruction in the source code together with all memory initialization checks/sets +/// that are connected to the memory used in this instruction and whether they should be inserted +/// before or after the instruction. +#[derive(Clone, Debug)] +pub struct InitRelevantInstruction { + /// The instruction that affects the state of the memory. + pub source: SourceInstruction, + /// All memory-related operations that should happen after the instruction. + pub before_instruction: Vec, + /// All memory-related operations that should happen after the instruction. + pub after_instruction: Vec, +} + +impl InitRelevantInstruction { + pub fn push_operation(&mut self, source_op: MemoryInitOp) { + match source_op.position() { + InsertPosition::Before => self.before_instruction.push(source_op), + InsertPosition::After => self.after_instruction.push(source_op), + } + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs index 09116230af80..e8f1c85cabcd 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -332,3 +332,36 @@ fn data_bytes_for_ty( FieldsShape::Array { .. } => Ok(vec![]), } } + +/// Returns true if `to_ty` has a smaller or equal size and the same padding bytes as `from_ty` up until +/// its size. +pub fn tys_layout_compatible(from_ty: &Ty, to_ty: &Ty) -> bool { + // Retrieve layouts to assess compatibility. + let from_ty_info = PointeeInfo::from_ty(*from_ty); + let to_ty_info = PointeeInfo::from_ty(*to_ty); + if let (Ok(from_ty_info), Ok(to_ty_info)) = (from_ty_info, to_ty_info) { + let from_ty_layout = match from_ty_info.layout() { + PointeeLayout::Sized { layout } => layout, + PointeeLayout::Slice { element_layout } => element_layout, + PointeeLayout::TraitObject => return false, + }; + let to_ty_layout = match to_ty_info.layout() { + PointeeLayout::Sized { layout } => layout, + PointeeLayout::Slice { element_layout } => element_layout, + PointeeLayout::TraitObject => return false, + }; + // Ensure `to_ty_layout` does not have a larger size. + if to_ty_layout.len() <= from_ty_layout.len() { + // Check data and padding bytes pair-wise. + if from_ty_layout.iter().zip(to_ty_layout.iter()).all( + |(from_ty_layout_byte, to_ty_layout_byte)| { + // Make sure all data and padding bytes match. + from_ty_layout_byte == to_ty_layout_byte + }, + ) { + return true; + } + } + }; + false +} diff --git a/kani-compiler/src/kani_middle/transform/internal_mir.rs b/kani-compiler/src/kani_middle/transform/internal_mir.rs new file mode 100644 index 000000000000..ca2dfd957fdd --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/internal_mir.rs @@ -0,0 +1,654 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This file contains conversions between from stable MIR data structures to its internal +//! counterparts. This is primarily done to facilitate using dataflow analysis, which does not yet +//! support StableMIR. + +use rustc_middle::ty::{self as rustc_ty, TyCtxt}; +use rustc_smir::rustc_internal::internal; +use stable_mir::mir::{ + AggregateKind, AssertMessage, Body, BorrowKind, CastKind, ConstOperand, CopyNonOverlapping, + CoroutineDesugaring, CoroutineKind, CoroutineSource, FakeBorrowKind, FakeReadCause, LocalDecl, + MutBorrowKind, NonDivergingIntrinsic, NullOp, Operand, PointerCoercion, RetagKind, Rvalue, + Statement, StatementKind, SwitchTargets, Terminator, TerminatorKind, UnwindAction, + UserTypeProjection, Variance, +}; + +pub trait RustcInternalMir { + type T<'tcx>; + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx>; +} + +impl RustcInternalMir for AggregateKind { + type T<'tcx> = rustc_middle::mir::AggregateKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + AggregateKind::Array(ty) => rustc_middle::mir::AggregateKind::Array(internal(tcx, ty)), + AggregateKind::Tuple => rustc_middle::mir::AggregateKind::Tuple, + AggregateKind::Adt( + adt_def, + variant_idx, + generic_args, + maybe_user_type_annotation_index, + maybe_field_idx, + ) => rustc_middle::mir::AggregateKind::Adt( + internal(tcx, adt_def.0), + internal(tcx, variant_idx), + internal(tcx, generic_args), + maybe_user_type_annotation_index + .map(rustc_middle::ty::UserTypeAnnotationIndex::from_usize), + maybe_field_idx.map(rustc_target::abi::FieldIdx::from_usize), + ), + AggregateKind::Closure(closure_def, generic_args) => { + rustc_middle::mir::AggregateKind::Closure( + internal(tcx, closure_def.0), + internal(tcx, generic_args), + ) + } + AggregateKind::Coroutine(coroutine_def, generic_args, _) => { + rustc_middle::mir::AggregateKind::Coroutine( + internal(tcx, coroutine_def.0), + internal(tcx, generic_args), + ) + } + AggregateKind::RawPtr(ty, mutability) => rustc_middle::mir::AggregateKind::RawPtr( + internal(tcx, ty), + internal(tcx, mutability), + ), + } + } +} + +impl RustcInternalMir for ConstOperand { + type T<'tcx> = rustc_middle::mir::ConstOperand<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::ConstOperand { + span: internal(tcx, self.span), + user_ty: self.user_ty.map(rustc_ty::UserTypeAnnotationIndex::from_usize), + const_: internal(tcx, self.const_.clone()), + } + } +} + +impl RustcInternalMir for Operand { + type T<'tcx> = rustc_middle::mir::Operand<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Operand::Copy(place) => rustc_middle::mir::Operand::Copy(internal(tcx, place)), + Operand::Move(place) => rustc_middle::mir::Operand::Move(internal(tcx, place)), + Operand::Constant(const_operand) => { + rustc_middle::mir::Operand::Constant(Box::new(const_operand.internal_mir(tcx))) + } + } + } +} + +impl RustcInternalMir for PointerCoercion { + type T<'tcx> = rustc_middle::ty::adjustment::PointerCoercion; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + PointerCoercion::ReifyFnPointer => { + rustc_middle::ty::adjustment::PointerCoercion::ReifyFnPointer + } + PointerCoercion::UnsafeFnPointer => { + rustc_middle::ty::adjustment::PointerCoercion::UnsafeFnPointer + } + PointerCoercion::ClosureFnPointer(safety) => { + rustc_middle::ty::adjustment::PointerCoercion::ClosureFnPointer(internal( + tcx, safety, + )) + } + PointerCoercion::MutToConstPointer => { + rustc_middle::ty::adjustment::PointerCoercion::MutToConstPointer + } + PointerCoercion::ArrayToPointer => { + rustc_middle::ty::adjustment::PointerCoercion::ArrayToPointer + } + PointerCoercion::Unsize => rustc_middle::ty::adjustment::PointerCoercion::Unsize, + } + } +} + +impl RustcInternalMir for CastKind { + type T<'tcx> = rustc_middle::mir::CastKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CastKind::PointerExposeAddress => rustc_middle::mir::CastKind::PointerExposeProvenance, + CastKind::PointerWithExposedProvenance => { + rustc_middle::mir::CastKind::PointerWithExposedProvenance + } + CastKind::PointerCoercion(ptr_coercion) => { + rustc_middle::mir::CastKind::PointerCoercion(ptr_coercion.internal_mir(tcx)) + } + CastKind::DynStar => rustc_middle::mir::CastKind::DynStar, + CastKind::IntToInt => rustc_middle::mir::CastKind::IntToInt, + CastKind::FloatToInt => rustc_middle::mir::CastKind::FloatToInt, + CastKind::FloatToFloat => rustc_middle::mir::CastKind::FloatToFloat, + CastKind::IntToFloat => rustc_middle::mir::CastKind::IntToFloat, + CastKind::PtrToPtr => rustc_middle::mir::CastKind::PtrToPtr, + CastKind::FnPtrToPtr => rustc_middle::mir::CastKind::FnPtrToPtr, + CastKind::Transmute => rustc_middle::mir::CastKind::Transmute, + } + } +} + +impl RustcInternalMir for FakeBorrowKind { + type T<'tcx> = rustc_middle::mir::FakeBorrowKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + FakeBorrowKind::Deep => rustc_middle::mir::FakeBorrowKind::Deep, + FakeBorrowKind::Shallow => rustc_middle::mir::FakeBorrowKind::Shallow, + } + } +} + +impl RustcInternalMir for MutBorrowKind { + type T<'tcx> = rustc_middle::mir::MutBorrowKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + MutBorrowKind::Default => rustc_middle::mir::MutBorrowKind::Default, + MutBorrowKind::TwoPhaseBorrow => rustc_middle::mir::MutBorrowKind::TwoPhaseBorrow, + MutBorrowKind::ClosureCapture => rustc_middle::mir::MutBorrowKind::ClosureCapture, + } + } +} + +impl RustcInternalMir for BorrowKind { + type T<'tcx> = rustc_middle::mir::BorrowKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + BorrowKind::Shared => rustc_middle::mir::BorrowKind::Shared, + BorrowKind::Fake(fake_borrow_kind) => { + rustc_middle::mir::BorrowKind::Fake(fake_borrow_kind.internal_mir(tcx)) + } + BorrowKind::Mut { kind } => { + rustc_middle::mir::BorrowKind::Mut { kind: kind.internal_mir(tcx) } + } + } + } +} + +impl RustcInternalMir for NullOp { + type T<'tcx> = rustc_middle::mir::NullOp<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + NullOp::SizeOf => rustc_middle::mir::NullOp::SizeOf, + NullOp::AlignOf => rustc_middle::mir::NullOp::AlignOf, + NullOp::OffsetOf(offsets) => rustc_middle::mir::NullOp::OffsetOf( + tcx.mk_offset_of( + offsets + .iter() + .map(|(variant_idx, field_idx)| { + ( + internal(tcx, variant_idx), + rustc_target::abi::FieldIdx::from_usize(*field_idx), + ) + }) + .collect::>() + .as_slice(), + ), + ), + NullOp::UbChecks => rustc_middle::mir::NullOp::UbChecks, + } + } +} + +impl RustcInternalMir for Rvalue { + type T<'tcx> = rustc_middle::mir::Rvalue<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Rvalue::AddressOf(mutability, place) => rustc_middle::mir::Rvalue::AddressOf( + internal(tcx, mutability), + internal(tcx, place), + ), + Rvalue::Aggregate(aggregate_kind, operands) => rustc_middle::mir::Rvalue::Aggregate( + Box::new(aggregate_kind.internal_mir(tcx)), + rustc_index::IndexVec::from_raw( + operands.iter().map(|operand| operand.internal_mir(tcx)).collect(), + ), + ), + Rvalue::BinaryOp(bin_op, left_operand, right_operand) + | Rvalue::CheckedBinaryOp(bin_op, left_operand, right_operand) => { + rustc_middle::mir::Rvalue::BinaryOp( + internal(tcx, bin_op), + Box::new((left_operand.internal_mir(tcx), right_operand.internal_mir(tcx))), + ) + } + Rvalue::Cast(cast_kind, operand, ty) => rustc_middle::mir::Rvalue::Cast( + cast_kind.internal_mir(tcx), + operand.internal_mir(tcx), + internal(tcx, ty), + ), + Rvalue::CopyForDeref(place) => { + rustc_middle::mir::Rvalue::CopyForDeref(internal(tcx, place)) + } + Rvalue::Discriminant(place) => { + rustc_middle::mir::Rvalue::Discriminant(internal(tcx, place)) + } + Rvalue::Len(place) => rustc_middle::mir::Rvalue::Len(internal(tcx, place)), + Rvalue::Ref(region, borrow_kind, place) => rustc_middle::mir::Rvalue::Ref( + internal(tcx, region), + borrow_kind.internal_mir(tcx), + internal(tcx, place), + ), + Rvalue::Repeat(operand, ty_const) => rustc_middle::mir::Rvalue::Repeat( + operand.internal_mir(tcx), + internal(tcx, ty_const), + ), + Rvalue::ShallowInitBox(operand, ty) => rustc_middle::mir::Rvalue::ShallowInitBox( + operand.internal_mir(tcx), + internal(tcx, ty), + ), + Rvalue::ThreadLocalRef(crate_item) => { + rustc_middle::mir::Rvalue::ThreadLocalRef(internal(tcx, crate_item.0)) + } + Rvalue::NullaryOp(null_op, ty) => { + rustc_middle::mir::Rvalue::NullaryOp(null_op.internal_mir(tcx), internal(tcx, ty)) + } + Rvalue::UnaryOp(un_op, operand) => { + rustc_middle::mir::Rvalue::UnaryOp(internal(tcx, un_op), operand.internal_mir(tcx)) + } + Rvalue::Use(operand) => rustc_middle::mir::Rvalue::Use(operand.internal_mir(tcx)), + } + } +} + +impl RustcInternalMir for FakeReadCause { + type T<'tcx> = rustc_middle::mir::FakeReadCause; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + FakeReadCause::ForMatchGuard => rustc_middle::mir::FakeReadCause::ForMatchGuard, + FakeReadCause::ForMatchedPlace(_opaque) => { + unimplemented!("cannot convert back from an opaque field") + } + FakeReadCause::ForGuardBinding => rustc_middle::mir::FakeReadCause::ForGuardBinding, + FakeReadCause::ForLet(_opaque) => { + unimplemented!("cannot convert back from an opaque field") + } + FakeReadCause::ForIndex => rustc_middle::mir::FakeReadCause::ForIndex, + } + } +} + +impl RustcInternalMir for RetagKind { + type T<'tcx> = rustc_middle::mir::RetagKind; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + RetagKind::FnEntry => rustc_middle::mir::RetagKind::FnEntry, + RetagKind::TwoPhase => rustc_middle::mir::RetagKind::TwoPhase, + RetagKind::Raw => rustc_middle::mir::RetagKind::Raw, + RetagKind::Default => rustc_middle::mir::RetagKind::Default, + } + } +} + +impl RustcInternalMir for UserTypeProjection { + type T<'tcx> = rustc_middle::mir::UserTypeProjection; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + unimplemented!("cannot convert back from an opaque field") + } +} + +impl RustcInternalMir for Variance { + type T<'tcx> = rustc_middle::ty::Variance; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + Variance::Covariant => rustc_middle::ty::Variance::Covariant, + Variance::Invariant => rustc_middle::ty::Variance::Invariant, + Variance::Contravariant => rustc_middle::ty::Variance::Contravariant, + Variance::Bivariant => rustc_middle::ty::Variance::Bivariant, + } + } +} + +impl RustcInternalMir for CopyNonOverlapping { + type T<'tcx> = rustc_middle::mir::CopyNonOverlapping<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::CopyNonOverlapping { + src: self.src.internal_mir(tcx), + dst: self.dst.internal_mir(tcx), + count: self.count.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for NonDivergingIntrinsic { + type T<'tcx> = rustc_middle::mir::NonDivergingIntrinsic<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + NonDivergingIntrinsic::Assume(operand) => { + rustc_middle::mir::NonDivergingIntrinsic::Assume(operand.internal_mir(tcx)) + } + NonDivergingIntrinsic::CopyNonOverlapping(copy_non_overlapping) => { + rustc_middle::mir::NonDivergingIntrinsic::CopyNonOverlapping( + copy_non_overlapping.internal_mir(tcx), + ) + } + } + } +} + +impl RustcInternalMir for StatementKind { + type T<'tcx> = rustc_middle::mir::StatementKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + StatementKind::Assign(place, rvalue) => rustc_middle::mir::StatementKind::Assign( + Box::new((internal(tcx, place), rvalue.internal_mir(tcx))), + ), + StatementKind::FakeRead(fake_read_cause, place) => { + rustc_middle::mir::StatementKind::FakeRead(Box::new(( + fake_read_cause.internal_mir(tcx), + internal(tcx, place), + ))) + } + StatementKind::SetDiscriminant { place, variant_index } => { + rustc_middle::mir::StatementKind::SetDiscriminant { + place: internal(tcx, place).into(), + variant_index: internal(tcx, variant_index), + } + } + StatementKind::Deinit(place) => { + rustc_middle::mir::StatementKind::Deinit(internal(tcx, place).into()) + } + StatementKind::StorageLive(local) => rustc_middle::mir::StatementKind::StorageLive( + rustc_middle::mir::Local::from_usize(*local), + ), + StatementKind::StorageDead(local) => rustc_middle::mir::StatementKind::StorageDead( + rustc_middle::mir::Local::from_usize(*local), + ), + StatementKind::Retag(retag_kind, place) => rustc_middle::mir::StatementKind::Retag( + retag_kind.internal_mir(tcx), + internal(tcx, place).into(), + ), + StatementKind::PlaceMention(place) => { + rustc_middle::mir::StatementKind::PlaceMention(Box::new(internal(tcx, place))) + } + StatementKind::AscribeUserType { place, projections, variance } => { + rustc_middle::mir::StatementKind::AscribeUserType( + Box::new((internal(tcx, place), projections.internal_mir(tcx))), + variance.internal_mir(tcx), + ) + } + StatementKind::Coverage(_coverage_kind) => { + unimplemented!("cannot convert back from an opaque field") + } + StatementKind::Intrinsic(non_diverging_intrinsic) => { + rustc_middle::mir::StatementKind::Intrinsic( + non_diverging_intrinsic.internal_mir(tcx).into(), + ) + } + StatementKind::ConstEvalCounter => rustc_middle::mir::StatementKind::ConstEvalCounter, + StatementKind::Nop => rustc_middle::mir::StatementKind::Nop, + } + } +} + +impl RustcInternalMir for Statement { + type T<'tcx> = rustc_middle::mir::Statement<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::Statement { + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + kind: self.kind.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for UnwindAction { + type T<'tcx> = rustc_middle::mir::UnwindAction; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + UnwindAction::Continue => rustc_middle::mir::UnwindAction::Continue, + UnwindAction::Unreachable => rustc_middle::mir::UnwindAction::Unreachable, + UnwindAction::Terminate => rustc_middle::mir::UnwindAction::Terminate( + rustc_middle::mir::UnwindTerminateReason::Abi, + ), + UnwindAction::Cleanup(basic_block_idx) => rustc_middle::mir::UnwindAction::Cleanup( + rustc_middle::mir::BasicBlock::from_usize(*basic_block_idx), + ), + } + } +} + +impl RustcInternalMir for SwitchTargets { + type T<'tcx> = rustc_middle::mir::SwitchTargets; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::SwitchTargets::new( + self.branches().map(|(value, basic_block_idx)| { + (value, rustc_middle::mir::BasicBlock::from_usize(basic_block_idx)) + }), + rustc_middle::mir::BasicBlock::from_usize(self.otherwise()), + ) + } +} + +impl RustcInternalMir for CoroutineDesugaring { + type T<'tcx> = rustc_hir::CoroutineDesugaring; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineDesugaring::Async => rustc_hir::CoroutineDesugaring::Async, + CoroutineDesugaring::Gen => rustc_hir::CoroutineDesugaring::Gen, + CoroutineDesugaring::AsyncGen => rustc_hir::CoroutineDesugaring::AsyncGen, + } + } +} + +impl RustcInternalMir for CoroutineSource { + type T<'tcx> = rustc_hir::CoroutineSource; + + fn internal_mir<'tcx>(&self, _tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineSource::Block => rustc_hir::CoroutineSource::Block, + CoroutineSource::Closure => rustc_hir::CoroutineSource::Closure, + CoroutineSource::Fn => rustc_hir::CoroutineSource::Fn, + } + } +} + +impl RustcInternalMir for CoroutineKind { + type T<'tcx> = rustc_hir::CoroutineKind; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + CoroutineKind::Desugared(coroutine_desugaring, coroutine_source) => { + rustc_hir::CoroutineKind::Desugared( + coroutine_desugaring.internal_mir(tcx), + coroutine_source.internal_mir(tcx), + ) + } + CoroutineKind::Coroutine(movability) => { + rustc_hir::CoroutineKind::Coroutine(internal(tcx, movability)) + } + } + } +} + +impl RustcInternalMir for AssertMessage { + type T<'tcx> = rustc_middle::mir::AssertMessage<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + AssertMessage::BoundsCheck { len, index } => { + rustc_middle::mir::AssertMessage::BoundsCheck { + len: len.internal_mir(tcx), + index: index.internal_mir(tcx), + } + } + AssertMessage::Overflow(bin_op, left_operand, right_operand) => { + rustc_middle::mir::AssertMessage::Overflow( + internal(tcx, bin_op), + left_operand.internal_mir(tcx), + right_operand.internal_mir(tcx), + ) + } + AssertMessage::OverflowNeg(operand) => { + rustc_middle::mir::AssertMessage::OverflowNeg(operand.internal_mir(tcx)) + } + AssertMessage::DivisionByZero(operand) => { + rustc_middle::mir::AssertMessage::DivisionByZero(operand.internal_mir(tcx)) + } + AssertMessage::RemainderByZero(operand) => { + rustc_middle::mir::AssertMessage::RemainderByZero(operand.internal_mir(tcx)) + } + AssertMessage::ResumedAfterReturn(coroutine_kind) => { + rustc_middle::mir::AssertMessage::ResumedAfterReturn( + coroutine_kind.internal_mir(tcx), + ) + } + AssertMessage::ResumedAfterPanic(coroutine_kind) => { + rustc_middle::mir::AssertMessage::ResumedAfterPanic( + coroutine_kind.internal_mir(tcx), + ) + } + AssertMessage::MisalignedPointerDereference { required, found } => { + rustc_middle::mir::AssertMessage::MisalignedPointerDereference { + required: required.internal_mir(tcx), + found: found.internal_mir(tcx), + } + } + } + } +} + +impl RustcInternalMir for TerminatorKind { + type T<'tcx> = rustc_middle::mir::TerminatorKind<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + match self { + TerminatorKind::Goto { target } => rustc_middle::mir::TerminatorKind::Goto { + target: rustc_middle::mir::BasicBlock::from_usize(*target), + }, + TerminatorKind::SwitchInt { discr, targets } => { + rustc_middle::mir::TerminatorKind::SwitchInt { + discr: discr.internal_mir(tcx), + targets: targets.internal_mir(tcx), + } + } + TerminatorKind::Resume => rustc_middle::mir::TerminatorKind::UnwindResume, + TerminatorKind::Abort => rustc_middle::mir::TerminatorKind::UnwindTerminate( + rustc_middle::mir::UnwindTerminateReason::Abi, + ), + TerminatorKind::Return => rustc_middle::mir::TerminatorKind::Return, + TerminatorKind::Unreachable => rustc_middle::mir::TerminatorKind::Unreachable, + TerminatorKind::Drop { place, target, unwind } => { + rustc_middle::mir::TerminatorKind::Drop { + place: internal(tcx, place), + target: rustc_middle::mir::BasicBlock::from_usize(*target), + unwind: unwind.internal_mir(tcx), + replace: false, + } + } + TerminatorKind::Call { func, args, destination, target, unwind } => { + rustc_middle::mir::TerminatorKind::Call { + func: func.internal_mir(tcx), + args: Box::from_iter( + args.iter().map(|arg| { + rustc_span::source_map::dummy_spanned(arg.internal_mir(tcx)) + }), + ), + destination: internal(tcx, destination), + target: target.map(|basic_block_idx| { + rustc_middle::mir::BasicBlock::from_usize(basic_block_idx) + }), + unwind: unwind.internal_mir(tcx), + call_source: rustc_middle::mir::CallSource::Normal, + fn_span: rustc_span::DUMMY_SP, + } + } + TerminatorKind::Assert { cond, expected, msg, target, unwind } => { + rustc_middle::mir::TerminatorKind::Assert { + cond: cond.internal_mir(tcx), + expected: *expected, + msg: Box::new(msg.internal_mir(tcx)), + target: rustc_middle::mir::BasicBlock::from_usize(*target), + unwind: unwind.internal_mir(tcx), + } + } + TerminatorKind::InlineAsm { .. } => todo!(), + } + } +} + +impl RustcInternalMir for Terminator { + type T<'tcx> = rustc_middle::mir::Terminator<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::Terminator { + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + kind: self.kind.internal_mir(tcx), + } + } +} + +impl RustcInternalMir for LocalDecl { + type T<'tcx> = rustc_middle::mir::LocalDecl<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + rustc_middle::mir::LocalDecl { + mutability: internal(tcx, self.mutability), + local_info: rustc_middle::mir::ClearCrossCrate::Set(Box::new( + rustc_middle::mir::LocalInfo::Boring, + )), + ty: internal(tcx, self.ty), + user_ty: None, + source_info: rustc_middle::mir::SourceInfo::outermost(internal(tcx, self.span)), + } + } +} + +impl RustcInternalMir for Body { + type T<'tcx> = rustc_middle::mir::Body<'tcx>; + + fn internal_mir<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Self::T<'tcx> { + let internal_basic_blocks = rustc_index::IndexVec::from_raw( + self.blocks + .iter() + .map(|stable_basic_block| rustc_middle::mir::BasicBlockData { + statements: stable_basic_block + .statements + .iter() + .map(|statement| statement.internal_mir(tcx)) + .collect(), + terminator: Some(stable_basic_block.terminator.internal_mir(tcx)), + is_cleanup: false, + }) + .collect(), + ); + let local_decls = rustc_index::IndexVec::from_raw( + self.locals().iter().map(|local_decl| local_decl.internal_mir(tcx)).collect(), + ); + rustc_middle::mir::Body::new( + rustc_middle::mir::MirSource::item(rustc_hir::def_id::CRATE_DEF_ID.to_def_id()), + internal_basic_blocks, + rustc_index::IndexVec::new(), + local_decls, + rustc_index::IndexVec::new(), + self.arg_locals().len(), + Vec::new(), + rustc_span::DUMMY_SP, + None, + None, + ) + } +} diff --git a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs index c4534bf11b4d..6ebe2c2e09a1 100644 --- a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs +++ b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs @@ -13,6 +13,9 @@ use crate::kani_middle::transform::body::{ CheckType, InsertPosition, MutableBody, SourceInstruction, }; use crate::kani_middle::transform::check_uninit::PointeeInfo; +use crate::kani_middle::transform::check_uninit::{ + get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, +}; use crate::kani_middle::transform::check_values::{build_limits, ty_validity_per_offset}; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -28,10 +31,6 @@ use std::fmt::Debug; use strum_macros::AsRefStr; use tracing::trace; -use super::check_uninit::{ - get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, -}; - /// Generate the body for a few Kani intrinsics. #[derive(Debug)] pub struct IntrinsicGeneratorPass { diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index 5b497b09619d..ce2f1742f71f 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -19,7 +19,8 @@ use crate::kani_middle::codegen_units::CodegenUnit; use crate::kani_middle::reachability::CallGraph; use crate::kani_middle::transform::body::CheckType; -use crate::kani_middle::transform::check_uninit::UninitPass; +use crate::kani_middle::transform::check_uninit::delayed_ub::DelayedUbPass; +use crate::kani_middle::transform::check_uninit::ptr_uninit::UninitPass; use crate::kani_middle::transform::check_values::ValidValuePass; use crate::kani_middle::transform::contracts::AnyModifiesPass; use crate::kani_middle::transform::kani_intrinsics::IntrinsicGeneratorPass; @@ -37,6 +38,7 @@ mod check_uninit; mod check_values; mod contracts; mod dump_mir_pass; +mod internal_mir; mod kani_intrinsics; mod stubs; @@ -191,6 +193,7 @@ impl GlobalPasses { pub fn new(queries: &QueryDb, tcx: TyCtxt) -> Self { let mut global_passes = GlobalPasses { global_passes: vec![] }; global_passes.add_global_pass(queries, DumpMirPass::new(tcx)); + global_passes.add_global_pass(queries, DelayedUbPass::new(CheckType::new_assert(tcx))); global_passes } diff --git a/kani-compiler/src/main.rs b/kani-compiler/src/main.rs index d2f8cf17e9e7..7f1fb144a09b 100644 --- a/kani-compiler/src/main.rs +++ b/kani-compiler/src/main.rs @@ -27,6 +27,7 @@ extern crate rustc_index; extern crate rustc_interface; extern crate rustc_metadata; extern crate rustc_middle; +extern crate rustc_mir_dataflow; extern crate rustc_session; extern crate rustc_smir; extern crate rustc_span; diff --git a/tests/expected/uninit/access-padding-via-cast/expected b/tests/expected/uninit/access-padding-via-cast/expected index e02883b26cdf..12c5c0a4a439 100644 --- a/tests/expected/uninit/access-padding-via-cast/expected +++ b/tests/expected/uninit/access-padding-via-cast/expected @@ -1,4 +1,4 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut [u8; 4]` VERIFICATION:- FAILED diff --git a/tests/expected/uninit/delayed-ub-transmute/expected b/tests/expected/uninit/delayed-ub-transmute/expected index e02883b26cdf..960efcdade40 100644 --- a/tests/expected/uninit/delayed-ub-transmute/expected +++ b/tests/expected/uninit/delayed-ub-transmute/expected @@ -1,4 +1,4 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` VERIFICATION:- FAILED diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index e02883b26cdf..960efcdade40 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -1,4 +1,4 @@ -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` VERIFICATION:- FAILED diff --git a/tests/expected/uninit/intrinsics/expected b/tests/expected/uninit/intrinsics/expected index ffa98b6f1140..ab83be097444 100644 --- a/tests/expected/uninit/intrinsics/expected +++ b/tests/expected/uninit/intrinsics/expected @@ -1,17 +1,21 @@ Checking harness check_typed_swap_safe... -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. +Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. + +Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. VERIFICATION:- FAILED Checking harness check_typed_swap... -Failed Checks: Kani does not support reasoning about memory initialization in presence of mutable raw pointer casts that could cause delayed UB. - Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` +Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. + +Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. + VERIFICATION:- FAILED Checking harness check_volatile_store_and_load_safe... From 40c804732abe72c0bf37e4cabe9676dad3fad2fa Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 24 Jul 2024 09:29:23 -0700 Subject: [PATCH 02/45] Add minimal statics support, clean up atomics --- ...b_visitor.rs => initial_target_visitor.rs} | 34 +-- .../delayed_ub/instrumentation_visitor.rs | 96 +++--- .../transform/check_uninit/delayed_ub/mod.rs | 49 ++-- .../delayed_ub/points_to_analysis.rs | 274 +++++------------- .../delayed_ub/points_to_graph.rs | 93 +++--- .../kani_middle/transform/check_uninit/mod.rs | 8 +- .../transform/check_uninit/ptr_uninit/mod.rs | 5 +- .../check_uninit/ptr_uninit/uninit_visitor.rs | 74 ++--- 8 files changed, 268 insertions(+), 365 deletions(-) rename kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/{delayed_ub_visitor.rs => initial_target_visitor.rs} (81%) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs similarity index 81% rename from kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs rename to kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index cde02c8b82a8..fd48952ef366 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/delayed_ub_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -18,22 +18,22 @@ use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatibl /// Visitor that finds initial analysis targets for delayed UB instrumentation. For our purposes, /// analysis targets are *pointers* to places reading and writing from which should be tracked. -pub struct DelayedUbVisitor { +pub struct InitialTargetVisitor { body: Body, - delayed_ub_targets: Vec, + targets: Vec, } -impl DelayedUbVisitor { +impl InitialTargetVisitor { pub fn new(body: Body) -> Self { - Self { body, delayed_ub_targets: vec![] } + Self { body, targets: vec![] } } pub fn into_targets(self) -> Vec { - self.delayed_ub_targets + self.targets } } -impl MirVisitor for DelayedUbVisitor { +impl MirVisitor for InitialTargetVisitor { fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { if let Rvalue::Cast(kind, operand, ty) = rvalue { let operand_ty = operand.ty(self.body.locals()).unwrap(); @@ -47,13 +47,11 @@ impl MirVisitor for DelayedUbVisitor { match operand { Operand::Copy(place) | Operand::Move(place) => { if !tys_layout_compatible(from_ty, to_ty) { - self.delayed_ub_targets.push(place.clone()); + self.targets.push(place.clone()); } } Operand::Constant(_) => { - unimplemented!( - "Delayed UB in presence of constants is not yet supported." - ) + unreachable!("cannot be a constant") } } } @@ -70,10 +68,10 @@ impl MirVisitor for DelayedUbVisitor { { match ©.dst { Operand::Copy(place) | Operand::Move(place) => { - self.delayed_ub_targets.push(place.clone()); + self.targets.push(place.clone()); } Operand::Constant(_) => { - unimplemented!("Delayed UB in presence of constants is not yet supported.") + unreachable!("cannot be a constant") } } } @@ -103,11 +101,9 @@ impl MirVisitor for DelayedUbVisitor { // Here, `dst` is the second argument. match &args[1] { Operand::Copy(place) | Operand::Move(place) => { - self.delayed_ub_targets.push(place.clone()); + self.targets.push(place.clone()); } - Operand::Constant(_) => unimplemented!( - "Delayed UB in presence of constants is not yet supported." - ), + Operand::Constant(_) => unreachable!("cannot be a constant"), } } "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { @@ -127,11 +123,9 @@ impl MirVisitor for DelayedUbVisitor { // Here, `dst` is the first argument. match &args[0] { Operand::Copy(place) | Operand::Move(place) => { - self.delayed_ub_targets.push(place.clone()); + self.targets.push(place.clone()); } - Operand::Constant(_) => unimplemented!( - "Delayed UB in presence of constants is not yet supported." - ), + Operand::Constant(_) => unreachable!("cannot be a constant"), } } _ => {} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index f8d60debca10..2fb43172e05e 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -5,15 +5,21 @@ //! UB. In practice, that means collecting all instructions where the place is featured. use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::check_uninit::delayed_ub::points_to_graph::{ + GlobalMemLoc, PointsToGraph, +}; use crate::kani_middle::transform::check_uninit::relevant_instruction::{ InitRelevantInstruction, MemoryInitOp, }; use crate::kani_middle::transform::check_uninit::TargetFinder; - +use rustc_hir::def_id::DefId as InternalDefId; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; use stable_mir::mir::visit::{Location, PlaceContext}; -use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, ProjectionElem, Statement}; +use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, Statement}; +use std::collections::HashSet; -pub struct DelayedUbTargetVisitor<'a> { +pub struct InstrumentationVisitor<'a, 'tcx> { /// Whether we should skip the next instruction, since it might've been instrumented already. /// When we instrument an instruction, we partition the basic block, and the instruction that /// may trigger UB becomes the first instruction of the basic block, which we need to skip @@ -23,29 +29,46 @@ pub struct DelayedUbTargetVisitor<'a> { current: SourceInstruction, /// The target instruction that should be verified. pub target: Option, - /// The list of places we should be looking for, ignoring others. - place_filter: &'a [Place], + /// Aliasing analysis data. + points_to: &'a PointsToGraph<'tcx>, + /// The list of places we should be looking for, ignoring others + analysis_targets: &'a HashSet>, + current_def_id: InternalDefId, + tcx: TyCtxt<'tcx>, } -impl<'a> TargetFinder for DelayedUbTargetVisitor<'a> { +impl<'a, 'tcx> TargetFinder for InstrumentationVisitor<'a, 'tcx> { fn find_next( + &mut self, body: &MutableBody, bb: BasicBlockIdx, skip_first: bool, - place_filter: &[Place], ) -> Option { - let mut visitor = DelayedUbTargetVisitor { - skip_next: skip_first, - current: SourceInstruction::Statement { idx: 0, bb }, - target: None, - place_filter, - }; - visitor.visit_basic_block(&body.blocks()[bb]); - visitor.target + self.skip_next = skip_first; + self.current = SourceInstruction::Statement { idx: 0, bb }; + self.target = None; + self.visit_basic_block(&body.blocks()[bb]); + self.target.clone() } } -impl<'a> DelayedUbTargetVisitor<'a> { +impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { + pub fn new( + points_to: &'a PointsToGraph<'tcx>, + analysis_targets: &'a HashSet>, + current_def_id: InternalDefId, + tcx: TyCtxt<'tcx>, + ) -> Self { + Self { + skip_next: false, + current: SourceInstruction::Statement { idx: 0, bb: 0 }, + target: None, + points_to, + analysis_targets, + current_def_id, + tcx, + } + } fn push_target(&mut self, source_op: MemoryInitOp) { let target = self.target.get_or_insert_with(|| InitRelevantInstruction { source: self.current, @@ -56,7 +79,7 @@ impl<'a> DelayedUbTargetVisitor<'a> { } } -impl<'a> MirVisitor for DelayedUbTargetVisitor<'a> { +impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { fn visit_statement(&mut self, stmt: &Statement, location: Location) { if self.skip_next { self.skip_next = false; @@ -70,31 +93,24 @@ impl<'a> MirVisitor for DelayedUbTargetVisitor<'a> { } fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { - // Match the place by its local. + // Match the place by whatever it is pointing to and find an intersection with the targets. if self - .place_filter - .iter() - .any(|instrumented_place| instrumented_place.local == place.local) + .points_to + .follow_from_place(rustc_internal::internal(self.tcx, place), self.current_def_id) + .intersection(&self.analysis_targets) + .count() + != 0 { - let deref_projection_detected = place - .projection - .iter() - .any(|projection_elem| matches!(projection_elem, ProjectionElem::Deref)); - // We should only track the place itself, not whatever it gets dereferenced to. - if !deref_projection_detected { - // If we are mutating the place, initialize it. - if ptx.is_mutating() { - self.push_target(MemoryInitOp::SetRef { - operand: Operand::Copy(place.clone()), - value: true, - position: InsertPosition::After, - }); - } else { - // Otherwise, check its initialization. - self.push_target(MemoryInitOp::CheckRef { - operand: Operand::Copy(place.clone()), - }); - } + // If we are mutating the place, initialize it. + if ptx.is_mutating() { + self.push_target(MemoryInitOp::SetRef { + operand: Operand::Copy(place.clone()), + value: true, + position: InsertPosition::After, + }); + } else { + // Otherwise, check its initialization. + self.push_target(MemoryInitOp::CheckRef { operand: Operand::Copy(place.clone()) }); } } self.super_place(place, ptx, location) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 499fe95e6b15..3ab1c2b69d2e 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -16,19 +16,20 @@ use crate::kani_middle::transform::BodyTransformation; use crate::kani_middle::transform::GlobalPass; use crate::kani_middle::transform::TransformationResult; use crate::kani_queries::QueryDb; -use delayed_ub_visitor::DelayedUbVisitor; -use instrumentation_visitor::DelayedUbTargetVisitor; +use initial_target_visitor::InitialTargetVisitor; +use instrumentation_visitor::InstrumentationVisitor; use points_to_analysis::PointsToAnalysis; -use points_to_graph::PlaceOrAlloc; +use points_to_graph::LocalMemLoc; +use points_to_graph::PointsToGraph; use rustc_middle::ty::TyCtxt; +use rustc_mir_dataflow::JoinSemiLattice; use rustc_smir::rustc_internal; use stable_mir::mir::mono::{Instance, MonoItem}; use stable_mir::mir::MirVisitor; -use stable_mir::mir::Place; use stable_mir::ty::FnDef; use stable_mir::CrateDef; -mod delayed_ub_visitor; +mod initial_target_visitor; mod instrumentation_visitor; mod points_to_analysis; mod points_to_graph; @@ -66,18 +67,19 @@ impl GlobalPass for DelayedUbPass { .flat_map(|instance| { let def_id = rustc_internal::internal(tcx, instance.def.def_id()); let body = instance.body().unwrap(); - let mut visitor = DelayedUbVisitor::new(body.clone()); + let mut visitor = InitialTargetVisitor::new(body.clone()); visitor.visit_body(&body); // Convert all places into the format of aliasing graph for later comparison. visitor.into_targets().into_iter().map(move |place| { - PlaceOrAlloc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) + LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) }) }) .collect(); // Only perform this analysis if there is something to analyze. if !targets.is_empty() { - let mut places_need_instrumentation = HashSet::new(); + let mut analysis_targets = HashSet::new(); + let mut global_points_to_graph = PointsToGraph::empty(); // Analyze aliasing for every harness. for entry_item in starting_items { // Convert each entry function into instance, if possible. @@ -105,8 +107,9 @@ impl GlobalPass for DelayedUbPass { ); // Since analysis targets are *pointers*, need to get its followers for instrumentation. for target in targets.iter() { - places_need_instrumentation.extend(results.pointees_of(target)); + analysis_targets.extend(results.pointees_of(target)); } + global_points_to_graph.join(&results); } } @@ -118,24 +121,16 @@ impl GlobalPass for DelayedUbPass { mem_init_fn_cache: &mut self.mem_init_fn_cache, }; // Retrieve the body with all local instrumentation passes applied. - let new_body = MutableBody::from(transformer.body(tcx, instance)); - // Retrieve all places we need to instrument in the appropriate format. - let place_filter: Vec = places_need_instrumentation - .iter() - .filter(|place| { - // Make sure only places from the current instance are included. - place.has_def_id(internal_def_id) - }) - .filter_map(|global_place_or_alloc| { - match global_place_or_alloc.without_def_id() { - PlaceOrAlloc::Alloc(_) => None, // Allocations cannot be read directly, so we need not worry about them. - PlaceOrAlloc::Place(place) => Some(rustc_internal::stable(place)), // Convert back to StableMIR. - } - }) - .collect(); - // Finally, instrument. - let (instrumentation_added, body) = instrumenter - .instrument::(tcx, new_body, instance, &place_filter); + let body = MutableBody::from(transformer.body(tcx, instance)); + // Instrument for delayed UB. + let target_finder = InstrumentationVisitor::new( + &global_points_to_graph, + &analysis_targets, + internal_def_id, + tcx, + ); + let (instrumentation_added, body) = + instrumenter.instrument(tcx, body, instance, target_finder); // If some instrumentation has been performed, update the cached body in the local transformer. if instrumentation_added { transformer.cache.entry(instance).and_modify(|transformation_result| { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index 48590c9fcea7..9cc1d591debc 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -7,9 +7,7 @@ use crate::kani_middle::{ reachability::CallGraph, transform::{ - check_uninit::delayed_ub::points_to_graph::{ - GlobalPlaceOrAlloc, PlaceOrAlloc, PointsToGraph, - }, + check_uninit::delayed_ub::points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, internal_mir::RustcInternalMir, BodyTransformation, }, @@ -32,16 +30,16 @@ use std::collections::HashSet; /// Main points-to analysis object. Since this one will be created anew for each instance analysis, /// we need to make sure big data structures are not copied unnecessarily. -pub struct PointsToAnalysis<'a, 'b, 'c, 'tcx> { +pub struct PointsToAnalysis<'a, 'tcx> { def_id: DefId, body: Body<'tcx>, tcx: TyCtxt<'tcx>, call_graph: &'a CallGraph, - instances: &'b Vec, - transformer: &'c mut BodyTransformation, + instances: &'a Vec, + transformer: &'a mut BodyTransformation, } -impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { +impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { /// Perform the analysis on a body, outputting the graph containing aliasing information of the /// body itself and any body reachable from it. pub fn run( @@ -49,13 +47,13 @@ impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { tcx: TyCtxt<'tcx>, def_id: DefId, call_graph: &'a CallGraph, - instances: &'b Vec, - transformer: &'c mut BodyTransformation, + instances: &'a Vec, + transformer: &'a mut BodyTransformation, ) -> PointsToGraph<'tcx> { let analysis = Self { body: body.clone(), tcx, def_id, call_graph, instances, transformer }; let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); - let mut results = PointsToGraph::new(&body, def_id); + let mut results = PointsToGraph::from_body(&body, def_id); for (idx, _) in body.basic_blocks.iter().enumerate() { cursor.seek_to_block_end(idx.into()); results.join(cursor.get()); @@ -64,7 +62,7 @@ impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { } } -impl<'a, 'b, 'c, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { +impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { type Domain = PointsToGraph<'tcx>; type Direction = Forward; @@ -73,7 +71,7 @@ impl<'a, 'b, 'c, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tc /// Dataflow state instantiated at the beginning of each basic block. fn bottom_value(&self, body: &Body<'tcx>) -> Self::Domain { - PointsToGraph::new(body, self.def_id) + PointsToGraph::from_body(body, self.def_id) } /// Dataflow state instantiated at the entry into the body, for us this coincides with the @@ -86,7 +84,7 @@ impl<'a, 'b, 'c, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tc } } -impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { +impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Update current dataflow state based on the information we can infer from the given /// statement. fn apply_statement_effect( @@ -108,7 +106,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { Rvalue::Use(operand) | Rvalue::ShallowInitBox(operand, _) | Rvalue::Cast(_, operand, _) - | Rvalue::Repeat(operand, ..) => self.find_operand_pointees(state, operand), + | Rvalue::Repeat(operand, ..) => self.follow_rvalue(state, operand), Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => { // Here, a reference to a place is created, which leaves the place // unchanged. @@ -120,14 +118,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { // Offsetting a pointer should still be within the boundaries of the // same object, so we can simply use the operand unchanged. let (ptr, _) = *operands.clone(); - match ptr { - Operand::Copy(place) | Operand::Move(place) => { - state.follow(&state.follow_from_place(place, self.def_id)) - } - Operand::Constant(_) => { - unreachable!("Pointer in offset should not be a constant.") - } - } + self.follow_rvalue(state, ptr) } BinOp::Add | BinOp::AddUnchecked @@ -151,18 +142,8 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { // track them. We assume that even shifted addresses will be within // the same original object. let (l_operand, r_operand) = *operands.clone(); - let l_operand_set = match l_operand { - Operand::Copy(place) | Operand::Move(place) => { - state.follow(&state.follow_from_place(place, self.def_id)) - } - Operand::Constant(_) => HashSet::new(), - }; - let r_operand_set = match r_operand { - Operand::Copy(place) | Operand::Move(place) => { - state.follow(&state.follow_from_place(place, self.def_id)) - } - Operand::Constant(_) => HashSet::new(), - }; + let l_operand_set = self.follow_rvalue(state, l_operand); + let r_operand_set = self.follow_rvalue(state, r_operand); l_operand_set.union(&r_operand_set).cloned().collect() } BinOp::Eq @@ -179,12 +160,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { } Rvalue::UnaryOp(_, operand) => { // The same story from BinOp applies here, too. Need to track those things. - match operand { - Operand::Copy(place) | Operand::Move(place) => { - state.follow(&state.follow_from_place(place, self.def_id)) - } - Operand::Constant(_) => HashSet::new(), - } + self.follow_rvalue(state, operand) } Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { // All of those should yield a constant. @@ -192,31 +168,16 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { } Rvalue::Aggregate(_, operands) => { // Conservatively find a union of all places mentioned here. - let places = operands + operands .into_iter() - .filter_map(|operand| { - match operand { - Operand::Copy(place) | Operand::Move(place) => { - // Simply add a constant here. - let place_or_alloc: PlaceOrAlloc = place.into(); - Some(place_or_alloc.with_def_id(self.def_id)) - } - Operand::Constant(_) => { - // This is a constant, the aliasing state is empty - None - } - } - }) - .collect(); - state.follow(&places) + .flat_map(|operand| self.follow_rvalue(state, operand)) + .collect() } Rvalue::CopyForDeref(place) => { // Use a place unchanged. state.follow(&state.follow_from_place(place, self.def_id)) } - Rvalue::ThreadLocalRef(_) => { - unimplemented!("Delayed UB analysis in Kani does not support statics.") - } + Rvalue::ThreadLocalRef(def_id) => HashSet::from([GlobalMemLoc::Global(def_id)]), }; // Create an edge between all places which could be lvalue and all places rvalue // could be pointing to. @@ -281,29 +242,12 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { args[0].node.ty(&self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let src_set = match args[2].node { - Operand::Copy(place) | Operand::Move(place) => { - state.follow_from_place(place, self.def_id) - } - Operand::Constant(_) => HashSet::new(), - }; - let dst_set = match args[0].node { - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper( - &[ProjectionElem::Deref], - self.tcx, - ), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; + let src_set = self.follow_rvalue(state, args[2].node.clone()); + let dst_set = self.follow_deref(state, args[0].node.clone()); let destination_set = state.follow_from_place(*destination, self.def_id); state.extend(&destination_set, &state.follow(&dst_set)); - state.extend(&dst_set, &state.follow(&src_set)); + state.extend(&dst_set, &src_set); } // All `atomic_load` intrinsics take `src` as an argument. // This is equivalent to `destination = *src`. @@ -317,19 +261,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { args[0].node.ty(&self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); - let src_set = match args[0].node { - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper( - &[ProjectionElem::Deref], - self.tcx, - ), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; + let src_set = self.follow_deref(state, args[0].node.clone()); let destination_set = state.follow_from_place(*destination, self.def_id); state.extend(&destination_set, &state.follow(&src_set)); @@ -346,26 +278,9 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { args[0].node.ty(&self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let dst_set = match args[0].node { - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper( - &[ProjectionElem::Deref], - self.tcx, - ), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; - let val_set = match args[1].node { - Operand::Copy(place) | Operand::Move(place) => { - state.follow_from_place(place, self.def_id) - } - Operand::Constant(_) => HashSet::new(), - }; - state.extend(&dst_set, &state.follow(&val_set)); + let dst_set = self.follow_deref(state, args[0].node.clone()); + let val_set = self.follow_rvalue(state, args[1].node.clone()); + state.extend(&dst_set, &val_set); } // All other `atomic` intrinsics take `dst, src` as arguments. // This is equivalent to `destination = *dst; *dst = src`. @@ -379,29 +294,12 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { args[0].node.ty(&self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let src_set = match args[1].node { - Operand::Copy(place) | Operand::Move(place) => { - state.follow_from_place(place, self.def_id) - } - Operand::Constant(_) => HashSet::new(), - }; - let dst_set = match args[0].node { - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper( - &[ProjectionElem::Deref], - self.tcx, - ), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; + let src_set = self.follow_rvalue(state, args[1].node.clone()); + let dst_set = self.follow_deref(state, args[0].node.clone()); let destination_set = state.follow_from_place(*destination, self.def_id); state.extend(&destination_set, &state.follow(&dst_set)); - state.extend(&dst_set, &state.follow(&src_set)); + state.extend(&dst_set, &src_set); } }; } @@ -452,15 +350,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { )); // Destination of the return value. let lvalue_set = state.follow_from_place(*destination, self.def_id); - let rvalue_set = match args[0].node { - // Need to add an additional dereference, since the value is loaded, not pointer. - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper(&[ProjectionElem::Deref], self.tcx), - self.def_id, - ), - Operand::Constant(_) => HashSet::new(), - }; + let rvalue_set = self.follow_deref(state, args[0].node.clone()); state.extend(&lvalue_set, &state.follow(&rvalue_set)); } // Semantically equivalent *a = b. @@ -474,24 +364,9 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { args[0].node.ty(&self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let lvalue_set = match args[0].node { - // Need to add an additional dereference, since storing into the dereference. - Operand::Copy(place) | Operand::Move(place) => state - .follow_from_place( - place.project_deeper(&[ProjectionElem::Deref], self.tcx), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer should not be a constant") - } - }; - let rvalue_set = match args[1].node { - Operand::Copy(place) | Operand::Move(place) => { - state.follow_from_place(place, self.def_id) - } - Operand::Constant(_) => HashSet::new(), - }; - state.extend(&lvalue_set, &state.follow(&rvalue_set)); + let lvalue_set = self.follow_deref(state, args[0].node.clone()); + let rvalue_set = self.follow_rvalue(state, args[1].node.clone()); + state.extend(&lvalue_set, &rvalue_set); } _ => { // TODO: go through the list of intrinsics and make sure none have @@ -514,7 +389,7 @@ impl<'a, 'b, 'c, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'b, 'c, 'tcx> { "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { let lvalue_set = state.follow_from_place(*destination, self.def_id); let rvalue_set = HashSet::from([ - PlaceOrAlloc::new_alloc().with_def_id(self.def_id) + LocalMemLoc::new_alloc().with_def_id(self.def_id) ]); state.extend(&lvalue_set, &rvalue_set); } @@ -557,7 +432,7 @@ fn try_resolve_instance<'tcx>( } } -impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { +impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // Update the analysis state according to the operation, which is semantically equivalent to `*to = *from`. fn apply_copy_effect( &self, @@ -565,41 +440,51 @@ impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { from: Operand<'tcx>, to: Operand<'tcx>, ) { - let lvalue_set = match to { - Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( - place.project_deeper(&[ProjectionElem::Deref], self.tcx), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; - let rvalue_set = match from { - Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( - place.project_deeper(&[ProjectionElem::Deref], self.tcx), - self.def_id, - ), - Operand::Constant(_) => { - unreachable!("pointer cannot be a constant") - } - }; + let lvalue_set = self.follow_deref(state, to); + let rvalue_set = self.follow_deref(state, from); state.extend(&lvalue_set, &state.follow(&rvalue_set)); } // Find all places where the operand could point to at the current stage of the program. - fn find_operand_pointees( + fn follow_rvalue( &self, state: &mut PointsToGraph<'tcx>, operand: Operand<'tcx>, - ) -> HashSet> { + ) -> HashSet> { match operand { Operand::Copy(place) | Operand::Move(place) => { // Find all places which are pointed to by the place. state.follow(&state.follow_from_place(place, self.def_id)) } - Operand::Constant(_) => { - // Constants do not point to anything, the aliasing state is empty. - HashSet::new() + Operand::Constant(const_operand) => { + // Constants could point to a static, so need to check for that. + if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { + HashSet::from([GlobalMemLoc::Global(static_def_id)]) + } else { + HashSet::new() + } + } + } + } + + // Find all places where the deref of the operand could point to at the current stage of the program. + fn follow_deref( + &self, + state: &mut PointsToGraph<'tcx>, + operand: Operand<'tcx>, + ) -> HashSet> { + match operand { + Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( + place.project_deeper(&[ProjectionElem::Deref], self.tcx), + self.def_id, + ), + Operand::Constant(const_operand) => { + // Constants could point to a static, so need to check for that. + if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { + HashSet::from([GlobalMemLoc::Global(static_def_id)]) + } else { + HashSet::new() + } } } } @@ -638,23 +523,18 @@ impl<'a, 'b, 'c, 'tcx> PointsToAnalysis<'a, 'b, 'c, 'tcx> { // TODO: this is probably wrong if the arguments are passed via spread, // as in with closures, so we would need to fix that. for (i, arg) in args.iter().enumerate() { - match &arg.node { - Operand::Copy(place) | Operand::Move(place) => { - let lvalue_set = HashSet::from([PlaceOrAlloc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); - let rvalue_set = state.follow_from_place(*place, self.def_id); - state.extend(&lvalue_set, &state.follow(&rvalue_set)); - } - Operand::Constant(_) => {} - } + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + let rvalue_set = self.follow_rvalue(state, arg.node.clone()); + state.extend(&lvalue_set, &rvalue_set); } // Similarly, need to connect the return value to the return // destination. let lvalue_set = state.follow_from_place(*destination, self.def_id); - let rvalue_set = HashSet::from([PlaceOrAlloc::Place(Place { + let rvalue_set = HashSet::from([LocalMemLoc::Place(Place { local: 0usize.into(), projection: List::empty(), }) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index bc4c918b0578..2f61a8aaebb6 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -16,46 +16,50 @@ use std::{ /// A node in the points-to graph, which could be a place on the stack or a heap allocation. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub enum PlaceOrAlloc<'tcx> { +pub enum LocalMemLoc<'tcx> { Alloc(usize), Place(Place<'tcx>), } /// A node tagged with a DefId, to differentiate between places across different functions. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub struct GlobalPlaceOrAlloc<'tcx> { - def_id: DefId, - place_or_alloc: PlaceOrAlloc<'tcx>, +pub enum GlobalMemLoc<'tcx> { + Local(DefId, LocalMemLoc<'tcx>), + Global(DefId), } -impl<'tcx> GlobalPlaceOrAlloc<'tcx> { - /// Check if the node has a given DefId. - pub fn has_def_id(&self, def_id: DefId) -> bool { - self.def_id == def_id +impl<'tcx> GlobalMemLoc<'tcx> { + /// Returns DefId of the memory location. + pub fn def_id(&self) -> DefId { + match self { + GlobalMemLoc::Local(def_id, _) | GlobalMemLoc::Global(def_id) => *def_id, + } } - /// Remove DefId from the node. - pub fn without_def_id(&self) -> PlaceOrAlloc<'tcx> { - self.place_or_alloc + pub fn maybe_local_mem_loc(&self) -> Option> { + match self { + GlobalMemLoc::Local(_, mem_loc) => Some(*mem_loc), + GlobalMemLoc::Global(_) => None, + } } } -impl<'tcx> From> for PlaceOrAlloc<'tcx> { +impl<'tcx> From> for LocalMemLoc<'tcx> { fn from(value: Place<'tcx>) -> Self { - PlaceOrAlloc::Place(value) + LocalMemLoc::Place(value) } } -impl<'tcx> PlaceOrAlloc<'tcx> { +impl<'tcx> LocalMemLoc<'tcx> { /// Generate a new alloc with increasing allocation id. pub fn new_alloc() -> Self { static NEXT_ALLOC_ID: AtomicUsize = AtomicUsize::new(0); - PlaceOrAlloc::Alloc(NEXT_ALLOC_ID.fetch_add(1, Ordering::Relaxed)) + LocalMemLoc::Alloc(NEXT_ALLOC_ID.fetch_add(1, Ordering::Relaxed)) } /// Tag the node with a DefId. - pub fn with_def_id(&self, def_id: DefId) -> GlobalPlaceOrAlloc<'tcx> { - GlobalPlaceOrAlloc { def_id, place_or_alloc: *self } + pub fn with_def_id(&self, def_id: DefId) -> GlobalMemLoc<'tcx> { + GlobalMemLoc::Local(def_id, *self) } } @@ -69,14 +73,18 @@ impl<'tcx> PlaceOrAlloc<'tcx> { #[derive(Clone, Debug, PartialEq, Eq)] pub struct PointsToGraph<'tcx> { /// A hash map of node --> {nodes} edges. - edges: HashMap, HashSet>>, + edges: HashMap, HashSet>>, } impl<'tcx> PointsToGraph<'tcx> { + pub fn empty() -> Self { + Self { edges: HashMap::new() } + } + /// Create a new graph, adding all existing places without projections from a body. - pub fn new(body: &Body, def_id: DefId) -> Self { + pub fn from_body(body: &Body, def_id: DefId) -> Self { let places = (0..body.local_decls.len()).map(|local| { - let place: PlaceOrAlloc = + let place: LocalMemLoc = Place { local: local.into(), projection: List::empty() }.into(); (place.with_def_id(def_id), HashSet::new()) }); @@ -84,19 +92,12 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Collect all nodes which have incoming edges from `nodes`. - pub fn follow( - &self, - nodes: &HashSet>, - ) -> HashSet> { + pub fn follow(&self, nodes: &HashSet>) -> HashSet> { nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() } /// For each node in `from`, add an edge to each node in `to`. - pub fn extend( - &mut self, - from: &HashSet>, - to: &HashSet>, - ) { + pub fn extend(&mut self, from: &HashSet>, to: &HashSet>) { for node in from.iter() { let node_pointees = self.edges.entry(*node).or_default(); node_pointees.extend(to.iter()); @@ -109,8 +110,8 @@ impl<'tcx> PointsToGraph<'tcx> { &self, place: Place<'tcx>, current_def_id: DefId, - ) -> HashSet> { - let place_or_alloc: PlaceOrAlloc = + ) -> HashSet> { + let place_or_alloc: LocalMemLoc = Place { local: place.local, projection: List::empty() }.into(); let mut node_set = HashSet::from([place_or_alloc.with_def_id(current_def_id)]); for projection in place.projection { @@ -137,16 +138,30 @@ impl<'tcx> PointsToGraph<'tcx> { let nodes: Vec = self .edges .keys() - .map(|from| format!("\t\"{:?}:{:?}\"", from.def_id, from.place_or_alloc)) + .map(|from| { + format!( + "\t\"{:?}:{:?}\"", + from.def_id(), + from.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) + ) + }) .collect(); let nodes_str = nodes.join("\n"); let edges: Vec = self .edges .iter() .flat_map(|(from, to)| { - let from = format!("\"{:?}:{:?}\"", from.def_id, from.place_or_alloc); + let from = format!( + "\"{:?}:{:?}\"", + from.def_id(), + from.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) + ); to.iter().map(move |to| { - let to = format!("\"{:?}:{:?}\"", to.def_id, to.place_or_alloc); + let to = format!( + "\"{:?}:{:?}\"", + to.def_id(), + to.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) + ); format!("\t{} -> {}", from.clone(), to) }) }) @@ -156,10 +171,7 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Find a transitive closure of the graph starting from a given place. - pub fn transitive_closure( - &self, - target: &GlobalPlaceOrAlloc<'tcx>, - ) -> HashSet> { + pub fn transitive_closure(&self, target: &GlobalMemLoc<'tcx>) -> HashSet> { let mut result = HashSet::new(); let mut queue = VecDeque::from([*target]); while !queue.is_empty() { @@ -174,10 +186,7 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Retrieve all places to which a given place is pointing to. - pub fn pointees_of( - &self, - target: &GlobalPlaceOrAlloc<'tcx>, - ) -> HashSet> { + pub fn pointees_of(&self, target: &GlobalMemLoc<'tcx>) -> HashSet> { self.edges .get(&target) .expect(format!("unable to retrieve {:?} from points-to graph", target).as_str()) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs index 279229a7d9d0..7ad53b8a419e 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -30,10 +30,10 @@ mod ty_layout; /// Trait that the instrumentation target providers must implement to work with the instrumenter. trait TargetFinder { fn find_next( + &mut self, body: &MutableBody, bb: BasicBlockIdx, skip_first: bool, - place_filter: &[Place], ) -> Option; } @@ -60,12 +60,12 @@ pub struct UninitInstrumenter<'a> { impl<'a> UninitInstrumenter<'a> { /// Instrument a body with memory initialization checks, the visitor that generates /// instrumentation targets must be provided via a TF type parameter. - fn instrument( + fn instrument( &mut self, tcx: TyCtxt, mut body: MutableBody, instance: Instance, - place_filter: &[Place], + mut target_finder: impl TargetFinder, ) -> (bool, MutableBody) { // Need to break infinite recursion when memory initialization checks are inserted, so the // internal functions responsible for memory initialization are skipped. @@ -95,7 +95,7 @@ impl<'a> UninitInstrumenter<'a> { let mut bb_idx = 0; while bb_idx < body.blocks().len() { if let Some(candidate) = - TF::find_next(&body, bb_idx, skip_first.contains(&bb_idx), place_filter) + target_finder.find_next(&body, bb_idx, skip_first.contains(&bb_idx)) { self.build_check_for_instruction(tcx, &mut body, candidate, &mut skip_first); bb_idx += 1 diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs index 14dbfa3929c4..be400233c78f 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs @@ -20,6 +20,7 @@ use stable_mir::CrateDef; use std::collections::HashMap; use std::fmt::Debug; use tracing::trace; +use uninit_visitor::CheckUninitVisitor; mod uninit_visitor; @@ -64,8 +65,8 @@ impl TransformPass for UninitPass { check_type: self.check_type.clone(), mem_init_fn_cache: &mut self.mem_init_fn_cache, }; - let (instrumentation_added, body) = instrumenter - .instrument::(tcx, new_body, instance, &[]); + let (instrumentation_added, body) = + instrumenter.instrument(tcx, new_body, instance, CheckUninitVisitor::new()); (changed || instrumentation_added, body.into()) } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 91a7d526508f..6cede48c36cc 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -19,8 +19,8 @@ use stable_mir::mir::{ }; use stable_mir::ty::{ConstantKind, RigidTy, TyKind}; -pub struct CheckUninitVisitor<'a> { - locals: &'a [LocalDecl], +pub struct CheckUninitVisitor { + locals: Vec, /// Whether we should skip the next instruction, since it might've been instrumented already. /// When we instrument an instruction, we partition the basic block, and the instruction that /// may trigger UB becomes the first instruction of the basic block, which we need to skip @@ -34,26 +34,34 @@ pub struct CheckUninitVisitor<'a> { bb: BasicBlockIdx, } -impl<'a> TargetFinder for CheckUninitVisitor<'a> { +impl TargetFinder for CheckUninitVisitor { fn find_next( + &mut self, body: &MutableBody, bb: BasicBlockIdx, skip_first: bool, - _place_filter: &[Place], ) -> Option { - let mut visitor = CheckUninitVisitor { - locals: body.locals(), - skip_next: skip_first, - current: SourceInstruction::Statement { idx: 0, bb }, - target: None, - bb, - }; - visitor.visit_basic_block(&body.blocks()[bb]); - visitor.target + self.locals = body.locals().to_vec(); + self.skip_next = skip_first; + self.current = SourceInstruction::Statement { idx: 0, bb }; + self.target = None; + self.bb = bb; + self.visit_basic_block(&body.blocks()[bb]); + self.target.clone() } } -impl<'a> CheckUninitVisitor<'a> { +impl CheckUninitVisitor { + pub fn new() -> Self { + Self { + locals: vec![], + skip_next: false, + current: SourceInstruction::Statement { idx: 0, bb: 0 }, + target: None, + bb: 0, + } + } + fn push_target(&mut self, source_op: MemoryInitOp) { let target = self.target.get_or_insert_with(|| InitRelevantInstruction { source: self.current, @@ -64,7 +72,7 @@ impl<'a> CheckUninitVisitor<'a> { } } -impl<'a> MirVisitor for CheckUninitVisitor<'a> { +impl MirVisitor for CheckUninitVisitor { fn visit_statement(&mut self, stmt: &Statement, location: Location) { if self.skip_next { self.skip_next = false; @@ -100,7 +108,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { // if it points to initialized memory. if *projection_elem == ProjectionElem::Deref { if let TyKind::RigidTy(RigidTy::RawPtr(..)) = - place_to_add_projections.ty(&self.locals).unwrap().kind() + place_to_add_projections.ty(&&self.locals).unwrap().kind() { self.push_target(MemoryInitOp::Check { operand: Operand::Copy(place_to_add_projections.clone()), @@ -109,7 +117,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } place_to_add_projections.projection.push(projection_elem.clone()); } - if place_without_deref.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if place_without_deref.ty(&&self.locals).unwrap().kind().is_raw_ptr() { self.push_target(MemoryInitOp::Set { operand: Operand::Copy(place_without_deref), value: true, @@ -118,7 +126,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } } // Check whether Rvalue creates a new initialized pointer previously not captured inside shadow memory. - if place.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if place.ty(&&self.locals).unwrap().kind().is_raw_ptr() { if let Rvalue::AddressOf(..) = rvalue { self.push_target(MemoryInitOp::Set { operand: Operand::Copy(place.clone()), @@ -160,7 +168,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { match &term.kind { TerminatorKind::Call { func, args, destination, .. } => { self.super_terminator(term, location); - let instance = match try_resolve_instance(self.locals, func) { + let instance = match try_resolve_instance(&self.locals, func) { Ok(instance) => instance, Err(reason) => { self.super_terminator(term, location); @@ -189,7 +197,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(..)) )); self.push_target(MemoryInitOp::Check { @@ -203,11 +211,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `compare_bytes`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); self.push_target(MemoryInitOp::CheckSliceChunk { @@ -228,11 +236,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `copy`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::CheckSliceChunk { @@ -253,11 +261,11 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `typed_swap`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); assert!(matches!( - args[1].ty(self.locals).unwrap().kind(), + args[1].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::Check { @@ -274,7 +282,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `volatile_load`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); self.push_target(MemoryInitOp::Check { @@ -288,7 +296,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `volatile_store`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::Set { @@ -304,7 +312,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { "Unexpected number of arguments for `write_bytes`" ); assert!(matches!( - args[0].ty(self.locals).unwrap().kind(), + args[0].ty(&self.locals).unwrap().kind(), TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); self.push_target(MemoryInitOp::SetSliceChunk { @@ -355,13 +363,13 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } TerminatorKind::Drop { place, .. } => { self.super_terminator(term, location); - let place_ty = place.ty(&self.locals).unwrap(); + let place_ty = place.ty(&&self.locals).unwrap(); // When drop is codegen'ed for types that could define their own dropping // behavior, a reference is taken to the place which is later implicitly coerced // to a pointer. Hence, we need to bless this pointer as initialized. match place - .ty(&self.locals) + .ty(&&self.locals) .unwrap() .kind() .rigid() @@ -403,7 +411,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { Place { local: place.local, projection: place.projection[..idx].to_vec() }; match elem { ProjectionElem::Deref => { - let ptr_ty = intermediate_place.ty(self.locals).unwrap(); + let ptr_ty = intermediate_place.ty(&self.locals).unwrap(); if ptr_ty.kind().is_raw_ptr() { self.push_target(MemoryInitOp::Check { operand: Operand::Copy(intermediate_place.clone()), @@ -465,7 +473,7 @@ impl<'a> MirVisitor for CheckUninitVisitor<'a> { } } CastKind::Transmute => { - let operand_ty = operand.ty(&self.locals).unwrap(); + let operand_ty = operand.ty(&&self.locals).unwrap(); if !tys_layout_compatible(&operand_ty, &ty) { // If transmuting between two types of incompatible layouts, padding // bytes are exposed, which is UB. From 9df425a4e90ea4baaea3f47efcf6bd93a0fbe04e Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 24 Jul 2024 11:37:08 -0700 Subject: [PATCH 03/45] Add tests for different ways to trigger delayed UB --- .../expected/uninit/delayed-ub/delayed-ub.rs | 73 ++++++++++++++++++- tests/expected/uninit/delayed-ub/expected | 34 ++++++++- 2 files changed, 102 insertions(+), 5 deletions(-) diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index bfed0a1f39a1..2e956856a334 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -2,13 +2,78 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT // kani-flags: -Z ghost-state -Z uninit-checks -/// Checks that Kani rejects mutable pointer casts between types of different padding. +//! Checks that Kani catches instances of delayed UB. + +/// Delayed UB via casted mutable pointer write. +#[kani::proof] +fn delayed_ub() { + unsafe { + let mut value: u128 = 0; + // Cast between two pointers of different padding. + let ptr = &mut value as *mut _ as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + let c: u128 = value; // UB: This reads a padding value! + } +} + +/// Delayed UB via transmuted mutable pointer write. +#[kani::proof] +fn delayed_ub_transmute() { + unsafe { + let mut value: u128 = 0; + // Transmute between two pointers of different padding. + let ptr: *mut (u8, u32, u64) = std::mem::transmute(&mut value as *mut _); + *ptr = (4, 4, 4); + let c: u128 = value; // UB: This reads a padding value! + } +} + +static mut VALUE: u128 = 42; + +/// Delayed UB via mutable pointer write into a static. +#[kani::proof] +fn delayed_ub_static() { + unsafe { + let v_ref = &mut VALUE; + // Cast reference to static to a pointer of different padding. + let ptr = &mut VALUE as *mut _ as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(*v_ref > 0); // UB: This reads a padding value! + } +} + +/// Helper to launder the pointer while keeping the address. +unsafe fn launder(ptr: *mut (u8, u32, u64)) -> *mut (u8, u32, u64) { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut (u8, u32, u64); +} + +/// Delayed UB via mutable pointer write with additional laundering. +#[kani::proof] +fn delayed_ub_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut _ as *mut (u8, u32, u64); + // Pass pointer around in an attempt to remove the association. + let ptr = launder(ptr); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + +/// Delayed UB via mutable pointer write using `copy_nonoverlapping` under the hood. #[kani::proof] -fn invalid_value() { +fn delayed_ub_copy() { unsafe { let mut value: u128 = 0; let ptr = &mut value as *mut _ as *mut (u8, u32, u64); - *ptr = (4, 4, 4); // This assignment itself does not cause UB... - let c: u128 = value; // ...but this reads a padding value! + // Use `copy_nonoverlapping` in an attempt to remove the taint. + std::ptr::write(ptr, (4, 4, 4)); + assert!(value > 0); // UB: This reads a padding value! } } diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index 960efcdade40..c493a56e228c 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -1,5 +1,37 @@ +Checking harness delayed_ub_copy... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + +Checking harness delayed_ub_laundered... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + +Checking harness delayed_ub_static... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + +Checking harness delayed_ub_transmute... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + +Checking harness delayed_ub... + Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` VERIFICATION:- FAILED -Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file +Summary: +Verification failed for - delayed_ub_copy +Verification failed for - delayed_ub_laundered +Verification failed for - delayed_ub_static +Verification failed for - delayed_ub_transmute +Verification failed for - delayed_ub +Complete - 0 successfully verified harnesses, 5 failures, 5 total. From 268885fea3f2545b224ecfbf2474c396a1080460 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 24 Jul 2024 14:08:19 -0700 Subject: [PATCH 04/45] Make sure statics can also be analysis targets --- .../delayed_ub/initial_target_visitor.rs | 66 ++++++++++--------- .../transform/check_uninit/delayed_ub/mod.rs | 12 +++- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index fd48952ef366..9908e2990934 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -6,21 +6,29 @@ use stable_mir::{ mir::{ + alloc::GlobalAlloc, mono::{Instance, InstanceKind}, visit::Location, Body, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, - ty::{RigidTy, TyKind}, + ty::{ConstantKind, RigidTy, TyKind}, + CrateDef, DefId, }; use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; +/// Pointer, write through which might trigger delayed UB. +pub enum AnalysisTarget { + Place(Place), + Static(DefId), +} + /// Visitor that finds initial analysis targets for delayed UB instrumentation. For our purposes, /// analysis targets are *pointers* to places reading and writing from which should be tracked. pub struct InitialTargetVisitor { body: Body, - targets: Vec, + targets: Vec, } impl InitialTargetVisitor { @@ -28,9 +36,27 @@ impl InitialTargetVisitor { Self { body, targets: vec![] } } - pub fn into_targets(self) -> Vec { + pub fn into_targets(self) -> Vec { self.targets } + + pub fn push_operand(&mut self, operand: &Operand) { + match operand { + Operand::Copy(place) | Operand::Move(place) => { + self.targets.push(AnalysisTarget::Place(place.clone())); + } + Operand::Constant(constant) => { + // Extract the static from the constant. + if let ConstantKind::Allocated(allocation) = constant.const_.kind() { + for (_, prov) in &allocation.provenance.ptrs { + if let GlobalAlloc::Static(static_def) = GlobalAlloc::from(prov.0) { + self.targets.push(AnalysisTarget::Static(static_def.def_id())); + }; + } + } + } + } + } } impl MirVisitor for InitialTargetVisitor { @@ -44,15 +70,8 @@ impl MirVisitor for InitialTargetVisitor { RigidTy::RawPtr(to_ty, Mutability::Mut), ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) { - match operand { - Operand::Copy(place) | Operand::Move(place) => { - if !tys_layout_compatible(from_ty, to_ty) { - self.targets.push(place.clone()); - } - } - Operand::Constant(_) => { - unreachable!("cannot be a constant") - } + if !tys_layout_compatible(from_ty, to_ty) { + self.push_operand(operand); } } } @@ -66,14 +85,7 @@ impl MirVisitor for InitialTargetVisitor { if let StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(copy)) = &stmt.kind { - match ©.dst { - Operand::Copy(place) | Operand::Move(place) => { - self.targets.push(place.clone()); - } - Operand::Constant(_) => { - unreachable!("cannot be a constant") - } - } + self.push_operand(©.dst); } self.super_statement(stmt, location); } @@ -99,12 +111,7 @@ impl MirVisitor for InitialTargetVisitor { TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) )); // Here, `dst` is the second argument. - match &args[1] { - Operand::Copy(place) | Operand::Move(place) => { - self.targets.push(place.clone()); - } - Operand::Constant(_) => unreachable!("cannot be a constant"), - } + self.push_operand(&args[1]); } "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { assert_eq!( @@ -121,12 +128,7 @@ impl MirVisitor for InitialTargetVisitor { TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) )); // Here, `dst` is the first argument. - match &args[0] { - Operand::Copy(place) | Operand::Move(place) => { - self.targets.push(place.clone()); - } - Operand::Constant(_) => unreachable!("cannot be a constant"), - } + self.push_operand(&args[0]); } _ => {} } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 3ab1c2b69d2e..46881cb0c89c 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -16,9 +16,11 @@ use crate::kani_middle::transform::BodyTransformation; use crate::kani_middle::transform::GlobalPass; use crate::kani_middle::transform::TransformationResult; use crate::kani_queries::QueryDb; +use initial_target_visitor::AnalysisTarget; use initial_target_visitor::InitialTargetVisitor; use instrumentation_visitor::InstrumentationVisitor; use points_to_analysis::PointsToAnalysis; +use points_to_graph::GlobalMemLoc; use points_to_graph::LocalMemLoc; use points_to_graph::PointsToGraph; use rustc_middle::ty::TyCtxt; @@ -70,8 +72,14 @@ impl GlobalPass for DelayedUbPass { let mut visitor = InitialTargetVisitor::new(body.clone()); visitor.visit_body(&body); // Convert all places into the format of aliasing graph for later comparison. - visitor.into_targets().into_iter().map(move |place| { - LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) + visitor.into_targets().into_iter().map(move |analysis_target| match analysis_target + { + AnalysisTarget::Place(place) => { + LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) + } + AnalysisTarget::Static(def_id) => { + GlobalMemLoc::Global(rustc_internal::internal(tcx, def_id)) + } }) }) .collect(); From 2d99fface54027743323c43ba5267c0555f3314d Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 24 Jul 2024 15:21:05 -0700 Subject: [PATCH 05/45] Add support for closures, fix function call handling --- .../transform/check_uninit/delayed_ub/mod.rs | 1 + .../delayed_ub/points_to_analysis.rs | 71 +++++++++++++------ .../expected/uninit/delayed-ub/delayed-ub.rs | 32 +++++++-- tests/expected/uninit/delayed-ub/expected | 9 ++- 4 files changed, 86 insertions(+), 27 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 46881cb0c89c..a1da69b3bfba 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -112,6 +112,7 @@ impl GlobalPass for DelayedUbPass { call_graph, &instances, transformer, + &PointsToGraph::empty(), ); // Since analysis targets are *pointers*, need to get its followers for instrumentation. for target in targets.iter() { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index 9cc1d591debc..2fae0b4f51df 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -37,6 +37,7 @@ pub struct PointsToAnalysis<'a, 'tcx> { call_graph: &'a CallGraph, instances: &'a Vec, transformer: &'a mut BodyTransformation, + initial_graph: &'a PointsToGraph<'tcx>, } impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { @@ -49,8 +50,17 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { call_graph: &'a CallGraph, instances: &'a Vec, transformer: &'a mut BodyTransformation, + initial_graph: &'a PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { - let analysis = Self { body: body.clone(), tcx, def_id, call_graph, instances, transformer }; + let analysis = Self { + body: body.clone(), + tcx, + def_id, + call_graph, + instances, + transformer, + initial_graph, + }; let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); let mut results = PointsToGraph::from_body(&body, def_id); @@ -70,17 +80,18 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { const NAME: &'static str = "PointsToAnalysis"; /// Dataflow state instantiated at the beginning of each basic block. - fn bottom_value(&self, body: &Body<'tcx>) -> Self::Domain { - PointsToGraph::from_body(body, self.def_id) + fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { + PointsToGraph::empty() } - /// Dataflow state instantiated at the entry into the body, for us this coincides with the - /// bottom value, so we don't need to do anything. + /// Dataflow state instantiated at the entry into the body, this should be the current dataflow + /// graph. fn initialize_start_block( &self, _body: &rustc_middle::mir::Body<'tcx>, - _state: &mut Self::Domain, + state: &mut Self::Domain, ) { + state.join(&self.initial_graph); } } @@ -506,6 +517,36 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { stable_body.internal_mir(self.tcx) }; + // One missing link is the connections between the arguments in the + // caller and parameters in the callee, add it to the graph. + match new_body.spread_arg { + Some(spread_arg) => { + let spread_arg_operand = args[spread_arg.as_usize()].node.clone(); + for i in 0..new_body.arg_count { + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + // This conservatively assumes all arguments alias to all parameters. This can be + // improved by supporting scalar places. + let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); + state.extend(&lvalue_set, &rvalue_set); + } + } + None => { + for (i, arg) in args.iter().enumerate() { + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + let rvalue_set = self.follow_rvalue(state, arg.node.clone()); + state.extend(&lvalue_set, &rvalue_set); + } + } + } + // Recursively run the analysis and join the results into the current state. let new_result = PointsToAnalysis::run( new_body, @@ -514,25 +555,11 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { self.call_graph, self.instances, self.transformer, + state, ); state.join(&new_result); - // One missing link is the connections between the arguments in the - // caller and parameters in the callee, add it to the graph. - // - // TODO: this is probably wrong if the arguments are passed via spread, - // as in with closures, so we would need to fix that. - for (i, arg) in args.iter().enumerate() { - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); - let rvalue_set = self.follow_rvalue(state, arg.node.clone()); - state.extend(&lvalue_set, &rvalue_set); - } - // Similarly, need to connect the return value to the return - // destination. + // Now, need to connect the return value to the return destination. let lvalue_set = state.follow_from_place(*destination, self.def_id); let rvalue_set = HashSet::from([LocalMemLoc::Place(Place { local: 0usize.into(), diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index 2e956856a334..b27a3c484eab 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -43,14 +43,14 @@ fn delayed_ub_static() { } /// Helper to launder the pointer while keeping the address. -unsafe fn launder(ptr: *mut (u8, u32, u64)) -> *mut (u8, u32, u64) { +unsafe fn launder(ptr: *mut u128) -> *mut u128 { let a = ptr; let b = a as *const u128; let c: *mut i128 = std::mem::transmute(b); let d = c as usize; let e = d + 1; let f = e - 1; - return f as *mut (u8, u32, u64); + return f as *mut u128; } /// Delayed UB via mutable pointer write with additional laundering. @@ -58,14 +58,38 @@ unsafe fn launder(ptr: *mut (u8, u32, u64)) -> *mut (u8, u32, u64) { fn delayed_ub_laundered() { unsafe { let mut value: u128 = 0; - let ptr = &mut value as *mut _ as *mut (u8, u32, u64); + let ptr = &mut value as *mut u128; // Pass pointer around in an attempt to remove the association. - let ptr = launder(ptr); + let ptr = launder(ptr) as *mut (u8, u32, u64); *ptr = (4, 4, 4); assert!(value > 0); // UB: This reads a padding value! } } +/// Delayed UB via mutable pointer write with additional laundering but via closure. +#[kani::proof] +fn delayed_ub_closure_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut u128; + // Add extra args to test spread_arg. + let launder = |arg1: bool, arg2: bool, arg3: bool, ptr: *mut u128| -> *mut u128 { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut u128; + }; + // Pass pointer around in an attempt to remove the association. + let ptr = launder(false, true, false, ptr) as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + + /// Delayed UB via mutable pointer write using `copy_nonoverlapping` under the hood. #[kani::proof] fn delayed_ub_copy() { diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index c493a56e228c..ae55f9cbc793 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -4,6 +4,12 @@ Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type VERIFICATION:- FAILED +Checking harness delayed_ub_closure_laundered... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + Checking harness delayed_ub_laundered... Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` @@ -30,8 +36,9 @@ VERIFICATION:- FAILED Summary: Verification failed for - delayed_ub_copy +Verification failed for - delayed_ub_closure_laundered Verification failed for - delayed_ub_laundered Verification failed for - delayed_ub_static Verification failed for - delayed_ub_transmute Verification failed for - delayed_ub -Complete - 0 successfully verified harnesses, 5 failures, 5 total. +Complete - 0 successfully verified harnesses, 6 failures, 6 total. From 7d69b0285f88dc63689aa6b6fa8a42046961360e Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 24 Jul 2024 15:22:27 -0700 Subject: [PATCH 06/45] Formatting nit --- tests/expected/uninit/delayed-ub/delayed-ub.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index b27a3c484eab..73fb446ff7a0 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -89,7 +89,6 @@ fn delayed_ub_closure_laundered() { } } - /// Delayed UB via mutable pointer write using `copy_nonoverlapping` under the hood. #[kani::proof] fn delayed_ub_copy() { From 6810535a94dce842b958d3440e8536985c26520b Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 25 Jul 2024 09:38:27 -0700 Subject: [PATCH 07/45] Fix bugs with inter-function analysis, add more tests --- .../transform/check_uninit/delayed_ub/mod.rs | 17 +++- .../delayed_ub/points_to_analysis.rs | 92 ++++++++++++------- .../delayed_ub/points_to_graph.rs | 42 +++------ .../expected/uninit/delayed-ub/delayed-ub.rs | 23 +++++ tests/expected/uninit/delayed-ub/expected | 9 +- 5 files changed, 114 insertions(+), 69 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index a1da69b3bfba..d8c025aba145 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -25,6 +25,7 @@ use points_to_graph::LocalMemLoc; use points_to_graph::PointsToGraph; use rustc_middle::ty::TyCtxt; use rustc_mir_dataflow::JoinSemiLattice; +use rustc_session::config::OutputType; use rustc_smir::rustc_internal; use stable_mir::mir::mono::{Instance, MonoItem}; use stable_mir::mir::MirVisitor; @@ -112,16 +113,22 @@ impl GlobalPass for DelayedUbPass { call_graph, &instances, transformer, - &PointsToGraph::empty(), + PointsToGraph::empty(), ); - // Since analysis targets are *pointers*, need to get its followers for instrumentation. - for target in targets.iter() { - analysis_targets.extend(results.pointees_of(target)); - } global_points_to_graph.join(&results); } } + // Since analysis targets are *pointers*, need to get its followers for instrumentation. + for target in targets.iter() { + analysis_targets.extend(global_points_to_graph.pointees_of(target)); + } + + // If we are generating MIR, generate the points-to graph as well. + if tcx.sess.opts.output_types.contains_key(&OutputType::Mir) { + global_points_to_graph.dump("points-to.dot"); + } + // Instrument each instance based on the final targets we found. for instance in instances { let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index 2fae0b4f51df..55e3ef06bbef 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -37,7 +37,7 @@ pub struct PointsToAnalysis<'a, 'tcx> { call_graph: &'a CallGraph, instances: &'a Vec, transformer: &'a mut BodyTransformation, - initial_graph: &'a PointsToGraph<'tcx>, + initial_graph: PointsToGraph<'tcx>, } impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { @@ -50,7 +50,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { call_graph: &'a CallGraph, instances: &'a Vec, transformer: &'a mut BodyTransformation, - initial_graph: &'a PointsToGraph<'tcx>, + initial_graph: PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { let analysis = Self { body: body.clone(), @@ -63,7 +63,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { }; let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); - let mut results = PointsToGraph::from_body(&body, def_id); + let mut results = PointsToGraph::empty(); for (idx, _) in body.basic_blocks.iter().enumerate() { cursor.seek_to_block_end(idx.into()); results.join(cursor.get()); @@ -88,10 +88,11 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// graph. fn initialize_start_block( &self, - _body: &rustc_middle::mir::Body<'tcx>, + body: &rustc_middle::mir::Body<'tcx>, state: &mut Self::Domain, ) { state.join(&self.initial_graph); + state.join(&PointsToGraph::from_body(body, self.def_id)); } } @@ -508,8 +509,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { args: &[Spanned>], destination: &Place<'tcx>, ) { - // Here we simply call another function, so need to retrieve internal - // body for it. + // Here we simply call another function, so need to retrieve internal body for it. let new_body = { let internal_instance = rustc_internal::stable(instance); assert!(self.instances.contains(&internal_instance)); @@ -517,37 +517,60 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { stable_body.internal_mir(self.tcx) }; - // One missing link is the connections between the arguments in the - // caller and parameters in the callee, add it to the graph. - match new_body.spread_arg { - Some(spread_arg) => { - let spread_arg_operand = args[spread_arg.as_usize()].node.clone(); - for i in 0..new_body.arg_count { - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); - // This conservatively assumes all arguments alias to all parameters. This can be - // improved by supporting scalar places. - let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); - state.extend(&lvalue_set, &rvalue_set); + // In order to be efficient, create a new graph for the function call analysis, which only + // contains arguments and anything transitively reachable from them. + let mut initial_graph = PointsToGraph::empty(); + for arg in args.iter() { + match arg.node { + Operand::Copy(place) | Operand::Move(place) => { + initial_graph.join( + &state.transitive_closure(state.follow_from_place(place, self.def_id)), + ); } + Operand::Constant(_) => {} } - None => { - for (i, arg) in args.iter().enumerate() { - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); - let rvalue_set = self.follow_rvalue(state, arg.node.clone()); - state.extend(&lvalue_set, &rvalue_set); - } + } + + // A missing link is the connections between the arguments in the caller and parameters in + // the callee, add it to the graph. + if self.tcx.is_closure_like(instance.def.def_id()) { + // This means we encountered a closure call. + assert!(args.len() == 2); + // First, connect all upvars. + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: 1usize.into(), + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + let rvalue_set = self.follow_rvalue(state, args[0].node.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); + // Then, connect the argument tuple to each of the spread arguments. + let spread_arg_operand = args[1].node.clone(); + for i in 0..new_body.arg_count { + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + // This conservatively assumes all arguments alias to all parameters. This can be + // improved by supporting scalar places. + let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); + } + } else { + // Otherwise, simply connect all arguments to parameters. + for (i, arg) in args.iter().enumerate() { + let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }) + .with_def_id(instance.def_id())]); + let rvalue_set = self.follow_rvalue(state, arg.node.clone()); + initial_graph.extend(&lvalue_set, &rvalue_set); } } - // Recursively run the analysis and join the results into the current state. + // Run the analysis. let new_result = PointsToAnalysis::run( new_body, self.tcx, @@ -555,11 +578,12 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { self.call_graph, self.instances, self.transformer, - state, + initial_graph, ); + // Merge the results into the current state. state.join(&new_result); - // Now, need to connect the return value to the return destination. + // Connect the return value to the return destination. let lvalue_set = state.follow_from_place(*destination, self.def_id); let rvalue_set = HashSet::from([LocalMemLoc::Place(Place { local: 0usize.into(), diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index 2f61a8aaebb6..d8a1b3208f6c 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -135,33 +135,16 @@ impl<'tcx> PointsToGraph<'tcx> { /// Dump the graph into a file using the graphviz format for later visualization. pub fn dump(&self, file_path: &str) { - let nodes: Vec = self - .edges - .keys() - .map(|from| { - format!( - "\t\"{:?}:{:?}\"", - from.def_id(), - from.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) - ) - }) - .collect(); + let nodes: Vec = + self.edges.keys().map(|from| format!("\t\"{:?}\"", from)).collect(); let nodes_str = nodes.join("\n"); let edges: Vec = self .edges .iter() .flat_map(|(from, to)| { - let from = format!( - "\"{:?}:{:?}\"", - from.def_id(), - from.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) - ); + let from = format!("\"{:?}\"", from); to.iter().map(move |to| { - let to = format!( - "\"{:?}:{:?}\"", - to.def_id(), - to.maybe_local_mem_loc().unwrap_or(LocalMemLoc::Alloc(0)) - ); + let to = format!("\"{:?}\"", to); format!("\t{} -> {}", from.clone(), to) }) }) @@ -171,15 +154,16 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Find a transitive closure of the graph starting from a given place. - pub fn transitive_closure(&self, target: &GlobalMemLoc<'tcx>) -> HashSet> { - let mut result = HashSet::new(); - let mut queue = VecDeque::from([*target]); + pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { + let mut result = PointsToGraph::empty(); + let mut queue = VecDeque::from_iter(targets.into_iter()); while !queue.is_empty() { let next_target = queue.pop_front().unwrap(); - if !result.contains(&next_target) { - let outgoing_edges = self.edges.get(&next_target).unwrap(); + if !result.edges.contains_key(&next_target) { + let outgoing_edges = + self.edges.get(&next_target).cloned().unwrap_or(HashSet::new()); queue.extend(outgoing_edges.iter()); - result.insert(next_target); + result.edges.insert(next_target, outgoing_edges.clone()); } } result @@ -209,9 +193,9 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { // graph. if to.difference(self.edges.get(from).unwrap()).count() != 0 { updated = true; + // Add all edges to the original graph. + self.edges.get_mut(from).unwrap().extend(to.iter()); } - // Add all edges to the original graph. - self.edges.get_mut(from).unwrap().extend(to.iter()); } else { // If node does not exist, add the node and all edges from it. self.edges.insert(*from, to.clone()); diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index 73fb446ff7a0..74196d144f0a 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -89,6 +89,29 @@ fn delayed_ub_closure_laundered() { } } +/// Delayed UB via mutable pointer write with additional laundering but via closure captures. +#[kani::proof] +fn delayed_ub_closure_capture_laundered() { + unsafe { + let mut value: u128 = 0; + let ptr = &mut value as *mut u128; + // Add extra args to test spread_arg. + let launder = |arg1: bool, arg2: bool, arg3: bool| -> *mut u128 { + let a = ptr; + let b = a as *const u128; + let c: *mut i128 = std::mem::transmute(b); + let d = c as usize; + let e = d + 1; + let f = e - 1; + return f as *mut u128; + }; + // Pass pointer around in an attempt to remove the association. + let ptr = launder(false, true, false) as *mut (u8, u32, u64); + *ptr = (4, 4, 4); + assert!(value > 0); // UB: This reads a padding value! + } +} + /// Delayed UB via mutable pointer write using `copy_nonoverlapping` under the hood. #[kani::proof] fn delayed_ub_copy() { diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index ae55f9cbc793..d0145966b002 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -4,6 +4,12 @@ Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type VERIFICATION:- FAILED +Checking harness delayed_ub_closure_capture_laundered... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` + +VERIFICATION:- FAILED + Checking harness delayed_ub_closure_laundered... Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` @@ -36,9 +42,10 @@ VERIFICATION:- FAILED Summary: Verification failed for - delayed_ub_copy +Verification failed for - delayed_ub_closure_capture_laundered Verification failed for - delayed_ub_closure_laundered Verification failed for - delayed_ub_laundered Verification failed for - delayed_ub_static Verification failed for - delayed_ub_transmute Verification failed for - delayed_ub -Complete - 0 successfully verified harnesses, 6 failures, 6 total. +Complete - 0 successfully verified harnesses, 7 failures, 7 total. From a508e6277e1b17b1b8ac01ccdbac388f310d4fff Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 25 Jul 2024 11:23:43 -0700 Subject: [PATCH 08/45] Fix methods changed by merge --- .../transform/check_uninit/delayed_ub/points_to_graph.rs | 8 ++++---- .../kani_middle/transform/check_uninit/ptr_uninit/mod.rs | 2 +- .../transform/check_uninit/relevant_instruction.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index d8a1b3208f6c..00a3bd779d3c 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -156,15 +156,15 @@ impl<'tcx> PointsToGraph<'tcx> { /// Find a transitive closure of the graph starting from a given place. pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { let mut result = PointsToGraph::empty(); - let mut queue = VecDeque::from_iter(targets.into_iter()); + let mut queue = VecDeque::from_iter(targets); while !queue.is_empty() { let next_target = queue.pop_front().unwrap(); - if !result.edges.contains_key(&next_target) { + result.edges.entry(next_target).or_insert_with(|| { let outgoing_edges = self.edges.get(&next_target).cloned().unwrap_or(HashSet::new()); queue.extend(outgoing_edges.iter()); - result.edges.insert(next_target, outgoing_edges.clone()); - } + outgoing_edges.clone() + }); } result } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs index be400233c78f..dc462079a6bc 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs @@ -119,7 +119,7 @@ fn inject_memory_init_setup( ) .unwrap(); - new_body.add_call( + new_body.insert_call( &memory_initialization_init, &mut source, InsertPosition::Before, diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs index 417dee46d9c1..3bc5b534a23b 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/relevant_instruction.rs @@ -52,7 +52,7 @@ impl MemoryInitOp { Operand::Copy(place) | Operand::Move(place) => place, Operand::Constant(_) => unreachable!(), }; - body.new_assignment( + body.insert_assignment( Rvalue::AddressOf(Mutability::Not, place.clone()), source, self.position(), From bf1ffc6eecb44421e0736105552ed2d700b4066b Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 26 Jul 2024 09:25:48 -0700 Subject: [PATCH 09/45] Mitigate issues with non-deterministic memory initialization instrumentation --- .../compiler_interface.rs | 4 +-- .../transform/check_uninit/delayed_ub/mod.rs | 2 -- .../delayed_ub/points_to_analysis.rs | 29 +++---------------- .../delayed_ub/points_to_graph.rs | 8 +++-- .../src/kani_middle/transform/mod.rs | 2 +- 5 files changed, 13 insertions(+), 32 deletions(-) diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index 6b73b25cab67..a850003b91b5 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -267,8 +267,8 @@ impl CodegenBackend for GotocCodegenBackend { for unit in units.iter() { // We reset the body cache for now because each codegen unit has different // configurations that affect how we transform the instance body. - let mut transformer = BodyTransformation::new(&queries, tcx, &unit); for harness in &unit.harnesses { + let transformer = BodyTransformation::new(&queries, tcx, &unit); let model_path = units.harness_model_path(*harness).unwrap(); let contract_metadata = contract_metadata_for_harness(tcx, harness.def.def_id()).unwrap(); @@ -280,7 +280,7 @@ impl CodegenBackend for GotocCodegenBackend { contract_metadata, transformer, ); - transformer = results.extend(gcx, items, None); + results.extend(gcx, items, None); if let Some(assigns_contract) = contract_info { modifies_instances.push((*harness, assigns_contract)); } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index d8c025aba145..e390b8ec030d 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -111,8 +111,6 @@ impl GlobalPass for DelayedUbPass { tcx, internal_def_id, call_graph, - &instances, - transformer, PointsToGraph::empty(), ); global_points_to_graph.join(&results); diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index 55e3ef06bbef..971ec1b108a8 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -9,7 +9,6 @@ use crate::kani_middle::{ transform::{ check_uninit::delayed_ub::points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, internal_mir::RustcInternalMir, - BodyTransformation, }, }; use rustc_ast::Mutability; @@ -25,7 +24,6 @@ use rustc_middle::{ use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; use rustc_smir::rustc_internal; use rustc_span::source_map::Spanned; -use stable_mir::mir::mono::Instance as StableInstance; use std::collections::HashSet; /// Main points-to analysis object. Since this one will be created anew for each instance analysis, @@ -35,8 +33,6 @@ pub struct PointsToAnalysis<'a, 'tcx> { body: Body<'tcx>, tcx: TyCtxt<'tcx>, call_graph: &'a CallGraph, - instances: &'a Vec, - transformer: &'a mut BodyTransformation, initial_graph: PointsToGraph<'tcx>, } @@ -48,19 +44,9 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { tcx: TyCtxt<'tcx>, def_id: DefId, call_graph: &'a CallGraph, - instances: &'a Vec, - transformer: &'a mut BodyTransformation, initial_graph: PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { - let analysis = Self { - body: body.clone(), - tcx, - def_id, - call_graph, - instances, - transformer, - initial_graph, - }; + let analysis = Self { body: body.clone(), tcx, def_id, call_graph, initial_graph }; let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); let mut results = PointsToGraph::empty(); @@ -86,11 +72,7 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Dataflow state instantiated at the entry into the body, this should be the current dataflow /// graph. - fn initialize_start_block( - &self, - body: &rustc_middle::mir::Body<'tcx>, - state: &mut Self::Domain, - ) { + fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) { state.join(&self.initial_graph); state.join(&PointsToGraph::from_body(body, self.def_id)); } @@ -511,9 +493,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { ) { // Here we simply call another function, so need to retrieve internal body for it. let new_body = { - let internal_instance = rustc_internal::stable(instance); - assert!(self.instances.contains(&internal_instance)); - let stable_body = self.transformer.body(self.tcx, rustc_internal::stable(instance)); + let stable_instance = rustc_internal::stable(instance); + let stable_body = stable_instance.body().unwrap(); stable_body.internal_mir(self.tcx) }; @@ -576,8 +557,6 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { self.tcx, instance.def_id(), self.call_graph, - self.instances, - self.transformer, initial_graph, ); // Merge the results into the current state. diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index 00a3bd779d3c..abf8615784e8 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -135,10 +135,12 @@ impl<'tcx> PointsToGraph<'tcx> { /// Dump the graph into a file using the graphviz format for later visualization. pub fn dump(&self, file_path: &str) { - let nodes: Vec = + let mut nodes: Vec = self.edges.keys().map(|from| format!("\t\"{:?}\"", from)).collect(); + nodes.sort(); let nodes_str = nodes.join("\n"); - let edges: Vec = self + + let mut edges: Vec = self .edges .iter() .flat_map(|(from, to)| { @@ -149,7 +151,9 @@ impl<'tcx> PointsToGraph<'tcx> { }) }) .collect(); + edges.sort(); let edges_str = edges.join("\n"); + std::fs::write(file_path, format!("digraph {{\n{}\n{}\n}}", nodes_str, edges_str)).unwrap(); } diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index ce2f1742f71f..5c772b5a0d38 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -192,8 +192,8 @@ pub struct GlobalPasses { impl GlobalPasses { pub fn new(queries: &QueryDb, tcx: TyCtxt) -> Self { let mut global_passes = GlobalPasses { global_passes: vec![] }; - global_passes.add_global_pass(queries, DumpMirPass::new(tcx)); global_passes.add_global_pass(queries, DelayedUbPass::new(CheckType::new_assert(tcx))); + global_passes.add_global_pass(queries, DumpMirPass::new(tcx)); global_passes } From 85a550a53d373d593c72d814ac17d27549bb5f37 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 26 Jul 2024 11:24:09 -0700 Subject: [PATCH 10/45] Correctness fixes and performance improvements --- .../transform/check_uninit/delayed_ub/mod.rs | 3 +- .../delayed_ub/points_to_analysis.rs | 28 ++++++++------- .../delayed_ub/points_to_graph.rs | 34 +++++++++++++------ 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index e390b8ec030d..e31aa8826ab7 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -24,7 +24,6 @@ use points_to_graph::GlobalMemLoc; use points_to_graph::LocalMemLoc; use points_to_graph::PointsToGraph; use rustc_middle::ty::TyCtxt; -use rustc_mir_dataflow::JoinSemiLattice; use rustc_session::config::OutputType; use rustc_smir::rustc_internal; use stable_mir::mir::mono::{Instance, MonoItem}; @@ -113,7 +112,7 @@ impl GlobalPass for DelayedUbPass { call_graph, PointsToGraph::empty(), ); - global_points_to_graph.join(&results); + global_points_to_graph.consume(results); } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index 971ec1b108a8..c18719c380c8 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -21,7 +21,7 @@ use rustc_middle::{ }, ty::{Instance, InstanceKind, List, ParamEnv, TyCtxt, TyKind}, }; -use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; +use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward}; use rustc_smir::rustc_internal; use rustc_span::source_map::Spanned; use std::collections::HashSet; @@ -50,9 +50,11 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); let mut results = PointsToGraph::empty(); - for (idx, _) in body.basic_blocks.iter().enumerate() { - cursor.seek_to_block_end(idx.into()); - results.join(cursor.get()); + for (idx, bb) in body.basic_blocks.iter().enumerate() { + if let TerminatorKind::Return = bb.terminator().kind { + cursor.seek_to_block_end(idx.into()); + results.consume(cursor.get().clone()); + } } results } @@ -73,8 +75,8 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Dataflow state instantiated at the entry into the body, this should be the current dataflow /// graph. fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) { - state.join(&self.initial_graph); - state.join(&PointsToGraph::from_body(body, self.def_id)); + state.consume(self.initial_graph.clone()); + state.consume(PointsToGraph::from_body(body, self.def_id)); } } @@ -209,7 +211,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { &mut self, state: &mut Self::Domain, terminator: &'mir Terminator<'tcx>, - _location: Location, + location: Location, ) -> TerminatorEdges<'mir, 'tcx> { if let TerminatorKind::Call { func, args, destination, .. } = &terminator.kind { let instance = match try_resolve_instance(&self.body, func, self.tcx) { @@ -382,9 +384,9 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // which creates a new node we need to add to the points-to graph. "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { let lvalue_set = state.follow_from_place(*destination, self.def_id); - let rvalue_set = HashSet::from([ - LocalMemLoc::new_alloc().with_def_id(self.def_id) - ]); + let rvalue_set = + HashSet::from([LocalMemLoc::new_alloc(self.def_id, location) + .with_def_id(self.def_id)]); state.extend(&lvalue_set, &rvalue_set); } _ => {} @@ -504,8 +506,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { for arg in args.iter() { match arg.node { Operand::Copy(place) | Operand::Move(place) => { - initial_graph.join( - &state.transitive_closure(state.follow_from_place(place, self.def_id)), + initial_graph.consume( + state.transitive_closure(state.follow_from_place(place, self.def_id)), ); } Operand::Constant(_) => {} @@ -560,7 +562,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { initial_graph, ); // Merge the results into the current state. - state.join(&new_result); + state.consume(new_result); // Connect the return value to the return destination. let lvalue_set = state.follow_from_place(*destination, self.def_id); diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index abf8615784e8..e6c28fa48a8b 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -5,19 +5,17 @@ use rustc_hir::def_id::DefId; use rustc_middle::{ - mir::{Body, Place, ProjectionElem}, + mir::{Body, Location, Place, ProjectionElem}, ty::List, }; use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; -use std::{ - collections::{HashMap, HashSet, VecDeque}, - sync::atomic::{AtomicUsize, Ordering}, -}; +use std::collections::{HashMap, HashSet, VecDeque}; /// A node in the points-to graph, which could be a place on the stack or a heap allocation. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum LocalMemLoc<'tcx> { - Alloc(usize), + /// Using a combination of DefId + Location implements allocation-site abstraction. + Alloc(DefId, Location), Place(Place<'tcx>), } @@ -52,9 +50,8 @@ impl<'tcx> From> for LocalMemLoc<'tcx> { impl<'tcx> LocalMemLoc<'tcx> { /// Generate a new alloc with increasing allocation id. - pub fn new_alloc() -> Self { - static NEXT_ALLOC_ID: AtomicUsize = AtomicUsize::new(0); - LocalMemLoc::Alloc(NEXT_ALLOC_ID.fetch_add(1, Ordering::Relaxed)) + pub fn new_alloc(def_id: DefId, location: Location) -> Self { + LocalMemLoc::Alloc(def_id, location) } /// Tag the node with a DefId. @@ -160,7 +157,12 @@ impl<'tcx> PointsToGraph<'tcx> { /// Find a transitive closure of the graph starting from a given place. pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { let mut result = PointsToGraph::empty(); + // Working queue. let mut queue = VecDeque::from_iter(targets); + // Add all statics, as they can be accessed at any point. + let statics = self.edges.keys().filter(|node| matches!(node, GlobalMemLoc::Global(_))); + queue.extend(statics); + // Add all entries. while !queue.is_empty() { let next_target = queue.pop_front().unwrap(); result.edges.entry(next_target).or_insert_with(|| { @@ -180,6 +182,14 @@ impl<'tcx> PointsToGraph<'tcx> { .expect(format!("unable to retrieve {:?} from points-to graph", target).as_str()) .clone() } + + // Merge the other graph into self, consuming it. + pub fn consume(&mut self, other: PointsToGraph<'tcx>) { + for (from, to) in other.edges { + let existing_to = self.edges.entry(from).or_default(); + existing_to.extend(to); + } + } } /// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous @@ -195,10 +205,12 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { if self.edges.contains_key(from) { // Check if there are any edges that are in the other graph but not in the original // graph. - if to.difference(self.edges.get(from).unwrap()).count() != 0 { + let difference: HashSet<_> = + to.difference(self.edges.get(from).unwrap()).cloned().collect(); + if difference.len() != 0 { updated = true; // Add all edges to the original graph. - self.edges.get_mut(from).unwrap().extend(to.iter()); + self.edges.get_mut(from).unwrap().extend(difference); } } else { // If node does not exist, add the node and all edges from it. From e597689143c3360520ca0a9391f4398af6aa5d23 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 26 Jul 2024 12:10:04 -0700 Subject: [PATCH 11/45] Add more tests, remove unnecessary nodes from the graph --- .../delayed_ub/points_to_analysis.rs | 3 +- .../delayed_ub/points_to_graph.rs | 17 +------- .../delayed-ub-transmute.rs | 14 ------- .../uninit/delayed-ub-transmute/expected | 5 --- .../expected/uninit/delayed-ub/delayed-ub.rs | 41 +++++++++++++++++++ tests/expected/uninit/delayed-ub/expected | 16 +++++++- 6 files changed, 59 insertions(+), 37 deletions(-) delete mode 100644 tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs delete mode 100644 tests/expected/uninit/delayed-ub-transmute/expected diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs index c18719c380c8..75eabc754ba5 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs @@ -74,9 +74,8 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Dataflow state instantiated at the entry into the body, this should be the current dataflow /// graph. - fn initialize_start_block(&self, body: &Body<'tcx>, state: &mut Self::Domain) { + fn initialize_start_block(&self, _body: &Body<'tcx>, state: &mut Self::Domain) { state.consume(self.initial_graph.clone()); - state.consume(PointsToGraph::from_body(body, self.def_id)); } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index e6c28fa48a8b..82f059b20716 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -5,7 +5,7 @@ use rustc_hir::def_id::DefId; use rustc_middle::{ - mir::{Body, Location, Place, ProjectionElem}, + mir::{Location, Place, ProjectionElem}, ty::List, }; use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; @@ -78,16 +78,6 @@ impl<'tcx> PointsToGraph<'tcx> { Self { edges: HashMap::new() } } - /// Create a new graph, adding all existing places without projections from a body. - pub fn from_body(body: &Body, def_id: DefId) -> Self { - let places = (0..body.local_decls.len()).map(|local| { - let place: LocalMemLoc = - Place { local: local.into(), projection: List::empty() }.into(); - (place.with_def_id(def_id), HashSet::new()) - }); - Self { edges: HashMap::from_iter(places) } - } - /// Collect all nodes which have incoming edges from `nodes`. pub fn follow(&self, nodes: &HashSet>) -> HashSet> { nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() @@ -177,10 +167,7 @@ impl<'tcx> PointsToGraph<'tcx> { /// Retrieve all places to which a given place is pointing to. pub fn pointees_of(&self, target: &GlobalMemLoc<'tcx>) -> HashSet> { - self.edges - .get(&target) - .expect(format!("unable to retrieve {:?} from points-to graph", target).as_str()) - .clone() + self.edges.get(&target).unwrap_or(&HashSet::new()).clone() } // Merge the other graph into self, consuming it. diff --git a/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs b/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs deleted file mode 100644 index df769e39a8b2..000000000000 --- a/tests/expected/uninit/delayed-ub-transmute/delayed-ub-transmute.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright Kani Contributors -// SPDX-License-Identifier: Apache-2.0 OR MIT -// kani-flags: -Z uninit-checks - -/// Checks that Kani rejects mutable pointer casts between types of different padding. -#[kani::proof] -fn invalid_value() { - unsafe { - let mut value: u128 = 0; - let ptr: *mut (u8, u32, u64) = std::mem::transmute(&mut value as *mut _); - *ptr = (4, 4, 4); // This assignment itself does not cause UB... - let c: u128 = value; // ...but this reads a padding value! - } -} diff --git a/tests/expected/uninit/delayed-ub-transmute/expected b/tests/expected/uninit/delayed-ub-transmute/expected deleted file mode 100644 index 960efcdade40..000000000000 --- a/tests/expected/uninit/delayed-ub-transmute/expected +++ /dev/null @@ -1,5 +0,0 @@ -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/delayed-ub/delayed-ub.rs b/tests/expected/uninit/delayed-ub/delayed-ub.rs index 74196d144f0a..feee4bcd161f 100644 --- a/tests/expected/uninit/delayed-ub/delayed-ub.rs +++ b/tests/expected/uninit/delayed-ub/delayed-ub.rs @@ -123,3 +123,44 @@ fn delayed_ub_copy() { assert!(value > 0); // UB: This reads a padding value! } } + +struct S { + u: U, +} + +struct U { + value1: u128, + value2: u64, + value3: u32, +} + +struct Inner(*mut T); + +/// Delayed UB via mutable pointer write into inner fields of structs. +#[kani::proof] +fn delayed_ub_structs() { + unsafe { + // Create a convoluted struct. + let mut s: S = S { u: U { value1: 0, value2: 0, value3: 0 } }; + // Get a pointer to an inner field of the struct. Then, cast between two pointers of + // different padding. + let inner = Inner(&mut s.u.value2 as *mut _); + let inner_cast = Inner(inner.0 as *mut (u8, u32)); + let ptr = inner_cast.0; + *ptr = (4, 4); + let u: U = s.u; // UB: This reads a padding value inside the inner struct! + } +} + +/// Delayed UB via mutable pointer write into a slice element. +#[kani::proof] +fn delayed_ub_slices() { + unsafe { + // Create an array. + let mut arr = [0u128; 4]; + // Get a pointer to a part of the array. + let ptr = &mut arr[0..2][0..1][0] as *mut _ as *mut (u8, u32); + *ptr = (4, 4); + let arr_copy = arr; // UB: This reads a padding value inside the array! + } +} diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index d0145966b002..06dc9e24ea12 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -1,3 +1,15 @@ +Checking harness delayed_ub_slices... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `[u128; 4]` + +VERIFICATION:- FAILED + +Checking harness delayed_ub_structs... + +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `U` + +VERIFICATION:- FAILED + Checking harness delayed_ub_copy... Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` @@ -41,6 +53,8 @@ Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type VERIFICATION:- FAILED Summary: +Verification failed for - delayed_ub_slices +Verification failed for - delayed_ub_structs Verification failed for - delayed_ub_copy Verification failed for - delayed_ub_closure_capture_laundered Verification failed for - delayed_ub_closure_laundered @@ -48,4 +62,4 @@ Verification failed for - delayed_ub_laundered Verification failed for - delayed_ub_static Verification failed for - delayed_ub_transmute Verification failed for - delayed_ub -Complete - 0 successfully verified harnesses, 7 failures, 7 total. +Complete - 0 successfully verified harnesses, 9 failures, 9 total. From afb9df7bff0b3b6a9aa9019513950ebd6b4b78b4 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 26 Jul 2024 12:11:10 -0700 Subject: [PATCH 12/45] Apply clippy suggestion --- .../transform/check_uninit/delayed_ub/points_to_graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs index 82f059b20716..f35b2f00f86a 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs @@ -194,7 +194,7 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { // graph. let difference: HashSet<_> = to.difference(self.edges.get(from).unwrap()).cloned().collect(); - if difference.len() != 0 { + if !difference.is_empty() { updated = true; // Add all edges to the original graph. self.edges.get_mut(from).unwrap().extend(difference); From 08494ed36df36b2ebc53a48435f0ffd870b8d506 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 26 Jul 2024 12:52:30 -0700 Subject: [PATCH 13/45] Fix an issue with terminator handling in `InstrumentationVisitor` --- kani-compiler/src/kani_middle/transform/body.rs | 6 ++++++ .../check_uninit/delayed_ub/instrumentation_visitor.rs | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/transform/body.rs b/kani-compiler/src/kani_middle/transform/body.rs index d3f2afc15d31..e3de35588475 100644 --- a/kani-compiler/src/kani_middle/transform/body.rs +++ b/kani-compiler/src/kani_middle/transform/body.rs @@ -469,6 +469,12 @@ impl SourceInstruction { SourceInstruction::Terminator { bb } => blocks[bb].terminator.span, } } + + pub fn bb(&self) -> usize { + match self { + SourceInstruction::Statement { bb, .. } | SourceInstruction::Terminator { bb } => *bb, + } + } } fn find_instance(tcx: TyCtxt, diagnostic: &str) -> Option { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index 2fb43172e05e..21fb3f42bc77 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -16,7 +16,7 @@ use rustc_hir::def_id::DefId as InternalDefId; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; use stable_mir::mir::visit::{Location, PlaceContext}; -use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, Statement}; +use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, Statement, Terminator}; use std::collections::HashSet; pub struct InstrumentationVisitor<'a, 'tcx> { @@ -92,6 +92,13 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { } } + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if !(self.skip_next || self.target.is_some()) { + self.current = SourceInstruction::Terminator { bb: self.current.bb() }; + self.super_terminator(term, location); + } + } + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { // Match the place by whatever it is pointing to and find an intersection with the targets. if self From 0f11c9558676b755efb6ba149b47583bc006c060 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 08:41:55 -0700 Subject: [PATCH 14/45] Fix expected test output files --- tests/expected/uninit/delayed-ub/expected | 72 +++++++------------ tests/expected/uninit/intrinsics/expected | 88 +++++++++-------------- 2 files changed, 60 insertions(+), 100 deletions(-) diff --git a/tests/expected/uninit/delayed-ub/expected b/tests/expected/uninit/delayed-ub/expected index 06dc9e24ea12..46b6ababe85d 100644 --- a/tests/expected/uninit/delayed-ub/expected +++ b/tests/expected/uninit/delayed-ub/expected @@ -1,56 +1,38 @@ -Checking harness delayed_ub_slices... +delayed_ub_slices.assertion.4\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `[u128; 4]`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `[u128; 4]` +delayed_ub_structs.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `U`" -VERIFICATION:- FAILED +delayed_ub_copy.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -Checking harness delayed_ub_structs... +delayed_ub_closure_capture_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `U` +delayed_ub_closure_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -VERIFICATION:- FAILED +delayed_ub_laundered.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -Checking harness delayed_ub_copy... +delayed_ub_static.assertion.4\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` +delayed_ub_transmute.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" -VERIFICATION:- FAILED - -Checking harness delayed_ub_closure_capture_laundered... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Checking harness delayed_ub_closure_laundered... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Checking harness delayed_ub_laundered... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Checking harness delayed_ub_static... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Checking harness delayed_ub_transmute... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED - -Checking harness delayed_ub... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `u128` - -VERIFICATION:- FAILED +delayed_ub.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `u128`" Summary: Verification failed for - delayed_ub_slices diff --git a/tests/expected/uninit/intrinsics/expected b/tests/expected/uninit/intrinsics/expected index ab83be097444..cf34d305608b 100644 --- a/tests/expected/uninit/intrinsics/expected +++ b/tests/expected/uninit/intrinsics/expected @@ -1,68 +1,46 @@ -Checking harness check_typed_swap_safe... +std::ptr::read::>.assertion.1\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. +std::ptr::read::>.assertion.2\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. +std::ptr::write::>.assertion.1\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -VERIFICATION:- FAILED +std::ptr::write::>.assertion.2\ + - Status: FAILURE\ + - Description: "Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit." -Checking harness check_typed_swap... +check_typed_swap.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` +check_typed_swap.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8`" -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut u8` +check_volatile_load.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. +check_compare_bytes.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -Failed Checks: Kani currently doesn't support checking memory initialization for pointers to `std::mem::MaybeUninit. +check_compare_bytes.assertion.2\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -VERIFICATION:- FAILED +std::intrinsics::copy::.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" -Checking harness check_volatile_store_and_load_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_volatile_load... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_write_bytes_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_compare_bytes_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_compare_bytes... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_copy_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_copy... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED - -Checking harness check_copy_nonoverlapping_safe... - -VERIFICATION:- SUCCESSFUL - -Checking harness check_copy_nonoverlapping... - -Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` - -VERIFICATION:- FAILED +std::intrinsics::copy_nonoverlapping::.assertion.1\ + - Status: FAILURE\ + - Description: "Undefined Behavior: Reading from an uninitialized pointer of type `*const u8`" Summary: Verification failed for - check_typed_swap_safe From c5d25ed933602d4d0fb40f6463245b8466568ccb Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 08:50:33 -0700 Subject: [PATCH 15/45] More efficient intersection handling --- .../check_uninit/delayed_ub/instrumentation_visitor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index 21fb3f42bc77..f57d9511c922 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -105,8 +105,8 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { .points_to .follow_from_place(rustc_internal::internal(self.tcx, place), self.current_def_id) .intersection(&self.analysis_targets) - .count() - != 0 + .next() + .is_some() { // If we are mutating the place, initialize it. if ptx.is_mutating() { From e7bb5b851667f04800ff390dd73d6df7f4025b71 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 10:02:13 -0700 Subject: [PATCH 16/45] Add a better explanation for running reachability analysis twice --- .../src/codegen_cprover_gotoc/compiler_interface.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index a850003b91b5..7416e997d1f7 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -87,6 +87,13 @@ impl GotocCodegenBackend { check_contract: Option, mut transformer: BodyTransformation, ) -> (GotocCtx<'tcx>, Vec, Option) { + // This runs reachability analysis before global passes are applied. + // + // Alternatively, we could run reachability only once after the global passes are applied + // and resolve the necessary dependencies inside the passes on the fly. This, however, has a + // disadvantage of not having a precomputed call graph for the global passes to use. The + // call graph could be used, for example, in resolving function pointer or vtable calls for + // global passes that need this. let (items, call_graph) = with_timer( || collect_reachable_items(tcx, &mut transformer, starting_items), "codegen reachability analysis", From 1792a6894228383b3fca7be5b0fecb7f4c857bad Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 13:25:22 -0700 Subject: [PATCH 17/45] Move points-to analysis into a separate module, organize imports --- .../compiler_interface.rs | 2 +- kani-compiler/src/kani_middle/mod.rs | 1 + .../src/kani_middle/points_to/mod.rs | 11 ++++++ .../points_to_analysis.rs | 6 +-- .../points_to_graph.rs | 0 .../delayed_ub/initial_target_visitor.rs | 3 +- .../delayed_ub/instrumentation_visitor.rs | 22 ++++++----- .../transform/check_uninit/delayed_ub/mod.rs | 37 +++++++++---------- .../kani_middle/transform/check_uninit/mod.rs | 27 ++++++++------ .../transform/check_uninit/ptr_uninit/mod.rs | 17 +++++---- .../check_uninit/ptr_uninit/uninit_visitor.rs | 30 ++++++++------- .../transform/check_uninit/ty_layout.rs | 10 +++-- .../src/kani_middle/transform/mod.rs | 5 ++- 13 files changed, 96 insertions(+), 75 deletions(-) create mode 100644 kani-compiler/src/kani_middle/points_to/mod.rs rename kani-compiler/src/kani_middle/{transform/check_uninit/delayed_ub => points_to}/points_to_analysis.rs (99%) rename kani-compiler/src/kani_middle/{transform/check_uninit/delayed_ub => points_to}/points_to_graph.rs (100%) diff --git a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs index 7416e997d1f7..6770cebbee4c 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs @@ -87,7 +87,7 @@ impl GotocCodegenBackend { check_contract: Option, mut transformer: BodyTransformation, ) -> (GotocCtx<'tcx>, Vec, Option) { - // This runs reachability analysis before global passes are applied. + // This runs reachability analysis before global passes are applied. // // Alternatively, we could run reachability only once after the global passes are applied // and resolve the necessary dependencies inside the passes on the fly. This, however, has a diff --git a/kani-compiler/src/kani_middle/mod.rs b/kani-compiler/src/kani_middle/mod.rs index a7a512c86de3..a5d077d9c16e 100644 --- a/kani-compiler/src/kani_middle/mod.rs +++ b/kani-compiler/src/kani_middle/mod.rs @@ -32,6 +32,7 @@ pub mod codegen_units; pub mod coercion; mod intrinsics; pub mod metadata; +pub mod points_to; pub mod provide; pub mod reachability; pub mod resolve; diff --git a/kani-compiler/src/kani_middle/points_to/mod.rs b/kani-compiler/src/kani_middle/points_to/mod.rs new file mode 100644 index 000000000000..ab7bb7223f1e --- /dev/null +++ b/kani-compiler/src/kani_middle/points_to/mod.rs @@ -0,0 +1,11 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This module contains points-to analysis primitives, such as the graph and types representing its +//! nodes, and the analysis itself. + +mod points_to_analysis; +mod points_to_graph; + +pub use points_to_analysis::PointsToAnalysis; +pub use points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}; diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs similarity index 99% rename from kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs rename to kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 75eabc754ba5..5d9faf96acfc 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -5,11 +5,9 @@ //! necessary aliasing information for instrumenting delayed UB later on. use crate::kani_middle::{ + points_to::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, reachability::CallGraph, - transform::{ - check_uninit::delayed_ub::points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, - internal_mir::RustcInternalMir, - }, + transform::RustcInternalMir, }; use rustc_ast::Mutability; use rustc_hir::def_id::DefId; diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs similarity index 100% rename from kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/points_to_graph.rs rename to kani-compiler/src/kani_middle/points_to/points_to_graph.rs diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index 9908e2990934..7a8a9de90b07 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -4,6 +4,7 @@ //! This module contains the visitor responsible for collecting initial analysis targets for delayed //! UB instrumentation. +use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; use stable_mir::{ mir::{ alloc::GlobalAlloc, @@ -16,8 +17,6 @@ use stable_mir::{ CrateDef, DefId, }; -use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; - /// Pointer, write through which might trigger delayed UB. pub enum AnalysisTarget { Place(Place), diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index f57d9511c922..6882244994c9 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -4,19 +4,23 @@ //! Visitor that collects all instructions relevant to uninitialized memory access caused by delayed //! UB. In practice, that means collecting all instructions where the place is featured. -use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; -use crate::kani_middle::transform::check_uninit::delayed_ub::points_to_graph::{ - GlobalMemLoc, PointsToGraph, +use crate::kani_middle::{ + points_to::{GlobalMemLoc, PointsToGraph}, + transform::{ + body::{InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{ + relevant_instruction::{InitRelevantInstruction, MemoryInitOp}, + TargetFinder, + }, + }, }; -use crate::kani_middle::transform::check_uninit::relevant_instruction::{ - InitRelevantInstruction, MemoryInitOp, -}; -use crate::kani_middle::transform::check_uninit::TargetFinder; use rustc_hir::def_id::DefId as InternalDefId; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; -use stable_mir::mir::visit::{Location, PlaceContext}; -use stable_mir::mir::{BasicBlockIdx, MirVisitor, Operand, Place, Statement, Terminator}; +use stable_mir::mir::{ + visit::{Location, PlaceContext}, + BasicBlockIdx, MirVisitor, Operand, Place, Statement, Terminator, +}; use std::collections::HashSet; pub struct InstrumentationVisitor<'a, 'tcx> { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index e31aa8826ab7..73eceabeaf1d 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -7,34 +7,31 @@ use std::collections::HashMap; use std::collections::HashSet; use crate::args::ExtraChecks; -use crate::kani_middle::reachability::CallGraph; -use crate::kani_middle::transform::body::CheckType; -use crate::kani_middle::transform::body::MutableBody; -use crate::kani_middle::transform::check_uninit::UninitInstrumenter; -use crate::kani_middle::transform::internal_mir::RustcInternalMir; -use crate::kani_middle::transform::BodyTransformation; -use crate::kani_middle::transform::GlobalPass; -use crate::kani_middle::transform::TransformationResult; +use crate::kani_middle::{ + points_to::{GlobalMemLoc, LocalMemLoc, PointsToAnalysis, PointsToGraph}, + reachability::CallGraph, + transform::{ + body::{CheckType, MutableBody}, + check_uninit::UninitInstrumenter, + internal_mir::RustcInternalMir, + BodyTransformation, GlobalPass, TransformationResult, + }, +}; use crate::kani_queries::QueryDb; -use initial_target_visitor::AnalysisTarget; -use initial_target_visitor::InitialTargetVisitor; +use initial_target_visitor::{AnalysisTarget, InitialTargetVisitor}; use instrumentation_visitor::InstrumentationVisitor; -use points_to_analysis::PointsToAnalysis; -use points_to_graph::GlobalMemLoc; -use points_to_graph::LocalMemLoc; -use points_to_graph::PointsToGraph; use rustc_middle::ty::TyCtxt; use rustc_session::config::OutputType; use rustc_smir::rustc_internal; -use stable_mir::mir::mono::{Instance, MonoItem}; -use stable_mir::mir::MirVisitor; -use stable_mir::ty::FnDef; -use stable_mir::CrateDef; +use stable_mir::{ + mir::mono::{Instance, MonoItem}, + mir::MirVisitor, + ty::FnDef, + CrateDef, +}; mod initial_target_visitor; mod instrumentation_visitor; -mod points_to_analysis; -mod points_to_graph; #[derive(Debug)] pub struct DelayedUbPass { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs index 82ef3df94361..5c7194f879d1 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -4,26 +4,29 @@ //! Module containing multiple transformation passes that instrument the code to detect possible UB //! due to the accesses to uninitialized memory. -use crate::kani_middle::find_fn_def; -use crate::kani_middle::transform::body::CheckType; -use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use crate::kani_middle::{ + find_fn_def, + transform::body::{CheckType, InsertPosition, MutableBody, SourceInstruction}, +}; use relevant_instruction::{InitRelevantInstruction, MemoryInitOp}; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; -use stable_mir::mir::mono::Instance; -use stable_mir::mir::{ - AggregateKind, BasicBlockIdx, ConstOperand, Mutability, Operand, Place, Rvalue, -}; -use stable_mir::ty::{ - FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy, +use stable_mir::{ + mir::{ + mono::Instance, AggregateKind, BasicBlockIdx, ConstOperand, Mutability, Operand, Place, + Rvalue, + }, + ty::{FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy}, + CrateDef, }; -use stable_mir::CrateDef; use std::collections::{HashMap, HashSet}; +pub use delayed_ub::DelayedUbPass; +pub use ptr_uninit::UninitPass; pub use ty_layout::{PointeeInfo, PointeeLayout}; -pub(crate) mod delayed_ub; -pub(crate) mod ptr_uninit; +mod delayed_ub; +mod ptr_uninit; mod relevant_instruction; mod ty_layout; diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs index dc462079a6bc..af2753ea7175 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/mod.rs @@ -5,18 +5,19 @@ //! uninitialized memory via raw pointers. use crate::args::ExtraChecks; -use crate::kani_middle::transform::body::{ - CheckType, InsertPosition, MutableBody, SourceInstruction, +use crate::kani_middle::transform::{ + body::{CheckType, InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{get_mem_init_fn_def, UninitInstrumenter}, + TransformPass, TransformationType, }; -use crate::kani_middle::transform::check_uninit::{get_mem_init_fn_def, UninitInstrumenter}; -use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; -use stable_mir::mir::mono::Instance; -use stable_mir::mir::{Body, Mutability, Place}; -use stable_mir::ty::{FnDef, GenericArgs, Ty}; -use stable_mir::CrateDef; +use stable_mir::{ + mir::{mono::Instance, Body, Mutability, Place}, + ty::{FnDef, GenericArgs, Ty}, + CrateDef, +}; use std::collections::HashMap; use std::fmt::Debug; use tracing::trace; diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 6cede48c36cc..36ec0f59e8d1 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -3,21 +3,25 @@ // //! Visitor that collects all instructions relevant to uninitialized memory access. -use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; -use crate::kani_middle::transform::check_uninit::relevant_instruction::{ - InitRelevantInstruction, MemoryInitOp, +use crate::kani_middle::transform::{ + body::{InsertPosition, MutableBody, SourceInstruction}, + check_uninit::{ + relevant_instruction::{InitRelevantInstruction, MemoryInitOp}, + ty_layout::tys_layout_compatible, + TargetFinder, + }, }; -use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; -use crate::kani_middle::transform::check_uninit::TargetFinder; -use stable_mir::mir::alloc::GlobalAlloc; -use stable_mir::mir::mono::{Instance, InstanceKind}; -use stable_mir::mir::visit::{Location, PlaceContext}; -use stable_mir::mir::{ - BasicBlockIdx, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, - Place, PointerCoercion, ProjectionElem, Rvalue, Statement, StatementKind, Terminator, - TerminatorKind, +use stable_mir::{ + mir::{ + alloc::GlobalAlloc, + mono::{Instance, InstanceKind}, + visit::{Location, PlaceContext}, + BasicBlockIdx, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, + Place, PointerCoercion, ProjectionElem, Rvalue, Statement, StatementKind, Terminator, + TerminatorKind, + }, + ty::{ConstantKind, RigidTy, TyKind}, }; -use stable_mir::ty::{ConstantKind, RigidTy, TyKind}; pub struct CheckUninitVisitor { locals: Vec, diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs index e8f1c85cabcd..5d0cf940ca14 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -3,10 +3,12 @@ // //! Utility functions that help calculate type layout. -use stable_mir::abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}; -use stable_mir::target::{MachineInfo, MachineSize}; -use stable_mir::ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}; -use stable_mir::CrateDef; +use stable_mir::{ + abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}, + target::{MachineInfo, MachineSize}, + ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}, + CrateDef, +}; /// Represents a chunk of data bytes in a data structure. #[derive(Clone, Debug, Eq, PartialEq, Hash)] diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index 5c772b5a0d38..bdd2f0778c57 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -19,8 +19,7 @@ use crate::kani_middle::codegen_units::CodegenUnit; use crate::kani_middle::reachability::CallGraph; use crate::kani_middle::transform::body::CheckType; -use crate::kani_middle::transform::check_uninit::delayed_ub::DelayedUbPass; -use crate::kani_middle::transform::check_uninit::ptr_uninit::UninitPass; +use crate::kani_middle::transform::check_uninit::{DelayedUbPass, UninitPass}; use crate::kani_middle::transform::check_values::ValidValuePass; use crate::kani_middle::transform::contracts::AnyModifiesPass; use crate::kani_middle::transform::kani_intrinsics::IntrinsicGeneratorPass; @@ -33,6 +32,8 @@ use stable_mir::mir::Body; use std::collections::HashMap; use std::fmt::Debug; +pub use internal_mir::RustcInternalMir; + pub(crate) mod body; mod check_uninit; mod check_values; From f4601c1407a5e76a543059cebbb03c21ab42cc7a Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 13:28:24 -0700 Subject: [PATCH 18/45] Use StaticDef as analysis target --- .../check_uninit/delayed_ub/initial_target_visitor.rs | 7 +++---- .../kani_middle/transform/check_uninit/delayed_ub/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index 7a8a9de90b07..f84e1499d319 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -8,19 +8,18 @@ use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatibl use stable_mir::{ mir::{ alloc::GlobalAlloc, - mono::{Instance, InstanceKind}, + mono::{Instance, InstanceKind, StaticDef}, visit::Location, Body, CastKind, LocalDecl, MirVisitor, Mutability, NonDivergingIntrinsic, Operand, Place, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }, ty::{ConstantKind, RigidTy, TyKind}, - CrateDef, DefId, }; /// Pointer, write through which might trigger delayed UB. pub enum AnalysisTarget { Place(Place), - Static(DefId), + Static(StaticDef), } /// Visitor that finds initial analysis targets for delayed UB instrumentation. For our purposes, @@ -49,7 +48,7 @@ impl InitialTargetVisitor { if let ConstantKind::Allocated(allocation) = constant.const_.kind() { for (_, prov) in &allocation.provenance.ptrs { if let GlobalAlloc::Static(static_def) = GlobalAlloc::from(prov.0) { - self.targets.push(AnalysisTarget::Static(static_def.def_id())); + self.targets.push(AnalysisTarget::Static(static_def)); }; } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 73eceabeaf1d..2198d8823174 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -74,8 +74,8 @@ impl GlobalPass for DelayedUbPass { AnalysisTarget::Place(place) => { LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) } - AnalysisTarget::Static(def_id) => { - GlobalMemLoc::Global(rustc_internal::internal(tcx, def_id)) + AnalysisTarget::Static(static_def) => { + GlobalMemLoc::Global(rustc_internal::internal(tcx, static_def)) } }) }) From 118642621cd8a6389e3392ad412046cf9c006c4d Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 13:31:58 -0700 Subject: [PATCH 19/45] Add a comment in `initial_target_visitor.rs` --- .../check_uninit/delayed_ub/initial_target_visitor.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index f84e1499d319..6d9927423df6 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -57,6 +57,9 @@ impl InitialTargetVisitor { } } +/// We implement MirVisitor to facilitate target finding, we look for: +/// - pointer casts where pointees have different padding; +/// - calls to `copy`-like intrinsics. impl MirVisitor for InitialTargetVisitor { fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { if let Rvalue::Cast(kind, operand, ty) = rvalue { From cf877b164497ed97d922b67bd22ab1cc4e30d22c Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 14:53:23 -0700 Subject: [PATCH 20/45] Fix small bug in delayed UB instrumentation visitor --- .../check_uninit/delayed_ub/instrumentation_visitor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index 6882244994c9..a1d914fb96d0 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -90,10 +90,10 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { } else if self.target.is_none() { // Check all inner places. self.super_statement(stmt, location); - // Switch to the next statement. - let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; - self.current = SourceInstruction::Statement { idx: idx + 1, bb }; } + // Switch to the next statement. + let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; + self.current = SourceInstruction::Statement { idx: idx + 1, bb }; } fn visit_terminator(&mut self, term: &Terminator, location: Location) { From 0a6be41715831470a86ef18ec41caa5d32ff7879 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 15:08:22 -0700 Subject: [PATCH 21/45] Systematic ptr/ref transmute handling --- .../delayed_ub/initial_target_visitor.rs | 20 ++++++++----- .../check_uninit/ptr_uninit/uninit_visitor.rs | 29 +++++++++++++++++-- .../transform/check_uninit/ty_layout.rs | 22 ++++++++++---- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index 6d9927423df6..3b35fe7e888e 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -4,7 +4,7 @@ //! This module contains the visitor responsible for collecting initial analysis targets for delayed //! UB instrumentation. -use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_compatible; +use crate::kani_middle::transform::check_uninit::ty_layout::tys_layout_equal_to_size; use stable_mir::{ mir::{ alloc::GlobalAlloc, @@ -66,12 +66,18 @@ impl MirVisitor for InitialTargetVisitor { let operand_ty = operand.ty(self.body.locals()).unwrap(); match kind { CastKind::Transmute | CastKind::PtrToPtr => { - if let ( - RigidTy::RawPtr(from_ty, Mutability::Mut), - RigidTy::RawPtr(to_ty, Mutability::Mut), - ) = (operand_ty.kind().rigid().unwrap(), ty.kind().rigid().unwrap()) - { - if !tys_layout_compatible(from_ty, to_ty) { + let operand_ty_kind = operand_ty.kind(); + let from_ty = match operand_ty_kind.rigid().unwrap() { + RigidTy::RawPtr(ty, _) | RigidTy::Ref(_, ty, _) => Some(ty), + _ => None, + }; + let ty_kind = ty.kind(); + let to_ty = match ty_kind.rigid().unwrap() { + RigidTy::RawPtr(ty, _) | RigidTy::Ref(_, ty, _) => Some(ty), + _ => None, + }; + if let (Some(from_ty), Some(to_ty)) = (from_ty, to_ty) { + if !tys_layout_equal_to_size(from_ty, to_ty) { self.push_operand(operand); } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 36ec0f59e8d1..02e153dc1caf 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -7,7 +7,7 @@ use crate::kani_middle::transform::{ body::{InsertPosition, MutableBody, SourceInstruction}, check_uninit::{ relevant_instruction::{InitRelevantInstruction, MemoryInitOp}, - ty_layout::tys_layout_compatible, + ty_layout::tys_layout_compatible_to_size, TargetFinder, }, }; @@ -477,14 +477,37 @@ impl MirVisitor for CheckUninitVisitor { } } CastKind::Transmute => { - let operand_ty = operand.ty(&&self.locals).unwrap(); - if !tys_layout_compatible(&operand_ty, &ty) { + let operand_ty = operand.ty(&self.locals).unwrap(); + if !tys_layout_compatible_to_size(&operand_ty, &ty) { // If transmuting between two types of incompatible layouts, padding // bytes are exposed, which is UB. self.push_target(MemoryInitOp::TriviallyUnsafe { reason: "Transmuting between types of incompatible layouts." .to_string(), }); + } else if let ( + TyKind::RigidTy(RigidTy::Ref(_, from_ty, _)), + TyKind::RigidTy(RigidTy::Ref(_, to_ty, _)), + ) = (operand_ty.kind(), ty.kind()) + { + if !tys_layout_compatible_to_size(&from_ty, &to_ty) { + // Since references are supposed to always be initialized for its type, + // transmuting between two references of incompatible layout is UB. + self.push_target(MemoryInitOp::TriviallyUnsafe { + reason: "Transmuting between references pointing to types of incompatible layouts." + .to_string(), + }); + } + } else if let ( + TyKind::RigidTy(RigidTy::RawPtr(from_ty, _)), + TyKind::RigidTy(RigidTy::Ref(_, to_ty, _)), + ) = (operand_ty.kind(), ty.kind()) + { + // Assert that we can only cast this way if types are the same. + assert!(from_ty == to_ty); + // When transmuting from a raw pointer to a reference, need to check that + // the value pointed by the raw pointer is initialized. + self.push_target(MemoryInitOp::Check { operand: operand.clone() }); } } _ => {} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs index 5d0cf940ca14..8a162d5944d3 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -335,9 +335,21 @@ fn data_bytes_for_ty( } } -/// Returns true if `to_ty` has a smaller or equal size and the same padding bytes as `from_ty` up until -/// its size. -pub fn tys_layout_compatible(from_ty: &Ty, to_ty: &Ty) -> bool { +/// Returns true if `to_ty` has a smaller or equal size and padding bytes in `from_ty` are padding +/// bytes in `to_ty`. +pub fn tys_layout_compatible_to_size(from_ty: &Ty, to_ty: &Ty) -> bool { + tys_layout_cmp_to_size(from_ty, to_ty, |from_byte, to_byte| from_byte || !to_byte) +} + +/// Returns true if `to_ty` has a smaller or equal size and padding bytes in `from_ty` are padding +/// bytes in `to_ty`. +pub fn tys_layout_equal_to_size(from_ty: &Ty, to_ty: &Ty) -> bool { + tys_layout_cmp_to_size(from_ty, to_ty, |from_byte, to_byte| from_byte == to_byte) +} + +/// Returns true if `to_ty` has a smaller or equal size and comparator function returns true for all +/// byte initialization value pairs up to size. +fn tys_layout_cmp_to_size(from_ty: &Ty, to_ty: &Ty, cmp: impl Fn(bool, bool) -> bool) -> bool { // Retrieve layouts to assess compatibility. let from_ty_info = PointeeInfo::from_ty(*from_ty); let to_ty_info = PointeeInfo::from_ty(*to_ty); @@ -357,8 +369,8 @@ pub fn tys_layout_compatible(from_ty: &Ty, to_ty: &Ty) -> bool { // Check data and padding bytes pair-wise. if from_ty_layout.iter().zip(to_ty_layout.iter()).all( |(from_ty_layout_byte, to_ty_layout_byte)| { - // Make sure all data and padding bytes match. - from_ty_layout_byte == to_ty_layout_byte + // Run comparator on each pair. + cmp(*from_ty_layout_byte, *to_ty_layout_byte) }, ) { return true; From f527f2eb50562ae2b7dd0c90b1997a687d88c7d8 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 15:18:15 -0700 Subject: [PATCH 22/45] Changes from code review --- .../check_uninit/delayed_ub/initial_target_visitor.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index 3b35fe7e888e..d8456082c600 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -99,12 +99,7 @@ impl MirVisitor for InitialTargetVisitor { fn visit_terminator(&mut self, term: &Terminator, location: Location) { if let TerminatorKind::Call { func, args, .. } = &term.kind { - let instance = match try_resolve_instance(self.body.locals(), func) { - Ok(instance) => instance, - Err(reason) => { - panic!("{reason}"); - } - }; + let instance = try_resolve_instance(self.body.locals(), func).unwrap(); if instance.kind == InstanceKind::Intrinsic { match instance.intrinsic_name().unwrap().as_str() { "copy" => { @@ -150,6 +145,6 @@ fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result Ok(Instance::resolve(def, &args).unwrap()), - _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), + _ => Err(format!("Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300.")), } } From 50ef746c7de46105d2eb3877c7a5db9d36aa9fa4 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 15:31:39 -0700 Subject: [PATCH 23/45] Formatting change --- .../check_uninit/delayed_ub/initial_target_visitor.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs index d8456082c600..11ac412703ae 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/initial_target_visitor.rs @@ -145,6 +145,8 @@ fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result Ok(Instance::resolve(def, &args).unwrap()), - _ => Err(format!("Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300.")), + _ => Err(format!( + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." + )), } } From bd7e960b1991a750cffcb5b0957dd72f25f7bf6d Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 29 Jul 2024 15:31:55 -0700 Subject: [PATCH 24/45] Link to the closed StableMIR PR --- kani-compiler/src/kani_middle/transform/internal_mir.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/transform/internal_mir.rs b/kani-compiler/src/kani_middle/transform/internal_mir.rs index ca2dfd957fdd..0dcf7d47c13a 100644 --- a/kani-compiler/src/kani_middle/transform/internal_mir.rs +++ b/kani-compiler/src/kani_middle/transform/internal_mir.rs @@ -3,7 +3,9 @@ //! This file contains conversions between from stable MIR data structures to its internal //! counterparts. This is primarily done to facilitate using dataflow analysis, which does not yet -//! support StableMIR. +//! support StableMIR. We tried to contribute this back to StableMIR, but faced some push back since +//! other maintainers wanted to keep the conversions minimal. For more information, see +//! https://github.com/rust-lang/rust/pull/127782 use rustc_middle::ty::{self as rustc_ty, TyCtxt}; use rustc_smir::rustc_internal::internal; From 076f5ad4097d11b6d9c4d78dcef4501bffd9b95f Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Tue, 30 Jul 2024 12:12:41 -0700 Subject: [PATCH 25/45] Handle spurious failures caused by delayed UB instrumentation --- .../delayed_ub/instrumentation_visitor.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index a1d914fb96d0..eb4121fa9d08 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -19,7 +19,7 @@ use rustc_middle::ty::TyCtxt; use rustc_smir::rustc_internal; use stable_mir::mir::{ visit::{Location, PlaceContext}, - BasicBlockIdx, MirVisitor, Operand, Place, Statement, Terminator, + BasicBlockIdx, MirVisitor, Operand, Place, Rvalue, Statement, Terminator, }; use std::collections::HashSet; @@ -103,6 +103,15 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { } } + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + match rvalue { + Rvalue::AddressOf(..) | Rvalue::Ref(..) => { + // These operations are always legitimate for us. + } + _ => self.super_rvalue(rvalue, location), + } + } + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { // Match the place by whatever it is pointing to and find an intersection with the targets. if self From b5b76608f98adf82ab942f0e22b03d94c9337e3e Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 31 Jul 2024 12:45:31 -0700 Subject: [PATCH 26/45] Add more comments to the points_to_analysis --- .../points_to/points_to_analysis.rs | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 5d9faf96acfc..fac369aea864 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -1,8 +1,25 @@ // Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! Implementation of points-to analysis using Rust's native dataflow framework. This provides +//! Implementation of the points-to analysis using Rust's native dataflow framework. This provides //! necessary aliasing information for instrumenting delayed UB later on. +//! +//! The analysis uses Rust's dataflow framework by implementing appropriate traits to leverage the +//! existing fixpoint solver infrastructure. The main trait responsible for the dataflow analysis +//! behavior is `rustc_mir_dataflow::Analysis`: it provides two methods that are responsible for +//! handling statements and terminators, which we implement. +//! +//! The analysis proceeds by looking at each instruction in the dataflow order and collecting all +//! possible aliasing relations that the instruction introduces. If a terminator is a function call, +//! the analysis recurs into the function and then joins the information retrieved from it into the +//! original graph. +//! +//! For each instruction, the analysis first resolves dereference projections for each place to +//! determine which places it could point to. This is done by finding a set of successors in the +//! graph for each dereference projection. +//! +//! Then, the analysis adds the appropriate edges into the points-to graph. It proceeds until there +//! is no new information to be discovered. use crate::kani_middle::{ points_to::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, @@ -30,7 +47,12 @@ pub struct PointsToAnalysis<'a, 'tcx> { def_id: DefId, body: Body<'tcx>, tcx: TyCtxt<'tcx>, + /// This will be used in the future to resolve function pointer and vtable calls. Currently, we + /// can resolve call graph edges just by looking at the terminators and erroring if we can't + /// resolve the callee. call_graph: &'a CallGraph, + /// This graph should contain a subset of the points-to graph reachable from function arguments. + /// For the entry function it will be empty. initial_graph: PointsToGraph<'tcx>, } @@ -45,12 +67,20 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { initial_graph: PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { let analysis = Self { body: body.clone(), tcx, def_id, call_graph, initial_graph }; + // This creates a fixpoint solver using the initial graph, the body, and extra information + // and produces the cursor, which contains dataflow state for each instruction in the body. let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); + // We collect dataflow state at each `Return` terminator to determine the full aliasing + // graph for the function. This is sound since those are the only places where the function + // finishes, so the dataflow state at those places will be a union of dataflow states + // preceding to it, which means every possible execution is taken into account. let mut results = PointsToGraph::empty(); for (idx, bb) in body.basic_blocks.iter().enumerate() { if let TerminatorKind::Return = bb.terminator().kind { + // Switch the cursor to the end of the block ending with `Return`. cursor.seek_to_block_end(idx.into()); + // Retrieve the dataflow state and join into the results graph. results.consume(cursor.get().clone()); } } From ac6591db7a69a4dae6e196a83440f06e51af7cd8 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Wed, 31 Jul 2024 14:44:47 -0700 Subject: [PATCH 27/45] Add more comments to the analysis --- .../points_to/points_to_analysis.rs | 61 ++++++++++++------- .../kani_middle/points_to/points_to_graph.rs | 8 ++- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index fac369aea864..156127e84a39 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -3,23 +3,26 @@ //! Implementation of the points-to analysis using Rust's native dataflow framework. This provides //! necessary aliasing information for instrumenting delayed UB later on. -//! +//! //! The analysis uses Rust's dataflow framework by implementing appropriate traits to leverage the //! existing fixpoint solver infrastructure. The main trait responsible for the dataflow analysis //! behavior is `rustc_mir_dataflow::Analysis`: it provides two methods that are responsible for //! handling statements and terminators, which we implement. -//! +//! //! The analysis proceeds by looking at each instruction in the dataflow order and collecting all //! possible aliasing relations that the instruction introduces. If a terminator is a function call, //! the analysis recurs into the function and then joins the information retrieved from it into the //! original graph. -//! +//! //! For each instruction, the analysis first resolves dereference projections for each place to //! determine which places it could point to. This is done by finding a set of successors in the //! graph for each dereference projection. -//! +//! //! Then, the analysis adds the appropriate edges into the points-to graph. It proceeds until there //! is no new information to be discovered. +//! +//! Currently, the analysis is not field-sensitive: e.g., if a field of a place aliases to some +//! other place, we treat it as if the place itself aliases to another place. use crate::kani_middle::{ points_to::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, @@ -41,8 +44,7 @@ use rustc_smir::rustc_internal; use rustc_span::source_map::Spanned; use std::collections::HashSet; -/// Main points-to analysis object. Since this one will be created anew for each instance analysis, -/// we need to make sure big data structures are not copied unnecessarily. +/// Main points-to analysis object. pub struct PointsToAnalysis<'a, 'tcx> { def_id: DefId, body: Body<'tcx>, @@ -52,7 +54,7 @@ pub struct PointsToAnalysis<'a, 'tcx> { /// resolve the callee. call_graph: &'a CallGraph, /// This graph should contain a subset of the points-to graph reachable from function arguments. - /// For the entry function it will be empty. + /// For the entry function it will be empty (as it supposedly does not have any parameters). initial_graph: PointsToGraph<'tcx>, } @@ -68,7 +70,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { ) -> PointsToGraph<'tcx> { let analysis = Self { body: body.clone(), tcx, def_id, call_graph, initial_graph }; // This creates a fixpoint solver using the initial graph, the body, and extra information - // and produces the cursor, which contains dataflow state for each instruction in the body. + // and solves the dataflow problem, producing the cursor, which contains dataflow state for + // each instruction in the body. let mut cursor = analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); // We collect dataflow state at each `Return` terminator to determine the full aliasing @@ -89,18 +92,20 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { } impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { + /// Dataflow state at each instruction. type Domain = PointsToGraph<'tcx>; type Direction = Forward; const NAME: &'static str = "PointsToAnalysis"; - /// Dataflow state instantiated at the beginning of each basic block. + /// Dataflow state instantiated at the beginning of each basic block, before the state from + /// previous basic blocks gets joined into it. fn bottom_value(&self, _body: &Body<'tcx>) -> Self::Domain { PointsToGraph::empty() } - /// Dataflow state instantiated at the entry into the body, this should be the current dataflow + /// Dataflow state instantiated at the entry into the body; this should be the initial dataflow /// graph. fn initialize_start_block(&self, _body: &Body<'tcx>, state: &mut Self::Domain) { state.consume(self.initial_graph.clone()); @@ -121,11 +126,12 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { match &statement.kind { StatementKind::Assign(assign_box) => { let (place, rvalue) = *assign_box.clone(); - // Lvalue is `place`, which is already available to us. + // Resolve all dereference projections for the lvalue. let lvalue_set = state.follow_from_place(place, self.def_id); - // Determine all places which the newly created rvalue could point to. + // Determine all places rvalue could point to. let rvalue_set = match rvalue { - // Using the operand unchanged. + // Using the operand unchanged requires determining where it could point, which + // `follow_rvalue` does. Rvalue::Use(operand) | Rvalue::ShallowInitBox(operand, _) | Rvalue::Cast(_, operand, _) @@ -190,17 +196,21 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { HashSet::new() } Rvalue::Aggregate(_, operands) => { - // Conservatively find a union of all places mentioned here. + // Conservatively find a union of all places mentioned here and resolve + // their pointees. operands .into_iter() .flat_map(|operand| self.follow_rvalue(state, operand)) .collect() } Rvalue::CopyForDeref(place) => { - // Use a place unchanged. + // Resolve pointees of a place. state.follow(&state.follow_from_place(place, self.def_id)) } - Rvalue::ThreadLocalRef(def_id) => HashSet::from([GlobalMemLoc::Global(def_id)]), + Rvalue::ThreadLocalRef(def_id) => { + // We store a def_id of a static. + HashSet::from([GlobalMemLoc::Global(def_id)]) + } }; // Create an edge between all places which could be lvalue and all places rvalue // could be pointing to. @@ -241,6 +251,9 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { location: Location, ) -> TerminatorEdges<'mir, 'tcx> { if let TerminatorKind::Call { func, args, destination, .. } = &terminator.kind { + // Attempt to resolve callee. For now, we panic if the callee cannot be resolved (e.g., + // if a function pointer call is used), but we could leverage the call graph to resolve + // it. let instance = match try_resolve_instance(&self.body, func, self.tcx) { Ok(instance) => instance, Err(reason) => { @@ -248,7 +261,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { } }; match instance.def { - // Intrinsics could introduce aliasing edges we care about. + // Intrinsics could introduce aliasing edges we care about, so need to handle them. InstanceKind::Intrinsic(def_id) => { match self.tcx.intrinsic(def_id).unwrap().name.to_string().as_str() { name if name.starts_with("atomic") => { @@ -419,6 +432,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { _ => {} } } else { + // Otherwise, handle this as a regular function call. self.apply_regular_call_effect(state, instance, args, destination); } } @@ -427,7 +441,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { terminator.edges() } - // We probably should not care about this. + /// We don't care about this and just need to implement this to implement the trait. fn apply_call_return_effect( &mut self, _state: &mut Self::Domain, @@ -456,7 +470,7 @@ fn try_resolve_instance<'tcx>( } impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { - // Update the analysis state according to the operation, which is semantically equivalent to `*to = *from`. + /// Update the analysis state according to the operation, which is semantically equivalent to `*to = *from`. fn apply_copy_effect( &self, state: &mut PointsToGraph<'tcx>, @@ -468,7 +482,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { state.extend(&lvalue_set, &state.follow(&rvalue_set)); } - // Find all places where the operand could point to at the current stage of the program. + /// Find all places where the operand could point to at the current stage of the program. fn follow_rvalue( &self, state: &mut PointsToGraph<'tcx>, @@ -490,7 +504,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { } } - // Find all places where the deref of the operand could point to at the current stage of the program. + /// Find all places where the deref of the operand could point to at the current stage of the program. fn follow_deref( &self, state: &mut PointsToGraph<'tcx>, @@ -512,7 +526,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { } } - // Update the analysis state according to the regular function call. + /// Update the analysis state according to the regular function call. fn apply_regular_call_effect( &mut self, state: &mut PointsToGraph<'tcx>, @@ -528,7 +542,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { }; // In order to be efficient, create a new graph for the function call analysis, which only - // contains arguments and anything transitively reachable from them. + // contains arguments and statics and anything transitively reachable from them. let mut initial_graph = PointsToGraph::empty(); for arg in args.iter() { match arg.node { @@ -545,6 +559,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // the callee, add it to the graph. if self.tcx.is_closure_like(instance.def.def_id()) { // This means we encountered a closure call. + // Sanity check. The first argument is the closure itself and the second argument is the tupled arguments from the caller. assert!(args.len() == 2); // First, connect all upvars. let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index f35b2f00f86a..79bf81c38fd0 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -34,6 +34,7 @@ impl<'tcx> GlobalMemLoc<'tcx> { } } + /// Returns LocalMemLoc of the memory location if available. pub fn maybe_local_mem_loc(&self) -> Option> { match self { GlobalMemLoc::Local(_, mem_loc) => Some(*mem_loc), @@ -49,7 +50,7 @@ impl<'tcx> From> for LocalMemLoc<'tcx> { } impl<'tcx> LocalMemLoc<'tcx> { - /// Generate a new alloc with increasing allocation id. + /// Register a new heap allocation site. pub fn new_alloc(def_id: DefId, location: Location) -> Self { LocalMemLoc::Alloc(def_id, location) } @@ -92,7 +93,7 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Collect all scalar places to which a given place can alias. This is needed to resolve all - /// deref-like projections. + /// dereference projections. pub fn follow_from_place( &self, place: Place<'tcx>, @@ -144,7 +145,8 @@ impl<'tcx> PointsToGraph<'tcx> { std::fs::write(file_path, format!("digraph {{\n{}\n{}\n}}", nodes_str, edges_str)).unwrap(); } - /// Find a transitive closure of the graph starting from a given place. + /// Find a transitive closure of the graph starting from a set of given locations; this also + /// includes statics. pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { let mut result = PointsToGraph::empty(); // Working queue. From bedaa4670df7b5cfc163b159f2b8e1256c063b00 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:04:48 -0700 Subject: [PATCH 28/45] Wrap `PointsToAnalysis::run` into a separate function --- kani-compiler/src/kani_middle/points_to/mod.rs | 2 +- .../src/kani_middle/points_to/points_to_analysis.rs | 13 ++++++++++++- .../transform/check_uninit/delayed_ub/mod.rs | 5 ++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/mod.rs b/kani-compiler/src/kani_middle/points_to/mod.rs index ab7bb7223f1e..269937a22143 100644 --- a/kani-compiler/src/kani_middle/points_to/mod.rs +++ b/kani-compiler/src/kani_middle/points_to/mod.rs @@ -7,5 +7,5 @@ mod points_to_analysis; mod points_to_graph; -pub use points_to_analysis::PointsToAnalysis; +pub use points_to_analysis::run_points_to_analysis; pub use points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}; diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 156127e84a39..0a7f0e99e89d 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -45,7 +45,7 @@ use rustc_span::source_map::Spanned; use std::collections::HashSet; /// Main points-to analysis object. -pub struct PointsToAnalysis<'a, 'tcx> { +struct PointsToAnalysis<'a, 'tcx> { def_id: DefId, body: Body<'tcx>, tcx: TyCtxt<'tcx>, @@ -58,6 +58,17 @@ pub struct PointsToAnalysis<'a, 'tcx> { initial_graph: PointsToGraph<'tcx>, } +/// Public points-to analysis entry point. Performs the analysis on a body, outputting the graph +/// containing aliasing information of the body itself and any body reachable from it. +pub fn run_points_to_analysis<'tcx>( + body: Body<'tcx>, + tcx: TyCtxt<'tcx>, + def_id: DefId, + call_graph: &CallGraph, +) -> PointsToGraph<'tcx> { + PointsToAnalysis::run(body, tcx, def_id, call_graph, PointsToGraph::empty()) +} + impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { /// Perform the analysis on a body, outputting the graph containing aliasing information of the /// body itself and any body reachable from it. diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 2198d8823174..a1989d87175b 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -8,7 +8,7 @@ use std::collections::HashSet; use crate::args::ExtraChecks; use crate::kani_middle::{ - points_to::{GlobalMemLoc, LocalMemLoc, PointsToAnalysis, PointsToGraph}, + points_to::{run_points_to_analysis, GlobalMemLoc, LocalMemLoc, PointsToGraph}, reachability::CallGraph, transform::{ body::{CheckType, MutableBody}, @@ -102,12 +102,11 @@ impl GlobalPass for DelayedUbPass { // conversion. let internal_body = body.internal_mir(tcx); let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); - let results = PointsToAnalysis::run( + let results = run_points_to_analysis( internal_body.clone(), tcx, internal_def_id, call_graph, - PointsToGraph::empty(), ); global_points_to_graph.consume(results); } From 421b13627d4a75bd4896c0b8a048f7fbefd6f4bb Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:15:07 -0700 Subject: [PATCH 29/45] Remove unnecessary body copy --- .../points_to/points_to_analysis.rs | 34 +++++++++---------- .../transform/check_uninit/delayed_ub/mod.rs | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 0a7f0e99e89d..241a02d53f61 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -47,7 +47,7 @@ use std::collections::HashSet; /// Main points-to analysis object. struct PointsToAnalysis<'a, 'tcx> { def_id: DefId, - body: Body<'tcx>, + body: &'a Body<'tcx>, tcx: TyCtxt<'tcx>, /// This will be used in the future to resolve function pointer and vtable calls. Currently, we /// can resolve call graph edges just by looking at the terminators and erroring if we can't @@ -61,7 +61,7 @@ struct PointsToAnalysis<'a, 'tcx> { /// Public points-to analysis entry point. Performs the analysis on a body, outputting the graph /// containing aliasing information of the body itself and any body reachable from it. pub fn run_points_to_analysis<'tcx>( - body: Body<'tcx>, + body: &Body<'tcx>, tcx: TyCtxt<'tcx>, def_id: DefId, call_graph: &CallGraph, @@ -73,18 +73,18 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { /// Perform the analysis on a body, outputting the graph containing aliasing information of the /// body itself and any body reachable from it. pub fn run( - body: Body<'tcx>, + body: &'a Body<'tcx>, tcx: TyCtxt<'tcx>, def_id: DefId, call_graph: &'a CallGraph, initial_graph: PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { - let analysis = Self { body: body.clone(), tcx, def_id, call_graph, initial_graph }; + let analysis = Self { body, tcx, def_id, call_graph, initial_graph }; // This creates a fixpoint solver using the initial graph, the body, and extra information // and solves the dataflow problem, producing the cursor, which contains dataflow state for // each instruction in the body. let mut cursor = - analysis.into_engine(tcx, &body).iterate_to_fixpoint().into_results_cursor(&body); + analysis.into_engine(tcx, body).iterate_to_fixpoint().into_results_cursor(body); // We collect dataflow state at each `Return` terminator to determine the full aliasing // graph for the function. This is sound since those are the only places where the function // finishes, so the dataflow state at those places will be a union of dataflow states @@ -265,7 +265,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // Attempt to resolve callee. For now, we panic if the callee cannot be resolved (e.g., // if a function pointer call is used), but we could leverage the call graph to resolve // it. - let instance = match try_resolve_instance(&self.body, func, self.tcx) { + let instance = match try_resolve_instance(self.body, func, self.tcx) { Ok(instance) => instance, Err(reason) => { unimplemented!("{reason}") @@ -286,7 +286,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); let src_set = self.follow_rvalue(state, args[2].node.clone()); @@ -305,7 +305,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); let src_set = self.follow_deref(state, args[0].node.clone()); @@ -322,7 +322,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); let dst_set = self.follow_deref(state, args[0].node.clone()); @@ -338,7 +338,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `{name}`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); let src_set = self.follow_rvalue(state, args[1].node.clone()); @@ -354,11 +354,11 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "copy" => { assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); assert!(matches!( - args[1].node.ty(&self.body, self.tcx).kind(), + args[1].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); self.apply_copy_effect( @@ -371,11 +371,11 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "volatile_copy_memory" | "volatile_copy_nonoverlapping_memory" => { assert_eq!(args.len(), 3, "Unexpected number of arguments for `copy`"); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); assert!(matches!( - args[1].node.ty(&self.body, self.tcx).kind(), + args[1].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); self.apply_copy_effect( @@ -392,7 +392,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `volatile_load`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); // Destination of the return value. @@ -408,7 +408,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { "Unexpected number of arguments for `volatile_store`" ); assert!(matches!( - args[0].node.ty(&self.body, self.tcx).kind(), + args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); let lvalue_set = self.follow_deref(state, args[0].node.clone()); @@ -608,7 +608,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // Run the analysis. let new_result = PointsToAnalysis::run( - new_body, + &new_body, self.tcx, instance.def_id(), self.call_graph, diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index a1989d87175b..a8d51674ded9 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -103,7 +103,7 @@ impl GlobalPass for DelayedUbPass { let internal_body = body.internal_mir(tcx); let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); let results = run_points_to_analysis( - internal_body.clone(), + &internal_body, tcx, internal_def_id, call_graph, From 3b3fdda56eb65ac73f99a51354edbc7e1a383e21 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:27:18 -0700 Subject: [PATCH 30/45] Update the documentation for `PointsToGraph` --- .../kani_middle/points_to/points_to_graph.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 79bf81c38fd0..e6ba84d0fc8c 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -62,12 +62,21 @@ impl<'tcx> LocalMemLoc<'tcx> { } /// Graph data structure that stores the current results of the point-to analysis. The graph is -/// directed, so having an edge between two places means that one is pointing to the other. For -/// example, `a = &b` would translate to `a --> b` and `a = b` to `a --> {all pointees of b}`. +/// directed, so having an edge between two places means that one is pointing to the other. +/// +/// For example: +/// - `a = &b` would translate to `a --> b` +/// - `a = b` would translate to `a --> {all pointees of b}` (if `a` and `b` are pointers / +/// references) /// -/// Note that the aliasing is stored between places with no projections, which is sound but can be -/// imprecise. I.e., if two places have an edge in the graph, could mean that some scalar sub-places -/// (e.g. _1.0) of the places alias, too, but not the deref ones. +/// Note that the aliasing is not field-sensitive, since the nodes in the graph are places with no +/// projections, which is sound but can be imprecise. +/// +/// For example: +/// ``` +/// let ref_pair = (&a, &b); // Will add `ref_pair --> (a | b)` edges into the graph. +/// let first = ref_pair.0; // Will add `first -> (a | b)`, which is an overapproximation. +/// ``` #[derive(Clone, Debug, PartialEq, Eq)] pub struct PointsToGraph<'tcx> { /// A hash map of node --> {nodes} edges. From a2e03c6411fc87a2abd3c33d3dee113487a787c8 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:29:17 -0700 Subject: [PATCH 31/45] Rename `GlobalMemLoc -> MemLoc` --- .../src/kani_middle/points_to/mod.rs | 2 +- .../points_to/points_to_analysis.rs | 12 ++++---- .../kani_middle/points_to/points_to_graph.rs | 28 +++++++++---------- .../delayed_ub/instrumentation_visitor.rs | 6 ++-- .../transform/check_uninit/delayed_ub/mod.rs | 4 +-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/mod.rs b/kani-compiler/src/kani_middle/points_to/mod.rs index 269937a22143..73ae459639f8 100644 --- a/kani-compiler/src/kani_middle/points_to/mod.rs +++ b/kani-compiler/src/kani_middle/points_to/mod.rs @@ -8,4 +8,4 @@ mod points_to_analysis; mod points_to_graph; pub use points_to_analysis::run_points_to_analysis; -pub use points_to_graph::{GlobalMemLoc, LocalMemLoc, PointsToGraph}; +pub use points_to_graph::{MemLoc, LocalMemLoc, PointsToGraph}; diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 241a02d53f61..dac0b6477144 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -25,7 +25,7 @@ //! other place, we treat it as if the place itself aliases to another place. use crate::kani_middle::{ - points_to::{GlobalMemLoc, LocalMemLoc, PointsToGraph}, + points_to::{MemLoc, LocalMemLoc, PointsToGraph}, reachability::CallGraph, transform::RustcInternalMir, }; @@ -220,7 +220,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { } Rvalue::ThreadLocalRef(def_id) => { // We store a def_id of a static. - HashSet::from([GlobalMemLoc::Global(def_id)]) + HashSet::from([MemLoc::Global(def_id)]) } }; // Create an edge between all places which could be lvalue and all places rvalue @@ -498,7 +498,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { &self, state: &mut PointsToGraph<'tcx>, operand: Operand<'tcx>, - ) -> HashSet> { + ) -> HashSet> { match operand { Operand::Copy(place) | Operand::Move(place) => { // Find all places which are pointed to by the place. @@ -507,7 +507,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([GlobalMemLoc::Global(static_def_id)]) + HashSet::from([MemLoc::Global(static_def_id)]) } else { HashSet::new() } @@ -520,7 +520,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { &self, state: &mut PointsToGraph<'tcx>, operand: Operand<'tcx>, - ) -> HashSet> { + ) -> HashSet> { match operand { Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( place.project_deeper(&[ProjectionElem::Deref], self.tcx), @@ -529,7 +529,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([GlobalMemLoc::Global(static_def_id)]) + HashSet::from([MemLoc::Global(static_def_id)]) } else { HashSet::new() } diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index e6ba84d0fc8c..d6f8c186c7fd 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -21,24 +21,24 @@ pub enum LocalMemLoc<'tcx> { /// A node tagged with a DefId, to differentiate between places across different functions. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub enum GlobalMemLoc<'tcx> { +pub enum MemLoc<'tcx> { Local(DefId, LocalMemLoc<'tcx>), Global(DefId), } -impl<'tcx> GlobalMemLoc<'tcx> { +impl<'tcx> MemLoc<'tcx> { /// Returns DefId of the memory location. pub fn def_id(&self) -> DefId { match self { - GlobalMemLoc::Local(def_id, _) | GlobalMemLoc::Global(def_id) => *def_id, + MemLoc::Local(def_id, _) | MemLoc::Global(def_id) => *def_id, } } /// Returns LocalMemLoc of the memory location if available. pub fn maybe_local_mem_loc(&self) -> Option> { match self { - GlobalMemLoc::Local(_, mem_loc) => Some(*mem_loc), - GlobalMemLoc::Global(_) => None, + MemLoc::Local(_, mem_loc) => Some(*mem_loc), + MemLoc::Global(_) => None, } } } @@ -56,8 +56,8 @@ impl<'tcx> LocalMemLoc<'tcx> { } /// Tag the node with a DefId. - pub fn with_def_id(&self, def_id: DefId) -> GlobalMemLoc<'tcx> { - GlobalMemLoc::Local(def_id, *self) + pub fn with_def_id(&self, def_id: DefId) -> MemLoc<'tcx> { + MemLoc::Local(def_id, *self) } } @@ -80,7 +80,7 @@ impl<'tcx> LocalMemLoc<'tcx> { #[derive(Clone, Debug, PartialEq, Eq)] pub struct PointsToGraph<'tcx> { /// A hash map of node --> {nodes} edges. - edges: HashMap, HashSet>>, + edges: HashMap, HashSet>>, } impl<'tcx> PointsToGraph<'tcx> { @@ -89,12 +89,12 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Collect all nodes which have incoming edges from `nodes`. - pub fn follow(&self, nodes: &HashSet>) -> HashSet> { + pub fn follow(&self, nodes: &HashSet>) -> HashSet> { nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() } /// For each node in `from`, add an edge to each node in `to`. - pub fn extend(&mut self, from: &HashSet>, to: &HashSet>) { + pub fn extend(&mut self, from: &HashSet>, to: &HashSet>) { for node in from.iter() { let node_pointees = self.edges.entry(*node).or_default(); node_pointees.extend(to.iter()); @@ -107,7 +107,7 @@ impl<'tcx> PointsToGraph<'tcx> { &self, place: Place<'tcx>, current_def_id: DefId, - ) -> HashSet> { + ) -> HashSet> { let place_or_alloc: LocalMemLoc = Place { local: place.local, projection: List::empty() }.into(); let mut node_set = HashSet::from([place_or_alloc.with_def_id(current_def_id)]); @@ -156,12 +156,12 @@ impl<'tcx> PointsToGraph<'tcx> { /// Find a transitive closure of the graph starting from a set of given locations; this also /// includes statics. - pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { + pub fn transitive_closure(&self, targets: HashSet>) -> PointsToGraph<'tcx> { let mut result = PointsToGraph::empty(); // Working queue. let mut queue = VecDeque::from_iter(targets); // Add all statics, as they can be accessed at any point. - let statics = self.edges.keys().filter(|node| matches!(node, GlobalMemLoc::Global(_))); + let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Global(_))); queue.extend(statics); // Add all entries. while !queue.is_empty() { @@ -177,7 +177,7 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Retrieve all places to which a given place is pointing to. - pub fn pointees_of(&self, target: &GlobalMemLoc<'tcx>) -> HashSet> { + pub fn pointees_of(&self, target: &MemLoc<'tcx>) -> HashSet> { self.edges.get(&target).unwrap_or(&HashSet::new()).clone() } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index eb4121fa9d08..91d216993564 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -5,7 +5,7 @@ //! UB. In practice, that means collecting all instructions where the place is featured. use crate::kani_middle::{ - points_to::{GlobalMemLoc, PointsToGraph}, + points_to::{MemLoc, PointsToGraph}, transform::{ body::{InsertPosition, MutableBody, SourceInstruction}, check_uninit::{ @@ -36,7 +36,7 @@ pub struct InstrumentationVisitor<'a, 'tcx> { /// Aliasing analysis data. points_to: &'a PointsToGraph<'tcx>, /// The list of places we should be looking for, ignoring others - analysis_targets: &'a HashSet>, + analysis_targets: &'a HashSet>, current_def_id: InternalDefId, tcx: TyCtxt<'tcx>, } @@ -59,7 +59,7 @@ impl<'a, 'tcx> TargetFinder for InstrumentationVisitor<'a, 'tcx> { impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { pub fn new( points_to: &'a PointsToGraph<'tcx>, - analysis_targets: &'a HashSet>, + analysis_targets: &'a HashSet>, current_def_id: InternalDefId, tcx: TyCtxt<'tcx>, ) -> Self { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index a8d51674ded9..11b67babe2a8 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -8,7 +8,7 @@ use std::collections::HashSet; use crate::args::ExtraChecks; use crate::kani_middle::{ - points_to::{run_points_to_analysis, GlobalMemLoc, LocalMemLoc, PointsToGraph}, + points_to::{run_points_to_analysis, MemLoc, LocalMemLoc, PointsToGraph}, reachability::CallGraph, transform::{ body::{CheckType, MutableBody}, @@ -75,7 +75,7 @@ impl GlobalPass for DelayedUbPass { LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) } AnalysisTarget::Static(static_def) => { - GlobalMemLoc::Global(rustc_internal::internal(tcx, static_def)) + MemLoc::Global(rustc_internal::internal(tcx, static_def)) } }) }) From 186abe3bfee4d1428756e9d095c4f4b996b57610 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:31:15 -0700 Subject: [PATCH 32/45] Rename `MemLoc::Global` -> `MemLoc::Static` --- .../src/kani_middle/points_to/points_to_analysis.rs | 6 +++--- .../src/kani_middle/points_to/points_to_graph.rs | 8 ++++---- .../kani_middle/transform/check_uninit/delayed_ub/mod.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index dac0b6477144..146cc5e51d9b 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -220,7 +220,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { } Rvalue::ThreadLocalRef(def_id) => { // We store a def_id of a static. - HashSet::from([MemLoc::Global(def_id)]) + HashSet::from([MemLoc::Static(def_id)]) } }; // Create an edge between all places which could be lvalue and all places rvalue @@ -507,7 +507,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([MemLoc::Global(static_def_id)]) + HashSet::from([MemLoc::Static(static_def_id)]) } else { HashSet::new() } @@ -529,7 +529,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([MemLoc::Global(static_def_id)]) + HashSet::from([MemLoc::Static(static_def_id)]) } else { HashSet::new() } diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index d6f8c186c7fd..4e8295cad253 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -23,14 +23,14 @@ pub enum LocalMemLoc<'tcx> { #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum MemLoc<'tcx> { Local(DefId, LocalMemLoc<'tcx>), - Global(DefId), + Static(DefId), } impl<'tcx> MemLoc<'tcx> { /// Returns DefId of the memory location. pub fn def_id(&self) -> DefId { match self { - MemLoc::Local(def_id, _) | MemLoc::Global(def_id) => *def_id, + MemLoc::Local(def_id, _) | MemLoc::Static(def_id) => *def_id, } } @@ -38,7 +38,7 @@ impl<'tcx> MemLoc<'tcx> { pub fn maybe_local_mem_loc(&self) -> Option> { match self { MemLoc::Local(_, mem_loc) => Some(*mem_loc), - MemLoc::Global(_) => None, + MemLoc::Static(_) => None, } } } @@ -161,7 +161,7 @@ impl<'tcx> PointsToGraph<'tcx> { // Working queue. let mut queue = VecDeque::from_iter(targets); // Add all statics, as they can be accessed at any point. - let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Global(_))); + let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Static(_))); queue.extend(statics); // Add all entries. while !queue.is_empty() { diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 11b67babe2a8..963fcff1ff84 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -75,7 +75,7 @@ impl GlobalPass for DelayedUbPass { LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) } AnalysisTarget::Static(static_def) => { - MemLoc::Global(rustc_internal::internal(tcx, static_def)) + MemLoc::Static(rustc_internal::internal(tcx, static_def)) } }) }) From 6de9bfd60858a2f4250e61b9ffc61d859e445ac4 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 15:36:56 -0700 Subject: [PATCH 33/45] Expand on the doc comment of `LocalMemLoc::Alloc` --- kani-compiler/src/kani_middle/points_to/points_to_graph.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 4e8295cad253..e8b4b4b0aebe 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -14,7 +14,8 @@ use std::collections::{HashMap, HashSet, VecDeque}; /// A node in the points-to graph, which could be a place on the stack or a heap allocation. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum LocalMemLoc<'tcx> { - /// Using a combination of DefId + Location implements allocation-site abstraction. + /// Using a combination of DefId of the function where the allocation took place + Location + /// implements allocation-site abstraction. Alloc(DefId, Location), Place(Place<'tcx>), } From a0ef351aacc207a25de42b29a00892c391753f95 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 16:07:24 -0700 Subject: [PATCH 34/45] More changes to `PointsToGraph` --- .../src/kani_middle/points_to/mod.rs | 2 +- .../points_to/points_to_analysis.rs | 124 +++++++++--------- .../kani_middle/points_to/points_to_graph.rs | 74 ++++------- .../delayed_ub/instrumentation_visitor.rs | 11 +- .../transform/check_uninit/delayed_ub/mod.rs | 29 ++-- 5 files changed, 104 insertions(+), 136 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/mod.rs b/kani-compiler/src/kani_middle/points_to/mod.rs index 73ae459639f8..21e8bdffc7b8 100644 --- a/kani-compiler/src/kani_middle/points_to/mod.rs +++ b/kani-compiler/src/kani_middle/points_to/mod.rs @@ -8,4 +8,4 @@ mod points_to_analysis; mod points_to_graph; pub use points_to_analysis::run_points_to_analysis; -pub use points_to_graph::{MemLoc, LocalMemLoc, PointsToGraph}; +pub use points_to_graph::{MemLoc, PointsToGraph}; diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 146cc5e51d9b..9e8b9176fc63 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -25,12 +25,11 @@ //! other place, we treat it as if the place itself aliases to another place. use crate::kani_middle::{ - points_to::{MemLoc, LocalMemLoc, PointsToGraph}, + points_to::{MemLoc, PointsToGraph}, reachability::CallGraph, transform::RustcInternalMir, }; use rustc_ast::Mutability; -use rustc_hir::def_id::DefId; use rustc_middle::{ mir::{ BasicBlock, BinOp, Body, CallReturnPlaces, Location, NonDivergingIntrinsic, Operand, Place, @@ -46,7 +45,7 @@ use std::collections::HashSet; /// Main points-to analysis object. struct PointsToAnalysis<'a, 'tcx> { - def_id: DefId, + instance: Instance<'tcx>, body: &'a Body<'tcx>, tcx: TyCtxt<'tcx>, /// This will be used in the future to resolve function pointer and vtable calls. Currently, we @@ -63,10 +62,10 @@ struct PointsToAnalysis<'a, 'tcx> { pub fn run_points_to_analysis<'tcx>( body: &Body<'tcx>, tcx: TyCtxt<'tcx>, - def_id: DefId, + instance: Instance<'tcx>, call_graph: &CallGraph, ) -> PointsToGraph<'tcx> { - PointsToAnalysis::run(body, tcx, def_id, call_graph, PointsToGraph::empty()) + PointsToAnalysis::run(body, tcx, instance, call_graph, PointsToGraph::empty()) } impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { @@ -75,11 +74,11 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { pub fn run( body: &'a Body<'tcx>, tcx: TyCtxt<'tcx>, - def_id: DefId, + instance: Instance<'tcx>, call_graph: &'a CallGraph, initial_graph: PointsToGraph<'tcx>, ) -> PointsToGraph<'tcx> { - let analysis = Self { body, tcx, def_id, call_graph, initial_graph }; + let analysis = Self { body, tcx, instance, call_graph, initial_graph }; // This creates a fixpoint solver using the initial graph, the body, and extra information // and solves the dataflow problem, producing the cursor, which contains dataflow state for // each instruction in the body. @@ -95,7 +94,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // Switch the cursor to the end of the block ending with `Return`. cursor.seek_to_block_end(idx.into()); // Retrieve the dataflow state and join into the results graph. - results.consume(cursor.get().clone()); + results.merge(cursor.get().clone()); } } results @@ -119,7 +118,7 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Dataflow state instantiated at the entry into the body; this should be the initial dataflow /// graph. fn initialize_start_block(&self, _body: &Body<'tcx>, state: &mut Self::Domain) { - state.consume(self.initial_graph.clone()); + state.merge(self.initial_graph.clone()); } } @@ -138,7 +137,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { StatementKind::Assign(assign_box) => { let (place, rvalue) = *assign_box.clone(); // Resolve all dereference projections for the lvalue. - let lvalue_set = state.follow_from_place(place, self.def_id); + let lvalue_set = state.follow_from_place(place, self.instance); // Determine all places rvalue could point to. let rvalue_set = match rvalue { // Using the operand unchanged requires determining where it could point, which @@ -150,7 +149,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => { // Here, a reference to a place is created, which leaves the place // unchanged. - state.follow_from_place(place, self.def_id) + state.follow_from_place(place, self.instance) } Rvalue::BinaryOp(bin_op, operands) => { match bin_op { @@ -216,11 +215,11 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { } Rvalue::CopyForDeref(place) => { // Resolve pointees of a place. - state.follow(&state.follow_from_place(place, self.def_id)) + state.successors(&state.follow_from_place(place, self.instance)) } Rvalue::ThreadLocalRef(def_id) => { // We store a def_id of a static. - HashSet::from([MemLoc::Static(def_id)]) + HashSet::from([MemLoc::new_static_allocation(def_id)]) } }; // Create an edge between all places which could be lvalue and all places rvalue @@ -292,8 +291,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { let src_set = self.follow_rvalue(state, args[2].node.clone()); let dst_set = self.follow_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.def_id); - state.extend(&destination_set, &state.follow(&dst_set)); + state.follow_from_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&dst_set)); state.extend(&dst_set, &src_set); } // All `atomic_load` intrinsics take `src` as an argument. @@ -310,8 +309,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { )); let src_set = self.follow_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.def_id); - state.extend(&destination_set, &state.follow(&src_set)); + state.follow_from_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&src_set)); } // All `atomic_store` intrinsics take `dst, val` as arguments. // This is equivalent to `*dst = val`. @@ -344,8 +343,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { let src_set = self.follow_rvalue(state, args[1].node.clone()); let dst_set = self.follow_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.def_id); - state.extend(&destination_set, &state.follow(&dst_set)); + state.follow_from_place(*destination, self.instance); + state.extend(&destination_set, &state.successors(&dst_set)); state.extend(&dst_set, &src_set); } }; @@ -396,9 +395,9 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { TyKind::RawPtr(_, Mutability::Not) )); // Destination of the return value. - let lvalue_set = state.follow_from_place(*destination, self.def_id); + let lvalue_set = state.follow_from_place(*destination, self.instance); let rvalue_set = self.follow_deref(state, args[0].node.clone()); - state.extend(&lvalue_set, &state.follow(&rvalue_set)); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); } // Semantically equivalent *a = b. "volatile_store" | "unaligned_volatile_store" => { @@ -434,10 +433,12 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // This is an internal function responsible for heap allocation, // which creates a new node we need to add to the points-to graph. "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { - let lvalue_set = state.follow_from_place(*destination, self.def_id); - let rvalue_set = - HashSet::from([LocalMemLoc::new_alloc(self.def_id, location) - .with_def_id(self.def_id)]); + let lvalue_set = + state.follow_from_place(*destination, self.instance); + let rvalue_set = HashSet::from([MemLoc::new_heap_allocation( + self.instance, + location, + )]); state.extend(&lvalue_set, &rvalue_set); } _ => {} @@ -490,7 +491,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { ) { let lvalue_set = self.follow_deref(state, to); let rvalue_set = self.follow_deref(state, from); - state.extend(&lvalue_set, &state.follow(&rvalue_set)); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); } /// Find all places where the operand could point to at the current stage of the program. @@ -502,12 +503,12 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { match operand { Operand::Copy(place) | Operand::Move(place) => { // Find all places which are pointed to by the place. - state.follow(&state.follow_from_place(place, self.def_id)) + state.successors(&state.follow_from_place(place, self.instance)) } Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([MemLoc::Static(static_def_id)]) + HashSet::from([MemLoc::new_static_allocation(static_def_id)]) } else { HashSet::new() } @@ -524,12 +525,12 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { match operand { Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( place.project_deeper(&[ProjectionElem::Deref], self.tcx), - self.def_id, + self.instance, ), Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. if let Some(static_def_id) = const_operand.check_static_ptr(self.tcx) { - HashSet::from([MemLoc::Static(static_def_id)]) + HashSet::from([MemLoc::new_static_allocation(static_def_id)]) } else { HashSet::new() } @@ -541,7 +542,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { fn apply_regular_call_effect( &mut self, state: &mut PointsToGraph<'tcx>, - instance: Instance, + instance: Instance<'tcx>, args: &[Spanned>], destination: &Place<'tcx>, ) { @@ -558,8 +559,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { for arg in args.iter() { match arg.node { Operand::Copy(place) | Operand::Move(place) => { - initial_graph.consume( - state.transitive_closure(state.follow_from_place(place, self.def_id)), + initial_graph.merge( + state.transitive_closure(state.follow_from_place(place, self.instance)), ); } Operand::Constant(_) => {} @@ -573,21 +574,22 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // Sanity check. The first argument is the closure itself and the second argument is the tupled arguments from the caller. assert!(args.len() == 2); // First, connect all upvars. - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: 1usize.into(), - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { local: 1usize.into(), projection: List::empty() }, + )]); let rvalue_set = self.follow_rvalue(state, args[0].node.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); // Then, connect the argument tuple to each of the spread arguments. let spread_arg_operand = args[1].node.clone(); for i in 0..new_body.arg_count { - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }, + )]); // This conservatively assumes all arguments alias to all parameters. This can be // improved by supporting scalar places. let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); @@ -596,34 +598,30 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { } else { // Otherwise, simply connect all arguments to parameters. for (i, arg) in args.iter().enumerate() { - let lvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); + let lvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { + local: (i + 1).into(), // Since arguments in the callee are starting with 1, account for that. + projection: List::empty(), + }, + )]); let rvalue_set = self.follow_rvalue(state, arg.node.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); } } // Run the analysis. - let new_result = PointsToAnalysis::run( - &new_body, - self.tcx, - instance.def_id(), - self.call_graph, - initial_graph, - ); + let new_result = + PointsToAnalysis::run(&new_body, self.tcx, instance, self.call_graph, initial_graph); // Merge the results into the current state. - state.consume(new_result); + state.merge(new_result); // Connect the return value to the return destination. - let lvalue_set = state.follow_from_place(*destination, self.def_id); - let rvalue_set = HashSet::from([LocalMemLoc::Place(Place { - local: 0usize.into(), - projection: List::empty(), - }) - .with_def_id(instance.def_id())]); - state.extend(&lvalue_set, &state.follow(&rvalue_set)); + let lvalue_set = state.follow_from_place(*destination, self.instance); + let rvalue_set = HashSet::from([MemLoc::new_stack_allocation( + instance, + Place { local: 0usize.into(), projection: List::empty() }, + )]); + state.extend(&lvalue_set, &state.successors(&rvalue_set)); } } diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index e8b4b4b0aebe..26f0850df72a 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -6,65 +6,41 @@ use rustc_hir::def_id::DefId; use rustc_middle::{ mir::{Location, Place, ProjectionElem}, - ty::List, + ty::{Instance, List}, }; use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; use std::collections::{HashMap, HashSet, VecDeque}; -/// A node in the points-to graph, which could be a place on the stack or a heap allocation. -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub enum LocalMemLoc<'tcx> { - /// Using a combination of DefId of the function where the allocation took place + Location - /// implements allocation-site abstraction. - Alloc(DefId, Location), - Place(Place<'tcx>), -} - -/// A node tagged with a DefId, to differentiate between places across different functions. +/// A node in the points-to graph, which could be a place on the stack, a heap allocation, or a static. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum MemLoc<'tcx> { - Local(DefId, LocalMemLoc<'tcx>), + Stack(Instance<'tcx>, Place<'tcx>), + /// Using a combination of the instance of the function where the allocation took place and the + /// location of the allocation inside this function implements allocation-site abstraction. + Heap(Instance<'tcx>, Location), Static(DefId), } impl<'tcx> MemLoc<'tcx> { - /// Returns DefId of the memory location. - pub fn def_id(&self) -> DefId { - match self { - MemLoc::Local(def_id, _) | MemLoc::Static(def_id) => *def_id, - } + /// Create a memory location representing a new heap allocation site. + pub fn new_heap_allocation(instance: Instance<'tcx>, location: Location) -> Self { + MemLoc::Heap(instance, location) } - /// Returns LocalMemLoc of the memory location if available. - pub fn maybe_local_mem_loc(&self) -> Option> { - match self { - MemLoc::Local(_, mem_loc) => Some(*mem_loc), - MemLoc::Static(_) => None, - } + /// Create a memory location representing a new stack allocation. + pub fn new_stack_allocation(instance: Instance<'tcx>, place: Place<'tcx>) -> Self { + MemLoc::Stack(instance, place) } -} -impl<'tcx> From> for LocalMemLoc<'tcx> { - fn from(value: Place<'tcx>) -> Self { - LocalMemLoc::Place(value) - } -} - -impl<'tcx> LocalMemLoc<'tcx> { - /// Register a new heap allocation site. - pub fn new_alloc(def_id: DefId, location: Location) -> Self { - LocalMemLoc::Alloc(def_id, location) - } - - /// Tag the node with a DefId. - pub fn with_def_id(&self, def_id: DefId) -> MemLoc<'tcx> { - MemLoc::Local(def_id, *self) + /// Create a memory location representing a new static allocation. + pub fn new_static_allocation(static_def: DefId) -> Self { + MemLoc::Static(static_def) } } /// Graph data structure that stores the current results of the point-to analysis. The graph is -/// directed, so having an edge between two places means that one is pointing to the other. -/// +/// directed, so having an edge between two places means that one is pointing to the other. +/// /// For example: /// - `a = &b` would translate to `a --> b` /// - `a = b` would translate to `a --> {all pointees of b}` (if `a` and `b` are pointers / @@ -72,7 +48,7 @@ impl<'tcx> LocalMemLoc<'tcx> { /// /// Note that the aliasing is not field-sensitive, since the nodes in the graph are places with no /// projections, which is sound but can be imprecise. -/// +/// /// For example: /// ``` /// let ref_pair = (&a, &b); // Will add `ref_pair --> (a | b)` edges into the graph. @@ -90,7 +66,7 @@ impl<'tcx> PointsToGraph<'tcx> { } /// Collect all nodes which have incoming edges from `nodes`. - pub fn follow(&self, nodes: &HashSet>) -> HashSet> { + pub fn successors(&self, nodes: &HashSet>) -> HashSet> { nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect() } @@ -107,15 +83,15 @@ impl<'tcx> PointsToGraph<'tcx> { pub fn follow_from_place( &self, place: Place<'tcx>, - current_def_id: DefId, + instance: Instance<'tcx>, ) -> HashSet> { - let place_or_alloc: LocalMemLoc = - Place { local: place.local, projection: List::empty() }.into(); - let mut node_set = HashSet::from([place_or_alloc.with_def_id(current_def_id)]); + let place_without_projections = Place { local: place.local, projection: List::empty() }; + let mut node_set = + HashSet::from([MemLoc::new_stack_allocation(instance, place_without_projections)]); for projection in place.projection { match projection { ProjectionElem::Deref => { - node_set = self.follow(&node_set); + node_set = self.successors(&node_set); } ProjectionElem::Field(..) | ProjectionElem::Index(..) @@ -183,7 +159,7 @@ impl<'tcx> PointsToGraph<'tcx> { } // Merge the other graph into self, consuming it. - pub fn consume(&mut self, other: PointsToGraph<'tcx>) { + pub fn merge(&mut self, other: PointsToGraph<'tcx>) { for (from, to) in other.edges { let existing_to = self.edges.entry(from).or_default(); existing_to.extend(to); diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index 91d216993564..1831264c2ec4 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -14,8 +14,7 @@ use crate::kani_middle::{ }, }, }; -use rustc_hir::def_id::DefId as InternalDefId; -use rustc_middle::ty::TyCtxt; +use rustc_middle::ty::{Instance as InternalInstance, TyCtxt}; use rustc_smir::rustc_internal; use stable_mir::mir::{ visit::{Location, PlaceContext}, @@ -37,7 +36,7 @@ pub struct InstrumentationVisitor<'a, 'tcx> { points_to: &'a PointsToGraph<'tcx>, /// The list of places we should be looking for, ignoring others analysis_targets: &'a HashSet>, - current_def_id: InternalDefId, + current_instance: InternalInstance<'tcx>, tcx: TyCtxt<'tcx>, } @@ -60,7 +59,7 @@ impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { pub fn new( points_to: &'a PointsToGraph<'tcx>, analysis_targets: &'a HashSet>, - current_def_id: InternalDefId, + current_instance: InternalInstance<'tcx>, tcx: TyCtxt<'tcx>, ) -> Self { Self { @@ -69,7 +68,7 @@ impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { target: None, points_to, analysis_targets, - current_def_id, + current_instance, tcx, } } @@ -116,7 +115,7 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { // Match the place by whatever it is pointing to and find an intersection with the targets. if self .points_to - .follow_from_place(rustc_internal::internal(self.tcx, place), self.current_def_id) + .follow_from_place(rustc_internal::internal(self.tcx, place), self.current_instance) .intersection(&self.analysis_targets) .next() .is_some() diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index 963fcff1ff84..f223474045c4 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -8,7 +8,7 @@ use std::collections::HashSet; use crate::args::ExtraChecks; use crate::kani_middle::{ - points_to::{run_points_to_analysis, MemLoc, LocalMemLoc, PointsToGraph}, + points_to::{run_points_to_analysis, MemLoc, PointsToGraph}, reachability::CallGraph, transform::{ body::{CheckType, MutableBody}, @@ -27,7 +27,6 @@ use stable_mir::{ mir::mono::{Instance, MonoItem}, mir::MirVisitor, ty::FnDef, - CrateDef, }; mod initial_target_visitor; @@ -64,18 +63,18 @@ impl GlobalPass for DelayedUbPass { let targets: HashSet<_> = instances .iter() .flat_map(|instance| { - let def_id = rustc_internal::internal(tcx, instance.def.def_id()); let body = instance.body().unwrap(); let mut visitor = InitialTargetVisitor::new(body.clone()); visitor.visit_body(&body); // Convert all places into the format of aliasing graph for later comparison. visitor.into_targets().into_iter().map(move |analysis_target| match analysis_target { - AnalysisTarget::Place(place) => { - LocalMemLoc::Place(rustc_internal::internal(tcx, place)).with_def_id(def_id) - } + AnalysisTarget::Place(place) => MemLoc::new_stack_allocation( + rustc_internal::internal(tcx, instance), + rustc_internal::internal(tcx, place), + ), AnalysisTarget::Static(static_def) => { - MemLoc::Static(rustc_internal::internal(tcx, static_def)) + MemLoc::new_static_allocation(rustc_internal::internal(tcx, static_def)) } }) }) @@ -101,14 +100,10 @@ impl GlobalPass for DelayedUbPass { // Dataflow analysis does not yet work with StableMIR, so need to perform backward // conversion. let internal_body = body.internal_mir(tcx); - let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); - let results = run_points_to_analysis( - &internal_body, - tcx, - internal_def_id, - call_graph, - ); - global_points_to_graph.consume(results); + let internal_instance = rustc_internal::internal(tcx, instance); + let results = + run_points_to_analysis(&internal_body, tcx, internal_instance, call_graph); + global_points_to_graph.merge(results); } } @@ -124,7 +119,7 @@ impl GlobalPass for DelayedUbPass { // Instrument each instance based on the final targets we found. for instance in instances { - let internal_def_id = rustc_internal::internal(tcx, instance.def.def_id()); + let internal_instance = rustc_internal::internal(tcx, instance); let mut instrumenter = UninitInstrumenter { check_type: self.check_type.clone(), mem_init_fn_cache: &mut self.mem_init_fn_cache, @@ -135,7 +130,7 @@ impl GlobalPass for DelayedUbPass { let target_finder = InstrumentationVisitor::new( &global_points_to_graph, &analysis_targets, - internal_def_id, + internal_instance, tcx, ); let (instrumentation_added, body) = From 5114525a7e2752d71972e518b0dffc633eac98ce Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 16:12:00 -0700 Subject: [PATCH 35/45] Remove confusing references to "scalar places" --- .../src/kani_middle/points_to/points_to_analysis.rs | 3 +-- kani-compiler/src/kani_middle/points_to/points_to_graph.rs | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 9e8b9176fc63..9c1daa5227ad 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -590,8 +590,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { projection: List::empty(), }, )]); - // This conservatively assumes all arguments alias to all parameters. This can be - // improved by supporting scalar places. + // This conservatively assumes all arguments alias to all parameters. let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); } diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 26f0850df72a..9e49a74ee7a1 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -78,8 +78,11 @@ impl<'tcx> PointsToGraph<'tcx> { } } - /// Collect all scalar places to which a given place can alias. This is needed to resolve all - /// dereference projections. + /// Collect all places to which a given place can alias. + /// + /// We automatically resolve dereference projections here (by finding successors for each + /// dereference projection we encounter), which is valid as long as we do it for every place we + /// add to the graph. pub fn follow_from_place( &self, place: Place<'tcx>, From 759a540cbf8379a7a294a869fe5b25aec80d249d Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Thu, 1 Aug 2024 16:13:12 -0700 Subject: [PATCH 36/45] Accept suggestion from code review Co-authored-by: Celina G. Val --- kani-compiler/src/kani_middle/points_to/points_to_graph.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 9e49a74ee7a1..c086ba97394b 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -144,8 +144,7 @@ impl<'tcx> PointsToGraph<'tcx> { let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Static(_))); queue.extend(statics); // Add all entries. - while !queue.is_empty() { - let next_target = queue.pop_front().unwrap(); + while let Some(next_target) = queue.pop_front() { result.edges.entry(next_target).or_insert_with(|| { let outgoing_edges = self.edges.get(&next_target).cloned().unwrap_or(HashSet::new()); From c7eed14cfbf1a179060e68eec25cd48dd983f011 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 08:44:31 -0700 Subject: [PATCH 37/45] Formatting change --- kani-compiler/src/kani_middle/points_to/points_to_graph.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index c086ba97394b..0f2771be3d74 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -78,11 +78,11 @@ impl<'tcx> PointsToGraph<'tcx> { } } - /// Collect all places to which a given place can alias. - /// + /// Collect all places to which a given place can alias. + /// /// We automatically resolve dereference projections here (by finding successors for each /// dereference projection we encounter), which is valid as long as we do it for every place we - /// add to the graph. + /// add to the graph. pub fn follow_from_place( &self, place: Place<'tcx>, From 136d5a230918c8328a5b591cb4fa5a779120808f Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 09:05:34 -0700 Subject: [PATCH 38/45] Remove `merge` method duplicate --- .../points_to/points_to_analysis.rs | 12 ++++---- .../kani_middle/points_to/points_to_graph.rs | 29 ++++--------------- .../transform/check_uninit/delayed_ub/mod.rs | 3 +- 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 9c1daa5227ad..e3aa70287ea5 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -38,7 +38,7 @@ use rustc_middle::{ }, ty::{Instance, InstanceKind, List, ParamEnv, TyCtxt, TyKind}, }; -use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward}; +use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; use rustc_smir::rustc_internal; use rustc_span::source_map::Spanned; use std::collections::HashSet; @@ -94,7 +94,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { // Switch the cursor to the end of the block ending with `Return`. cursor.seek_to_block_end(idx.into()); // Retrieve the dataflow state and join into the results graph. - results.merge(cursor.get().clone()); + results.join(&cursor.get().clone()); } } results @@ -118,7 +118,7 @@ impl<'a, 'tcx> AnalysisDomain<'tcx> for PointsToAnalysis<'a, 'tcx> { /// Dataflow state instantiated at the entry into the body; this should be the initial dataflow /// graph. fn initialize_start_block(&self, _body: &Body<'tcx>, state: &mut Self::Domain) { - state.merge(self.initial_graph.clone()); + state.join(&self.initial_graph.clone()); } } @@ -559,8 +559,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { for arg in args.iter() { match arg.node { Operand::Copy(place) | Operand::Move(place) => { - initial_graph.merge( - state.transitive_closure(state.follow_from_place(place, self.instance)), + initial_graph.join( + &state.transitive_closure(state.follow_from_place(place, self.instance)), ); } Operand::Constant(_) => {} @@ -613,7 +613,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { let new_result = PointsToAnalysis::run(&new_body, self.tcx, instance, self.call_graph, initial_graph); // Merge the results into the current state. - state.merge(new_result); + state.join(&new_result); // Connect the return value to the return destination. let lvalue_set = state.follow_from_place(*destination, self.instance); diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 0f2771be3d74..3c2cb8ff09d9 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -159,14 +159,6 @@ impl<'tcx> PointsToGraph<'tcx> { pub fn pointees_of(&self, target: &MemLoc<'tcx>) -> HashSet> { self.edges.get(&target).unwrap_or(&HashSet::new()).clone() } - - // Merge the other graph into self, consuming it. - pub fn merge(&mut self, other: PointsToGraph<'tcx>) { - for (from, to) in other.edges { - let existing_to = self.edges.entry(from).or_default(); - existing_to.extend(to); - } - } } /// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous @@ -178,22 +170,11 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { let mut updated = false; // Check every node in the other graph. for (from, to) in other.edges.iter() { - // If node already exists in the original graph. - if self.edges.contains_key(from) { - // Check if there are any edges that are in the other graph but not in the original - // graph. - let difference: HashSet<_> = - to.difference(self.edges.get(from).unwrap()).cloned().collect(); - if !difference.is_empty() { - updated = true; - // Add all edges to the original graph. - self.edges.get_mut(from).unwrap().extend(difference); - } - } else { - // If node does not exist, add the node and all edges from it. - self.edges.insert(*from, to.clone()); - updated = true; - } + let existing_to = self.edges.entry(from.clone()).or_default(); + let initial_size = existing_to.len(); + existing_to.extend(to); + let new_size = existing_to.len(); + updated |= initial_size != new_size; } updated } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index f223474045c4..f9b306859e86 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -21,6 +21,7 @@ use crate::kani_queries::QueryDb; use initial_target_visitor::{AnalysisTarget, InitialTargetVisitor}; use instrumentation_visitor::InstrumentationVisitor; use rustc_middle::ty::TyCtxt; +use rustc_mir_dataflow::JoinSemiLattice; use rustc_session::config::OutputType; use rustc_smir::rustc_internal; use stable_mir::{ @@ -103,7 +104,7 @@ impl GlobalPass for DelayedUbPass { let internal_instance = rustc_internal::internal(tcx, instance); let results = run_points_to_analysis(&internal_body, tcx, internal_instance, call_graph); - global_points_to_graph.merge(results); + global_points_to_graph.join(&results); } } From 7cd2f80fe453a912f59e73f95e46b0de3adc6a54 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 09:13:10 -0700 Subject: [PATCH 39/45] Accept suggestion from clippy --- kani-compiler/src/kani_middle/points_to/points_to_graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 3c2cb8ff09d9..81cc29782173 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -170,7 +170,7 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> { let mut updated = false; // Check every node in the other graph. for (from, to) in other.edges.iter() { - let existing_to = self.edges.entry(from.clone()).or_default(); + let existing_to = self.edges.entry(*from).or_default(); let initial_size = existing_to.len(); existing_to.extend(to); let new_size = existing_to.len(); From 54bea658fe884c823049e4e5581eac78d3bcb215 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 14:39:29 -0700 Subject: [PATCH 40/45] Documentation changes / renaming --- .../points_to/points_to_analysis.rs | 86 ++++++++++--------- .../kani_middle/points_to/points_to_graph.rs | 5 +- .../delayed_ub/instrumentation_visitor.rs | 2 +- .../transform/check_uninit/delayed_ub/mod.rs | 2 +- 4 files changed, 52 insertions(+), 43 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index e3aa70287ea5..0601dc14c55c 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -137,19 +137,19 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { StatementKind::Assign(assign_box) => { let (place, rvalue) = *assign_box.clone(); // Resolve all dereference projections for the lvalue. - let lvalue_set = state.follow_from_place(place, self.instance); + let lvalue_set = state.resolve_place(place, self.instance); // Determine all places rvalue could point to. let rvalue_set = match rvalue { // Using the operand unchanged requires determining where it could point, which - // `follow_rvalue` does. + // `successors_for_operand` does. Rvalue::Use(operand) | Rvalue::ShallowInitBox(operand, _) | Rvalue::Cast(_, operand, _) - | Rvalue::Repeat(operand, ..) => self.follow_rvalue(state, operand), - Rvalue::Ref(_, _, place) | Rvalue::AddressOf(_, place) => { + | Rvalue::Repeat(operand, ..) => self.successors_for_operand(state, operand), + Rvalue::Ref(_, _, ref_place) | Rvalue::AddressOf(_, ref_place) => { // Here, a reference to a place is created, which leaves the place // unchanged. - state.follow_from_place(place, self.instance) + state.resolve_place(ref_place, self.instance) } Rvalue::BinaryOp(bin_op, operands) => { match bin_op { @@ -157,7 +157,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // Offsetting a pointer should still be within the boundaries of the // same object, so we can simply use the operand unchanged. let (ptr, _) = *operands.clone(); - self.follow_rvalue(state, ptr) + self.successors_for_operand(state, ptr) } BinOp::Add | BinOp::AddUnchecked @@ -181,8 +181,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // track them. We assume that even shifted addresses will be within // the same original object. let (l_operand, r_operand) = *operands.clone(); - let l_operand_set = self.follow_rvalue(state, l_operand); - let r_operand_set = self.follow_rvalue(state, r_operand); + let l_operand_set = self.successors_for_operand(state, l_operand); + let r_operand_set = self.successors_for_operand(state, r_operand); l_operand_set.union(&r_operand_set).cloned().collect() } BinOp::Eq @@ -199,7 +199,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { } Rvalue::UnaryOp(_, operand) => { // The same story from BinOp applies here, too. Need to track those things. - self.follow_rvalue(state, operand) + self.successors_for_operand(state, operand) } Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { // All of those should yield a constant. @@ -210,12 +210,12 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // their pointees. operands .into_iter() - .flat_map(|operand| self.follow_rvalue(state, operand)) + .flat_map(|operand| self.successors_for_operand(state, operand)) .collect() } Rvalue::CopyForDeref(place) => { // Resolve pointees of a place. - state.successors(&state.follow_from_place(place, self.instance)) + state.successors(&state.resolve_place(place, self.instance)) } Rvalue::ThreadLocalRef(def_id) => { // We store a def_id of a static. @@ -288,10 +288,12 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let src_set = self.follow_rvalue(state, args[2].node.clone()); - let dst_set = self.follow_deref(state, args[0].node.clone()); + let src_set = + self.successors_for_operand(state, args[2].node.clone()); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.instance); + state.resolve_place(*destination, self.instance); state.extend(&destination_set, &state.successors(&dst_set)); state.extend(&dst_set, &src_set); } @@ -307,9 +309,10 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Not) )); - let src_set = self.follow_deref(state, args[0].node.clone()); + let src_set = + self.successors_for_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.instance); + state.resolve_place(*destination, self.instance); state.extend(&destination_set, &state.successors(&src_set)); } // All `atomic_store` intrinsics take `dst, val` as arguments. @@ -324,8 +327,10 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let dst_set = self.follow_deref(state, args[0].node.clone()); - let val_set = self.follow_rvalue(state, args[1].node.clone()); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); + let val_set = + self.successors_for_operand(state, args[1].node.clone()); state.extend(&dst_set, &val_set); } // All other `atomic` intrinsics take `dst, src` as arguments. @@ -340,10 +345,12 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let src_set = self.follow_rvalue(state, args[1].node.clone()); - let dst_set = self.follow_deref(state, args[0].node.clone()); + let src_set = + self.successors_for_operand(state, args[1].node.clone()); + let dst_set = + self.successors_for_deref(state, args[0].node.clone()); let destination_set = - state.follow_from_place(*destination, self.instance); + state.resolve_place(*destination, self.instance); state.extend(&destination_set, &state.successors(&dst_set)); state.extend(&dst_set, &src_set); } @@ -395,8 +402,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { TyKind::RawPtr(_, Mutability::Not) )); // Destination of the return value. - let lvalue_set = state.follow_from_place(*destination, self.instance); - let rvalue_set = self.follow_deref(state, args[0].node.clone()); + let lvalue_set = state.resolve_place(*destination, self.instance); + let rvalue_set = self.successors_for_deref(state, args[0].node.clone()); state.extend(&lvalue_set, &state.successors(&rvalue_set)); } // Semantically equivalent *a = b. @@ -410,8 +417,9 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { args[0].node.ty(self.body, self.tcx).kind(), TyKind::RawPtr(_, Mutability::Mut) )); - let lvalue_set = self.follow_deref(state, args[0].node.clone()); - let rvalue_set = self.follow_rvalue(state, args[1].node.clone()); + let lvalue_set = self.successors_for_deref(state, args[0].node.clone()); + let rvalue_set = + self.successors_for_operand(state, args[1].node.clone()); state.extend(&lvalue_set, &rvalue_set); } _ => { @@ -433,8 +441,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // This is an internal function responsible for heap allocation, // which creates a new node we need to add to the points-to graph. "alloc::alloc::__rust_alloc" | "alloc::alloc::__rust_alloc_zeroed" => { - let lvalue_set = - state.follow_from_place(*destination, self.instance); + let lvalue_set = state.resolve_place(*destination, self.instance); let rvalue_set = HashSet::from([MemLoc::new_heap_allocation( self.instance, location, @@ -489,13 +496,13 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { from: Operand<'tcx>, to: Operand<'tcx>, ) { - let lvalue_set = self.follow_deref(state, to); - let rvalue_set = self.follow_deref(state, from); + let lvalue_set = self.successors_for_deref(state, to); + let rvalue_set = self.successors_for_deref(state, from); state.extend(&lvalue_set, &state.successors(&rvalue_set)); } /// Find all places where the operand could point to at the current stage of the program. - fn follow_rvalue( + fn successors_for_operand( &self, state: &mut PointsToGraph<'tcx>, operand: Operand<'tcx>, @@ -503,7 +510,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { match operand { Operand::Copy(place) | Operand::Move(place) => { // Find all places which are pointed to by the place. - state.successors(&state.follow_from_place(place, self.instance)) + state.successors(&state.resolve_place(place, self.instance)) } Operand::Constant(const_operand) => { // Constants could point to a static, so need to check for that. @@ -517,13 +524,13 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { } /// Find all places where the deref of the operand could point to at the current stage of the program. - fn follow_deref( + fn successors_for_deref( &self, state: &mut PointsToGraph<'tcx>, operand: Operand<'tcx>, ) -> HashSet> { match operand { - Operand::Copy(place) | Operand::Move(place) => state.follow_from_place( + Operand::Copy(place) | Operand::Move(place) => state.resolve_place( place.project_deeper(&[ProjectionElem::Deref], self.tcx), self.instance, ), @@ -559,9 +566,8 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { for arg in args.iter() { match arg.node { Operand::Copy(place) | Operand::Move(place) => { - initial_graph.join( - &state.transitive_closure(state.follow_from_place(place, self.instance)), - ); + initial_graph + .join(&state.transitive_closure(state.resolve_place(place, self.instance))); } Operand::Constant(_) => {} } @@ -578,7 +584,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { instance, Place { local: 1usize.into(), projection: List::empty() }, )]); - let rvalue_set = self.follow_rvalue(state, args[0].node.clone()); + let rvalue_set = self.successors_for_operand(state, args[0].node.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); // Then, connect the argument tuple to each of the spread arguments. let spread_arg_operand = args[1].node.clone(); @@ -591,7 +597,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { }, )]); // This conservatively assumes all arguments alias to all parameters. - let rvalue_set = self.follow_rvalue(state, spread_arg_operand.clone()); + let rvalue_set = self.successors_for_operand(state, spread_arg_operand.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); } } else { @@ -604,7 +610,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { projection: List::empty(), }, )]); - let rvalue_set = self.follow_rvalue(state, arg.node.clone()); + let rvalue_set = self.successors_for_operand(state, arg.node.clone()); initial_graph.extend(&lvalue_set, &rvalue_set); } } @@ -616,7 +622,7 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { state.join(&new_result); // Connect the return value to the return destination. - let lvalue_set = state.follow_from_place(*destination, self.instance); + let lvalue_set = state.resolve_place(*destination, self.instance); let rvalue_set = HashSet::from([MemLoc::new_stack_allocation( instance, Place { local: 0usize.into(), projection: List::empty() }, diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index 81cc29782173..f86b1b0fe46f 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -14,6 +14,9 @@ use std::collections::{HashMap, HashSet, VecDeque}; /// A node in the points-to graph, which could be a place on the stack, a heap allocation, or a static. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum MemLoc<'tcx> { + /// Notice that the type of `Place` here is not restricted to references or pointers. For + /// example, we propagate aliasing information for values derived from casting a pointer to a + /// usize in order to ensure soundness, as it could later be casted back to a pointer. Stack(Instance<'tcx>, Place<'tcx>), /// Using a combination of the instance of the function where the allocation took place and the /// location of the allocation inside this function implements allocation-site abstraction. @@ -83,7 +86,7 @@ impl<'tcx> PointsToGraph<'tcx> { /// We automatically resolve dereference projections here (by finding successors for each /// dereference projection we encounter), which is valid as long as we do it for every place we /// add to the graph. - pub fn follow_from_place( + pub fn resolve_place( &self, place: Place<'tcx>, instance: Instance<'tcx>, diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index 1831264c2ec4..d041e0e3023b 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -115,7 +115,7 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { // Match the place by whatever it is pointing to and find an intersection with the targets. if self .points_to - .follow_from_place(rustc_internal::internal(self.tcx, place), self.current_instance) + .resolve_place(rustc_internal::internal(self.tcx, place), self.current_instance) .intersection(&self.analysis_targets) .next() .is_some() diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index f9b306859e86..b6c5a6cf59c8 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -108,7 +108,7 @@ impl GlobalPass for DelayedUbPass { } } - // Since analysis targets are *pointers*, need to get its followers for instrumentation. + // Since analysis targets are *pointers*, need to get its successors for instrumentation. for target in targets.iter() { analysis_targets.extend(global_points_to_graph.pointees_of(target)); } From 3d43def9950ef745ecb3c15c30ff24121c22811f Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 14:50:50 -0700 Subject: [PATCH 41/45] Accept suggestions for error messages --- .../src/kani_middle/points_to/points_to_analysis.rs | 13 +++++++------ .../check_uninit/ptr_uninit/uninit_visitor.rs | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 0601dc14c55c..32f4a8304f2c 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -40,7 +40,7 @@ use rustc_middle::{ }; use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; use rustc_smir::rustc_internal; -use rustc_span::source_map::Spanned; +use rustc_span::{source_map::Spanned, DUMMY_SP}; use std::collections::HashSet; /// Main points-to analysis object. @@ -479,12 +479,13 @@ fn try_resolve_instance<'tcx>( let ty = func.ty(body, tcx); match ty.kind() { TyKind::FnDef(def, args) => { - match Instance::try_resolve(tcx, ParamEnv::reveal_all(), *def, &args) { - Ok(Some(instance)) => Ok(instance), - _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), - } + // Span here is used for error-reporting, which we don't expect to encounter anyway, so + // it is ok to use a dummy. + Ok(Instance::expect_resolve(tcx, ParamEnv::reveal_all(), *def, &args, DUMMY_SP)) } - _ => Err(format!("Kani does not support reasoning about arguments to `{ty:?}`.")), + _ => Err(format!( + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." + )), } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs index 02e153dc1caf..837e14abc886 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ptr_uninit/uninit_visitor.rs @@ -668,7 +668,7 @@ fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result Ok(Instance::resolve(def, &args).unwrap()), _ => Err(format!( - "Kani does not support reasoning about memory initialization of arguments to `{ty:?}`." + "Kani was not able to resolve the instance of the function operand `{ty:?}`. Currently, memory initialization checks in presence of function pointers and vtable calls are not supported. For more information about planned support, see https://github.com/model-checking/kani/issues/3300." )), } } From 9deb5a3e5bfc311c0e455bfebb1926c5e2a5a8f1 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 14:55:22 -0700 Subject: [PATCH 42/45] Move `successors_for_rvalue` into a separate function --- .../points_to/points_to_analysis.rs | 175 +++++++++--------- 1 file changed, 92 insertions(+), 83 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 32f4a8304f2c..4284a644343c 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -139,89 +139,7 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { // Resolve all dereference projections for the lvalue. let lvalue_set = state.resolve_place(place, self.instance); // Determine all places rvalue could point to. - let rvalue_set = match rvalue { - // Using the operand unchanged requires determining where it could point, which - // `successors_for_operand` does. - Rvalue::Use(operand) - | Rvalue::ShallowInitBox(operand, _) - | Rvalue::Cast(_, operand, _) - | Rvalue::Repeat(operand, ..) => self.successors_for_operand(state, operand), - Rvalue::Ref(_, _, ref_place) | Rvalue::AddressOf(_, ref_place) => { - // Here, a reference to a place is created, which leaves the place - // unchanged. - state.resolve_place(ref_place, self.instance) - } - Rvalue::BinaryOp(bin_op, operands) => { - match bin_op { - BinOp::Offset => { - // Offsetting a pointer should still be within the boundaries of the - // same object, so we can simply use the operand unchanged. - let (ptr, _) = *operands.clone(); - self.successors_for_operand(state, ptr) - } - BinOp::Add - | BinOp::AddUnchecked - | BinOp::AddWithOverflow - | BinOp::Sub - | BinOp::SubUnchecked - | BinOp::SubWithOverflow - | BinOp::Mul - | BinOp::MulUnchecked - | BinOp::MulWithOverflow - | BinOp::Div - | BinOp::Rem - | BinOp::BitXor - | BinOp::BitAnd - | BinOp::BitOr - | BinOp::Shl - | BinOp::ShlUnchecked - | BinOp::Shr - | BinOp::ShrUnchecked => { - // While unlikely, those could be pointer addresses, so we need to - // track them. We assume that even shifted addresses will be within - // the same original object. - let (l_operand, r_operand) = *operands.clone(); - let l_operand_set = self.successors_for_operand(state, l_operand); - let r_operand_set = self.successors_for_operand(state, r_operand); - l_operand_set.union(&r_operand_set).cloned().collect() - } - BinOp::Eq - | BinOp::Lt - | BinOp::Le - | BinOp::Ne - | BinOp::Ge - | BinOp::Gt - | BinOp::Cmp => { - // None of those could yield an address as the result. - HashSet::new() - } - } - } - Rvalue::UnaryOp(_, operand) => { - // The same story from BinOp applies here, too. Need to track those things. - self.successors_for_operand(state, operand) - } - Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { - // All of those should yield a constant. - HashSet::new() - } - Rvalue::Aggregate(_, operands) => { - // Conservatively find a union of all places mentioned here and resolve - // their pointees. - operands - .into_iter() - .flat_map(|operand| self.successors_for_operand(state, operand)) - .collect() - } - Rvalue::CopyForDeref(place) => { - // Resolve pointees of a place. - state.successors(&state.resolve_place(place, self.instance)) - } - Rvalue::ThreadLocalRef(def_id) => { - // We store a def_id of a static. - HashSet::from([MemLoc::new_static_allocation(def_id)]) - } - }; + let rvalue_set = self.successors_for_rvalue(state, rvalue); // Create an edge between all places which could be lvalue and all places rvalue // could be pointing to. state.extend(&lvalue_set, &rvalue_set); @@ -630,4 +548,95 @@ impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { )]); state.extend(&lvalue_set, &state.successors(&rvalue_set)); } + + /// Find all places where the rvalue could point to at the current stage of the program. + fn successors_for_rvalue( + &self, + state: &mut PointsToGraph<'tcx>, + rvalue: Rvalue<'tcx>, + ) -> HashSet> { + match rvalue { + // Using the operand unchanged requires determining where it could point, which + // `successors_for_operand` does. + Rvalue::Use(operand) + | Rvalue::ShallowInitBox(operand, _) + | Rvalue::Cast(_, operand, _) + | Rvalue::Repeat(operand, ..) => self.successors_for_operand(state, operand), + Rvalue::Ref(_, _, ref_place) | Rvalue::AddressOf(_, ref_place) => { + // Here, a reference to a place is created, which leaves the place + // unchanged. + state.resolve_place(ref_place, self.instance) + } + Rvalue::BinaryOp(bin_op, operands) => { + match bin_op { + BinOp::Offset => { + // Offsetting a pointer should still be within the boundaries of the + // same object, so we can simply use the operand unchanged. + let (ptr, _) = *operands.clone(); + self.successors_for_operand(state, ptr) + } + BinOp::Add + | BinOp::AddUnchecked + | BinOp::AddWithOverflow + | BinOp::Sub + | BinOp::SubUnchecked + | BinOp::SubWithOverflow + | BinOp::Mul + | BinOp::MulUnchecked + | BinOp::MulWithOverflow + | BinOp::Div + | BinOp::Rem + | BinOp::BitXor + | BinOp::BitAnd + | BinOp::BitOr + | BinOp::Shl + | BinOp::ShlUnchecked + | BinOp::Shr + | BinOp::ShrUnchecked => { + // While unlikely, those could be pointer addresses, so we need to + // track them. We assume that even shifted addresses will be within + // the same original object. + let (l_operand, r_operand) = *operands.clone(); + let l_operand_set = self.successors_for_operand(state, l_operand); + let r_operand_set = self.successors_for_operand(state, r_operand); + l_operand_set.union(&r_operand_set).cloned().collect() + } + BinOp::Eq + | BinOp::Lt + | BinOp::Le + | BinOp::Ne + | BinOp::Ge + | BinOp::Gt + | BinOp::Cmp => { + // None of those could yield an address as the result. + HashSet::new() + } + } + } + Rvalue::UnaryOp(_, operand) => { + // The same story from BinOp applies here, too. Need to track those things. + self.successors_for_operand(state, operand) + } + Rvalue::Len(..) | Rvalue::NullaryOp(..) | Rvalue::Discriminant(..) => { + // All of those should yield a constant. + HashSet::new() + } + Rvalue::Aggregate(_, operands) => { + // Conservatively find a union of all places mentioned here and resolve + // their pointees. + operands + .into_iter() + .flat_map(|operand| self.successors_for_operand(state, operand)) + .collect() + } + Rvalue::CopyForDeref(place) => { + // Resolve pointees of a place. + state.successors(&state.resolve_place(place, self.instance)) + } + Rvalue::ThreadLocalRef(def_id) => { + // We store a def_id of a static. + HashSet::from([MemLoc::new_static_allocation(def_id)]) + } + } + } } From 700033e9819fc5812bfcfeba225af5a45152a538 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 14:57:47 -0700 Subject: [PATCH 43/45] Add a comment about intrinsics --- .../src/kani_middle/points_to/points_to_analysis.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 4284a644343c..568cafd141cb 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -341,8 +341,9 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { state.extend(&lvalue_set, &rvalue_set); } _ => { - // TODO: go through the list of intrinsics and make sure none have - // slipped; I am sure we still missing some. + // TODO: this probably does not handle all relevant intrinsics, so more + // need to be added. For more information, see: + // https://github.com/model-checking/kani/issues/3300 if self.tcx.is_mir_available(def_id) { self.apply_regular_call_effect(state, instance, args, destination); } From 4cba01e1a7874d53ff56a75cf80fdcace0a44b40 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 2 Aug 2024 15:34:54 -0700 Subject: [PATCH 44/45] Restrict use of all internal MIR types to `points_to` module --- .../points_to/points_to_analysis.rs | 17 +++++++-- .../kani_middle/points_to/points_to_graph.rs | 36 ++++++++++++++++++- .../delayed_ub/instrumentation_visitor.rs | 10 +++--- .../transform/check_uninit/delayed_ub/mod.rs | 21 ++++------- 4 files changed, 60 insertions(+), 24 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 568cafd141cb..32b7c1fd6f6e 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -41,6 +41,7 @@ use rustc_middle::{ use rustc_mir_dataflow::{Analysis, AnalysisDomain, Forward, JoinSemiLattice}; use rustc_smir::rustc_internal; use rustc_span::{source_map::Spanned, DUMMY_SP}; +use stable_mir::mir::{mono::Instance as StableInstance, Body as StableBody}; use std::collections::HashSet; /// Main points-to analysis object. @@ -60,12 +61,22 @@ struct PointsToAnalysis<'a, 'tcx> { /// Public points-to analysis entry point. Performs the analysis on a body, outputting the graph /// containing aliasing information of the body itself and any body reachable from it. pub fn run_points_to_analysis<'tcx>( - body: &Body<'tcx>, + body: &StableBody, tcx: TyCtxt<'tcx>, - instance: Instance<'tcx>, + instance: StableInstance, call_graph: &CallGraph, ) -> PointsToGraph<'tcx> { - PointsToAnalysis::run(body, tcx, instance, call_graph, PointsToGraph::empty()) + // Dataflow analysis does not yet work with StableMIR, so need to perform backward + // conversion. + let internal_instance = rustc_internal::internal(tcx, instance); + let internal_body = body.internal_mir(tcx); + PointsToAnalysis::run( + &internal_body, + tcx, + internal_instance, + call_graph, + PointsToGraph::empty(), + ) } impl<'a, 'tcx> PointsToAnalysis<'a, 'tcx> { diff --git a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs index f86b1b0fe46f..d2e80f24c737 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_graph.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_graph.rs @@ -6,9 +6,14 @@ use rustc_hir::def_id::DefId; use rustc_middle::{ mir::{Location, Place, ProjectionElem}, - ty::{Instance, List}, + ty::{Instance, List, TyCtxt}, }; use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice}; +use rustc_smir::rustc_internal; +use stable_mir::mir::{ + mono::{Instance as StableInstance, StaticDef}, + Place as StablePlace, +}; use std::collections::{HashMap, HashSet, VecDeque}; /// A node in the points-to graph, which could be a place on the stack, a heap allocation, or a static. @@ -39,6 +44,23 @@ impl<'tcx> MemLoc<'tcx> { pub fn new_static_allocation(static_def: DefId) -> Self { MemLoc::Static(static_def) } + + /// Create a memory location representing a new stack allocation from StableMIR values. + pub fn from_stable_stack_allocation( + instance: StableInstance, + place: StablePlace, + tcx: TyCtxt<'tcx>, + ) -> Self { + let internal_instance = rustc_internal::internal(tcx, instance); + let internal_place = rustc_internal::internal(tcx, place); + Self::new_stack_allocation(internal_instance, internal_place) + } + + /// Create a memory location representing a new static allocation from StableMIR values. + pub fn from_stable_static_allocation(static_def: StaticDef, tcx: TyCtxt<'tcx>) -> Self { + let static_def_id = rustc_internal::internal(tcx, static_def); + Self::new_static_allocation(static_def_id) + } } /// Graph data structure that stores the current results of the point-to analysis. The graph is @@ -113,6 +135,18 @@ impl<'tcx> PointsToGraph<'tcx> { node_set } + /// Stable interface for `resolve_place`. + pub fn resolve_place_stable( + &self, + place: StablePlace, + instance: StableInstance, + tcx: TyCtxt<'tcx>, + ) -> HashSet> { + let internal_place = rustc_internal::internal(tcx, place); + let internal_instance = rustc_internal::internal(tcx, instance); + self.resolve_place(internal_place, internal_instance) + } + /// Dump the graph into a file using the graphviz format for later visualization. pub fn dump(&self, file_path: &str) { let mut nodes: Vec = diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs index d041e0e3023b..f295fc76d4bf 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/instrumentation_visitor.rs @@ -14,9 +14,9 @@ use crate::kani_middle::{ }, }, }; -use rustc_middle::ty::{Instance as InternalInstance, TyCtxt}; -use rustc_smir::rustc_internal; +use rustc_middle::ty::TyCtxt; use stable_mir::mir::{ + mono::Instance, visit::{Location, PlaceContext}, BasicBlockIdx, MirVisitor, Operand, Place, Rvalue, Statement, Terminator, }; @@ -36,7 +36,7 @@ pub struct InstrumentationVisitor<'a, 'tcx> { points_to: &'a PointsToGraph<'tcx>, /// The list of places we should be looking for, ignoring others analysis_targets: &'a HashSet>, - current_instance: InternalInstance<'tcx>, + current_instance: Instance, tcx: TyCtxt<'tcx>, } @@ -59,7 +59,7 @@ impl<'a, 'tcx> InstrumentationVisitor<'a, 'tcx> { pub fn new( points_to: &'a PointsToGraph<'tcx>, analysis_targets: &'a HashSet>, - current_instance: InternalInstance<'tcx>, + current_instance: Instance, tcx: TyCtxt<'tcx>, ) -> Self { Self { @@ -115,7 +115,7 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> { // Match the place by whatever it is pointing to and find an intersection with the targets. if self .points_to - .resolve_place(rustc_internal::internal(self.tcx, place), self.current_instance) + .resolve_place_stable(place.clone(), self.current_instance, self.tcx) .intersection(&self.analysis_targets) .next() .is_some() diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs index b6c5a6cf59c8..6b488569813f 100644 --- a/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs +++ b/kani-compiler/src/kani_middle/transform/check_uninit/delayed_ub/mod.rs @@ -13,7 +13,6 @@ use crate::kani_middle::{ transform::{ body::{CheckType, MutableBody}, check_uninit::UninitInstrumenter, - internal_mir::RustcInternalMir, BodyTransformation, GlobalPass, TransformationResult, }, }; @@ -23,7 +22,6 @@ use instrumentation_visitor::InstrumentationVisitor; use rustc_middle::ty::TyCtxt; use rustc_mir_dataflow::JoinSemiLattice; use rustc_session::config::OutputType; -use rustc_smir::rustc_internal; use stable_mir::{ mir::mono::{Instance, MonoItem}, mir::MirVisitor, @@ -70,12 +68,11 @@ impl GlobalPass for DelayedUbPass { // Convert all places into the format of aliasing graph for later comparison. visitor.into_targets().into_iter().map(move |analysis_target| match analysis_target { - AnalysisTarget::Place(place) => MemLoc::new_stack_allocation( - rustc_internal::internal(tcx, instance), - rustc_internal::internal(tcx, place), - ), + AnalysisTarget::Place(place) => { + MemLoc::from_stable_stack_allocation(*instance, place, tcx) + } AnalysisTarget::Static(static_def) => { - MemLoc::new_static_allocation(rustc_internal::internal(tcx, static_def)) + MemLoc::from_stable_static_allocation(static_def, tcx) } }) }) @@ -98,12 +95,7 @@ impl GlobalPass for DelayedUbPass { }; if let Some(instance) = entry_fn { let body = instance.body().unwrap(); - // Dataflow analysis does not yet work with StableMIR, so need to perform backward - // conversion. - let internal_body = body.internal_mir(tcx); - let internal_instance = rustc_internal::internal(tcx, instance); - let results = - run_points_to_analysis(&internal_body, tcx, internal_instance, call_graph); + let results = run_points_to_analysis(&body, tcx, instance, call_graph); global_points_to_graph.join(&results); } } @@ -120,7 +112,6 @@ impl GlobalPass for DelayedUbPass { // Instrument each instance based on the final targets we found. for instance in instances { - let internal_instance = rustc_internal::internal(tcx, instance); let mut instrumenter = UninitInstrumenter { check_type: self.check_type.clone(), mem_init_fn_cache: &mut self.mem_init_fn_cache, @@ -131,7 +122,7 @@ impl GlobalPass for DelayedUbPass { let target_finder = InstrumentationVisitor::new( &global_points_to_graph, &analysis_targets, - internal_instance, + instance, tcx, ); let (instrumentation_added, body) = From 8671a38b4d8d150b1ce1ff1b80b2b7c7ccd11af6 Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Mon, 5 Aug 2024 08:48:36 -0700 Subject: [PATCH 45/45] Small comment update --- kani-compiler/src/kani_middle/points_to/points_to_analysis.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs index 32b7c1fd6f6e..640318ccb584 100644 --- a/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs +++ b/kani-compiler/src/kani_middle/points_to/points_to_analysis.rs @@ -158,8 +158,8 @@ impl<'a, 'tcx> Analysis<'tcx> for PointsToAnalysis<'a, 'tcx> { StatementKind::Intrinsic(non_diverging_intrinsic) => { match *non_diverging_intrinsic.clone() { NonDivergingIntrinsic::CopyNonOverlapping(copy_nonoverlapping) => { - // Copy between `*const a` and `*mut b` is semantically equivalent to *b = - // *a with respect to aliasing. + // Copy between the values pointed by `*const a` and `*mut b` is + // semantically equivalent to *b = *a with respect to aliasing. self.apply_copy_effect( state, copy_nonoverlapping.src.clone(),