From 96d1147fc3b59bf71ac6618aca957ac6ba40073a Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Fri, 19 Jul 2024 08:56:01 -0700 Subject: [PATCH] Support for disabling automatically generated pointer checks to avoid reinstrumentation (#3344) Recently added memory initialization checks (see #3300 for an overview) code suffers from re-instrumentation due to automatically added pointer checks to the instrumentation code. This PR adds an internal `kanitool::disable_checks` attribute to disable automatically generated CBMC checks inside internal instrumentation. This, in turn, relies on CBMC pragmas (https://www.cprover.org/cprover-manual/properties/#usinggotoinstrument). By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses. --- cprover_bindings/src/goto_program/location.rs | 35 +++++++--- cprover_bindings/src/irep/to_irep.rs | 46 ++++++++++--- .../src/codegen_cprover_gotoc/codegen/span.rs | 68 +++++++++++++++++++ kani-compiler/src/kani_middle/attributes.rs | 14 +++- library/kani/src/mem_init.rs | 17 ++++- .../alloc-zeroed.rs | 22 ++++++ .../mem-init-reinstrumentation/config.yml | 4 ++ .../mem-init-reinstrumentation.expected | 1 + .../mem-init-reinstrumentation.sh | 20 ++++++ 9 files changed, 204 insertions(+), 23 deletions(-) create mode 100644 tests/script-based-pre/mem-init-reinstrumentation/alloc-zeroed.rs create mode 100644 tests/script-based-pre/mem-init-reinstrumentation/config.yml create mode 100644 tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.expected create mode 100755 tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.sh diff --git a/cprover_bindings/src/goto_program/location.rs b/cprover_bindings/src/goto_program/location.rs index 4097d075276d..9a2a046f6387 100644 --- a/cprover_bindings/src/goto_program/location.rs +++ b/cprover_bindings/src/goto_program/location.rs @@ -19,6 +19,7 @@ pub enum Location { start_col: Option, end_line: u64, end_col: Option, + pragmas: &'static [&'static str], // CBMC `#pragma check` statements per location. }, /// Location for Statements that use Property Class and Description - Assert, Assume, Cover etc. Property { @@ -28,6 +29,7 @@ pub enum Location { col: Option, comment: InternedString, property_class: InternedString, + pragmas: &'static [&'static str], // CBMC `#pragma check` statements per location. }, /// Covers cases where Location Details are unknown or set as None but Property Class is needed. PropertyUnknownLocation { comment: InternedString, property_class: InternedString }, @@ -99,6 +101,7 @@ impl Location { start_col: Option, end_line: T, end_col: Option, + pragmas: &'static [&'static str], ) -> Location where T: TryInto, @@ -117,6 +120,7 @@ impl Location { start_col: start_col_into, end_line: end_line_into, end_col: end_col_into, + pragmas, } } @@ -128,6 +132,7 @@ impl Location { col: Option, comment: U, property_name: U, + pragmas: &'static [&'static str], ) -> Location where T: TryInto, @@ -140,7 +145,7 @@ impl Location { let function = function.intern(); let property_class = property_name.into(); let comment = comment.into(); - Location::Property { file, function, line, col, comment, property_class } + Location::Property { file, function, line, col, comment, property_class, pragmas } } /// Create a Property type Location from an already existing Location type @@ -157,17 +162,25 @@ impl Location { None, comment.into(), property_name.into(), + &[], + ), + Location::Loc { + file, + function, + start_line, + start_col, + end_line: _, + end_col: _, + pragmas, + } => Location::property_location( + file.into(), + function.intern(), + start_line, + start_col, + comment.into(), + property_name.into(), + pragmas, ), - Location::Loc { file, function, start_line, start_col, end_line: _, end_col: _ } => { - Location::property_location( - file.into(), - function.intern(), - start_line, - start_col, - comment.into(), - property_name.into(), - ) - } Location::Property { .. } => location, Location::PropertyUnknownLocation { .. } => location, // This converts None type Locations to PropertyUnknownLocation type which inserts Property Class and Description diff --git a/cprover_bindings/src/irep/to_irep.rs b/cprover_bindings/src/irep/to_irep.rs index 4b5c86350172..d1e84669121d 100644 --- a/cprover_bindings/src/irep/to_irep.rs +++ b/cprover_bindings/src/irep/to_irep.rs @@ -389,15 +389,32 @@ impl ToIrep for Location { (IrepId::Function, Irep::just_string_id(function_name.to_string())), ]) .with_named_sub_option(IrepId::Line, line.map(Irep::just_int_id)), - Location::Loc { file, function, start_line, start_col, end_line: _, end_col: _ } => { - Irep::just_named_sub(linear_map![ - (IrepId::File, Irep::just_string_id(file.to_string())), - (IrepId::Line, Irep::just_int_id(*start_line)), - ]) - .with_named_sub_option(IrepId::Column, start_col.map(Irep::just_int_id)) - .with_named_sub_option(IrepId::Function, function.map(Irep::just_string_id)) - } - Location::Property { file, function, line, col, property_class, comment } => { + Location::Loc { + file, + function, + start_line, + start_col, + end_line: _, + end_col: _, + pragmas, + } => Irep::just_named_sub(linear_map![ + (IrepId::File, Irep::just_string_id(file.to_string())), + (IrepId::Line, Irep::just_int_id(*start_line)), + ]) + .with_named_sub_option(IrepId::Column, start_col.map(Irep::just_int_id)) + .with_named_sub_option(IrepId::Function, function.map(Irep::just_string_id)) + .with_named_sub_option( + IrepId::Pragma, + Some(Irep::just_named_sub( + pragmas + .iter() + .map(|pragma| { + (IrepId::from_string(*pragma), Irep::just_id(IrepId::EmptyString)) + }) + .collect(), + )), + ), + Location::Property { file, function, line, col, property_class, comment, pragmas } => { Irep::just_named_sub(linear_map![ (IrepId::File, Irep::just_string_id(file.to_string())), (IrepId::Line, Irep::just_int_id(*line)), @@ -409,6 +426,17 @@ impl ToIrep for Location { IrepId::PropertyClass, Irep::just_string_id(property_class.to_string()), ) + .with_named_sub_option( + IrepId::Pragma, + Some(Irep::just_named_sub( + pragmas + .iter() + .map(|pragma| { + (IrepId::from_string(*pragma), Irep::just_id(IrepId::EmptyString)) + }) + .collect(), + )), + ) } Location::PropertyUnknownLocation { property_class, comment } => { Irep::just_named_sub(linear_map![ diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs index aadb4fbebed9..41a3da11324b 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/span.rs @@ -5,9 +5,31 @@ use crate::codegen_cprover_gotoc::GotocCtx; use cbmc::goto_program::Location; +use lazy_static::lazy_static; +use rustc_ast::Attribute; use rustc_smir::rustc_internal; use rustc_span::Span; use stable_mir::ty::Span as SpanStable; +use std::collections::HashMap; + +lazy_static! { + /// Pragmas key-value store to prevent CBMC from generating automatic checks. + /// This list is taken from https://github.com/diffblue/cbmc/blob/develop/regression/cbmc/pragma_cprover_enable_all/main.c. + static ref PRAGMAS: HashMap<&'static str, &'static str> = + [("bounds", "disable:bounds-check"), + ("pointer", "disable:pointer-check"), + ("div-by-zero", "disable:div-by-zero-check"), + ("float-div-by-zero", "disable:float-div-by-zero-check"), + ("enum-range", "disable:enum-range-check"), + ("signed-overflow", "disable:signed-overflow-check"), + ("unsigned-overflow", "disable:unsigned-overflow-check"), + ("pointer-overflow", "disable:pointer-overflow-check"), + ("float-overflow", "disable:float-overflow-check"), + ("conversion", "disable:conversion-check"), + ("undefined-shift", "disable:undefined-shift-check"), + ("nan", "disable:nan-check"), + ("pointer-primitive", "disable:pointer-primitive-check")].iter().copied().collect(); +} impl<'tcx> GotocCtx<'tcx> { pub fn codegen_span(&self, sp: &Span) -> Location { @@ -15,6 +37,36 @@ impl<'tcx> GotocCtx<'tcx> { } pub fn codegen_span_stable(&self, sp: SpanStable) -> Location { + // Attribute to mark functions as where automatic pointer checks should not be generated. + let should_skip_ptr_checks_attr = vec![ + rustc_span::symbol::Symbol::intern("kanitool"), + rustc_span::symbol::Symbol::intern("disable_checks"), + ]; + let pragmas: &'static [&str] = { + let disabled_checks: Vec<_> = self + .current_fn + .as_ref() + .map(|current_fn| { + let instance = current_fn.instance(); + self.tcx + .get_attrs_by_path(instance.def.def_id(), &should_skip_ptr_checks_attr) + .collect() + }) + .unwrap_or_default(); + disabled_checks + .iter() + .map(|attr| { + let arg = parse_word(attr).expect( + "incorrect value passed to `disable_checks`, expected a single identifier", + ); + *PRAGMAS.get(arg.as_str()).expect(format!( + "attempting to disable an unexisting check, the possible options are {:?}", + PRAGMAS.keys() + ).as_str()) + }) + .collect::>() + .leak() // This is to preserve `Location` being Copy, but could blow up the memory utilization of compiler. + }; let loc = sp.get_lines(); Location::new( sp.get_filename().to_string(), @@ -23,6 +75,7 @@ impl<'tcx> GotocCtx<'tcx> { Some(loc.start_col), loc.end_line, Some(loc.end_col), + pragmas, ) } @@ -39,3 +92,18 @@ impl<'tcx> GotocCtx<'tcx> { self.codegen_span(&topmost) } } + +/// Extracts the single argument from the attribute provided as a string. +/// For example, `disable_checks(foo)` return `Some("foo")` +fn parse_word(attr: &Attribute) -> Option { + // Vector of meta items , that contain the arguments given the attribute + let attr_args = attr.meta_item_list()?; + // Only extracts one string ident as a string + if attr_args.len() == 1 { + attr_args[0].ident().map(|ident| ident.to_string()) + } + // Return none if there are no attributes or if there's too many attributes + else { + None + } +} diff --git a/kani-compiler/src/kani_middle/attributes.rs b/kani-compiler/src/kani_middle/attributes.rs index 84ec8d627c59..9eb6d3d6ee4e 100644 --- a/kani-compiler/src/kani_middle/attributes.rs +++ b/kani-compiler/src/kani_middle/attributes.rs @@ -74,6 +74,9 @@ enum KaniAttributeKind { /// We use this attribute to properly instantiate `kani::any_modifies` in /// cases when recursion is present given our contracts instrumentation. Recursion, + /// Used to mark functions where generating automatic pointer checks should be disabled. This is + /// used later to automatically attach pragma statements to locations. + DisableChecks, } impl KaniAttributeKind { @@ -93,7 +96,8 @@ impl KaniAttributeKind { | KaniAttributeKind::CheckedWith | KaniAttributeKind::Modifies | KaniAttributeKind::InnerCheck - | KaniAttributeKind::IsContractGenerated => false, + | KaniAttributeKind::IsContractGenerated + | KaniAttributeKind::DisableChecks => false, } } @@ -382,6 +386,10 @@ impl<'tcx> KaniAttributes<'tcx> { KaniAttributeKind::InnerCheck => { self.inner_check(); } + KaniAttributeKind::DisableChecks => { + // Ignored here, because it should be an internal attribute. Actual validation + // happens when pragmas are generated. + } } } } @@ -490,6 +498,10 @@ impl<'tcx> KaniAttributes<'tcx> { | KaniAttributeKind::InnerCheck | KaniAttributeKind::ReplacedWith => { self.tcx.dcx().span_err(self.tcx.def_span(self.item), format!("Contracts are not supported on harnesses. (Found the kani-internal contract attribute `{}`)", kind.as_ref())); + }, + KaniAttributeKind::DisableChecks => { + // Internal attribute which shouldn't exist here. + unreachable!() } }; harness diff --git a/library/kani/src/mem_init.rs b/library/kani/src/mem_init.rs index a09e515d7e17..88847e9c4f3c 100644 --- a/library/kani/src/mem_init.rs +++ b/library/kani/src/mem_init.rs @@ -50,6 +50,7 @@ impl MemoryInitializationState { /// /// Such definition is necessary since both tracked object and tracked offset are chosen /// non-deterministically. + #[kanitool::disable_checks(pointer)] pub fn get( &mut self, ptr: *const u8, @@ -61,7 +62,7 @@ impl MemoryInitializationState { && self.tracked_offset >= offset && self.tracked_offset < offset + LAYOUT_SIZE { - !layout[(self.tracked_offset - offset) % LAYOUT_SIZE] || self.value + !layout[self.tracked_offset - offset] || self.value } else { true } @@ -73,6 +74,7 @@ impl MemoryInitializationState { /// /// Such definition is necessary since both tracked object and tracked offset are chosen /// non-deterministically. + #[kanitool::disable_checks(pointer)] pub fn set( &mut self, ptr: *const u8, @@ -85,7 +87,7 @@ impl MemoryInitializationState { && self.tracked_offset >= offset && self.tracked_offset < offset + LAYOUT_SIZE { - self.value = layout[(self.tracked_offset - offset) % LAYOUT_SIZE] && value; + self.value = layout[self.tracked_offset - offset] && value; } } @@ -95,6 +97,7 @@ impl MemoryInitializationState { /// /// Such definition is necessary since both tracked object and tracked offset are chosen /// non-deterministically. + #[kanitool::disable_checks(pointer)] pub fn get_slice( &mut self, ptr: *const u8, @@ -119,6 +122,7 @@ impl MemoryInitializationState { /// /// Such definition is necessary since both tracked object and tracked offset are chosen /// non-deterministically. + #[kanitool::disable_checks(pointer)] pub fn set_slice( &mut self, ptr: *const u8, @@ -142,6 +146,7 @@ impl MemoryInitializationState { static mut MEM_INIT_STATE: MemoryInitializationState = MemoryInitializationState::new(); /// Set tracked object and tracked offset to a non-deterministic value. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniInitializeMemoryInitializationState"] fn initialize_memory_initialization_state() { unsafe { @@ -152,6 +157,7 @@ fn initialize_memory_initialization_state() { } /// Get initialization state of `num_elts` items laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniIsPtrInitialized"] fn is_ptr_initialized( ptr: *const T, @@ -165,6 +171,7 @@ fn is_ptr_initialized( } /// Set initialization state to `value` for `num_elts` items laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniSetPtrInitialized"] fn set_ptr_initialized( ptr: *const T, @@ -181,6 +188,7 @@ fn set_ptr_initialized( } /// Get initialization state of `num_elts` items laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniIsSliceChunkPtrInitialized"] fn is_slice_chunk_ptr_initialized( ptr: *const T, @@ -195,6 +203,7 @@ fn is_slice_chunk_ptr_initialized( } /// Set initialization state to `value` for `num_elts` items laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniSetSliceChunkPtrInitialized"] fn set_slice_chunk_ptr_initialized( ptr: *const T, @@ -212,6 +221,7 @@ fn set_slice_chunk_ptr_initialized( } /// Get initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniIsSlicePtrInitialized"] fn is_slice_ptr_initialized( ptr: *const [T], @@ -225,6 +235,7 @@ fn is_slice_ptr_initialized( } /// Set initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniSetSlicePtrInitialized"] fn set_slice_ptr_initialized( ptr: *const [T], @@ -241,6 +252,7 @@ fn set_slice_ptr_initialized( } /// Get initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniIsStrPtrInitialized"] fn is_str_ptr_initialized( ptr: *const str, @@ -254,6 +266,7 @@ fn is_str_ptr_initialized( } /// Set initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[kanitool::disable_checks(pointer)] #[rustc_diagnostic_item = "KaniSetStrPtrInitialized"] fn set_str_ptr_initialized( ptr: *const str, diff --git a/tests/script-based-pre/mem-init-reinstrumentation/alloc-zeroed.rs b/tests/script-based-pre/mem-init-reinstrumentation/alloc-zeroed.rs new file mode 100644 index 000000000000..891a97fd8c22 --- /dev/null +++ b/tests/script-based-pre/mem-init-reinstrumentation/alloc-zeroed.rs @@ -0,0 +1,22 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z uninit-checks + +use std::alloc::{alloc_zeroed, Layout}; +use std::slice::from_raw_parts; + +#[kani::proof] +fn alloc_zeroed_to_slice() { + let layout = Layout::from_size_align(32, 8).unwrap(); + unsafe { + // This returns initialized memory, so any further accesses are valid. + let ptr = alloc_zeroed(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + *ptr.add(16) = 0x00; + let _slice1 = from_raw_parts(ptr, 16); + let _slice2 = from_raw_parts(ptr.add(16), 16); + } +} diff --git a/tests/script-based-pre/mem-init-reinstrumentation/config.yml b/tests/script-based-pre/mem-init-reinstrumentation/config.yml new file mode 100644 index 000000000000..ef61a9171954 --- /dev/null +++ b/tests/script-based-pre/mem-init-reinstrumentation/config.yml @@ -0,0 +1,4 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT +script: mem-init-reinstrumentation.sh +expected: mem-init-reinstrumentation.expected diff --git a/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.expected b/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.expected new file mode 100644 index 000000000000..59b64322ad50 --- /dev/null +++ b/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.expected @@ -0,0 +1 @@ +success: no pointer checks are detected in initialized memory instrumentaiton diff --git a/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.sh b/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.sh new file mode 100755 index 000000000000..f859864d1c9a --- /dev/null +++ b/tests/script-based-pre/mem-init-reinstrumentation/mem-init-reinstrumentation.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +set -u + +KANI_OUTPUT=`kani -Z uninit-checks alloc-zeroed.rs` +echo "$KANI_OUTPUT" | egrep -q "kani::mem_init::.*pointer_dereference" +INSTRUMENTATION_DETECTED=$? + +if [[ $INSTRUMENTATION_DETECTED == 0 ]]; then + echo "failed: pointer checks are detected in initialized memory instrumentaiton" + exit 1 +elif [[ $INSTRUMENTATION_DETECTED == 1 ]]; then + echo "success: no pointer checks are detected in initialized memory instrumentaiton" + exit 0 +else + echo "failed: error occured when runnning egrep" + exit 0 +fi