From 5436969bff6bb67d2412b118b12c9486dfa67b38 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Tue, 27 Feb 2024 08:48:33 +0100 Subject: [PATCH] lib: RuntimeMemoryImage: support relocatable object files Add initial support for the analysis of relocatable object files. These files do not include information about the runtime memory layout, i.e., segments. Ghidra handles such files by assigning addresses to sections in `ElfProgramBuilder.processSectionHeaders`. The algorithm is essentially: ``` base_address <- 0x100_000 for each section in section_headers do: if not section.is_alloc() or section.sh_type == SHT_NULL or section.size == 0 then: continue end if alignment <- section.sh_addralign section.address <- align(base_address, alignment) base_address <- base_address + section.sh_size end for ``` Importantly, the above is not the exact algorithm used by Ghidra, but only an approximation that aims to be correct for the invocation of Ghidra with the pcode extractor plugin. Add an implementation of this algorithm to `MemorySegment` and use it from `MemoryImage::new` when an relocatable object file is given. --- .../runtime_memory_image.rs | 115 +++++++++++++++--- src/cwe_checker_lib/src/utils/binary.rs | 26 ++++ src/cwe_checker_lib/src/utils/ghidra.rs | 7 +- src/cwe_checker_lib/src/utils/mod.rs | 19 --- 4 files changed, 129 insertions(+), 38 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/runtime_memory_image.rs b/src/cwe_checker_lib/src/intermediate_representation/runtime_memory_image.rs index 1a574676d..31318c9d8 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/runtime_memory_image.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/runtime_memory_image.rs @@ -12,6 +12,10 @@ pub struct RuntimeMemoryImage { } impl RuntimeMemoryImage { + /// Base address of the first [`MemorySegment`] when mapping relocatable + /// object files. + pub const ELF_REL_BASE_ADDRESS: u64 = 0x100_000; + /// Generate a runtime memory image containing no memory segments. /// Primarily useful in situations where any access to global memory would be an error. pub fn empty(is_little_endian: bool) -> RuntimeMemoryImage { @@ -25,24 +29,14 @@ impl RuntimeMemoryImage { /// /// The function can parse ELF and PE files as input. pub fn new(binary: &[u8]) -> Result { - let parsed_object = Object::parse(binary)?; - - match parsed_object { - Object::Elf(elf_file) => { - let mut memory_segments = Vec::new(); - for header in elf_file.program_headers.iter() { - if header.p_type == elf::program_header::PT_LOAD { - memory_segments.push(MemorySegment::from_elf_segment(binary, header)); - } + match Object::parse(binary)? { + Object::Elf(elf_file) => match elf_file.header.e_type { + elf::header::ET_REL => Self::from_elf_sections(binary, elf_file), + elf::header::ET_DYN | elf::header::ET_EXEC => { + Self::from_elf_segments(binary, elf_file) } - if memory_segments.is_empty() { - return Err(anyhow!("No loadable segments found")); - } - Ok(RuntimeMemoryImage { - memory_segments, - is_little_endian: elf_file.header.endianness().unwrap().is_little(), - }) - } + ty => Err(anyhow!("Unsupported ELF type: e_type {}", ty)), + }, Object::PE(pe_file) => { let mut memory_segments = Vec::new(); for header in pe_file.sections.iter() { @@ -65,6 +59,64 @@ impl RuntimeMemoryImage { } } + /// Generate a runtime memory image for an executable ELF file or shared object. + fn from_elf_segments(binary: &[u8], elf_file: elf::Elf) -> Result { + let mut memory_segments = Vec::new(); + + for header in elf_file.program_headers.iter() { + if header.p_type == elf::program_header::PT_LOAD { + memory_segments.push(MemorySegment::from_elf_segment(binary, header)); + } + } + + if memory_segments.is_empty() { + return Err(anyhow!("No loadable segments found")); + } + + Ok(Self { + memory_segments, + is_little_endian: elf_file.header.endianness().unwrap().is_little(), + }) + } + + /// Generate a runtime memory image for a relocatable object file. + /// + /// These files do not contain information about the expected memory layout. + /// Ghidra implements a basic loader that essentially concatenates all + /// `SHF_ALLOC` sections that are not `SHT_NULL`. They are placed in memory + /// as close as possible while respecting their alignment at a fixed + /// address. + /// + /// It is important that this implementation stays in sync with + /// `processSectionHeaders` in [`ElfProgramBuilder`] for the cases that we + /// care about. + /// + /// [`ElfProgramBuilder`]: https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/ElfProgramBuilder.java + fn from_elf_sections(binary: &[u8], elf_file: elf::Elf) -> Result { + let mut next_base = Self::ELF_REL_BASE_ADDRESS; + + Ok(Self { + memory_segments: elf_file + .section_headers + .iter() + .filter_map(|section_header| { + if section_header.is_alloc() + && section_header.sh_type != elf::section_header::SHT_NULL + && section_header.sh_size != 0 + { + let mem_seg = + MemorySegment::from_elf_section(binary, next_base, section_header); + next_base = mem_seg.base_address + mem_seg.bytes.len() as u64; + Some(mem_seg) + } else { + None + } + }) + .collect(), + is_little_endian: elf_file.header.endianness().unwrap().is_little(), + }) + } + /// Generate a runtime memory image for a bare metal binary. /// /// The generated runtime memory image contains: @@ -108,6 +160,35 @@ impl RuntimeMemoryImage { }) } + /// Get the base address for the image of a binary when loaded into memory. + pub fn get_base_address(binary: &[u8]) -> Result { + match Object::parse(binary)? { + Object::Elf(elf_file) => match elf_file.header.e_type { + elf::header::ET_REL => Ok(Self::ELF_REL_BASE_ADDRESS), + elf::header::ET_DYN | elf::header::ET_EXEC => { + elf_file + .program_headers + .iter() + .find_map(|header| { + let vm_range = header.vm_range(); + if !vm_range.is_empty() + && header.p_type == goblin::elf::program_header::PT_LOAD + { + // The loadable segments have to occur in order in the program header table. + // So the start address of the first loadable segment is the base offset of the binary. + Some(vm_range.start as u64) + } else { + None + } + }) + .context("No loadable segment bounds found.") + } + ty => Err(anyhow!("Unsupported ELF type: e_type {}", ty)), + }, + _ => Err(anyhow!("Binary type not yet supported")), + } + } + /// Return whether values in the memory image should be interpreted in little-endian /// or big-endian byte order. pub fn is_little_endian_byte_order(&self) -> bool { diff --git a/src/cwe_checker_lib/src/utils/binary.rs b/src/cwe_checker_lib/src/utils/binary.rs index fbea74697..0d230f61f 100644 --- a/src/cwe_checker_lib/src/utils/binary.rs +++ b/src/cwe_checker_lib/src/utils/binary.rs @@ -70,6 +70,32 @@ pub struct MemorySegment { } impl MemorySegment { + /// Generate a segment from a section header of a relocatable ELF object + /// file. + pub fn from_elf_section( + binary: &[u8], + base_address: u64, + section_header: &elf::SectionHeader, + ) -> Self { + let bytes: Vec = match section_header.file_range() { + Some(range) => binary[range].to_vec(), + // `SHT_NOBITS` + None => core::iter::repeat(0) + .take(section_header.sh_size as usize) + .collect(), + }; + let alignment = section_header.sh_addralign.next_power_of_two(); + Self { + bytes, + base_address: base_address.next_multiple_of(alignment), + // ELF format specification does not allow for Declaration of + // sections as non-readable. + read_flag: true, + write_flag: section_header.is_writable(), + execute_flag: section_header.is_executable(), + } + } + /// Generate a segment from a program header of an ELF file. pub fn from_elf_segment(binary: &[u8], program_header: &elf::ProgramHeader) -> MemorySegment { let mut bytes: Vec = binary[program_header.file_range()].to_vec(); diff --git a/src/cwe_checker_lib/src/utils/ghidra.rs b/src/cwe_checker_lib/src/utils/ghidra.rs index ed88e68db..bfc868bfb 100644 --- a/src/cwe_checker_lib/src/utils/ghidra.rs +++ b/src/cwe_checker_lib/src/utils/ghidra.rs @@ -3,7 +3,10 @@ use crate::prelude::*; use crate::utils::binary::BareMetalConfig; use crate::utils::{get_ghidra_plugin_path, read_config_file}; -use crate::{intermediate_representation::Project, utils::log::LogMessage}; +use crate::{ + intermediate_representation::{Project, RuntimeMemoryImage}, + utils::log::LogMessage, +}; use directories::ProjectDirs; use nix::{sys::stat, unistd}; use std::path::{Path, PathBuf}; @@ -54,7 +57,7 @@ fn parse_pcode_project_to_ir_project( .as_ref() .map(|config| config.parse_binary_base_address()); let mut log_messages = pcode_project.normalize(); - let project: Project = match crate::utils::get_binary_base_address(binary) { + let project: Project = match RuntimeMemoryImage::get_base_address(binary) { Ok(binary_base_address) => pcode_project.into_ir_project(binary_base_address), Err(_err) => { if let Some(binary_base_address) = bare_metal_base_address_opt { diff --git a/src/cwe_checker_lib/src/utils/mod.rs b/src/cwe_checker_lib/src/utils/mod.rs index 881028d7f..38fccc043 100644 --- a/src/cwe_checker_lib/src/utils/mod.rs +++ b/src/cwe_checker_lib/src/utils/mod.rs @@ -27,22 +27,3 @@ pub fn get_ghidra_plugin_path(plugin_name: &str) -> std::path::PathBuf { let data_dir = project_dirs.data_dir(); data_dir.join("ghidra").join(plugin_name) } - -/// Get the base address for the image of a binary when loaded into memory. -pub fn get_binary_base_address(binary: &[u8]) -> Result { - use goblin::Object; - match Object::parse(binary)? { - Object::Elf(elf_file) => { - for header in elf_file.program_headers.iter() { - let vm_range = header.vm_range(); - if !vm_range.is_empty() && header.p_type == goblin::elf::program_header::PT_LOAD { - // The loadable segments have to occur in order in the program header table. - // So the start address of the first loadable segment is the base offset of the binary. - return Ok(vm_range.start as u64); - } - } - Err(anyhow!("No loadable segment bounds found.")) - } - _ => Err(anyhow!("Binary type not yet supported")), - } -}