Skip to content

Commit

Permalink
lib: RuntimeMemoryImage: support relocatable object files
Browse files Browse the repository at this point in the history
Add initial support for the analysis of relocatable object files. These
files do not include information about the runtime memory layout, i.e.,
segments. Ghidra handles such files by assigning addresses to sections
in `ElfProgramBuilder.processSectionHeaders`. The algorithm is
essentially:

```
base_address <- 0x100_000

for each section in section_headers do:
  if not section.is_alloc() or section.sh_type == SHT_NULL or
     section.size == 0 then:
     continue
  end if

  alignment <- section.sh_addralign

  section.address <- align(base_address, alignment)

  base_address <- base_address + section.sh_size
end for
```

Importantly, the above is not the exact algorithm used by Ghidra, but only
an approximation that aims to be correct for the invocation of Ghidra
with the pcode extractor plugin.

Add an implementation of this algorithm to `MemorySegment` and use it
from `MemoryImage::new` when an relocatable object file is given.
  • Loading branch information
Valentin Obst committed Feb 27, 2024
1 parent b086b47 commit 5436969
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ pub struct RuntimeMemoryImage {
}

impl RuntimeMemoryImage {
/// Base address of the first [`MemorySegment`] when mapping relocatable
/// object files.
pub const ELF_REL_BASE_ADDRESS: u64 = 0x100_000;

/// Generate a runtime memory image containing no memory segments.
/// Primarily useful in situations where any access to global memory would be an error.
pub fn empty(is_little_endian: bool) -> RuntimeMemoryImage {
Expand All @@ -25,24 +29,14 @@ impl RuntimeMemoryImage {
///
/// The function can parse ELF and PE files as input.
pub fn new(binary: &[u8]) -> Result<Self, Error> {
let parsed_object = Object::parse(binary)?;

match parsed_object {
Object::Elf(elf_file) => {
let mut memory_segments = Vec::new();
for header in elf_file.program_headers.iter() {
if header.p_type == elf::program_header::PT_LOAD {
memory_segments.push(MemorySegment::from_elf_segment(binary, header));
}
match Object::parse(binary)? {
Object::Elf(elf_file) => match elf_file.header.e_type {
elf::header::ET_REL => Self::from_elf_sections(binary, elf_file),
elf::header::ET_DYN | elf::header::ET_EXEC => {
Self::from_elf_segments(binary, elf_file)
}
if memory_segments.is_empty() {
return Err(anyhow!("No loadable segments found"));
}
Ok(RuntimeMemoryImage {
memory_segments,
is_little_endian: elf_file.header.endianness().unwrap().is_little(),
})
}
ty => Err(anyhow!("Unsupported ELF type: e_type {}", ty)),
},
Object::PE(pe_file) => {
let mut memory_segments = Vec::new();
for header in pe_file.sections.iter() {
Expand All @@ -65,6 +59,64 @@ impl RuntimeMemoryImage {
}
}

/// Generate a runtime memory image for an executable ELF file or shared object.
fn from_elf_segments(binary: &[u8], elf_file: elf::Elf) -> Result<Self, Error> {
let mut memory_segments = Vec::new();

for header in elf_file.program_headers.iter() {
if header.p_type == elf::program_header::PT_LOAD {
memory_segments.push(MemorySegment::from_elf_segment(binary, header));
}
}

if memory_segments.is_empty() {
return Err(anyhow!("No loadable segments found"));
}

Ok(Self {
memory_segments,
is_little_endian: elf_file.header.endianness().unwrap().is_little(),
})
}

/// Generate a runtime memory image for a relocatable object file.
///
/// These files do not contain information about the expected memory layout.
/// Ghidra implements a basic loader that essentially concatenates all
/// `SHF_ALLOC` sections that are not `SHT_NULL`. They are placed in memory
/// as close as possible while respecting their alignment at a fixed
/// address.
///
/// It is important that this implementation stays in sync with
/// `processSectionHeaders` in [`ElfProgramBuilder`] for the cases that we
/// care about.
///
/// [`ElfProgramBuilder`]: https://github.com/NationalSecurityAgency/ghidra/blob/master/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/ElfProgramBuilder.java
fn from_elf_sections(binary: &[u8], elf_file: elf::Elf) -> Result<Self, Error> {
let mut next_base = Self::ELF_REL_BASE_ADDRESS;

Ok(Self {
memory_segments: elf_file
.section_headers
.iter()
.filter_map(|section_header| {
if section_header.is_alloc()
&& section_header.sh_type != elf::section_header::SHT_NULL
&& section_header.sh_size != 0
{
let mem_seg =
MemorySegment::from_elf_section(binary, next_base, section_header);
next_base = mem_seg.base_address + mem_seg.bytes.len() as u64;
Some(mem_seg)
} else {
None
}
})
.collect(),
is_little_endian: elf_file.header.endianness().unwrap().is_little(),
})
}

/// Generate a runtime memory image for a bare metal binary.
///
/// The generated runtime memory image contains:
Expand Down Expand Up @@ -108,6 +160,35 @@ impl RuntimeMemoryImage {
})
}

/// Get the base address for the image of a binary when loaded into memory.
pub fn get_base_address(binary: &[u8]) -> Result<u64, Error> {
match Object::parse(binary)? {
Object::Elf(elf_file) => match elf_file.header.e_type {
elf::header::ET_REL => Ok(Self::ELF_REL_BASE_ADDRESS),
elf::header::ET_DYN | elf::header::ET_EXEC => {
elf_file
.program_headers
.iter()
.find_map(|header| {
let vm_range = header.vm_range();
if !vm_range.is_empty()
&& header.p_type == goblin::elf::program_header::PT_LOAD
{
// The loadable segments have to occur in order in the program header table.
// So the start address of the first loadable segment is the base offset of the binary.
Some(vm_range.start as u64)
} else {
None
}
})
.context("No loadable segment bounds found.")
}
ty => Err(anyhow!("Unsupported ELF type: e_type {}", ty)),
},
_ => Err(anyhow!("Binary type not yet supported")),
}
}

/// Return whether values in the memory image should be interpreted in little-endian
/// or big-endian byte order.
pub fn is_little_endian_byte_order(&self) -> bool {
Expand Down
26 changes: 26 additions & 0 deletions src/cwe_checker_lib/src/utils/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,32 @@ pub struct MemorySegment {
}

impl MemorySegment {
/// Generate a segment from a section header of a relocatable ELF object
/// file.
pub fn from_elf_section(
binary: &[u8],
base_address: u64,
section_header: &elf::SectionHeader,
) -> Self {
let bytes: Vec<u8> = match section_header.file_range() {
Some(range) => binary[range].to_vec(),
// `SHT_NOBITS`
None => core::iter::repeat(0)
.take(section_header.sh_size as usize)
.collect(),
};
let alignment = section_header.sh_addralign.next_power_of_two();
Self {
bytes,
base_address: base_address.next_multiple_of(alignment),
// ELF format specification does not allow for Declaration of
// sections as non-readable.
read_flag: true,
write_flag: section_header.is_writable(),
execute_flag: section_header.is_executable(),
}
}

/// Generate a segment from a program header of an ELF file.
pub fn from_elf_segment(binary: &[u8], program_header: &elf::ProgramHeader) -> MemorySegment {
let mut bytes: Vec<u8> = binary[program_header.file_range()].to_vec();
Expand Down
7 changes: 5 additions & 2 deletions src/cwe_checker_lib/src/utils/ghidra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
use crate::prelude::*;
use crate::utils::binary::BareMetalConfig;
use crate::utils::{get_ghidra_plugin_path, read_config_file};
use crate::{intermediate_representation::Project, utils::log::LogMessage};
use crate::{
intermediate_representation::{Project, RuntimeMemoryImage},
utils::log::LogMessage,
};
use directories::ProjectDirs;
use nix::{sys::stat, unistd};
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -54,7 +57,7 @@ fn parse_pcode_project_to_ir_project(
.as_ref()
.map(|config| config.parse_binary_base_address());
let mut log_messages = pcode_project.normalize();
let project: Project = match crate::utils::get_binary_base_address(binary) {
let project: Project = match RuntimeMemoryImage::get_base_address(binary) {
Ok(binary_base_address) => pcode_project.into_ir_project(binary_base_address),
Err(_err) => {
if let Some(binary_base_address) = bare_metal_base_address_opt {
Expand Down
19 changes: 0 additions & 19 deletions src/cwe_checker_lib/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,3 @@ pub fn get_ghidra_plugin_path(plugin_name: &str) -> std::path::PathBuf {
let data_dir = project_dirs.data_dir();
data_dir.join("ghidra").join(plugin_name)
}

/// Get the base address for the image of a binary when loaded into memory.
pub fn get_binary_base_address(binary: &[u8]) -> Result<u64, Error> {
use goblin::Object;
match Object::parse(binary)? {
Object::Elf(elf_file) => {
for header in elf_file.program_headers.iter() {
let vm_range = header.vm_range();
if !vm_range.is_empty() && header.p_type == goblin::elf::program_header::PT_LOAD {
// The loadable segments have to occur in order in the program header table.
// So the start address of the first loadable segment is the base offset of the binary.
return Ok(vm_range.start as u64);
}
}
Err(anyhow!("No loadable segment bounds found."))
}
_ => Err(anyhow!("Binary type not yet supported")),
}
}

0 comments on commit 5436969

Please sign in to comment.