diff --git a/Cargo.toml b/Cargo.toml index 124748a..75d57be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,17 +4,17 @@ categories = ["command-line-utilities", "development-tools::debugging"] description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] version = "0.1.0" -authors.workspace = true -license.workspace = true -rust-version.workspace = true -repository.workspace = true -keywords.workspace = true -edition.workspace = true +authors = ["Axel '0vercl0k' Souchet"] +license = "MIT" +rust-version = "1.70" +repository = "https://github.com/0vercl0k/symbolizer-rs" +keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] +edition = "2021" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -symbolizer = { path = "crates/symbolizer" } +addr-symbolizer = { path = "../addr-symbolizer" } env_logger = "0.11" itoa = "1.0" kdmp-parser = "0.3" @@ -22,12 +22,3 @@ kdmp-parser = "0.3" [profile.release] debug = true panic = "abort" - -[workspace] -members = ["crates/*"] -package.authors = ["Axel '0vercl0k' Souchet"] -package.license = "MIT" -package.rust-version = "1.70" -package.repository = "https://github.com/0vercl0k/symbolizer-rs" -package.keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] -package.edition = "2021" diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml deleted file mode 100644 index 4ca1d36..0000000 --- a/crates/symbolizer/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -[package] -name = "symbolizer" -version = "0.1.0" -# categories = ["command-line-utilities", "development-tools::debugging"] -# description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." -include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] -authors.workspace = true -license.workspace = true -rust-version.workspace = true -repository.workspace = true -keywords.workspace = true -edition.workspace = true - -[dependencies] -pdb = "0.8" -log = "0.4" -msvc-demangler = "0.10" -ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } -thiserror = "1.0.61" -anyhow = "1.0.86" - -[dev-dependencies] -kdmp-parser = "0.3" -udmp-parser = "0.2" -object = { version = "0.36.0", default-features = false, features = [ - "read", - "read_core", - "pe", - "std", -] } -clap = { version = "4.5", features = ["derive"] } diff --git a/crates/symbolizer/README.md b/crates/symbolizer/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/crates/symbolizer/examples/symbolize-dump.rs b/crates/symbolizer/examples/symbolize-dump.rs deleted file mode 100644 index 7b99d36..0000000 --- a/crates/symbolizer/examples/symbolize-dump.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Axel '0vercl0k' Souchet -use std::cmp::min; -use std::env; -use std::io::{self, Write}; -use std::path::PathBuf; - -use anyhow::Result; -use clap::Parser; -use kdmp_parser::KernelDumpParser; -use symbolizer::{AddrSpace, Builder, Module}; -use udmp_parser::UserDumpParser; - -/// The command line arguments. -#[derive(Debug, Parser)] -#[command(about = "Symbolize an address from a user or kernel dump file.")] -enum CliArgs { - User { dump: PathBuf, addr: String }, - Kernel { dump: PathBuf, addr: String }, -} - -/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol -/// cache. -fn sympath() -> Option { - let env = env::var("_NT_SYMBOL_PATH").ok()?; - - if !env.starts_with("srv*") { - return None; - } - - let sympath = env.strip_prefix("srv*").unwrap(); - let sympath = PathBuf::from(sympath.split('*').next().unwrap()); - - if sympath.is_dir() { - Some(sympath) - } else { - None - } -} - -fn user(dmp: UserDumpParser, addr: u64) -> Result<()> { - #[derive(Debug)] - struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); - impl<'a> AddrSpace for UserDumpAddrSpace<'a> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { - let mut cur_addr = addr; - let mut read_len = 0; - while read_len < buf.len() { - let Some(block) = self.0.get_mem_block(addr) else { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("no mem block found for {addr:#x}"), - )); - }; - - let Some(data) = block.data_from(cur_addr) else { - panic!(); - }; - - let left = buf.len() - read_len; - let len = min(data.len(), left); - buf.write_all(&data[..len]).unwrap(); - cur_addr += u64::try_from(len).unwrap(); - read_len += len; - } - - Ok(read_len) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - match self.read_at(addr, buf) { - Ok(sz) => Ok(Some(sz)), - Err(_) => Ok(None), - } - } - } - - let modules = dmp - .modules() - .values() - .map(|module| { - Module::new( - module.path.file_name().unwrap().to_string_lossy(), - module.start_addr(), - module.end_addr(), - ) - }) - .collect::>(); - - let mut wrapper = UserDumpAddrSpace(dmp); - let mut symb = Builder::default() - .modules(modules) - .msft_symsrv() - .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) - .build()?; - - let mut s = Vec::new(); - symb.full(&mut wrapper, addr, &mut s)?; - println!("{addr:#x}: {}", String::from_utf8(s)?); - - Ok(()) -} - -fn kernel(dmp: KernelDumpParser, addr: u64) -> Result<()> { - #[derive(Debug)] - struct KernelDumpAdrSpace<'a>(&'a KernelDumpParser); - impl<'a> AddrSpace for KernelDumpAdrSpace<'a> { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { - self.0 - .virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - self.0 - .try_virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } - } - - let mut modules = Vec::new(); - for (at, name) in dmp.user_modules().chain(dmp.kernel_modules()) { - let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); - modules.push(Module::new( - filename.to_string(), - at.start.into(), - at.end.into(), - )); - } - - let mut wrapper = KernelDumpAdrSpace(&dmp); - let mut symb = Builder::default() - .modules(modules) - .msft_symsrv() - .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) - .build()?; - - let mut s = Vec::new(); - symb.full(&mut wrapper, addr, &mut s)?; - println!("{addr:#x}: {}", String::from_utf8(s)?); - - Ok(()) -} - -fn hex(x: &str) -> Result { - let no_backtick = x.replace('`', ""); - let no_prefix = no_backtick.strip_prefix("0x").unwrap_or(x); - - Ok(u64::from_str_radix(no_prefix, 16)?) -} - -fn main() -> Result<()> { - // Parse the CLI arguments. - let args = CliArgs::parse(); - match args { - CliArgs::User { dump, addr } => user(UserDumpParser::new(dump)?, hex(&addr)?), - CliArgs::Kernel { dump, addr } => kernel(KernelDumpParser::new(dump)?, hex(&addr)?), - }?; - - Ok(()) -} diff --git a/crates/symbolizer/src/addr_space.rs b/crates/symbolizer/src/addr_space.rs deleted file mode 100644 index 765ff15..0000000 --- a/crates/symbolizer/src/addr_space.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Axel '0vercl0k' Souchet - May 30 2024 -use core::slice; -use std::io; -use std::mem::{self, MaybeUninit}; - -pub trait AddrSpace { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result; - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result>; - - fn read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result<()> { - let size = self.read_at(addr, buf)?; - - if size != buf.len() { - Err(io::Error::new( - io::ErrorKind::Other, - format!("could read only {size} bytes instead of {}", buf.len()), - )) - } else { - Ok(()) - } - } - - fn try_read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - let Some(size) = self.try_read_at(addr, buf)? else { - return Ok(None); - }; - - if size != buf.len() { - Err(io::Error::new( - io::ErrorKind::Other, - format!("could read only {size} bytes instead of {}", buf.len()), - )) - } else { - Ok(Some(())) - } - } - - fn read_struct_at(&mut self, addr: u64) -> io::Result - where - S: Copy, - { - let mut t = MaybeUninit::uninit(); - let size_of_t = mem::size_of_val(&t); - let slice_over_t = - unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; - - self.read_exact_at(addr, slice_over_t)?; - - Ok(unsafe { t.assume_init() }) - } - - fn try_read_struct_at(&mut self, addr: u64) -> io::Result> - where - S: Copy, - { - let mut t: MaybeUninit = MaybeUninit::uninit(); - let size_of_t = mem::size_of_val(&t); - let slice_over_t = - unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; - - Ok(self - .try_read_exact_at(addr, slice_over_t)? - .map(|_| unsafe { t.assume_init() })) - } -} diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs deleted file mode 100644 index c30bcc7..0000000 --- a/crates/symbolizer/src/builder.rs +++ /dev/null @@ -1,82 +0,0 @@ -// Axel '0vercl0k' Souchet - June 7 2024 -use std::path::{Path, PathBuf}; - -use anyhow::anyhow; - -use crate::symbolizer::{Config, PdbLookupMode}; -use crate::{Module, Result, Symbolizer}; - -#[derive(Default)] -pub struct NoSymcache; - -pub struct Symcache(PathBuf); - -/// Builder for [`Symbolizer`]. -#[derive(Default, Debug)] -pub struct Builder { - symcache: SC, - modules: Vec, - mode: PdbLookupMode, -} - -impl Builder { - pub fn msft_symsrv(self) -> Builder { - self.online(vec!["https://msdl.microsoft.com/download/symbols/"]) - } - - pub fn online(self, symsrvs: impl IntoIterator>) -> Builder { - let Self { - symcache, modules, .. - } = self; - - Builder { - symcache, - modules, - mode: PdbLookupMode::Online { - symsrvs: symsrvs.into_iter().map(Into::into).collect(), - }, - } - } -} - -impl Builder { - pub fn symcache(self, cache: impl AsRef) -> Builder { - let Self { modules, mode, .. } = self; - - Builder { - symcache: Symcache(cache.as_ref().to_path_buf()), - modules, - mode, - } - } -} - -impl Builder { - pub fn modules(mut self, modules: impl IntoIterator) -> Self { - self.modules = modules.into_iter().collect(); - - self - } -} - -impl Builder { - pub fn build(self) -> Result { - let Self { - symcache, - modules, - mode, - } = self; - - if !symcache.0.exists() { - return Err(anyhow!("symcache {:?} does not exist", symcache.0).into()); - } - - let config = Config { - symcache: symcache.0, - modules, - mode, - }; - - Symbolizer::new(config) - } -} diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs deleted file mode 100644 index a1f3ebe..0000000 --- a/crates/symbolizer/src/error.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Axel '0vercl0k' Souchet - May 27 2024 -use std::io; -use std::num::{ParseIntError, TryFromIntError}; -use std::path::PathBuf; -use std::str::Utf8Error; -use std::string::FromUtf8Error; - -use pdb::PdbInternalSectionOffset; -use thiserror::Error; - -pub type Result = std::result::Result; - -#[derive(Error, Debug)] -pub enum Error { - #[error("failed to get rva from symbol {0} / {1:?}")] - SymbolRva(String, PdbInternalSectionOffset), - #[error("pdb error: {0}")] - Pdb(#[from] pdb::Error), - #[error("from int error: {0}")] - FromInt(#[from] TryFromIntError), - #[error("parse int error: {0}")] - ParseInt(#[from] ParseIntError), - #[error("utf8: {0}")] - Utf8(#[from] Utf8Error), - #[error("from utf8: {0}")] - FromUtf8(#[from] FromUtf8Error), - #[error("pdb path {0:?} does not have a filename")] - PdbPathNoName(PathBuf), - #[error("failed to perform an i/o: {0}")] - Io(#[from] io::Error), - #[error("failed to download pdb {pdb_url}: {e}")] - DownloadPdb { - pdb_url: String, - e: Box, - }, - #[error("the module path is either 0 or larger than reasonable")] - CodeViewInvalidPath, - #[error("{0}")] - Anyhow(#[from] anyhow::Error), -} diff --git a/crates/symbolizer/src/guid.rs b/crates/symbolizer/src/guid.rs deleted file mode 100644 index 5bd682d..0000000 --- a/crates/symbolizer/src/guid.rs +++ /dev/null @@ -1,145 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Guid`] type. -use std::fmt::Display; -use std::str::FromStr; - -use anyhow::anyhow; - -use crate::Error; - -/// A GUID. -#[derive(Default, Debug, PartialEq, Eq, Hash, Clone, Copy)] -pub struct Guid { - d0: u32, - d1: u16, - d2: u16, - d3: [u8; 8], -} - -impl FromStr for Guid { - type Err = Error; - - fn from_str(s: &str) -> Result { - if s.len() != 32 { - return Err(anyhow!("the guid str ({s:?}) should be 32 bytes long").into()); - } - - let mut bytes = [0; 16]; - for (n, chunk) in s.as_bytes().chunks_exact(2).enumerate() { - let s = std::str::from_utf8(chunk)?; - bytes[n] = u8::from_str_radix(s, 16)?; - } - - let d0 = u32::from_be_bytes(bytes[0..4].try_into().unwrap()); - let d1 = u16::from_be_bytes(bytes[4..6].try_into().unwrap()); - let d2 = u16::from_be_bytes(bytes[6..8].try_into().unwrap()); - let d3 = bytes[8..].try_into().unwrap(); - - Ok(Self { d0, d1, d2, d3 }) - } -} - -impl From<[u8; 16]> for Guid { - fn from(value: [u8; 16]) -> Self { - let d0 = u32::from_le_bytes(value[0..4].try_into().unwrap()); - let d1 = u16::from_le_bytes(value[4..6].try_into().unwrap()); - let d2 = u16::from_le_bytes(value[6..8].try_into().unwrap()); - let d3 = value[8..].try_into().unwrap(); - - Self { d0, d1, d2, d3 } - } -} - -impl Display for Guid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "{:08X}{:04X}{:04X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", - self.d0, - self.d1, - self.d2, - self.d3[0], - self.d3[1], - self.d3[2], - self.d3[3], - self.d3[4], - self.d3[5], - self.d3[6], - self.d3[7] - )) - } -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use crate::Guid; - - const NTDLL_GUID: Guid = Guid { - d0: 0x8d5d5ed5, - d1: 0xd5b8, - d2: 0xaa60, - d3: [0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, 0x00, 0x4d], - }; - - #[test] - fn malformed_guids() { - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004D1").is_err()); - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004").is_err()); - } - - #[test] - fn non_hex_guids() { - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004Z").is_err()); - } - - #[test] - fn str() { - // 0:000> lmvm ntdll - // Browse full module list - // start end module name - // 00007ff9`aa450000 00007ff9`aa667000 ntdll (pdb symbols) - // c:\dbg\sym\ntdll.pdb\8D5D5ED5D5B8AA609A82600C14E3004D1\ntdll.pdb - assert_eq!( - "8D5D5ED5D5B8AA609A82600C14E3004D".parse::().unwrap(), - NTDLL_GUID - ) - } - - #[test] - fn from() { - // 0:000> !dh ntdll - // ... - // SECTION HEADER #5 - // .rdata name - // 4D210 virtual size - // 132000 virtual address - // 4E000 size of raw data - // 132000 file pointer to raw data - // 0 file pointer to relocation table - // 0 file pointer to line numbers - // 0 number of relocations - // 0 number of line numbers - // 40000040 flags - // Initialized Data - // (no align specified) - // Read Only - // ... - // Debug Directories(4) - // Type Size Address Pointer - // cv 22 15b880 15b880 Format: RSDS, guid, 1, ntdll.pdb - // - // 0:000> db ntdll+15b880 - // 00007ff9`aa5ab880 52 53 44 53 d5 5e 5d 8d-b8 d5 60 aa 9a 82 60 0c - // RSDS.^]...`...`. 00007ff9`aa5ab890 14 e3 00 4d 01 00 00 00-6e 74 64 - // 6c 6c 2e 70 64 ...M....ntdll.pd - - assert_eq!( - Guid::from([ - 0xd5, 0x5e, 0x5d, 0x8d, 0xb8, 0xd5, 0x60, 0xaa, 0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, - 0x00, 0x4d - ]), - NTDLL_GUID - ) - } -} diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs deleted file mode 100644 index f94e1bf..0000000 --- a/crates/symbolizer/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Axel '0vercl0k' Souchet - May 26th 2024 -mod addr_space; -mod builder; -mod error; -mod guid; -mod misc; -mod modules; -mod pdbcache; -mod pe; -mod stats; -mod symbolizer; - -pub use addr_space::AddrSpace; -pub use builder::Builder; -pub use error::{Error, Result}; -pub use guid::Guid; -pub use modules::{Module, Modules}; -pub use pe::PdbId; -pub use stats::Stats; -pub use symbolizer::Symbolizer; diff --git a/crates/symbolizer/src/misc.rs b/crates/symbolizer/src/misc.rs deleted file mode 100644 index 8df4116..0000000 --- a/crates/symbolizer/src/misc.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of a bunch of misc utility functions -//! that didn't really fit anywhere else. - -/// A relative address. -pub type Rva = u32; - -/// Convert an `u64` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -pub fn fast_hex64(buffer: &mut [u8; 16], u: u64) -> &[u8] { - let mut x = u as u128; - - // Arrange each digit into their own byte. Each byte will become the ascii - // character representing its digit. For example, we want to arrange: - // - `0x00000000_00000000_DEADBEEF_BAADC0DE` into - // - `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E`. - // - // Here's a step by step using `0xDEADBEEF_BAADC0DE`: - // 1. `x = 0x00000000_DEADBEEF_00000000_BAADC0DE` - // 2. `x = 0xDEAD0000_BEEF0000_BAAD0000_C0DE0000` - // 3. `x = 0x00DE00AD_00BE00EF_00BA00AD_00C000DE` - // 4. `x = 0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` - // - // Let's start the dance.. - x = (x & 0xFFFFFFFF_00000000) << 32 | x; - x = ((x & 0xFFFF0000_00000000_FFFF0000) << 32) | ((x & 0xFFFF_00000000_0000FFFF) << 16); - x = ((x & 0xFF0000_00FF0000_00FF0000_00FF0000) >> 16) - | ((x & 0xFF000000_FF000000_FF000000_FF000000) >> 8); - x = ((x & 0xF000F0_00F000F0_00F000F0_00F000F0) << 4) | (x & 0xF000F_000F000F_000F000F_000F000F); - - // This creates a mask where there'll be a 0x01 byte for each digit that is - // alpha. For example, for `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` we want: - // `0x01010101_01010101_01010101_01000101`. The trick is to add 0x06 to each - // byte; if the digit is 0x0A..0x0F, adding 0x06 will give 0x10..0x15 (notice - // the leading '1'). Note that we need to ADD, not an OR :). At this point, - // right shifting by 4 bits means to position that leading '1' in the lower - // nibble which is then 'grabbed' via the masking with 0x01.. - let mask = - ((x + 0x06060606_06060606_06060606_06060606) >> 4) & 0x01010101_01010101_01010101_01010101; - - // Turn each digit into their ASCII equivalent by setting the high nibble of - // each byte to 0x3. `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` becomes - // `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E`. - x |= 0x30303030_30303030_30303030_30303030; - - // The last step is to adjust the ASCII byte for every digit that was in - // 0xA..0xF. We basically add to each of those bytes `0x27` to make them lower - // case alpha ASCII. - // For example: - // - `0x01010101_01010101_01010101_01000101 * 0x27 = - // 0x27272727_27272727_27272727_27002727` - // - `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E + - // 0x27272727_27272727_27272727_27002727` = - // `0x64656164_62656566_62616164_63306465` - // - // Why `0x27`? Well, if we have the digit 'a', we end up with `0x3a`. ASCII - // character for 'a' is `0x61`, so `0x61 - 0x3a = 0x27`. - x += 0x27 * mask; - - // Transform the integer into a slice of bytes. - buffer.copy_from_slice(&x.to_be_bytes()); - - // We're done! - buffer -} - -/// Convert an `u32` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -/// -/// Adapted to not bother shuffling the bytes in little endian; we simply read -/// the final integer as big endian. -pub fn fast_hex32(buffer: &mut [u8; 8], u: u32) -> &[u8] { - let mut x = u as u64; - - // Here's a step by step using `0xDEADBEEF`: - // 1. `x = 0x0000DEAD_0000BEEF` - // 2. `x = 0xDE00AD00_BE00EF00` - // 3. `x = 0x0D0E0A0D_0B0E0E0F` - x = (x & 0xFFFF0000) << 16 | x; - x = ((x & 0x0000FF00_0000FF00) << 16) | ((x & 0x000000FF_000000FF) << 8); - x = ((x & 0xF000F000_F000F000) >> 4) | ((x & 0x0F000F00_0F000F00) >> 8); - - let mask = ((x + 0x06060606_06060606) >> 4) & 0x01010101_01010101; - x |= 0x30303030_30303030; - x += 0x27 * mask; - - buffer.copy_from_slice(&x.to_be_bytes()); - - buffer -} - -#[cfg(test)] -mod tests { - use super::{fast_hex32, fast_hex64}; - - #[test] - fn hex32() { - let mut buffer = [0; 8]; - let out = fast_hex32(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f']); - let out = fast_hex32(&mut buffer, 0xdead); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd']); - let out = fast_hex32(&mut buffer, 0x0); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0']); - } - - #[test] - fn hex64() { - let mut buffer = [0; 16]; - let out = fast_hex64(&mut buffer, 0xdeadbeef_baadc0de); - assert_eq!(out, &[ - b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f', b'b', b'a', b'a', b'd', b'c', b'0', - b'd', b'e' - ]); - let out = fast_hex64(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd', b'b', b'e', - b'e', b'f' - ]); - let out = fast_hex64(&mut buffer, 0x0); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', - b'0', b'0' - ]); - } -} diff --git a/crates/symbolizer/src/modules.rs b/crates/symbolizer/src/modules.rs deleted file mode 100644 index 7411cb0..0000000 --- a/crates/symbolizer/src/modules.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`Module`] type which is used -//! across the codebase. -use std::ops::Range; - -use crate::misc::Rva; - -/// A user or kernel module. -#[derive(Debug, Default, Clone)] -pub struct Module { - /// Where the module is loaded into virtual memory. - pub at: Range, - /// The name of the module. - pub name: String, -} - -impl Module { - /// Create a [`Module`]. - pub fn new(name: impl Into, start: u64, end: u64) -> Self { - Module { - name: name.into(), - at: start..end, - } - } - - /// Calculate an rva from an `addr` contained in this module. - pub fn rva(&self, addr: u64) -> Rva { - debug_assert!(self.at.contains(&addr)); - - let offset = addr - self.at.start; - assert!(offset <= u32::MAX.into()); - - offset as Rva - } -} - -/// A list of modules. -#[derive(Debug, Default)] -pub struct Modules(Vec); - -impl Modules { - /// Create a [`Modules`]. - pub fn new(mut modules: Vec) -> Self { - // Order the modules by their end addresses. - modules.sort_unstable_by_key(|e| e.at.end); - - Self(modules) - } - - /// Find the module that contains this address. - pub fn find(&self, addr: u64) -> Option<&Module> { - // Find the index of the first module that might contain `addr`. - let idx = self.0.partition_point(|m| m.at.end <= addr); - - // At this point there's several cases to handle. - // - // `partition_point` returns the len of the vector if it couldn't - // partition in two. This means that `addr` cannot possibly be contained by any - // of the modules we have, so we're done. - if idx == self.0.len() { - return None; - } - - // We found the first module that has an end address larger than `addr`. This - // doesn't mean the module contains the address though. Imagine `addr` = - // `0xdeadbeef`, and `module.at` = `[0xefefefef, 0xefefefef+1]`. - let module = &self.0[idx]; - - // For this reason, we'll make sure the `addr` is in fact included, otherwise - // it's not a match. - if module.at.contains(&addr) { - Some(module) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn basics() { - let modules = Modules::new(vec![ - Module::new("foo".to_string(), 0x1_000, 0x2_000), - Module::new("foobar".to_string(), 0x2_000, 0x3_000), - Module::new("bar".to_string(), 0x4_000, 0x5_000), - ]); - - assert!(modules.find(1).is_none()); - assert_eq!(modules.find(0x1_000).unwrap().name, "foo"); - assert_eq!(modules.find(0x2_000).unwrap().name, "foobar"); - assert!(modules.find(0x3_000).is_none()); - assert_eq!(modules.find(0x4_fff).unwrap().name, "bar"); - assert!(modules.find(0x6_000).is_none()); - } -} diff --git a/crates/symbolizer/src/pdbcache.rs b/crates/symbolizer/src/pdbcache.rs deleted file mode 100644 index 591f050..0000000 --- a/crates/symbolizer/src/pdbcache.rs +++ /dev/null @@ -1,551 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`PdbCache`] which is the -//! object that keeps track of all the information needed to symbolize an -//! address. It extracts it out of a PDB file and doesn't require it to be -//! around. -use std::borrow::Cow; -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::fs::File; -use std::ops::Range; -use std::path::Path; - -use anyhow::{anyhow, Context}; -use log::{trace, warn}; -use pdb::{ - AddressMap, FallibleIterator, LineProgram, PdbInternalSectionOffset, ProcedureSymbol, - StringTable, Symbol, -}; - -use crate::error::Result; -use crate::modules::Module; - -/// A PDB opened via file access. -type Pdb<'p> = pdb::PDB<'p, File>; -/// A relative virtual address. -type Rva = u32; -/// A vector of lines. -type Lines = Vec; - -/// A line of source code. -/// -/// It maps an offset in the function (like offset -/// `0x1122`) to a line number in a file (like `foo.c:1336`). -#[derive(Default, Debug)] -struct Line { - /// Offset from the start of the function it's part of. - offset: u32, - /// The line number. - number: Rva, - /// Most lines in a function are part of the same file which is stored in - /// the [`SourceInfo`] which contains the lines info. But in case, this line - /// is stored in a different file, this is its path. - override_path: Option, -} - -impl Line { - /// Build a [`Line`]. - fn new(offset: Rva, number: u32, override_path: Option) -> Self { - Self { - offset, - number, - override_path, - } - } -} - -/// Information related to source code. -/// -/// It contains the path to the source code file as well as a mapping between -/// offsets to line number. -#[derive(Debug, Default)] -struct SourceInfo { - path: String, - lines: Lines, -} - -impl SourceInfo { - /// Build a [`SourceInfo`]. - fn new(path: String, lines: Lines) -> Self { - // We assume we have at least one entry in the vector. - assert!(!lines.is_empty()); - - Self { path, lines } - } - - /// Find the line number associated to a raw offset from inside a function. - pub fn line(&self, offset: Rva) -> &Line { - self.lines - .iter() - .find(|&line| offset < line.offset) - .unwrap_or(self.lines.last().unwrap()) - } -} - -/// A function. -/// -/// It has a name and if available, information related to the file where the -/// function is implemented as well as the line of code. -#[derive(Default, Debug)] -struct FuncSymbol { - pub name: String, - pub source_info: Option, -} - -impl FuncSymbol { - fn new(name: String, source_info: Option) -> Self { - Self { name, source_info } - } -} - -impl From for FuncSymbol { - fn from(value: BuilderEntry) -> Self { - FuncSymbol::new(value.name, value.source_info) - } -} - -/// A PDB cache. -/// -/// It basically is a data-structure that stores all the information about the -/// functions defined in a module. It extracts everything it can off a PDB and -/// then toss it as a PDB file is larger than a [`PdbCache`] (as we don't care -/// about types, variables, etc.). -pub struct PdbCache { - module_name: String, - addrs: Vec>, - symbols: Vec, -} - -impl Debug for PdbCache { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PdbCache") - .field("module_name", &self.module_name) - .finish_non_exhaustive() - } -} - -impl PdbCache { - fn new(module_name: String, mut symbols: Vec<(Range, FuncSymbol)>) -> Self { - symbols.sort_unstable_by_key(|(range, _)| range.end); - let (addrs, symbols) = symbols.into_iter().unzip(); - - Self { - module_name, - addrs, - symbols, - } - } - - /// Find a symbol that contains `rva`. - fn find_sym(&self, rva: Rva) -> Option<(Rva, &FuncSymbol)> { - let idx = self.addrs.partition_point(|probe| probe.end <= rva); - if idx == self.addrs.len() { - return None; - } - - let range = &self.addrs[idx]; - let func = &self.symbols[idx]; - - if range.contains(&rva) { - Some((range.start, func)) - } else { - None - } - } - - /// Symbolize a raw address. - /// - /// This pulls as much information as possible and use any private symbols - /// if there were any. - pub fn symbolize(&self, rva: Rva) -> Result { - // Find the function in which this `rva` is in. - let Some((func_rva, func_symbol)) = self.find_sym(rva) else { - // If we can't find one, we'll just return `module.dll+rva`. - return Ok(format!("{}+{:#x}", self.module_name, rva)); - }; - - debug_assert!( - rva >= func_rva, - "The function RVA should always be smaller or equal to the instruction RVA" - ); - - // Calculate the instruction offset. - let instr_offset = rva - func_rva; - - // Generate the symbolized version. - let symbolized = if let Some(source_info) = &func_symbol.source_info { - // If we have knowledge about in which source file this is implemented and at - // what line number, then let's use it.. - let line = source_info.line(instr_offset); - let path = line.override_path.as_ref().unwrap_or(&source_info.path); - - format!( - "{}!{}+{instr_offset:#x} [{path} @ {}]", - self.module_name, func_symbol.name, line.number - ) - } else { - // ..or do without if it's not present. - format!( - "{}!{}+{instr_offset:#x}", - self.module_name, func_symbol.name - ) - }; - - Ok(symbolized) - } -} - -#[derive(Debug)] -struct BuilderEntry { - name: String, - len: Option, - source_info: Option, -} - -impl BuilderEntry { - fn new(name: String, len: Option, source_info: Option) -> Self { - Self { - name, - len, - source_info, - } - } - - fn with_name(name: String) -> Self { - Self::new(name, None, None) - } - - fn len(&self) -> Option { - self.len - } -} - -/// A [`PdbCache`] builder. -/// -/// Ultimately, we try to get as much information possible on modules with what -/// we have. Sometimes, we have public symbols, something we have private -/// symbols and.. sometimes we have nothing (just its PE). If we're dealing with -/// just information extracted from the PE or the public symbols, we have no -/// available information regarding function sizes. -/// -/// To work around this issue, what we do is we aggregate all the information in -/// a data structure ordered by the function address. Once we're done, we walk -/// this data structure and we calculate the size of the current function by -/// 'filling the hole' up to the next function. This is innacurate but is the -/// only heuristic I had in store. -/// -/// Once we have a list of functions with assigned sizes, we can finally build -/// the [`PdbCache`] structure. -#[derive(Debug)] -pub struct PdbCacheBuilder<'module> { - /// The module for which this symbol cache is for. - module: &'module Module, - /// Basically all the information we've extracted so far. - /// - /// The key is the [`Rva`] of where the module starts, and the value is a - /// [`BuilderEntry`] which describes the symbol with more details. - symbols: BTreeMap, -} - -impl<'module> PdbCacheBuilder<'module> { - pub fn new(module: &'module Module) -> Self { - Self { - module, - symbols: BTreeMap::new(), - } - } - - /// Ingest a bunch of symbols. - /// - /// The key is the start [`Rva`] of the symbol, and the value is its name. - /// This is used to ingest for example a list of functions acquired from the - /// EAT of a module. - pub fn ingest(&mut self, symbols: impl Iterator) { - for (start, name) in symbols { - self.symbols.insert(start, BuilderEntry::with_name(name)); - } - } - - /// Parse a [`ProcedureSymbol`]. - fn parse_procedure_symbol( - &mut self, - proc: &ProcedureSymbol, - address_map: &AddressMap, - string_table: &StringTable, - line_program: &LineProgram, - ) -> Result<()> { - let proc_name = proc.name.to_string(); - let Some(pdb::Rva(proc_rva)) = proc.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - - return Ok(()); - }; - - let mut lines_it = line_program.lines_for_symbol(proc.offset); - let mut main_path = None; - let mut lines = Lines::new(); - while let Some(line) = lines_it.next()? { - let Some(pdb::Rva(line_rva)) = line.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - continue; - }; - - let file_info = line_program.get_file_info(line.file_index)?; - let override_path = if main_path.is_none() { - main_path = Some(file_info.name.to_string_lossy(string_table)?.into_owned()); - - None - } else { - let new_path = file_info.name.to_string_lossy(string_table)?; - if main_path.as_ref().unwrap() != &new_path { - Some(new_path.into_owned()) - } else { - None - } - }; - - if line_rva < proc_rva { - warn!( - "symbol {} has confusing line information, skipping", - proc_name - ); - return Ok(()); - } - - let line_offset = line_rva - proc_rva; - lines.push(Line::new(line_offset, line.line_start, override_path)); - } - - self.ingest_symbol( - address_map, - proc_name, - proc.offset, - Some(proc.len), - main_path.map(|p| SourceInfo::new(p, lines)), - ) - } - - /// Ingest a symbol with a name. - fn ingest_symbol_with_name( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, None, None) - } - - /// Ingest a symbol with a name and a length. - fn ingest_symbol_with_len( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: u32, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, Some(len), None) - } - - /// Ingest a symbol. - /// - /// Some symbols have a length, some don't, some have source information, - /// some don't. - fn ingest_symbol( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: Option, - source_info: Option, - ) -> Result<()> { - use msvc_demangler::DemangleFlags as DF; - let undecorated_name = if name.as_bytes().starts_with(b"?") { - // Demangle the name if it starts by a '?'. - match msvc_demangler::demangle(&name, DF::NAME_ONLY) { - Ok(o) => o, - Err(e) => { - // Let's log the failures as warning because we might care one day? - warn!("failed to demangle {name}: {e}"); - - // But if it failed, returning the mangled name is better than nothing. - name.into_owned() - } - } - } else { - // If it isn't a mangled name, then do.. nothing! - name.into() - }; - - // Get the RVA.. - let pdb::Rva(rva) = offset.to_rva(address_map).ok_or_else(|| { - anyhow!( - "failed to get rva from symbol {undecorated_name} / {:?}, skipping", - offset - ) - })?; - - //.. and build an entry for this function. - if let Some(prev) = self - .symbols - .insert(rva, BuilderEntry::new(undecorated_name, len, source_info)) - { - warn!("symbol {prev:?} in dbi has a duplicate at {rva:#x}, skipping"); - } - - Ok(()) - } - - /// Parse a [`Symbol`]. - fn parse_symbol( - &mut self, - address_map: &AddressMap, - symbol: &Symbol, - extra: Option<(&StringTable, &LineProgram)>, - ) -> Result<()> { - use pdb::SymbolData as SD; - match symbol.parse()? { - SD::Procedure(procedure) => { - let (string_table, line_program) = extra.unwrap(); - self.parse_procedure_symbol(&procedure, address_map, string_table, line_program)?; - } - SD::Public(public) => { - self.ingest_symbol_with_name(address_map, public.name.to_string(), public.offset)?; - } - SD::Thunk(thunk) => { - self.ingest_symbol_with_len( - address_map, - thunk.name.to_string(), - thunk.offset, - thunk.len.into(), - )?; - } - _ => {} - }; - - Ok(()) - } - - /// Parse the debug information stream which is where private symbols are - /// stored in. - fn parse_dbi(&mut self, pdb: &mut Pdb, address_map: &AddressMap) -> Result<()> { - // If we don't have a string table, there is no point in parsing the debug - // information stream. - let Ok(string_table) = pdb.string_table() else { - return Ok(()); - }; - - // Grab the debug information stream.. - let dbi = pdb.debug_information().context("failed to get dbi")?; - // ..and grab / walk through the 'modules'. - let mut module_it = dbi.modules()?; - while let Some(module) = module_it.next()? { - // Get information about the module; such as its path, its symbols, etc. - let Some(info) = pdb.module_info(&module)? else { - warn!("no module info: {:?}", &module); - continue; - }; - - let program = info.line_program()?; - let mut sym_it = info.symbols()?; - while let Some(symbol) = sym_it.next()? { - if let Err(e) = - self.parse_symbol(address_map, &symbol, Some((&string_table, &program))) - { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - } - - Ok(()) - } - - /// Parse the global symbols stream where public symbols are stored at. - fn parse_global_symbols_table( - &mut self, - pdb: &mut Pdb, - address_map: &AddressMap, - ) -> Result<()> { - let global_symbols = pdb.global_symbols()?; - let mut symbol_it = global_symbols.iter(); - while let Some(symbol) = symbol_it.next()? { - if let Err(e) = self.parse_symbol(address_map, &symbol, None) { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - - Ok(()) - } - - /// Ingest a PDB file stored on the file system. - pub fn ingest_pdb(&mut self, pdb_path: impl AsRef) -> Result<()> { - // Open the PDB file. - let pdb_path = pdb_path.as_ref(); - let pdb_file = - File::open(pdb_path).with_context(|| format!("failed to open pdb {pdb_path:?}"))?; - let mut pdb = - Pdb::open(pdb_file).with_context(|| format!("failed to parse pdb {pdb_path:?}"))?; - - trace!("ingesting {pdb_path:?}.."); - - let address_map = pdb.address_map()?; - // Parse and extract all the bits we need from the private symbols first. We do - // this first, because procedures have a length field which isn't the case for - // global symbols. And if there's duplicates, then we'd rather have the entry - // that gives us the exact procedure length instead of us guessing. - self.parse_dbi(&mut pdb, &address_map) - .map_err(|e| anyhow!("failed to parse private symbols: {e:?}"))?; - - // Parse and extract all the bits we need from the global symbols.. - self.parse_global_symbols_table(&mut pdb, &address_map) - .map_err(|e| anyhow!("failed to parse public symbols: {e:?}"))?; - - Ok(()) - } - - /// Build a [`PdbCache`]. - pub fn build(mut self) -> Result { - // Walk the map of ordered RVA with their associated names and assign lengths to - // each of the functions. Some function have a length and some don't. If a - // length is specified, then we'll use it; otherwise we'll assign one ourselves. - let mut functions = Vec::with_capacity(self.symbols.len()); - while let Some((start, entry)) = self.symbols.pop_first() { - let end = if let Some(len) = entry.len() { - // If we have a length, then use it! - start - .checked_add(len) - .ok_or(anyhow!("overflow w/ symbol range"))? - } else { - // If we don't have one, the length of the current function is basically up to - // the next entry. - // - // For example imagine the below: - // - RVA: 0, Name: foo - // - RVA: 5, Name: bar - // - // In that case, we consider the first function to be spanning [0..4], and - // [5..module size] for the second one. - - // If we didn't pop the last value, then just check the one that follows. - if let Some((&end, _)) = self.symbols.first_key_value() { - end - } else { - debug_assert!(self.module.at.end > self.module.at.start); - - // If we popped the last value, just use the module end as the end of the range. - u32::try_from(self.module.at.end - self.module.at.start) - .context("failed to make the module's end into a rva")? - } - }; - - functions.push((Range { start, end }, entry.into())); - } - - Ok(PdbCache::new(self.module.name.clone(), functions)) - } -} diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs deleted file mode 100644 index bd1774a..0000000 --- a/crates/symbolizer/src/pe.rs +++ /dev/null @@ -1,559 +0,0 @@ -// Axel '0vercl0k' Souchet - February 19 2024 -//! This module contains the implementation of the PE parsing we do. -use std::fmt::Display; -use std::mem; -use std::ops::Range; -use std::path::PathBuf; - -use anyhow::{anyhow, Context}; -use log::debug; - -use crate::addr_space::AddrSpace; -use crate::guid::Guid; -use crate::misc::Rva; -use crate::{Error as E, Result}; - -/// The IMAGE_DOS_HEADER. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C, packed(2))] -pub struct ImageDosHeader { - pub e_magic: u16, - pub e_cblp: u16, - pub e_cp: u16, - pub e_crlc: u16, - pub e_cparhdr: u16, - pub e_minalloc: u16, - pub e_maxalloc: u16, - pub e_ss: u16, - pub e_sp: u16, - pub e_csum: u16, - pub e_ip: u16, - pub e_cs: u16, - pub e_lfarlc: u16, - pub e_ovno: u16, - pub e_res: [u16; 4], - pub e_oemid: u16, - pub e_oeminfo: u16, - pub e_res2: [u16; 10], - pub e_lfanew: i32, -} - -/// The IMAGE_NT_HEADERS. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -struct NtHeaders { - signature: u32, - file_hdr: ImageFileHeader, -} - -/// The IMAGE_FILE_HEADER. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageFileHeader { - pub machine: u16, - pub number_of_sections: u16, - pub time_date_stamp: u32, - pub pointer_to_symbol_table: u32, - pub number_of_symbols: u32, - pub size_of_optional_header: u16, - pub characteristics: u16, -} - -/// The IMAGE_DATA_DIRECTORY. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C)] -pub struct ImageDataDirectory { - pub virtual_address: u32, - pub size: u32, -} - -/// The IMAGE_OPTIONAL_HEADER32. -#[derive(Debug, Default)] -#[repr(C)] -pub struct ImageOptionalHeader32 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub base_of_data: u32, - pub image_base: u32, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u32, - pub size_of_stack_commit: u32, - pub size_of_heap_reserve: u32, - pub size_of_heap_commit: u32, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_OPTIONAL_HEADER64. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C, packed(4))] -pub struct ImageOptionalHeader64 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub image_base: u64, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u64, - pub size_of_stack_commit: u64, - pub size_of_heap_reserve: u64, - pub size_of_heap_commit: u64, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_DEBUG_DIRECTORY. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageDebugDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub type_: u32, - pub size_of_data: u32, - pub address_of_raw_data: u32, - pub pointer_to_raw_data: u32, -} - -/// The IMAGE_EXPORT_DIRECTORY. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageExportDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub name: u32, - pub base: u32, - pub number_of_functions: u32, - pub number_of_names: u32, - pub address_of_functions: u32, - pub address_of_names: u32, - pub address_of_name_ordinals: u32, -} - -/// The code view information. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C)] -pub struct Codeview { - pub signature: u32, - pub guid: [u8; 16], - pub age: u32, - // name follows -} - -pub const IMAGE_NT_SIGNATURE: u32 = 17744; -pub const IMAGE_FILE_MACHINE_AMD64: u16 = 34404; -pub const IMAGE_DIRECTORY_ENTRY_EXPORT: usize = 0; -pub const IMAGE_DIRECTORY_ENTRY_DEBUG: usize = 6; - -pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; - -/// A PDB identifier. -/// -/// To download a PDB off Microsoft's Symbol Server, we need three pieces of -/// information: the pdb name, a guid and its age. -#[derive(Debug, Default, PartialEq, Eq, Hash, Clone)] -pub struct PdbId { - pub path: PathBuf, - pub guid: Guid, - pub age: u32, -} - -impl Display for PdbId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{:?}:{}:{:x}", self.path, self.guid, self.age)) - } -} - -impl PdbId { - pub fn new(path: impl Into, guid: Guid, age: u32) -> Result { - let path = path.into(); - if path.file_name().is_none() { - return Err(E::PdbPathNoName(path)); - } - - Ok(Self { path, guid, age }) - } - - pub fn name(&self) -> String { - self.path - .file_name() - .unwrap() - .to_string_lossy() - .into_owned() - } -} - -/// Calculate the absolute address of an array entry based on a base address, -/// the RVA of the array, the entry index and the size of an entry. -pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> Option { - let offset = idx.checked_mul(entry_size.try_into().ok()?)?; - let rva = rva_array.checked_add(offset)?; - - base.checked_add(rva.into()) -} - -/// Read a NULL terminated string from the dump file at a specific address. -pub fn read_string( - addr_space: &mut impl AddrSpace, - mut addr: u64, - max: usize, -) -> Result> { - let mut s = String::new(); - let mut terminated = false; - for _ in 0..max { - let mut buf = [0]; - let Some(()) = addr_space - .try_read_exact_at(addr, &mut buf) - .context("failed reading null terminated string")? - else { - return Ok(None); - }; - - let c = buf[0]; - if c == 0 { - terminated = true; - break; - } - - s.push(c.into()); - addr += 1; - } - - if !terminated && s.len() == max { - s.push_str("..."); - } - - Ok(Some(s)) -} - -/// A parsed PE headers. -/// -/// We are only interested in the PDB identifier and the Export Address Table. -#[derive(Debug, Default)] -pub struct Pe { - pub pdb_id: Option, - pub exports: Vec<(Rva, String)>, -} - -impl Pe { - pub fn new(addr_space: &mut impl AddrSpace, base: u64) -> Result { - // All right let's parse the PE. - debug!("parsing PE @ {:#x}", base); - - // Read the DOS/NT headers. - let dos_hdr = addr_space - .read_struct_at::(base) - .context("failed to read ImageDosHeader")?; - let nt_hdr_addr = base - .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) - .ok_or(anyhow!("overflow w/ e_lfanew"))?; - let nt_hdr = addr_space - .read_struct_at::(nt_hdr_addr) - .context("failed to read Ntheaders")?; - - // Let's verify the signature.. - if nt_hdr.signature != IMAGE_NT_SIGNATURE { - return Err(anyhow!("wrong PE signature for {base:#x}").into()); - } - - // ..and let's ignore non x64 PEs. - if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { - return Err(anyhow!("wrong architecture for {base:#x}").into()); - } - - // Now locate the optional header, and check that it looks big enough. - let opt_hdr_addr = nt_hdr_addr - .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) - .ok_or(anyhow!("overflow w/ nt_hdr"))?; - let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; - debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); - - // If it's not big enough, let's bail. - if opt_hdr_size < mem::size_of::() { - return Err(anyhow!("optional header's size is too small").into()); - } - - // Read the IMAGE_OPTIONAL_HEADER64. - let opt_hdr = addr_space - .read_struct_at::(opt_hdr_addr) - .with_context(|| "failed to read ImageOptionalHeader64")?; - - // Read the PDB information if there's any. - let pdb_id = Self::try_parse_debug_dir(addr_space, base, &opt_hdr)?; - - // Read the EXPORT table if there's any. - let exports = match Self::try_parse_export_dir(addr_space, base, &opt_hdr) { - Ok(o) => o, - // Err(E::DumpParserError(KdmpParserError::AddrTranslation(_))) => None, - Err(e) => return Err(e), - } - .unwrap_or_default(); - - Ok(Self { pdb_id, exports }) - } - - fn try_parse_debug_dir( - addr_space: &mut impl AddrSpace, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result> { - // Let's check if there's an ImageDebugDirectory. - let debug_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_DEBUG]; - if usize::try_from(debug_data_dir.size).unwrap() < mem::size_of::() { - debug!("debug dir is too small"); - return Ok(None); - } - - // Read it. - let debug_dir_addr = base - .checked_add(debug_data_dir.virtual_address.into()) - .ok_or(anyhow!("overflow w/ debug_data_dir"))?; - let Some(debug_dir) = - addr_space.try_read_struct_at::(debug_dir_addr)? - else { - debug!( - "failed to read ImageDebugDirectory {debug_dir_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - // If it's not a codeview type.. I don't know what to do, so let's bail. - if debug_dir.type_ != IMAGE_DEBUG_TYPE_CODEVIEW { - debug!("debug dir is not a codeview"); - return Ok(None); - } - - // Let's make sure it's big enough to back a codeview structure. - if usize::try_from(debug_dir.size_of_data).unwrap() < mem::size_of::() { - debug!("codeview too small"); - return Ok(None); - } - - // Let's read it. - let codeview_addr = base - .checked_add(debug_dir.address_of_raw_data.into()) - .ok_or(anyhow!("overflow w/ debug_dir"))?; - let Some(codeview) = addr_space.try_read_struct_at::(codeview_addr)? else { - debug!("failed to read codeview {codeview_addr:#x} because of mem translation"); - return Ok(None); - }; - - // The codeview structure is followed by a NULL terminated string which is the - // module name. - let leftover = - usize::try_from(debug_dir.size_of_data).unwrap() - mem::size_of::(); - if leftover == 0 || leftover > 256 { - return Err(E::CodeViewInvalidPath); - } - - // Allocate space for it, and read it. - let mut file_name = vec![0; leftover]; - let file_name_addr = array_offset( - base, - debug_dir.address_of_raw_data, - 1, - mem::size_of::(), - ) - .ok_or(anyhow!("overflow w/ debug_dir filename"))?; - - let Some(amount) = addr_space.try_read_at(file_name_addr, &mut file_name)? else { - return Ok(None); - }; - - // The last character is supposed to be a NULL byte, bail if it's not there. - if *file_name.last().unwrap() != 0 { - return Err(anyhow!("the module path doesn't end with a NULL byte").into()); - } - - file_name.resize(amount - 1, 0); - - // All right, at this point we have everything we need: the PDB name / GUID / - // age. Those are the three piece of information we need to download a PDB - // off Microsoft's symbol server. - let path = PathBuf::from(String::from_utf8(file_name)?); - - Ok(Some(PdbId::new(path, codeview.guid.into(), codeview.age)?)) - } - - fn try_parse_export_dir( - addr_space: &mut impl AddrSpace, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result>> { - // Let's check if there's an EAT. - debug!("parsing EAT.."); - let export_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_EXPORT]; - if usize::try_from(export_data_dir.size)? < mem::size_of::() { - debug!("export dir is too small"); - return Ok(None); - } - - // Read it. - let export_dir_addr = base - .checked_add(u64::from(export_data_dir.virtual_address)) - .ok_or(anyhow!("export_data_dir"))?; - let Some(export_dir) = - addr_space.try_read_struct_at::(export_dir_addr)? - else { - debug!("failed to read ImageExportDirectory {export_dir_addr:#x} because of mem translation"); - return Ok(None); - }; - - // Read the ordinal / name arrays. - // """ - // The export name pointer table is an array of addresses (RVAs) into the export - // name table. The pointers are 32 bits each and are relative to the image base. - // The pointers are ordered lexically to allow binary searches. - // An export name is defined only if the export name pointer table contains a - // pointer to it. """ - let n_names = export_dir.number_of_names; - let addr_of_names = export_dir.address_of_names; - // """ - // The export ordinal table is an array of 16-bit unbiased indexes into the - // export address table. Ordinals are biased by the Ordinal Base field of the - // export directory table. In other words, the ordinal base must be subtracted - // from the ordinals to obtain true indexes into the export address table. - // """ - let addr_of_ords = export_dir.address_of_name_ordinals; - let mut names = Vec::with_capacity(n_names.try_into()?); - let mut ords = Vec::with_capacity(names.len()); - for name_idx in 0..n_names { - // Read the name RVA's.. - let name_rva_addr = array_offset(base, addr_of_names, name_idx, mem::size_of::()) - .ok_or(anyhow!("name_rva_addr"))?; - let Some(name_rva) = addr_space - .try_read_struct_at::(name_rva_addr) - .with_context(|| "failed to read EAT's name array".to_string())? - else { - debug!( - "failed to read EAT's name array {name_rva_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - let name_addr = base - .checked_add(name_rva.into()) - .ok_or(anyhow!("overflow w/ name_addr"))?; - // ..then read the string in memory. - let Some(name) = read_string(addr_space, name_addr, 64)? else { - debug!("failed to read export's name #{name_idx}"); - return Ok(None); - }; - names.push(name); - - // Read the ordinal. - let ord_addr = array_offset(base, addr_of_ords, name_idx, mem::size_of::()) - .ok_or(anyhow!("ord_addr"))?; - let Some(ord) = addr_space - .try_read_struct_at::(ord_addr) - .context("failed to read EAT's ord array")? - else { - debug!("failed to read EAT's ord array {ord_addr:#x} because of mem translation"); - return Ok(None); - }; - ords.push(ord); - } - - debug!("read {n_names} names"); - - // Read the address array. - // - // """ - // The export address table contains the address of exported entry points and - // exported data and absolutes. An ordinal number is used as an index into the - // export address table. - // """ - let addr_of_functs = export_dir.address_of_functions; - let n_functs = export_dir.number_of_functions; - let mut address_rvas = Vec::with_capacity(n_functs.try_into()?); - for addr_idx in 0..n_functs { - // Read the RVA. - let address_rva_addr = - array_offset(base, addr_of_functs, addr_idx, mem::size_of::()) - .ok_or(anyhow!("overflow w/ address_rva_addr"))?; - - let Some(address_rva) = addr_space - .try_read_struct_at::(address_rva_addr) - .with_context(|| "failed to read EAT's address array".to_string())? - else { - debug!("failed to read EAT's address array {address_rva_addr:#x} because of mem translation"); - return Ok(None); - }; - - address_rvas.push(address_rva); - } - - debug!("read {n_functs} addresses"); - - // Time to build the EAT. - let eat_range = Range { - start: export_data_dir.virtual_address, - end: export_data_dir - .virtual_address - .checked_add(export_data_dir.size) - .ok_or(anyhow!("overflow w/ export data dir size"))?, - }; - - let mut exports = Vec::with_capacity(address_rvas.len()); - for (unbiased_ordinal, addr_rva) in address_rvas.drain(..).enumerate() { - let ordinal = unbiased_ordinal - .checked_add(export_dir.base.try_into()?) - .ok_or(anyhow!("overflow w/ biased_ordinal"))?; - let name = ords - .iter() - .position(|&o| usize::from(o) == unbiased_ordinal) - .map(|name_idx| names[name_idx].clone()) - .unwrap_or_else(|| format!("ORD#{ordinal}")); - - let forwarder = eat_range.contains(&addr_rva); - if !forwarder { - exports.push((addr_rva, name.clone())); - } - } - - debug!("built table w/ {} entries", exports.len()); - - Ok(Some(exports)) - } -} diff --git a/crates/symbolizer/src/stats.rs b/crates/symbolizer/src/stats.rs deleted file mode 100644 index e22cd4c..0000000 --- a/crates/symbolizer/src/stats.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Axel '0vercl0k' Souchet - April 21 2024 -//! This module contains the [`Stats`] type that is used to keep track of -//! various statistics when symbolizing. -use std::cell::RefCell; -use std::collections::HashMap; -use std::fmt::Debug; - -use crate::pe::PdbId; - -#[derive(Debug, Default)] -pub struct StatsBuilder { - inner: RefCell, -} - -/// Various statistics that the symbolizer keeps track of. -#[derive(Default, Clone, Debug)] -pub struct Stats { - /// The number of addresses symbolized. - pub n_addrs: u64, - /// The PDB identifiers that have been downloaded & the associated file size - /// in bytes. - pub downloaded: HashMap, - /// The number of time the address cache was a hit. - pub cache_hit: u64, -} - -impl Stats { - pub fn did_download(&self, pdb_id: PdbId) -> bool { - self.downloaded.contains_key(&pdb_id) - } - - pub fn amount_downloaded(&self) -> u64 { - let mut total = 0u64; - for value in self.downloaded.values() { - total = total.saturating_add(*value); - } - - total - } - - pub fn amount_pdb_downloaded(&self) -> usize { - self.downloaded.len() - } -} - -impl StatsBuilder { - pub fn build(&self) -> Stats { - self.inner.borrow().clone() - } - - pub fn downloaded_file(&self, pdb_id: PdbId, size: u64) { - assert!(self - .inner - .borrow_mut() - .downloaded - .insert(pdb_id, size) - .is_none()); - } - - pub fn addr_symbolized(&self) { - self.inner.borrow_mut().n_addrs += 1; - } - - pub fn cache_hit(&self) { - self.inner.borrow_mut().cache_hit += 1; - } -} diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs deleted file mode 100644 index f1c8a16..0000000 --- a/crates/symbolizer/src/symbolizer.rs +++ /dev/null @@ -1,450 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Symbolizer`] which is the -//! object that is able to symbolize files using PDB information if available. -use std::cell::RefCell; -use std::collections::{hash_map, HashMap}; -use std::fs::{self, File}; -use std::hash::{BuildHasher, Hasher}; -use std::io::{self, BufWriter, Write}; -use std::ops::Range; -use std::path::{Path, PathBuf}; -use std::rc::Rc; - -use anyhow::{anyhow, Context}; -use log::{debug, trace, warn}; - -use crate::addr_space::AddrSpace; -use crate::builder::{Builder, NoSymcache}; -use crate::misc::{fast_hex32, fast_hex64}; -use crate::modules::{Module, Modules}; -use crate::pdbcache::{PdbCache, PdbCacheBuilder}; -use crate::pe::{PdbId, Pe}; -use crate::stats::{Stats, StatsBuilder}; -use crate::{Error as E, Result}; - -/// Format a path to find a PDB in a symbol cache. -/// -/// Here is an example: -/// ```text -/// C:\work\dbg\sym\ntfs.pdb\64D20DCBA29FFC0CD355FFE7440EC5F81\ntfs.pdb -/// ^^^^^^^^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^ -/// cache path PDB name PDB GUID & PDB Age PDB name -/// ``` -pub fn format_pdb_path(symsrv_cache: &Path, pdb_id: &PdbId) -> PathBuf { - let pdb_name = pdb_id.name(); - symsrv_cache - .join(&pdb_name) - .join(format!("{}{:x}", pdb_id.guid, pdb_id.age,)) - .join(&pdb_name) -} - -/// Format a URL to find a PDB on an HTTP symbol server. -pub fn format_pdb_url(symsrv: &str, pdb_id: &PdbId) -> String { - // It seems that Chrome's symsrv server only accepts the GUID/age part as - // uppercase hex, so let's use that. - format!( - "{symsrv}/{}/{}{:x}/{}", - pdb_id.name(), - pdb_id.guid, - pdb_id.age, - pdb_id.name() - ) -} - -/// Download a PDB file from a candidate symbol servers. -/// -/// The code iterates through every symbol servers, and stops as soon as it was -/// able to download a matching file. -pub fn try_download_from_guid( - symsrvs: &Vec, - sympath_dir: impl AsRef, - pdb_id: &PdbId, -) -> Result> { - // Give a try to each of the symbol servers. - for symsrv in symsrvs { - debug!( - "trying to download pdb for {} from {}..", - pdb_id.name(), - symsrv - ); - - // The way a symbol path is structured is that there is a directory per module.. - let sympath_dir = sympath_dir.as_ref(); - let pdb_root_dir = sympath_dir.join(pdb_id.name()); - - // ..and inside, there is a directory per version of the PDB.. - let pdb_dir = pdb_root_dir.join(format!("{}{:x}", pdb_id.guid, pdb_id.age)); - - // ..and finally the PDB file itself. - let pdb_path = pdb_dir.join(pdb_id.name()); - - // The file doesn't exist on the file system, so let's try to download it from a - // symbol server. - let pdb_url = format_pdb_url(symsrv, pdb_id); - let resp = match ureq::get(&pdb_url).call() { - Ok(o) => o, - // If we get a 404, it means that the server doesn't know about this file. So we'll skip - // to the next symbol server. - Err(ureq::Error::Status(404, ..)) => { - warn!("got a 404 for {pdb_url}"); - continue; - } - // If we received any other errors, well that's not expected so let's bail. - Err(e) => { - return Err(E::DownloadPdb { - pdb_url, - e: e.into(), - }); - } - }; - - // If the server knows about this file, it is time to create the directory - // structure in which we'll download the file into. - if !(pdb_root_dir.try_exists()?) { - debug!("creating {pdb_root_dir:?}.."); - fs::create_dir(&pdb_root_dir) - .with_context(|| format!("failed to create base pdb dir {pdb_root_dir:?}"))?; - } - - if !pdb_dir.try_exists()? { - debug!("creating {pdb_dir:?}.."); - fs::create_dir(&pdb_dir) - .with_context(|| format!("failed to create pdb dir {pdb_dir:?}"))?; - } - - // Finally, we can download and save the file. - let file = - File::create(&pdb_path).with_context(|| format!("failed to create {pdb_path:?}"))?; - - io::copy(&mut resp.into_reader(), &mut BufWriter::new(file))?; - - debug!("downloaded to {pdb_path:?}"); - return Ok(Some(pdb_path)); - } - - Ok(None) -} - -/// Where did we find this PDB? On the file-system somewhere, in a local symbol -/// cache or downloaded on a symbol server. -/// -/// This is used mainly to account for statistics; how many files were -/// downloaded, etc. -enum PdbKind { - /// The PDB file was found on the file system but no in a symbol cache. - Local, - /// The PDB file was found on the file system in a local symbol cache. - LocalCache, - /// The PDB file was downloaded on a remote symbol server. - Download, -} - -/// Try to find a PDB file online or locally from a [`PdbId`]. -fn get_pdb( - sympath: &Path, - symsrvs: &Vec, - pdb_id: &PdbId, - offline: bool, -) -> Result> { - // Let's see if the path exists locally.. - if pdb_id.path.is_file() { - // .. if it does, this is a 'Local' PDB. - return Ok(Some((pdb_id.path.clone(), PdbKind::Local))); - } - - // Now, let's see if it's in the local cache.. - let local_path = format_pdb_path(sympath, pdb_id); - if local_path.is_file() { - // .. if it does, this is a 'LocalCache' PDB. - return Ok(Some((local_path, PdbKind::LocalCache))); - } - - // If we're offline, let's just skip the downloading part. - if offline { - return Ok(None); - } - - // We didn't find a PDB on disk, so last resort is to try to download it. - let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id)?; - - Ok(downloaded_path.map(|p| (p, PdbKind::Download))) -} - -/// A simple 'hasher' that uses the input bytes as a hash. -/// -/// This is used for the cache HashMap used in the [`Symbolizer`]. We are -/// caching symbol addresses and so we know those addresses are unique and do -/// not need to be hashed. -#[derive(Default)] -struct IdentityHasher { - h: u64, -} - -impl Hasher for IdentityHasher { - fn finish(&self) -> u64 { - self.h - } - - fn write(&mut self, bytes: &[u8]) { - debug_assert_eq!(bytes.len(), 8); - - self.h = u64::from_le_bytes(bytes.try_into().unwrap()); - } -} - -impl BuildHasher for IdentityHasher { - type Hasher = Self; - - fn build_hasher(&self) -> Self::Hasher { - Self::default() - } -} - -#[derive(Debug, Default)] -pub enum PdbLookupMode { - #[default] - Offline, - Online { - /// List of symbol servers to try to download PDBs from when needed. - symsrvs: Vec, - }, -} - -/// Configuration for the [`Symbolizer`]. -#[derive(Debug)] -pub struct Config { - /// Path to the local PDB symbol cache where PDBs will be - /// downloaded into, or where we'll look for cached PDBs. - pub symcache: PathBuf, - /// This is the list of kernel / user modules read from the kernel crash - /// dump. - pub modules: Vec, - /// Which mode are we using for PDB lookups? Online or Offline? - pub mode: PdbLookupMode, -} - -/// The [`Symbolizer`] is the main object that glues all the logic. -/// -/// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer { - /// Keep track of some statistics such as the number of lines symbolized, - /// PDB downloaded, etc. - stats: StatsBuilder, - /// This is a path to the local PDB symbol cache where PDBs will be - /// downloaded into / where some are available. - symcache: PathBuf, - /// This is the list of kernel / user modules read from the kernel crash - /// dump. - modules: Modules, - /// List of symbol servers to try to download PDBs from when needed. - symsrvs: Vec, - /// Caches addresses to symbols. This allows us to not have to symbolize an - /// address again. - addr_cache: RefCell, IdentityHasher>>, - /// Each parsed module is stored in this cache. We parse PDBs, etc. only - /// once and then the [`PdbCache`] is used to query. - pdb_caches: RefCell, Rc>>, - offline: bool, -} - -impl Symbolizer { - pub fn builder() -> Builder { - Builder::default() - } - - /// Create a [`Symbolizer`]. - pub fn new(config: Config) -> Result { - let (offline, symsrvs) = match config.mode { - PdbLookupMode::Offline => - // If the user wants offline, then let's do that.. - { - (true, vec![]) - } - PdbLookupMode::Online { symsrvs } => { - // ..otherwise, we'll try to resolve a DNS and see what happens. If we can't do - // that, then we'll assume we're offline and turn the offline mode. - // Otherwise, we'll assume we have online access and attempt to download PDBs. - let offline = ureq::get("https://www.google.com/").call().is_err(); - if offline { - debug!("Turning on 'offline' mode as you seem to not have internet access.."); - } - - (offline, symsrvs) - } - }; - - if !config.symcache.is_dir() { - return Err(anyhow!("{:?} directory does not exist", config.symcache))?; - } - - Ok(Self { - stats: Default::default(), - symcache: config.symcache, - modules: Modules::new(config.modules), - symsrvs, - addr_cache: Default::default(), - pdb_caches: Default::default(), - offline, - }) - } - - /// Get [`Stats`]. - pub fn stats(&self) -> Stats { - self.stats.build() - } - - /// Get the [`PdbCache`] for a specified `addr`. - fn module_pdbcache(&self, addr: u64) -> Option> { - self.pdb_caches.borrow().iter().find_map(|(k, v)| { - if k.contains(&addr) { - Some(v.clone()) - } else { - None - } - }) - } - - /// Try to symbolize an address. - /// - /// If there's a [`PdbCache`] already created, then ask it to symbolize. - /// Otherwise, this will create a [`PdbCache`], try to find a PDB (locally - /// or remotely) and extract every bit of relevant information for us. - /// Finally, the result will be kept around to symbolize addresses in that - /// module faster in the future. - fn try_symbolize_addr_from_pdbs( - &self, - addr_space: &mut impl AddrSpace, - addr: u64, - ) -> Result>> { - trace!("symbolizing address {addr:#x}.."); - let Some(module) = self.modules.find(addr) else { - trace!("address {addr:#x} doesn't belong to any module"); - return Ok(None); - }; - - trace!("address {addr:#x} found in {}", module.name); - - // Do we have a cache already ready to go? - if let Some(pdbcache) = self.module_pdbcache(addr) { - return Ok(Some(Rc::new(pdbcache.symbolize(module.rva(addr))?))); - } - - // Otherwise, let's make one. - let mut builder = PdbCacheBuilder::new(module); - - // Let's start by parsing the PE to get its exports, and PDB information if - // there's any. - let pe = Pe::new(addr_space, module.at.start)?; - - // Ingest the EAT. - builder.ingest(pe.exports.into_iter()); - - // .. and see if it has PDB information. - if let Some(pdb_id) = pe.pdb_id { - trace!("Get PDB information for {module:?}/{pdb_id}.."); - - // Try to get a PDB.. - let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id, self.offline)?; - - // .. and ingest it if we have one. - if let Some((pdb_path, pdb_kind)) = pdb_path { - if matches!(pdb_kind, PdbKind::Download) { - self.stats - .downloaded_file(pdb_id, pdb_path.metadata()?.len()) - } - - builder.ingest_pdb(pdb_path)?; - } - } - - // Build the cache.. - let pdbcache = builder.build()?; - - // .. symbolize `addr`.. - let line = pdbcache - .symbolize(module.rva(addr)) - .with_context(|| format!("failed to symbolize {addr:#x}"))?; - - // .. and store the sym cache to be used for next time we need to symbolize an - // address from this module. - assert!(self - .pdb_caches - .borrow_mut() - .insert(module.at.clone(), Rc::new(pdbcache)) - .is_none()); - - Ok(Some(Rc::new(line))) - } - - /// Try to symbolize an address. - /// - /// If the address has been symbolized before, it will be in the - /// `addr_cache` already. If not, we need to take the slow path and ask the - /// right [`PdbCache`] which might require to create one in the first place. - fn try_symbolize_addr( - &self, - addr_space: &mut impl AddrSpace, - addr: u64, - ) -> Result>> { - match self.addr_cache.borrow_mut().entry(addr) { - hash_map::Entry::Occupied(o) => { - self.stats.cache_hit(); - return Ok(Some(o.get().clone())); - } - hash_map::Entry::Vacant(v) => { - let Some(symbol) = self.try_symbolize_addr_from_pdbs(addr_space, addr)? else { - return Ok(None); - }; - - v.insert(symbol); - } - }; - - Ok(self.addr_cache.borrow().get(&addr).cloned()) - } - - /// Symbolize `addr` in the `module+offset` style and write the result into - /// `output`. - pub fn modoff(&mut self, addr: u64, output: &mut impl Write) -> Result<()> { - let mut buffer = [0; 16]; - if let Some(module) = self.modules.find(addr) { - output.write_all(module.name.as_bytes())?; - output.write_all(&[b'+', b'0', b'x'])?; - - output.write_all(fast_hex32( - &mut buffer[0..8].try_into().unwrap(), - module.rva(addr), - )) - } else { - output.write_all(&[b'0', b'x'])?; - - output.write_all(fast_hex64(&mut buffer, addr)) - } - .context("failed to write symbolized value to output")?; - - self.stats.addr_symbolized(); - - Ok(()) - } - - /// Symbolize `addr` in the `module!function+offset` style and write the - /// result into `output`. - pub fn full( - &mut self, - addr_space: &mut impl AddrSpace, - addr: u64, - output: &mut impl Write, - ) -> Result<()> { - match self.try_symbolize_addr(addr_space, addr)? { - Some(sym) => { - output - .write_all(sym.as_bytes()) - .context("failed to write symbolized value to output")?; - - self.stats.addr_symbolized(); - Ok(()) - } - None => self.modoff(addr, output), - } - } -} diff --git a/crates/symbolizer/testdatas/mrt100.dll b/crates/symbolizer/testdatas/mrt100.dll deleted file mode 100644 index a986193..0000000 Binary files a/crates/symbolizer/testdatas/mrt100.dll and /dev/null differ diff --git a/crates/symbolizer/testdatas/mrt100.raw b/crates/symbolizer/testdatas/mrt100.raw deleted file mode 100644 index d4ae68d..0000000 Binary files a/crates/symbolizer/testdatas/mrt100.raw and /dev/null differ diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs deleted file mode 100644 index 7be510d..0000000 --- a/crates/symbolizer/tests/basics.rs +++ /dev/null @@ -1,304 +0,0 @@ -// Axel '0vercl0k' Souchet - May 30 2024 -use std::cmp::min; -use std::env::temp_dir; -use std::fs::{self, File}; -use std::io::{self, Read, Seek, Write}; -use std::path::{Path, PathBuf}; - -use object::read::pe::PeFile64; -use object::{NativeEndian, ReadCache, ReadRef}; -use symbolizer::{AddrSpace, Builder, Module, PdbId}; -use udmp_parser::UserDumpParser; - -const EXPECTED_LEN: u64 = 0x90_00; -const EXPECTED_RAW: [(u64, &str, &str); 4] = [ - ( - 0x19_50, - "mrt100!GetManagedRuntimeService+0x0", - "mrt100+0x00001950", - ), - ( - 0x19_30, - "mrt100!ManagedRuntimeServices::SetWerDataBuffer+0x0", - "mrt100+0x00001930", - ), - (EXPECTED_LEN, "0x0000000000009000", "0x0000000000009000"), - (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), -]; - -fn testdata(name: &str) -> PathBuf { - PathBuf::from(&env!("CARGO_MANIFEST_DIR")) - .join("testdatas") - .join(name) -} - -fn symcache(name: &str) -> PathBuf { - let cache = temp_dir().join(name); - let _ = fs::remove_dir_all(&cache); - let _ = fs::create_dir(&cache); - - cache -} - -#[derive(Debug)] -struct RawAddressSpace { - raw: File, - len: u64, -} - -impl RawAddressSpace { - fn new(path: &impl AsRef) -> io::Result { - let raw = File::open(path)?; - let metadata = raw.metadata()?; - let len = metadata.len(); - - Ok(Self { raw, len }) - } - - fn len(&self) -> u64 { - self.len - } -} - -impl AddrSpace for RawAddressSpace { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result { - Seek::seek(&mut self.raw, io::SeekFrom::Start(addr))?; - - Read::read(&mut self.raw, buf) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { - self.read_at(addr, buf).map(Some) - } -} - -#[test] -fn raw_virt() { - let mut raw_addr_space = RawAddressSpace::new(&testdata("mrt100.raw")).unwrap(); - let len = raw_addr_space.len(); - - let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)]) - .msft_symsrv() - .symcache(symcache("basics")) - .build() - .unwrap(); - - for (addr, expected_full, expected_modoff) in EXPECTED_RAW { - let mut full = Vec::new(); - symb.full(&mut raw_addr_space, addr, &mut full).unwrap(); - assert_eq!(String::from_utf8(full).unwrap(), expected_full); - - let mut modoff = Vec::new(); - symb.modoff(addr, &mut modoff).unwrap(); - assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); - } - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), - 1 - ) - .unwrap() - )); -} - -#[derive(Debug)] -struct FileAddressSpace<'data> { - pe: PeFile64<'data, &'data ReadCache>, - virt_len: u64, -} - -impl<'data> FileAddressSpace<'data> { - fn new(cache: &'data ReadCache) -> io::Result { - let pe = - PeFile64::parse(cache).map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e))?; - - let virt_len = pe - .nt_headers() - .optional_header - .size_of_image - .get(NativeEndian) - .into(); - - Ok(Self { pe, virt_len }) - } - - fn len(&self) -> u64 { - self.virt_len - } -} - -impl<'data> AddrSpace for FileAddressSpace<'data> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> std::io::Result { - if addr >= self.virt_len { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("{addr:#x} vs {:#x} is oob", self.virt_len), - )); - } - - let data = match self - .pe - .section_table() - .pe_data_at(self.pe.data(), addr.try_into().unwrap()) - { - Some(data) => data, - None => self - .pe - .data() - .read_slice_at(addr, buf.len()) - .map_err(|_| io::Error::new(io::ErrorKind::Unsupported, "read_slice_at"))?, - }; - - buf.write(data) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { - self.read_at(addr, buf).map(Some) - } -} - -#[test] -fn raw_file() { - let file = File::open(testdata("mrt100.dll")).unwrap(); - let cache = ReadCache::new(file); - let mut file_addr_space = FileAddressSpace::new(&cache).unwrap(); - let len = file_addr_space.len(); - - let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)]) - .online(vec!["https://msdl.microsoft.com/download/symbols/"]) - .symcache(symcache("basics")) - .build() - .unwrap(); - - for (addr, expected_full, expected_modoff) in EXPECTED_RAW { - let mut full = Vec::new(); - symb.full(&mut file_addr_space, addr, &mut full).unwrap(); - assert_eq!(String::from_utf8(full).unwrap(), expected_full); - - let mut modoff = Vec::new(); - symb.modoff(addr, &mut modoff).unwrap(); - assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); - } - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), - 1 - ) - .unwrap() - )); -} - -#[derive(Debug)] -struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); - -impl<'a> AddrSpace for UserDumpAddrSpace<'a> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { - let mut cur_addr = addr; - let mut read_len = 0; - while read_len < buf.len() { - let Some(block) = self.0.get_mem_block(addr) else { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("no mem block found for {addr:#x}"), - )); - }; - - let Some(data) = block.data_from(cur_addr) else { - panic!(); - }; - - let left = buf.len() - read_len; - let len = min(data.len(), left); - buf.write_all(&data[..len]).unwrap(); - cur_addr += u64::try_from(len).unwrap(); - read_len += len; - } - - Ok(read_len) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - match self.read_at(addr, buf) { - Ok(sz) => Ok(Some(sz)), - Err(_) => Ok(None), - } - } -} - -#[test] -fn user_dump() { - let dump = UserDumpParser::new(testdata("udmp.dmp")).unwrap(); - let modules = dump - .modules() - .values() - .map(|module| { - Module::new( - module.path.file_name().unwrap().to_string_lossy(), - module.start_addr(), - module.end_addr(), - ) - }) - .collect::>(); - - let mut udmp_addr_space = UserDumpAddrSpace(dump); - let mut symb = Builder::default() - .modules(modules.clone()) - .msft_symsrv() - .symcache(symcache("basics")) - .build() - .unwrap(); - - // 0:000> u 00007ff9`aa4f8eb2 - // ntdll!EvtIntReportEventWorker$fin$0+0x2: - // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h - let mut output = Vec::new(); - symb.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) - .unwrap(); - assert_eq!( - String::from_utf8(output).unwrap(), - "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" - ); - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "ntdll.pdb", - "8D5D5ED5D5B8AA609A82600C14E3004D".parse().unwrap(), - 1 - ) - .unwrap() - )); - - drop(symb); - let mut symb_offline = Builder::default() - .symcache(symcache("basics")) - .modules(modules) - .build() - .unwrap(); - - // 0:000> u 00007ff9`aa4f8eb2 - // ntdll!EvtIntReportEventWorker$fin$0+0x2: - // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h - let mut output = Vec::new(); - symb_offline - .full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) - .unwrap(); - assert_ne!( - String::from_utf8(output).unwrap(), - "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" - ); - - let stats = symb_offline.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 0); -}