diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index 1e2ee74..6986384 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -32,7 +32,7 @@ jobs: - name: cargo clippy env: RUSTFLAGS: "-Dwarnings" - run: cargo clippy + run: cargo clippy --workspace --tests --examples doc: name: doc @@ -65,16 +65,16 @@ jobs: run: rustup default stable - name: cargo test - run: cargo test + run: cargo test --workspace - name: cargo test release - run: cargo test --release + run: cargo test --release --workspace - name: cargo check - run: cargo check + run: cargo check --workspace --examples --tests - name: cargo build - run: cargo build --release + run: cargo build --release --workspace --examples --tests - name: Upload artifacts uses: actions/upload-artifact@v4 diff --git a/Cargo.toml b/Cargo.toml index f3e9e7f..8223c6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,34 +1,24 @@ [package] name = "symbolizer-rs" -version = "0.1.0" -edition = "2021" -authors = ["Axel '0vercl0k' Souchet"] categories = ["command-line-utilities", "development-tools::debugging"] description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] -keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] +version = "0.1.0" +authors = ["Axel '0vercl0k' Souchet"] license = "MIT" -repository = "https://github.com/0vercl0k/symbolizer-rs" rust-version = "1.70" +repository = "https://github.com/0vercl0k/symbolizer-rs" +keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] +edition = "2021" [dependencies] anyhow = "1.0" -pdb = "0.8" -log = "0.4" -env_logger = "0.11" clap = { version = "4.5", features = ["derive"] } -msvc-demangler = "0.10" -ureq = { version = "2.9", default-features = false, features = [ - "tls", - "gzip", -] } -kdmp-parser = "0.2" -itoa = "1.0.11" +addr-symbolizer = { version = "0.1" } +env_logger = "0.11" +itoa = "1.0" +kdmp-parser = "0.5" [profile.release] debug = true panic = "abort" - -[[bin]] -name = "symbolizer-rs" -path = "src/main.rs" diff --git a/src/guid.rs b/src/guid.rs deleted file mode 100644 index 25da4d6..0000000 --- a/src/guid.rs +++ /dev/null @@ -1,42 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Guid`] type. -use std::fmt::Display; - -/// A GUID. -#[derive(Default, Debug)] -pub struct Guid { - d0: u32, - d1: u16, - d2: u16, - d3: [u8; 8], -} - -impl From<[u8; 16]> for Guid { - fn from(value: [u8; 16]) -> Self { - let d0 = u32::from_le_bytes(value[0..4].try_into().unwrap()); - let d1 = u16::from_le_bytes(value[4..6].try_into().unwrap()); - let d2 = u16::from_le_bytes(value[6..8].try_into().unwrap()); - let d3 = value[8..].try_into().unwrap(); - - Self { d0, d1, d2, d3 } - } -} - -impl Display for Guid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "{:08X}{:04X}{:04X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", - self.d0, - self.d1, - self.d2, - self.d3[0], - self.d3[1], - self.d3[2], - self.d3[3], - self.d3[4], - self.d3[5], - self.d3[6], - self.d3[7] - )) - } -} diff --git a/src/hex_addrs_iter.rs b/src/hex_addrs_iter.rs index c946925..0bc6e0f 100644 --- a/src/hex_addrs_iter.rs +++ b/src/hex_addrs_iter.rs @@ -75,7 +75,7 @@ fn fast_hex_str_to_u32(hex: [u8; 8]) -> u32 { /// Convert the `slice` of an hexadecimal string into an integer. fn hex_slice(slice: &[u8]) -> Result { - let slice = slice.strip_prefix(&[b'0', b'x']).unwrap_or(slice); + let slice = slice.strip_prefix(b"0x").unwrap_or(slice); if slice.len() > 16 { bail!("{slice:?} has more digits than supported (16)"); } @@ -165,7 +165,7 @@ where if let Some(last_range) = self.last_range { let last_slice = &self.buf[last_range]; // Be nice, and ignore a potential trailing end of line.. - let last_slice = last_slice.strip_suffix(&[b'\n']).unwrap_or(last_slice); + let last_slice = last_slice.strip_suffix(b"\n").unwrap_or(last_slice); // ..and if there's a carriage return right before, let's ignore this one as // well. let last_slice = last_slice @@ -214,7 +214,7 @@ where // what we return where the next slice starts at Some(idx) => { let without_lf = &parse_slice[..idx]; - let without_cr = without_lf.strip_suffix(&[b'\r']); + let without_cr = without_lf.strip_suffix(b"\r"); (without_cr.unwrap_or(without_lf), idx + 1) } @@ -232,7 +232,7 @@ where .with_context(|| anyhow!("failed to turn {addr_str:?} into an integer")) { Ok(o) => o, - Err(e) => return Some(Err(e)), + e => return Some(e), }; // If we hit the EOF, let's record the last range of data we'll consume. @@ -265,9 +265,7 @@ where mod tests { use std::io::BufReader; - use anyhow::Result; - - use super::HexAddressesIterator; + use super::{HexAddressesIterator, Result}; #[test] fn t1() { diff --git a/src/human.rs b/src/human.rs index 9eb1892..1865b51 100644 --- a/src/human.rs +++ b/src/human.rs @@ -24,7 +24,7 @@ pub trait ToHuman: Sized + Copy { /// Blanket implementation for all the `T` that have what we need. impl ToHuman for T where - T: Into, + T: TryInto, T: Copy, { } @@ -34,12 +34,12 @@ pub struct HumanTime(T); impl Display for HumanTime where - T: Into, + T: TryInto, T: Copy, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut unit = "s"; - let mut time = self.0.into() as f64; + let mut time = self.0.try_into().map_err(|_| std::fmt::Error)? as f64; let m = 60f64; let h = m * m; let d = h * 24.0; @@ -92,12 +92,12 @@ pub struct HumanNumber(T); impl Display for HumanNumber where - T: Into, + T: TryInto, T: Copy, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut unit = ""; - let mut size = self.0.into() as f64; + let mut size = self.0.try_into().map_err(|_| std::fmt::Error)? as f64; let k = 1_000f64; let m = k * k; let b = m * k; diff --git a/src/main.rs b/src/main.rs index 72f3e5b..3c4307a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,27 +1,113 @@ // Axel '0vercl0k' Souchet - February 19 2024 #![doc = include_str!("../README.md")] -mod guid; -mod hex_addrs_iter; -mod human; -mod misc; -mod modules; -mod pdbcache; -mod pe; -mod stats; -mod symbolizer; - -use std::io::Write; -use std::path::PathBuf; -use std::{fs, io}; - -use anyhow::{bail, Context, Result}; +use std::fmt::Display; +use std::fs::File; +use std::io::{stdout, BufReader, BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::time::Instant; +use std::{env, fs, io}; + +use addr_symbolizer::{AddrSpace, Builder as SymbolizerBuilder, Module, Symbolizer}; +use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use misc::sympath; -use symbolizer::Symbolizer; + +mod hex_addrs_iter; +mod human; + +use hex_addrs_iter::HexAddressesIterator; +use human::ToHuman; + +#[derive(Debug)] +struct StatsBuilder { + start: Instant, + n_files: u64, +} + +impl Default for StatsBuilder { + fn default() -> Self { + Self { + start: Instant::now(), + n_files: 0, + } + } +} + +impl StatsBuilder { + pub fn done_file(&mut self) { + self.n_files += 1; + } + + pub fn stop(self, symbolizer: Symbolizer) -> Stats { + Stats { + time: self.start.elapsed().as_secs(), + n_files: self.n_files, + symbolizer_stats: symbolizer.stats(), + } + } +} + +struct Stats { + time: u64, + n_files: u64, + symbolizer_stats: addr_symbolizer::Stats, +} + +impl Display for Stats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "✓ Successfully symbolized {} lines across {} files in {} ({}% cache hits", + self.symbolizer_stats.n_addrs.human_number(), + self.n_files.human_number(), + self.time.human_time(), + percentage( + self.symbolizer_stats.cache_hit, + self.symbolizer_stats.n_addrs + ) + )?; + + let size_downloaded = self.symbolizer_stats.amount_downloaded(); + if size_downloaded > 0 { + write!( + f, + ", downloaded {} / {} PDBs)", + size_downloaded.human_bytes(), + self.symbolizer_stats.amount_pdb_downloaded().human_number() + ) + } else { + write!(f, ")") + } + } +} + +#[derive(Debug)] +struct AddrSpaceWrapper { + parser: KernelDumpParser, +} + +impl AddrSpaceWrapper { + fn new(parser: KernelDumpParser) -> Self { + Self { parser } + } +} + +impl AddrSpace for AddrSpaceWrapper { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { + self.parser + .virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + self.parser + .try_virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } +} /// The style of the symbols. -#[derive(Default, Debug, ValueEnum, Clone)] +#[derive(Default, Debug, Clone, ValueEnum)] enum SymbolStyle { /// Module + offset style like `foo.dll+0x11`. Modoff, @@ -50,7 +136,7 @@ struct CliArgs { skip: usize, /// The maximum amount of lines to process per file. #[arg(short, long, default_value = "20000000")] - max: Option, + limit: Option, /// The symbolization style (mod+offset or mod!f+offset). #[arg(long, default_value = "full")] style: SymbolStyle, @@ -67,6 +153,9 @@ struct CliArgs { /// parsed if present. #[arg(long)] symcache: Option, + /// Import PDBs found in the specified directories into the symbol cache. + #[arg(long)] + import_pdbs: Option>, /// The size in bytes of the buffer used to write data into the output /// files. #[arg(long, default_value_t = 3 * 1024 * 1024)] @@ -74,6 +163,137 @@ struct CliArgs { /// The size in bytes of the buffer used to read data from the input files. #[arg(long, default_value_t = 1024 * 1024)] in_buffer_size: usize, + /// Don't try to download PDBs off the network. + #[arg(long, default_value_t = false)] + offline: bool, +} + +/// Calculate a percentage value. +pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { + assert!( + how_many_total > 0, + "{how_many_total} needs to be bigger than 0" + ); + + ((how_many * 1_00) / how_many_total) as u32 +} + +/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol +/// cache. +fn sympath() -> Option { + let env = env::var("_NT_SYMBOL_PATH").ok()?; + + if !env.starts_with("srv*") { + return None; + } + + let sympath = env.strip_prefix("srv*").unwrap(); + let sympath = PathBuf::from(sympath.split('*').next().unwrap()); + + if sympath.is_dir() { + Some(sympath) + } else { + None + } +} + +/// Create the output file from an input. +/// +/// This logic was moved into a function to be able to handle the `--overwrite` +/// logic and to handle the case when `output` is a directory path and not a +/// file path. In that case, we will create a file with the same input file +/// name, but with a specific suffix. +fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result { + let output_path = if output.is_dir() { + // If the output is a directory, then we'll create a file that has the same file + // name as the input, but with a suffix. + let path = input.with_extension("symbolized.txt"); + let filename = path.file_name().ok_or_else(|| anyhow!("no file name"))?; + + output.join(filename) + } else { + // If the output path is already a file path, then we'll use it as is. + output.into() + }; + + // If the output exists, we'll want the user to tell us to overwrite those + // files. + if output_path.exists() && !args.overwrite { + // If they don't we will bail. + bail!( + "{} already exists, run with --overwrite", + output_path.display() + ); + } + + // We can now create the output file! + File::create(output_path.clone()) + .with_context(|| format!("failed to create output file {output_path:?}")) +} + +/// Process an input file and symbolize every line. +fn symbolize_file( + symbolizer: &mut Symbolizer, + addr_space: &mut impl AddrSpace, + trace_path: impl AsRef, + args: &CliArgs, +) -> Result { + let trace_path = trace_path.as_ref(); + let input = File::open(trace_path) + .with_context(|| format!("failed to open {}", trace_path.display()))?; + + let writer: Box = match &args.output { + Some(output) => Box::new(get_output_file(args, trace_path, output)?), + None => Box::new(stdout()), + }; + + let mut output = BufWriter::with_capacity(args.out_buffer_size, writer); + let mut line_number = 1 + args.skip; + let mut lines_symbolized = 1; + let limit = args.limit.unwrap_or(usize::MAX); + let reader = BufReader::with_capacity(args.in_buffer_size, input); + for addr in HexAddressesIterator::new(reader).skip(args.skip) { + let addr = addr.with_context(|| { + format!( + "failed to get hex addr from l{line_number} of {}", + trace_path.display() + ) + })?; + + if args.line_numbers { + let mut buffer = itoa::Buffer::new(); + output.write_all(b"l")?; + output.write_all(buffer.format(line_number).as_bytes())?; + output.write_all(b": ")?; + } + + match args.style { + SymbolStyle::Modoff => symbolizer.modoff(addr, &mut output), + SymbolStyle::Full => symbolizer.full(addr_space, addr, &mut output), + } + .with_context(|| { + format!( + "failed to symbolize l{line_number} of {}", + trace_path.display() + ) + })?; + + output.write_all(b"\n")?; + + if lines_symbolized >= limit { + println!( + "Hit maximum line limit {} for {}", + limit, + trace_path.display() + ); + break; + } + + lines_symbolized += 1; + line_number += 1; + } + + Ok(lines_symbolized) } fn main() -> Result<()> { @@ -105,7 +325,7 @@ fn main() -> Result<()> { // We need to parse the crash-dump to figure out where drivers / user-modules // are loaded at, and to read enough information out of the PE to download PDB // files ourselves. - let parser = KernelDumpParser::new(&crash_dump_path).context("failed to create dump parser")?; + let parser = KernelDumpParser::new(crash_dump_path).context("failed to create dump parser")?; // Figure out what is the symbol path we should be using. We will use the one // specified by the user, or will try to find one in the `_NT_SYMBOL_PATH` @@ -117,10 +337,32 @@ fn main() -> Result<()> { bail!("no sympath"); }; + let mut modules = Vec::new(); + for (at, name) in parser.user_modules().chain(parser.kernel_modules()) { + let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); + modules.push(Module::new( + filename.to_string(), + at.start.into(), + at.end.into(), + )); + } + // All right, ready to create the symbolizer. - let mut symbolizer = Symbolizer::new(symcache, parser, args.symsrv.clone())?; + let mut wrapper = AddrSpaceWrapper::new(parser); + let mut builder = SymbolizerBuilder::default() + .modules(modules) + .symcache(symcache)?; + + if let Some(import_pdbs) = &args.import_pdbs { + builder = builder.import_pdbs(import_pdbs.iter())?; + } + + if !args.offline { + builder = builder.online(args.symsrv.iter()); + } + + let mut symbolizer = builder.build()?; - symbolizer.start_stopwatch(); let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize // every file inside that directory.. @@ -135,16 +377,18 @@ fn main() -> Result<()> { vec![args.trace.clone()] }; + let mut stats_builder = StatsBuilder::default(); let total = paths.len(); for (idx, path) in paths.into_iter().enumerate() { print!("\x1B[2K\r"); - symbolizer.process_file(&path, &args)?; + symbolize_file(&mut symbolizer, &mut wrapper, &path, &args)?; + stats_builder.done_file(); print!("[{}/{total}] {} done", idx + 1, path.display()); io::stdout().flush()?; } // Grab a few stats before exiting! - let stats = symbolizer.stop_stopwatch(); + let stats = stats_builder.stop(symbolizer); println!("\x1B[2K\r{stats}"); Ok(()) diff --git a/src/misc.rs b/src/misc.rs deleted file mode 100644 index 5a708e1..0000000 --- a/src/misc.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of a bunch of misc utility functions -//! that didn't really fit anywhere else. -use std::env; -use std::path::PathBuf; - -/// A relative address. -pub type Rva = u32; - -/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol -/// cache. -pub fn sympath() -> Option { - let env = env::var("_NT_SYMBOL_PATH").ok()?; - - if !env.starts_with("srv*") { - return None; - } - - let sympath = env.strip_prefix("srv*").unwrap(); - let sympath = PathBuf::from(sympath.split('*').next().unwrap()); - - if sympath.is_dir() { - Some(sympath) - } else { - None - } -} - -/// Calculate a percentage value. -pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { - assert!( - how_many_total > 0, - "{how_many_total} needs to be bigger than 0" - ); - - ((how_many * 1_00) / how_many_total) as u32 -} - -/// Convert an `u64` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -pub fn fast_hex64(buffer: &mut [u8; 16], u: u64) -> &[u8] { - let mut x = u as u128; - - // Arrange each digit into their own byte. Each byte will become the ascii - // character representing its digit. For example, we want to arrange: - // - `0x00000000_00000000_DEADBEEF_BAADC0DE` into - // - `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E`. - // - // Here's a step by step using `0xDEADBEEF_BAADC0DE`: - // 1. `x = 0x00000000_DEADBEEF_00000000_BAADC0DE` - // 2. `x = 0xDEAD0000_BEEF0000_BAAD0000_C0DE0000` - // 3. `x = 0x00DE00AD_00BE00EF_00BA00AD_00C000DE` - // 4. `x = 0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` - // - // Let's start the dance.. - x = (x & 0xFFFFFFFF_00000000) << 32 | x; - x = ((x & 0xFFFF0000_00000000_FFFF0000) << 32) | ((x & 0xFFFF_00000000_0000FFFF) << 16); - x = ((x & 0xFF0000_00FF0000_00FF0000_00FF0000) >> 16) - | ((x & 0xFF000000_FF000000_FF000000_FF000000) >> 8); - x = ((x & 0xF000F0_00F000F0_00F000F0_00F000F0) << 4) | (x & 0xF000F_000F000F_000F000F_000F000F); - - // This creates a mask where there'll be a 0x01 byte for each digit that is - // alpha. For example, for `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` we want: - // `0x01010101_01010101_01010101_01000101`. The trick is to add 0x06 to each - // byte; if the digit is 0x0A..0x0F, adding 0x06 will give 0x10..0x15 (notice - // the leading '1'). Note that we need to ADD, not an OR :). At this point, - // right shifting by 4 bits means to position that leading '1' in the lower - // nibble which is then 'grabbed' via the masking with 0x01.. - let mask = - ((x + 0x06060606_06060606_06060606_06060606) >> 4) & 0x01010101_01010101_01010101_01010101; - - // Turn each digit into their ASCII equivalent by setting the high nibble of - // each byte to 0x3. `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` becomes - // `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E`. - x |= 0x30303030_30303030_30303030_30303030; - - // The last step is to adjust the ASCII byte for every digit that was in - // 0xA..0xF. We basically add to each of those bytes `0x27` to make them lower - // case alpha ASCII. - // For example: - // - `0x01010101_01010101_01010101_01000101 * 0x27 = - // 0x27272727_27272727_27272727_27002727` - // - `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E + - // 0x27272727_27272727_27272727_27002727` = - // `0x64656164_62656566_62616164_63306465` - // - // Why `0x27`? Well, if we have the digit 'a', we end up with `0x3a`. ASCII - // character for 'a' is `0x61`, so `0x61 - 0x3a = 0x27`. - x += 0x27 * mask; - - // Transform the integer into a slice of bytes. - buffer.copy_from_slice(&x.to_be_bytes()); - - // We're done! - buffer -} - -/// Convert an `u32` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -/// -/// Adapted to not bother shuffling the bytes in little endian; we simply read -/// the final integer as big endian. -pub fn fast_hex32(buffer: &mut [u8; 8], u: u32) -> &[u8] { - let mut x = u as u64; - - // Here's a step by step using `0xDEADBEEF`: - // 1. `x = 0x0000DEAD_0000BEEF` - // 2. `x = 0xDE00AD00_BE00EF00` - // 3. `x = 0x0D0E0A0D_0B0E0E0F` - x = (x & 0xFFFF0000) << 16 | x; - x = ((x & 0x0000FF00_0000FF00) << 16) | ((x & 0x000000FF_000000FF) << 8); - x = ((x & 0xF000F000_F000F000) >> 4) | ((x & 0x0F000F00_0F000F00) >> 8); - - let mask = ((x + 0x06060606_06060606) >> 4) & 0x01010101_01010101; - x |= 0x30303030_30303030; - x += 0x27 * mask; - - buffer.copy_from_slice(&x.to_be_bytes()); - - buffer -} - -#[cfg(test)] -mod tests { - use super::{fast_hex32, fast_hex64}; - - #[test] - fn hex32() { - let mut buffer = [0; 8]; - let out = fast_hex32(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f']); - let out = fast_hex32(&mut buffer, 0xdead); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd']); - let out = fast_hex32(&mut buffer, 0x0); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0']); - } - - #[test] - fn hex64() { - let mut buffer = [0; 16]; - let out = fast_hex64(&mut buffer, 0xdeadbeef_baadc0de); - assert_eq!(out, &[ - b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f', b'b', b'a', b'a', b'd', b'c', b'0', - b'd', b'e' - ]); - let out = fast_hex64(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd', b'b', b'e', - b'e', b'f' - ]); - let out = fast_hex64(&mut buffer, 0x0); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', - b'0', b'0' - ]); - } -} diff --git a/src/modules.rs b/src/modules.rs deleted file mode 100644 index e91ce37..0000000 --- a/src/modules.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`Module`] type which is used -//! across the codebase. -use std::ops::Range; - -use crate::misc::Rva; - -/// A user or kernel module. -#[derive(Debug, Default)] -pub struct Module { - /// Where the module is loaded into virtual memory. - pub at: Range, - /// The name of the module. - pub name: String, -} - -impl Module { - /// Create a [`Module`]. - pub fn new(name: String, start: u64, end: u64) -> Self { - Module { - name, - at: start..end, - } - } - - /// Calculate an [`Rva`] from an `addr` contained in this module. - pub fn rva(&self, addr: u64) -> Rva { - debug_assert!(self.at.contains(&addr)); - - let offset = addr - self.at.start; - assert!(offset <= u32::MAX.into()); - - offset as Rva - } -} - -/// A list of modules. -#[derive(Debug, Default)] -pub struct Modules(Vec); - -impl Modules { - /// Create a [`Modules`]. - pub fn new(mut modules: Vec) -> Self { - // Order the modules by their end addresses. - modules.sort_unstable_by_key(|e| e.at.end); - - Self(modules) - } - - /// Find the module that contains this address. - pub fn find(&self, addr: u64) -> Option<&Module> { - // Find the index of the first module that might contain `addr`. - let idx = self.0.partition_point(|m| m.at.end <= addr); - - // At this point there's several cases to handle. - // - // `partition_point` returns the len of the vector if it couldn't - // partition in two. This means that `addr` cannot possibly be contained by any - // of the modules we have, so we're done. - if idx == self.0.len() { - return None; - } - - // We found the first module that has an end address larger than `addr`. This - // doesn't mean the module contains the address though. Imagine `addr` = - // `0xdeadbeef`, and `module.at` = `[0xefefefef, 0xefefefef+1]`. - let module = &self.0[idx]; - - // For this reason, we'll make sure the `addr` is in fact included, otherwise - // it's not a match. - if module.at.contains(&addr) { - Some(module) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn basics() { - let modules = Modules::new(vec![ - Module::new("foo".to_string(), 0x1_000, 0x2_000), - Module::new("foobar".to_string(), 0x2_000, 0x3_000), - Module::new("bar".to_string(), 0x4_000, 0x5_000), - ]); - - assert!(modules.find(1).is_none()); - assert_eq!(modules.find(0x1_000).unwrap().name, "foo"); - assert_eq!(modules.find(0x2_000).unwrap().name, "foobar"); - assert!(modules.find(0x3_000).is_none()); - assert_eq!(modules.find(0x4_fff).unwrap().name, "bar"); - assert!(modules.find(0x6_000).is_none()); - } -} diff --git a/src/pdbcache.rs b/src/pdbcache.rs deleted file mode 100644 index f75707c..0000000 --- a/src/pdbcache.rs +++ /dev/null @@ -1,548 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`PdbCache`] which is the -//! object that keeps track of all the information needed to symbolize an -//! address. It extracts it out of a PDB file and doesn't require it to be -//! around. -use std::borrow::Cow; -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::fs::File; -use std::ops::Range; -use std::path::Path; - -use anyhow::{anyhow, Context, Result}; -use log::{trace, warn}; -use pdb::{ - AddressMap, FallibleIterator, LineProgram, PdbInternalSectionOffset, ProcedureSymbol, - StringTable, Symbol, -}; - -use crate::modules::Module; - -/// A PDB opened via file access. -type Pdb<'p> = pdb::PDB<'p, File>; -/// A relative virtual address. -type Rva = u32; -/// A vector of lines. -type Lines = Vec; - -/// A line of source code. -/// -/// It maps an offset in the function (like offset -/// `0x1122`) to a line number in a file (like `foo.c:1336`). -#[derive(Default, Debug)] -struct Line { - /// Offset from the start of the function it's part of. - offset: u32, - /// The line number. - number: Rva, - /// Most lines in a function are part of the same file which is stored in - /// the [`SourceInfo`] which contains the lines info. But in case, this line - /// is stored in a different file, this is its path. - override_path: Option, -} - -impl Line { - /// Build a [`Line`]. - fn new(offset: Rva, number: u32, override_path: Option) -> Self { - Self { - offset, - number, - override_path, - } - } -} - -/// Information related to source code. -/// -/// It contains the path to the source code file as well as a mapping between -/// offsets to line number. -#[derive(Debug, Default)] -struct SourceInfo { - path: String, - lines: Lines, -} - -impl SourceInfo { - /// Build a [`SourceInfo`]. - fn new(path: String, lines: Lines) -> Self { - // We assume we have at least one entry in the vector. - assert!(!lines.is_empty()); - - Self { path, lines } - } - - /// Find the line number associated to a raw offset from inside a function. - pub fn line(&self, offset: Rva) -> &Line { - self.lines - .iter() - .find(|&line| offset < line.offset) - .unwrap_or(self.lines.last().unwrap()) - } -} - -/// A function. -/// -/// It has a name and if available, information related to the file where the -/// function is implemented as well as the line of code. -#[derive(Default, Debug)] -struct FuncSymbol { - pub name: String, - pub source_info: Option, -} - -impl FuncSymbol { - fn new(name: String, source_info: Option) -> Self { - Self { name, source_info } - } -} - -impl From for FuncSymbol { - fn from(value: BuilderEntry) -> Self { - FuncSymbol::new(value.name, value.source_info) - } -} - -/// A PDB cache. -/// -/// It basically is a data-structure that stores all the information about the -/// functions defined in a module. It extracts everything it can off a PDB and -/// then toss it as a PDB file is larger than a [`PdbCache`] (as we don't care -/// about types, variables, etc.). -pub struct PdbCache { - module_name: String, - addrs: Vec>, - symbols: Vec, -} - -impl Debug for PdbCache { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PdbCache") - .field("module_name", &self.module_name) - .finish_non_exhaustive() - } -} - -impl PdbCache { - fn new(module_name: String, mut symbols: Vec<(Range, FuncSymbol)>) -> Self { - symbols.sort_unstable_by_key(|(range, _)| range.end); - let (addrs, symbols) = symbols.into_iter().unzip(); - - Self { - module_name, - addrs, - symbols, - } - } - - /// Find a symbol that contains `rva`. - fn find_sym(&self, rva: Rva) -> Option<(Rva, &FuncSymbol)> { - let idx = self.addrs.partition_point(|probe| probe.end <= rva); - if idx == self.addrs.len() { - return None; - } - - let range = &self.addrs[idx]; - let func = &self.symbols[idx]; - - if range.contains(&rva) { - Some((range.start, func)) - } else { - None - } - } - - /// Symbolize a raw address. - /// - /// This pulls as much information as possible and use any private symbols - /// if there were any. - pub fn symbolize(&self, rva: Rva) -> Result { - // Find the function in which this `rva` is in. - let Some((func_rva, func_symbol)) = self.find_sym(rva) else { - // If we can't find one, we'll just return `module.dll+rva`. - return Ok(format!("{}+{:#x}", self.module_name, rva)); - }; - - debug_assert!( - rva >= func_rva, - "The function RVA should always be smaller or equal to the instruction RVA" - ); - - // Calculate the instruction offset. - let instr_offset = rva - func_rva; - - // Generate the symbolized version. - let symbolized = if let Some(source_info) = &func_symbol.source_info { - // If we have knowledge about in which source file this is implemented and at - // what line number, then let's use it.. - let line = source_info.line(instr_offset); - let path = line.override_path.as_ref().unwrap_or(&source_info.path); - - format!( - "{}!{}+{instr_offset:#x} [{path} @ {}]", - self.module_name, func_symbol.name, line.number - ) - } else { - // ..or do without if it's not present. - format!( - "{}!{}+{instr_offset:#x}", - self.module_name, func_symbol.name - ) - }; - - Ok(symbolized) - } -} - -#[derive(Debug)] -struct BuilderEntry { - name: String, - len: Option, - source_info: Option, -} - -impl BuilderEntry { - fn new(name: String, len: Option, source_info: Option) -> Self { - Self { - name, - len, - source_info, - } - } - - fn with_name(name: String) -> Self { - Self::new(name, None, None) - } - - fn len(&self) -> Option { - self.len - } -} - -/// A [`PdbCache`] builder. -/// -/// Ultimately, we try to get as much information possible on modules with what -/// we have. Sometimes, we have public symbols, something we have private -/// symbols and.. sometimes we have nothing (just its PE). If we're dealing with -/// just information extracted from the PE or the public symbols, we have no -/// available information regarding function sizes. -/// -/// To work around this issue, what we do is we aggregate all the information in -/// a data structure ordered by the function address. Once we're done, we walk -/// this data structure and we calculate the size of the current function by -/// 'filling the hole' up to the next function. This is innacurate but is the -/// only heuristic I had in store. -/// -/// Once we have a list of functions with assigned sizes, we can finally build -/// the [`PdbCache`] structure. -#[derive(Debug)] -pub struct PdbCacheBuilder<'module> { - /// The module for which this symbol cache is for. - module: &'module Module, - /// Basically all the information we've extracted so far. - /// - /// The key is the [`Rva`] of where the module starts, and the value is a - /// [`BuilderEntry`] which describes the symbol with more details. - symbols: BTreeMap, -} - -impl<'module> PdbCacheBuilder<'module> { - pub fn new(module: &'module Module) -> Self { - Self { - module, - symbols: BTreeMap::new(), - } - } - - /// Ingest a bunch of symbols. - /// - /// The key is the start [`Rva`] of the symbol, and the value is its name. - /// This is used to ingest for example a list of functions acquired from the - /// EAT of a module. - pub fn ingest(&mut self, symbols: impl Iterator) { - for (start, name) in symbols { - self.symbols.insert(start, BuilderEntry::with_name(name)); - } - } - - /// Parse a [`ProcedureSymbol`]. - fn parse_procedure_symbol( - &mut self, - proc: &ProcedureSymbol, - address_map: &AddressMap, - string_table: &StringTable, - line_program: &LineProgram, - ) -> Result<()> { - let proc_name = proc.name.to_string(); - let Some(pdb::Rva(proc_rva)) = proc.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - - return Ok(()); - }; - - let mut lines_it = line_program.lines_for_symbol(proc.offset); - let mut main_path = None; - let mut lines = Lines::new(); - while let Some(line) = lines_it.next()? { - let Some(pdb::Rva(line_rva)) = line.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - continue; - }; - - let file_info = line_program.get_file_info(line.file_index)?; - let override_path = if main_path.is_none() { - main_path = Some(file_info.name.to_string_lossy(string_table)?.into_owned()); - - None - } else { - let new_path = file_info.name.to_string_lossy(string_table)?; - if main_path.as_ref().unwrap() != &new_path { - Some(new_path.into_owned()) - } else { - None - } - }; - - if line_rva < proc_rva { - warn!( - "symbol {} has confusing line information, skipping", - proc_name - ); - return Ok(()); - } - - let line_offset = line_rva - proc_rva; - lines.push(Line::new(line_offset, line.line_start, override_path)); - } - - self.ingest_symbol( - address_map, - proc_name, - proc.offset, - Some(proc.len), - main_path.map(|p| SourceInfo::new(p, lines)), - ) - } - - /// Ingest a symbol with a name. - fn ingest_symbol_with_name( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, None, None) - } - - /// Ingest a symbol with a name and a length. - fn ingest_symbol_with_len( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: u32, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, Some(len), None) - } - - /// Ingest a symbol. - /// - /// Some symbols have a length, some don't, some have source information, - /// some don't. - fn ingest_symbol( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: Option, - source_info: Option, - ) -> Result<()> { - use msvc_demangler::DemangleFlags as E; - let undecorated_name = if name.as_bytes().starts_with(b"?") { - // Demangle the name if it starts by a '?'. - match msvc_demangler::demangle(&name, E::NAME_ONLY) { - Ok(o) => o, - Err(e) => { - // Let's log the failures as warning because we might care one day? - warn!("failed to demangle {name}: {e}"); - - // But if it failed, returning the mangled name is better than nothing. - name.into_owned() - } - } - } else { - // If it isn't a mangled name, then do.. nothing! - name.into() - }; - - // Get the RVA.. - let pdb::Rva(rva) = offset.to_rva(address_map).ok_or_else(|| { - anyhow!( - "failed to get rva from symbol {undecorated_name} / {:?}, skipping", - offset - ) - })?; - - //.. and build an entry for this function. - if let Some(prev) = self - .symbols - .insert(rva, BuilderEntry::new(undecorated_name, len, source_info)) - { - warn!("symbol {prev:?} in dbi has a duplicate at {rva:#x}, skipping"); - } - - Ok(()) - } - - /// Parse a [`Symbol`]. - fn parse_symbol( - &mut self, - address_map: &AddressMap, - symbol: &Symbol, - extra: Option<(&StringTable, &LineProgram)>, - ) -> Result<()> { - use pdb::SymbolData as SD; - match symbol.parse()? { - SD::Procedure(procedure) => { - let (string_table, line_program) = extra.unwrap(); - self.parse_procedure_symbol(&procedure, address_map, string_table, line_program)?; - } - SD::Public(public) => { - self.ingest_symbol_with_name(address_map, public.name.to_string(), public.offset)?; - } - SD::Thunk(thunk) => { - self.ingest_symbol_with_len( - address_map, - thunk.name.to_string(), - thunk.offset, - thunk.len.into(), - )?; - } - _ => {} - }; - - Ok(()) - } - - /// Parse the debug information stream which is where private symbols are - /// stored in. - fn parse_dbi(&mut self, pdb: &mut Pdb, address_map: &AddressMap) -> Result<()> { - // If we don't have a string table, there is no point in parsing the debug - // information stream. - let Ok(string_table) = pdb.string_table() else { - return Ok(()); - }; - - // Grab the debug information stream.. - let dbi = pdb.debug_information().context("failed to get dbi")?; - // ..and grab / walk through the 'modules'. - let mut module_it = dbi.modules()?; - while let Some(module) = module_it.next()? { - // Get information about the module; such as its path, its symbols, etc. - let Some(info) = pdb.module_info(&module)? else { - warn!("no module info: {:?}", &module); - continue; - }; - - let program = info.line_program()?; - let mut sym_it = info.symbols()?; - while let Some(symbol) = sym_it.next()? { - if let Err(e) = - self.parse_symbol(address_map, &symbol, Some((&string_table, &program))) - { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - } - - Ok(()) - } - - /// Parse the global symbols stream where public symbols are stored at. - fn parse_global_symbols_table( - &mut self, - pdb: &mut Pdb, - address_map: &AddressMap, - ) -> Result<()> { - let global_symbols = pdb.global_symbols()?; - let mut symbol_it = global_symbols.iter(); - while let Some(symbol) = symbol_it.next()? { - if let Err(e) = self.parse_symbol(address_map, &symbol, None) { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - - Ok(()) - } - - /// Ingest a PDB file stored on the file system. - pub fn ingest_pdb(&mut self, pdb_path: impl AsRef) -> Result<()> { - // Open the PDB file. - let pdb_path = pdb_path.as_ref(); - let pdb_file = - File::open(pdb_path).with_context(|| format!("failed to open pdb {pdb_path:?}"))?; - let mut pdb = - Pdb::open(pdb_file).with_context(|| format!("failed to parse pdb {pdb_path:?}"))?; - - trace!("ingesting {pdb_path:?}.."); - - let address_map = pdb.address_map()?; - // Parse and extract all the bits we need from the private symbols first. We do - // this first, because procedures have a length field which isn't the case for - // global symbols. And if there's duplicates, then we'd rather have the entry - // that gives us the exact procedure length instead of us guessing. - self.parse_dbi(&mut pdb, &address_map) - .context("failed to parse private symbols")?; - - // Parse and extract all the bits we need from the global symbols.. - self.parse_global_symbols_table(&mut pdb, &address_map) - .context("failed to parse public symbols") - } - - /// Build a [`PdbCache`]. - pub fn build(mut self) -> Result { - // Walk the map of ordered RVA with their associated names and assign lengths to - // each of the functions. Some function have a length and some don't. If a - // length is specified, then we'll use it; otherwise we'll assign one ourselves. - let mut functions = Vec::with_capacity(self.symbols.len()); - while let Some((start, entry)) = self.symbols.pop_first() { - let end = if let Some(len) = entry.len() { - // If we have a length, then use it! - start - .checked_add(len) - .ok_or_else(|| anyhow!("overflow w/ symbol range"))? - } else { - // If we don't have one, the length of the current function is basically up to - // the next entry. - // - // For example imagine the below: - // - RVA: 0, Name: foo - // - RVA: 5, Name: bar - // - // In that case, we consider the first function to be spanning [0..4], and - // [5..module size] for the second one. - - // If we didn't pop the last value, then just check the one that follows. - if let Some((&end, _)) = self.symbols.first_key_value() { - end - } else { - debug_assert!(self.module.at.end > self.module.at.start); - - // If we popped the last value, just use the module end as the end of the range. - u32::try_from(self.module.at.end - self.module.at.start) - .context("failed to make the module's end into a rva")? - } - }; - - functions.push((Range { start, end }, entry.into())); - } - - Ok(PdbCache::new(self.module.name.clone(), functions)) - } -} diff --git a/src/pe.rs b/src/pe.rs deleted file mode 100644 index 41353e8..0000000 --- a/src/pe.rs +++ /dev/null @@ -1,554 +0,0 @@ -// Axel '0vercl0k' Souchet - February 19 2024 -//! This module contains the implementation of the PE parsing we do. -use std::fmt::Display; -use std::mem; -use std::ops::Range; -use std::path::PathBuf; - -use anyhow::{anyhow, bail, Context, Result}; -use kdmp_parser::{Gva, KdmpParserError, KernelDumpParser}; -use log::debug; - -use crate::guid::Guid; -use crate::misc::Rva; - -/// The IMAGE_DOS_HEADER. -#[derive(Default, Debug)] -#[repr(C, packed(2))] -pub struct ImageDosHeader { - pub e_magic: u16, - pub e_cblp: u16, - pub e_cp: u16, - pub e_crlc: u16, - pub e_cparhdr: u16, - pub e_minalloc: u16, - pub e_maxalloc: u16, - pub e_ss: u16, - pub e_sp: u16, - pub e_csum: u16, - pub e_ip: u16, - pub e_cs: u16, - pub e_lfarlc: u16, - pub e_ovno: u16, - pub e_res: [u16; 4], - pub e_oemid: u16, - pub e_oeminfo: u16, - pub e_res2: [u16; 10], - pub e_lfanew: i32, -} - -/// The IMAGE_NT_HEADERS. -#[derive(Default, Debug)] -#[repr(C)] -struct NtHeaders { - signature: u32, - file_hdr: ImageFileHeader, -} - -/// The IMAGE_FILE_HEADER. -#[derive(Default, Debug)] -#[repr(C)] -pub struct ImageFileHeader { - pub machine: u16, - pub number_of_sections: u16, - pub time_date_stamp: u32, - pub pointer_to_symbol_table: u32, - pub number_of_symbols: u32, - pub size_of_optional_header: u16, - pub characteristics: u16, -} - -/// The IMAGE_DATA_DIRECTORY. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C)] -pub struct ImageDataDirectory { - pub virtual_address: u32, - pub size: u32, -} - -/// The IMAGE_OPTIONAL_HEADER32. -#[derive(Debug, Default)] -#[repr(C)] -pub struct ImageOptionalHeader32 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub base_of_data: u32, - pub image_base: u32, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u32, - pub size_of_stack_commit: u32, - pub size_of_heap_reserve: u32, - pub size_of_heap_commit: u32, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_OPTIONAL_HEADER64. -#[derive(Debug, Default)] -#[repr(C, packed(4))] -pub struct ImageOptionalHeader64 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub image_base: u64, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u64, - pub size_of_stack_commit: u64, - pub size_of_heap_reserve: u64, - pub size_of_heap_commit: u64, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_DEBUG_DIRECTORY. -#[derive(Default, Debug)] -#[repr(C)] -pub struct ImageDebugDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub type_: u32, - pub size_of_data: u32, - pub address_of_raw_data: u32, - pub pointer_to_raw_data: u32, -} - -/// The IMAGE_EXPORT_DIRECTORY. -#[derive(Default, Debug)] -#[repr(C)] -pub struct ImageExportDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub name: u32, - pub base: u32, - pub number_of_functions: u32, - pub number_of_names: u32, - pub address_of_functions: u32, - pub address_of_names: u32, - pub address_of_name_ordinals: u32, -} - -/// The code view information. -#[derive(Debug, Default)] -#[repr(C)] -pub struct Codeview { - pub signature: u32, - pub guid: [u8; 16], - pub age: u32, - // name follows -} - -pub const IMAGE_NT_SIGNATURE: u32 = 17744; -pub const IMAGE_FILE_MACHINE_AMD64: u16 = 34404; -pub const IMAGE_DIRECTORY_ENTRY_EXPORT: usize = 0; -pub const IMAGE_DIRECTORY_ENTRY_DEBUG: usize = 6; - -pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; - -/// A PDB identifier. -/// -/// To download a PDB off Microsoft's Symbol Server, we need three pieces of -/// information: the pdb name, a guid and its age. -#[derive(Debug, Default)] -pub struct PdbId { - pub path: PathBuf, - pub guid: Guid, - pub age: u32, -} - -impl Display for PdbId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{:?}:{}:{:x}", self.path, self.guid, self.age)) - } -} - -impl PdbId { - pub fn new(path: PathBuf, guid: Guid, age: u32) -> Result { - if path.file_name().is_none() { - bail!("pdb path {path:?} does not have a filename"); - } - - Ok(Self { path, guid, age }) - } - - pub fn name(&self) -> String { - self.path - .file_name() - .unwrap() - .to_string_lossy() - .into_owned() - } -} - -/// Calculate the absolute address of an array entry based on a base address, -/// the RVA of the array, the entry index and the size of an entry. -pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> Option { - let offset = idx.checked_mul(entry_size.try_into().ok()?)?; - let rva = rva_array.checked_add(offset)?; - - base.checked_add(rva.into()) -} - -/// Read a NULL terminated string from the dump file at a specific address. -pub fn read_string(parser: &KernelDumpParser, addr: u64, max: usize) -> Result { - let mut s = String::new(); - let mut terminated = false; - let mut gva = Gva::new(addr); - for _ in 0..max { - let mut buf = [0]; - parser - .virt_read_exact(gva, &mut buf) - .with_context(|| "failed reading null terminated string".to_string())?; - - let c = buf[0]; - if c == 0 { - terminated = true; - break; - } - - s.push(c.into()); - gva += 1.into(); - } - - if !terminated && s.len() == max { - s.push_str("..."); - } - - Ok(s) -} - -/// A parsed PE headers. -/// -/// We are only interested in the PDB identifier and the Export Address Table. -#[derive(Debug, Default)] -pub struct Pe { - pub pdb_id: Option, - pub exports: Vec<(Rva, String)>, -} - -impl Pe { - fn try_parse_debug_dir( - parser: &KernelDumpParser, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result> { - // Let's check if there's an ImageDebugDirectory. - let debug_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_DEBUG]; - if usize::try_from(debug_data_dir.size).unwrap() < mem::size_of::() { - debug!("debug dir is too small"); - return Ok(None); - } - - // Read it. - let debug_dir_addr = base - .checked_add(debug_data_dir.virtual_address.into()) - .ok_or_else(|| anyhow!("overflow with debug_data_dir"))?; - let Some(debug_dir) = - parser.try_virt_read_struct::(debug_dir_addr.into())? - else { - debug!( - "failed to read ImageDebugDirectory {debug_dir_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - // If it's not a codeview type.. I don't know what to do, so let's bail. - if debug_dir.type_ != IMAGE_DEBUG_TYPE_CODEVIEW { - debug!("debug dir is not a codeview"); - return Ok(None); - } - - // Let's make sure it's big enough to back a codeview structure. - if usize::try_from(debug_dir.size_of_data).unwrap() < mem::size_of::() { - debug!("codeview too small"); - return Ok(None); - } - - // Let's read it. - let codeview_addr = base - .checked_add(debug_dir.address_of_raw_data.into()) - .ok_or_else(|| anyhow!("overflow with debug_dir"))?; - let Some(codeview) = parser.try_virt_read_struct::(codeview_addr.into())? else { - debug!("failed to read codeview {codeview_addr:#x} because of mem translation"); - return Ok(None); - }; - - // The codeview structure is followed by a NULL terminated string which is the - // module name. - let leftover = - usize::try_from(debug_dir.size_of_data).unwrap() - mem::size_of::(); - if leftover == 0 || leftover > 256 { - bail!("the module path is either 0 or larger than reasonable"); - } - - // Allocate space for it, and read it. - let mut file_name = vec![0; leftover]; - let file_name_addr = array_offset( - base, - debug_dir.address_of_raw_data, - 1, - mem::size_of::(), - ) - .ok_or_else(|| anyhow!("oveflow with debug_dir"))?; - - let Some(amount) = parser.try_virt_read(file_name_addr.into(), &mut file_name)? else { - return Ok(None); - }; - - // The last character is supposed to be a NULL byte, bail if it's not there. - if *file_name.last().unwrap() != 0 { - bail!("the module path doesn't end with a NULL byte"); - } - - file_name.resize(amount - 1, 0); - - // All right, at this point we have everything we need: the PDB name / GUID / - // age. Those are the three piece of information we need to download a PDB - // off Microsoft's symbol server. - let path = PathBuf::from(String::from_utf8(file_name)?); - - Ok(Some(PdbId::new(path, codeview.guid.into(), codeview.age)?)) - } - - fn try_parse_export_dir( - parser: &KernelDumpParser, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result>> { - // Let's check if there's an EAT. - debug!("parsing EAT.."); - let export_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_EXPORT]; - if usize::try_from(export_data_dir.size)? < mem::size_of::() { - debug!("export dir is too small"); - return Ok(None); - } - - // Read it. - let export_dir_addr = base - .checked_add(u64::from(export_data_dir.virtual_address)) - .ok_or_else(|| anyhow!("overflow with export_data_dir"))?; - let Some(export_dir) = - parser.try_virt_read_struct::(export_dir_addr.into())? - else { - debug!("failed to read ImageExportDirectory {export_dir_addr:#x} because of mem translation"); - return Ok(None); - }; - - // Read the ordinal / name arrays. - // """ - // The export name pointer table is an array of addresses (RVAs) into the export - // name table. The pointers are 32 bits each and are relative to the image base. - // The pointers are ordered lexically to allow binary searches. - // An export name is defined only if the export name pointer table contains a - // pointer to it. """ - let n_names = export_dir.number_of_names; - let addr_of_names = export_dir.address_of_names; - // """ - // The export ordinal table is an array of 16-bit unbiased indexes into the - // export address table. Ordinals are biased by the Ordinal Base field of the - // export directory table. In other words, the ordinal base must be subtracted - // from the ordinals to obtain true indexes into the export address table. - // """ - let addr_of_ords = export_dir.address_of_name_ordinals; - let mut names = Vec::with_capacity(n_names.try_into()?); - let mut ords = Vec::with_capacity(names.len()); - for name_idx in 0..n_names { - // Read the name RVA's.. - let name_rva_addr = array_offset(base, addr_of_names, name_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with name_rva_addr"))?; - let Some(name_rva) = parser - .try_virt_read_struct::(name_rva_addr.into()) - .with_context(|| "failed to read EAT's name array".to_string())? - else { - debug!( - "failed to read EAT's name array {name_rva_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - let name_addr = base - .checked_add(name_rva.into()) - .ok_or_else(|| anyhow!("overflow with name_addr"))?; - // ..then read the string in memory. - let name = read_string(parser, name_addr, 64)?; - names.push(name); - - // Read the ordinal. - let ord_addr = array_offset(base, addr_of_ords, name_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with ord_addr"))?; - let Some(ord) = parser - .try_virt_read_struct::(ord_addr.into()) - .with_context(|| "failed to read EAT's ord array".to_string())? - else { - debug!("failed to read EAT's ord array {ord_addr:#x} because of mem translation"); - return Ok(None); - }; - ords.push(ord); - } - - debug!("read {n_names} names"); - - // Read the address array. - // - // """ - // The export address table contains the address of exported entry points and - // exported data and absolutes. An ordinal number is used as an index into the - // export address table. - // """ - let addr_of_functs = export_dir.address_of_functions; - let n_functs = export_dir.number_of_functions; - let mut address_rvas = Vec::with_capacity(n_functs.try_into()?); - for addr_idx in 0..n_functs { - // Read the RVA. - let address_rva_addr = - array_offset(base, addr_of_functs, addr_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with address_rva_addr"))?; - - let Some(address_rva) = parser - .try_virt_read_struct::(address_rva_addr.into()) - .with_context(|| "failed to read EAT's address array".to_string())? - else { - debug!("failed to read EAT's address array {address_rva_addr:#x} because of mem translation"); - return Ok(None); - }; - - address_rvas.push(address_rva); - } - - debug!("read {n_functs} addresses"); - - // Time to build the EAT. - let eat_range = Range { - start: export_data_dir.virtual_address, - end: export_data_dir - .virtual_address - .checked_add(export_data_dir.size) - .ok_or_else(|| anyhow!("overflow with export data dir size"))?, - }; - - let mut exports = Vec::with_capacity(address_rvas.len()); - for (unbiased_ordinal, addr_rva) in address_rvas.drain(..).enumerate() { - let ordinal = unbiased_ordinal - .checked_add(export_dir.base.try_into()?) - .ok_or_else(|| anyhow!("overflow with biased_ordinal"))?; - let name = ords - .iter() - .position(|&o| usize::from(o) == unbiased_ordinal) - .map(|name_idx| names[name_idx].clone()) - .unwrap_or_else(|| format!("ORD#{ordinal}")); - - let forwarder = eat_range.contains(&addr_rva); - if !forwarder { - exports.push((addr_rva, name.clone())); - } - } - - debug!("built table w/ {} entries", exports.len()); - - Ok(Some(exports)) - } - - pub fn new(parser: &KernelDumpParser, base: u64) -> Result { - // All right let's parse the PE. - debug!("parsing PE @ {:#x}", base); - - // Read the DOS/NT headers. - let dos_hdr = parser - .virt_read_struct::(base.into()) - .with_context(|| "failed to read ImageDosHeader")?; - let nt_hdr_addr = base - .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) - .ok_or_else(|| anyhow!("overflow with e_lfanew"))?; - let nt_hdr = parser.virt_read_struct::(nt_hdr_addr.into())?; - - // Let's verify the signature.. - if nt_hdr.signature != IMAGE_NT_SIGNATURE { - bail!("wrong PE signature for {base:#x}"); - } - - // ..and let's ignore non x64 PEs. - if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { - bail!("wrong architecture for {base:#x}"); - } - - // Now locate the optional header, and check that it looks big enough. - let opt_hdr_addr = nt_hdr_addr - .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) - .ok_or_else(|| anyhow!("overflow with nt_hdr"))?; - let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; - debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); - - // If it's not big enough, let's bail. - if opt_hdr_size < mem::size_of::() { - bail!("optional header's size is too small"); - } - - // Read the IMAGE_OPTIONAL_HEADER64. - let opt_hdr = parser - .virt_read_struct::(opt_hdr_addr.into()) - .with_context(|| "failed to read ImageOptionalHeader64")?; - - // Read the PDB information if there's any. - let pdb_id = Self::try_parse_debug_dir(parser, base, &opt_hdr)?; - - // Read the EXPORT table if there's any. - let exports = match Self::try_parse_export_dir(parser, base, &opt_hdr) { - Ok(o) => o, - Err(e) => { - let Some(kdmp) = e.downcast_ref::() else { - return Err(e); - }; - - match kdmp { - KdmpParserError::AddrTranslation(..) => None, - _ => return Err(e), - } - } - } - .unwrap_or_default(); - - Ok(Self { pdb_id, exports }) - } -} diff --git a/src/stats.rs b/src/stats.rs deleted file mode 100644 index c23b6dc..0000000 --- a/src/stats.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Axel '0vercl0k' Souchet - April 21 2024 -//! This module contains the [`Stats`] type that is used to keep track of -//! various statistics when symbolizing. -use std::cell::RefCell; -use std::fmt::{Debug, Display}; -use std::time::Instant; - -use crate::human::ToHuman; -use crate::misc::percentage; - -#[derive(Debug)] -pub struct StatsBuilder { - start: RefCell, - inner: RefCell, -} - -impl Default for StatsBuilder { - fn default() -> Self { - Self { - start: RefCell::new(Instant::now()), - inner: Default::default(), - } - } -} - -#[derive(Default, Clone, Copy, Debug)] -pub struct Stats { - time: u64, - n_files: u64, - n_lines: u64, - n_downloads: u64, - size_downloaded: u64, - cache_hit: u64, -} - -impl StatsBuilder { - pub fn start(&self) { - self.start.replace_with(|_| Instant::now()); - } - - pub fn stop(&self) -> Stats { - let elapsed = self.start.borrow().elapsed(); - let mut stats = *self.inner.borrow(); - stats.time = elapsed.as_secs(); - - stats - } - - pub fn done_file(&self, n: u64) { - let mut inner = self.inner.borrow_mut(); - inner.n_files += 1; - inner.n_lines += n; - } - - pub fn downloaded_file(&self, size: u64) { - let mut inner = self.inner.borrow_mut(); - inner.n_downloads += 1; - inner.size_downloaded += size; - } - - pub fn cache_hit(&self) { - self.inner.borrow_mut().cache_hit += 1; - } -} - -impl Display for Stats { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "✓ Successfully symbolized {} lines across {} files in {} ({}% cache hits", - self.n_lines.human_number(), - self.n_files.human_number(), - self.time.human_time(), - percentage(self.cache_hit, self.n_lines) - )?; - - if self.size_downloaded > 0 { - writeln!( - f, - ", downloaded {} / {} PDBs)", - self.size_downloaded.human_bytes(), - self.n_downloads.human_number() - ) - } else { - writeln!(f, ")") - } - } -} diff --git a/src/symbolizer.rs b/src/symbolizer.rs deleted file mode 100644 index b3e9cee..0000000 --- a/src/symbolizer.rs +++ /dev/null @@ -1,500 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Symbolizer`] which is the -//! object that is able to symbolize files using PDB information if available. -use std::cell::RefCell; -use std::collections::{hash_map, HashMap}; -use std::fs::{self, File}; -use std::hash::{BuildHasher, Hasher}; -use std::io::{self, stdout, BufReader, BufWriter, Write}; -use std::ops::Range; -use std::path::{Path, PathBuf}; -use std::rc::Rc; - -use anyhow::{anyhow, bail, Context, Result}; -use kdmp_parser::KernelDumpParser; -use log::{debug, trace, warn}; - -use crate::hex_addrs_iter::HexAddressesIterator; -use crate::misc::{fast_hex32, fast_hex64}; -use crate::modules::{Module, Modules}; -use crate::pdbcache::{PdbCache, PdbCacheBuilder}; -use crate::pe::{PdbId, Pe}; -use crate::stats::{Stats, StatsBuilder}; -use crate::CliArgs; - -/// Format a path to find a PDB in a symbol cache. -/// -/// Here is an example: -/// ```text -/// C:\work\dbg\sym\ntfs.pdb\64D20DCBA29FFC0CD355FFE7440EC5F81\ntfs.pdb -/// ^^^^^^^^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^ -/// cache path PDB name PDB GUID & PDB Age PDB name -/// ``` -pub fn format_pdb_path(symsrv_cache: &Path, pdb_id: &PdbId) -> PathBuf { - let pdb_name = pdb_id.name(); - symsrv_cache - .join(&pdb_name) - .join(format!("{}{:x}", pdb_id.guid, pdb_id.age,)) - .join(&pdb_name) -} - -/// Format a URL to find a PDB on an HTTP symbol server. -pub fn format_pdb_url(symsrv: &str, pdb_id: &PdbId) -> String { - // It seems that Chrome's symsrv server only accepts the GUID/age part as - // uppercase hex, so let's use that. - format!( - "{symsrv}/{}/{}{:x}/{}", - pdb_id.name(), - pdb_id.guid, - pdb_id.age, - pdb_id.name() - ) -} - -/// Download a PDB file from a candidate symbol servers. -/// -/// The code iterates through every symbol servers, and stops as soon as it was -/// able to download a matching file. -pub fn try_download_from_guid( - symsrvs: &Vec, - sympath_dir: impl AsRef, - pdb_id: &PdbId, -) -> Result> { - // Give a try to each of the symbol servers. - for symsrv in symsrvs { - debug!( - "trying to download pdb for {} from {}..", - pdb_id.name(), - symsrv - ); - - // The way a symbol path is structured is that there is a directory per module.. - let sympath_dir = sympath_dir.as_ref(); - let pdb_root_dir = sympath_dir.join(pdb_id.name()); - - // ..and inside, there is a directory per version of the PDB.. - let pdb_dir = pdb_root_dir.join(format!("{}{:x}", pdb_id.guid, pdb_id.age)); - - // ..and finally the PDB file itself. - let pdb_path = pdb_dir.join(pdb_id.name()); - - // The file doesn't exist on the file system, so let's try to download it from a - // symbol server. - let pdb_url = format_pdb_url(symsrv, pdb_id); - let resp = match ureq::get(&pdb_url).call() { - Ok(o) => o, - // If we get a 404, it means that the server doesn't know about this file. So we'll skip - // to the next symbol server. - Err(ureq::Error::Status(404, ..)) => { - warn!("got a 404 for {pdb_url}"); - continue; - } - // If we received any other errors, well that's not expected so let's bail. - Err(e) => bail!("failed to download pdb {pdb_url}: {e}"), - }; - - // If the server knows about this file, it is time to create the directory - // structure in which we'll download the file into. - if !(pdb_root_dir.try_exists()?) { - debug!("creating {pdb_root_dir:?}.."); - fs::create_dir(&pdb_root_dir) - .with_context(|| format!("failed to create base pdb dir {pdb_root_dir:?}"))?; - } - - if !pdb_dir.try_exists()? { - debug!("creating {pdb_dir:?}.."); - fs::create_dir(&pdb_dir) - .with_context(|| format!("failed to create pdb dir {pdb_dir:?}"))?; - } - - // Finally, we can download and save the file. - let file = - File::create(&pdb_path).with_context(|| format!("failed to create {pdb_path:?}"))?; - - io::copy(&mut resp.into_reader(), &mut BufWriter::new(file))?; - - debug!("downloaded to {pdb_path:?}"); - return Ok(Some(pdb_path)); - } - - Ok(None) -} - -/// Create the output file from an input. -/// -/// This logic was moved into a function to be able to handle the `--overwrite` -/// logic and to handle the case when `output` is a directory path and not a -/// file path. In that case, we will create a file with the same input file -/// name, but with a specific suffix. -fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result { - let output_path = if output.is_dir() { - // If the output is a directory, then we'll create a file that has the same file - // name as the input, but with a suffix. - let path = input.with_extension("symbolized.txt"); - let filename = path.file_name().ok_or_else(|| anyhow!("no file name"))?; - - output.join(filename) - } else { - // If the output path is already a file path, then we'll use it as is. - output.into() - }; - - // If the output exists, we'll want the user to tell us to overwrite those - // files. - if output_path.exists() && !args.overwrite { - // If they don't we will bail. - bail!( - "{} already exists, run with --overwrite", - output_path.display() - ); - } - - // We can now create the output file! - File::create(output_path.clone()) - .with_context(|| format!("failed to create output file {output_path:?}")) -} - -/// Where did we find this PDB? On the file-system somewhere, in a local symbol -/// cache or downloaded on a symbol server. -/// -/// This is used mainly to account for statistics; how many files were -/// downloaded, etc. -enum PdbKind { - /// The PDB file was found on the file system but no in a symbol cache. - Local, - /// The PDB file was found on the file system in a local symbol cache. - LocalCache, - /// The PDB file was downloaded on a remote symbol server. - Download, -} - -/// Try to find a PDB file online or locally from a [`PdbId`]. -fn get_pdb( - sympath: &Path, - symsrvs: &Vec, - pdb_id: &PdbId, -) -> Result> { - // Let's see if the path exists locally.. - if pdb_id.path.is_file() { - // .. if it does, this is a 'Local' PDB. - return Ok(Some((pdb_id.path.clone(), PdbKind::Local))); - } - - // Now, let's see if it's in the local cache.. - let local_path = format_pdb_path(sympath, pdb_id); - if local_path.is_file() { - // .. if it does, this is a 'LocalCache' PDB. - return Ok(Some((local_path, PdbKind::LocalCache))); - } - - // The last resort is to try to download it... - let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id) - .with_context(|| format!("failed to download PDB for {pdb_id}"))?; - - Ok(downloaded_path.map(|p| (p, PdbKind::Download))) -} - -/// A simple 'hasher' that uses the input bytes as a hash. -/// -/// This is used for the cache HashMap used in the [`Symbolizer`]. We are -/// caching symbol addresses and so we know those addresses are unique and do -/// not need to be hashed. -#[derive(Default)] -struct IdentityHasher { - h: u64, -} - -impl Hasher for IdentityHasher { - fn finish(&self) -> u64 { - self.h - } - - fn write(&mut self, bytes: &[u8]) { - debug_assert_eq!(bytes.len(), 8); - - self.h = u64::from_le_bytes(bytes.try_into().unwrap()); - } -} - -impl BuildHasher for IdentityHasher { - type Hasher = Self; - - fn build_hasher(&self) -> Self::Hasher { - Self::default() - } -} - -/// The [`Symbolizer`] is the main object that glues all the logic. -/// -/// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer { - /// Keep track of some statistics regarding the number of lines symbolized, - /// PDB downloaded, etc. - stats: StatsBuilder, - /// This is a path to the local PDB symbol cache where PDBs will be - /// downloaded into / where some are available. - symcache: PathBuf, - /// This is the list of kernel / user modules read from the kernel crash - /// dump. - modules: Modules, - /// The kernel dump parser. We need this to be able to read PDB identifiers - /// out of the PE headers, as well as reading the export tables of those - /// modules. - parser: KernelDumpParser, - /// List of symbol servers to try to download PDBs from when needed. - symsrvs: Vec, - /// Caches addresses to symbols. This allows us to not have to symbolize an - /// address again. - addr_cache: RefCell, IdentityHasher>>, - /// Each parsed module is stored in this cache. We parse PDBs, etc. only - /// once and then the [`PdbCache`] is used to query. - pdb_caches: RefCell, Rc>>, -} - -impl Symbolizer { - /// Create a symbolizer. - /// - /// The `symcache` is used both for reading existing PDBs as well as writing - /// the newly downloaded ones, the `parser` is used to enumerate the kernel - /// / user modules loaded at the crash-dump time as well as reading PDB - /// identifiers off the modules' PE headers, and the HTTP symbol servers are - /// a list of servers that will get contacted to try to find one that knows - /// about a specific PDB file. - pub fn new( - symcache: impl AsRef, - parser: KernelDumpParser, - symsrvs: Vec, - ) -> Result { - // Read both the user & kernel modules from the dump file. - let mut modules = Vec::new(); - for (at, name) in parser.user_modules().chain(parser.kernel_modules()) { - let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); - modules.push(Module::new( - filename.to_string(), - at.start.into(), - at.end.into(), - )); - } - - Ok(Self { - stats: Default::default(), - symcache: symcache.as_ref().to_path_buf(), - modules: Modules::new(modules), - parser, - symsrvs, - addr_cache: Default::default(), - pdb_caches: Default::default(), - }) - } - - /// Start the stopwatch. - pub fn start_stopwatch(&self) { - self.stats.start() - } - - /// Stop the stopwatch and get a copy of the [`Stats`]. - pub fn stop_stopwatch(self) -> Stats { - self.stats.stop() - } - - /// Get the [`PdbCache`] for a specified `addr`. - pub fn module_pdbcache(&self, addr: u64) -> Option> { - self.pdb_caches.borrow().iter().find_map(|(k, v)| { - if k.contains(&addr) { - Some(v.clone()) - } else { - None - } - }) - } - - /// Try to symbolize an address. - /// - /// If there's a [`PdbCache`] already created, then ask it to symbolize. - /// Otherwise, this will create a [`PdbCache`], try to find a PDB (locally - /// or remotely) and extract every bit of relevant information for us. - /// Finally, the result will be kept around to symbolize addresses in that - /// module faster in the future. - pub fn try_symbolize_addr_from_pdbs(&self, addr: u64) -> Result>> { - trace!("symbolizing address {addr:#x}.."); - let Some(module) = self.modules.find(addr) else { - trace!("address {addr:#x} doesn't belong to any module"); - return Ok(None); - }; - - trace!("address {addr:#x} found in {}", module.name); - - // Do we have a cache already ready to go? - if let Some(pdbcache) = self.module_pdbcache(addr) { - return Ok(Some(Rc::new(pdbcache.symbolize(module.rva(addr))?))); - } - - // Otherwise, let's make one. - let mut builder = PdbCacheBuilder::new(module); - - // Let's start by parsing the PE to get its exports, and PDB information if - // there's any. - let pe = Pe::new(&self.parser, module.at.start)?; - - // Ingest the EAT. - builder.ingest(pe.exports.into_iter()); - - // .. and see if it has PDB information. - trace!("Get PDB information for {module:?}.."); - - if let Some(pdb_id) = pe.pdb_id { - // Try to get a PDB.. - let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id)?; - - // .. and ingest it if we have one. - if let Some((pdb_path, pdb_kind)) = pdb_path { - if matches!(pdb_kind, PdbKind::Download) { - self.stats.downloaded_file(pdb_path.metadata()?.len()) - } - - builder.ingest_pdb(pdb_path)?; - } - } - - // Build the cache.. - let pdbcache = builder.build()?; - - // .. symbolize `addr`.. - let line = pdbcache - .symbolize(module.rva(addr)) - .with_context(|| format!("failed to symbolize {addr:#x}"))?; - - // .. and store the sym cache to be used for next time we need to symbolize an - // address from this module. - assert!(self - .pdb_caches - .borrow_mut() - .insert(module.at.clone(), Rc::new(pdbcache)) - .is_none()); - - Ok(Some(Rc::new(line))) - } - - /// Try to symbolize an address. - /// - /// If the address has been symbolized before, it will be in the - /// `addr_cache` already. If not, we need to take the slow path and ask the - /// right [`PdbCache`] which might require to create one in the first place. - pub fn try_symbolize_addr(&self, addr: u64) -> Result>> { - match self.addr_cache.borrow_mut().entry(addr) { - hash_map::Entry::Occupied(o) => { - self.stats.cache_hit(); - return Ok(Some(o.get().clone())); - } - hash_map::Entry::Vacant(v) => { - let Some(symbol) = self.try_symbolize_addr_from_pdbs(addr)? else { - return Ok(None); - }; - - v.insert(symbol); - } - }; - - Ok(self.addr_cache.borrow().get(&addr).cloned()) - } - - /// Symbolize `addr` in the `module+offset` style and write the result into - /// `output`. - fn modoff(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { - let mut buffer = [0; 16]; - if let Some(module) = self.modules.find(addr) { - output.write_all(module.name.as_bytes())?; - output.write_all(&[b'+', b'0', b'x'])?; - - output.write_all(fast_hex32( - &mut buffer[0..8].try_into().unwrap(), - module.rva(addr), - )) - } else { - output.write_all(&[b'0', b'x'])?; - - output.write_all(fast_hex64(&mut buffer, addr)) - } - .context("failed to write symbolized value to output")?; - - output - .write_all(&[b'\n']) - .context("failed to write line feed modoff addr") - } - - /// Symbolize `addr` in the `module!function+offset` style and write the - /// result into `output`. - fn full(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { - match self.try_symbolize_addr(addr)? { - Some(sym) => { - output - .write_all(sym.as_bytes()) - .context("failed to write symbolized value to output")?; - output - .write_all(&[b'\n']) - .context("failed to write line feed") - } - None => self.modoff(output, addr), - } - } - - /// Process an input file and symbolize every line. - pub fn process_file(&mut self, trace_path: impl AsRef, args: &CliArgs) -> Result { - let trace_path = trace_path.as_ref(); - let input = File::open(trace_path) - .with_context(|| format!("failed to open {}", trace_path.display()))?; - - let writer: Box = match &args.output { - Some(output) => Box::new(get_output_file(args, trace_path, output)?), - None => Box::new(stdout()), - }; - - let mut output = BufWriter::with_capacity(args.out_buffer_size, writer); - let mut line_number = 1 + args.skip; - let mut lines_symbolized = 1; - let max_line = args.max.unwrap_or(usize::MAX); - let reader = BufReader::with_capacity(args.in_buffer_size, input); - for addr in HexAddressesIterator::new(reader).skip(args.skip) { - let addr = addr.with_context(|| { - format!( - "failed to get hex addr from l{line_number} of {}", - trace_path.display() - ) - })?; - - if args.line_numbers { - let mut buffer = itoa::Buffer::new(); - output.write_all(&[b'l'])?; - output.write_all(buffer.format(line_number).as_bytes())?; - output.write_all(&[b':', b' '])?; - } - - match args.style { - crate::SymbolStyle::Modoff => self.modoff(&mut output, addr), - crate::SymbolStyle::Full => self.full(&mut output, addr), - } - .with_context(|| { - format!( - "failed to symbolize l{line_number} of {}", - trace_path.display() - ) - })?; - - if lines_symbolized >= max_line { - println!( - "Hit maximum line limit {} for {}", - max_line, - trace_path.display() - ); - break; - } - - lines_symbolized += 1; - line_number += 1; - } - - self.stats.done_file(lines_symbolized.try_into()?); - - Ok(lines_symbolized) - } -}