From 41f8303f3044f250528da6f028cb1763690f32e0 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Sun, 26 May 2024 21:33:33 -0700 Subject: [PATCH 01/28] starting to break things up --- Cargo.toml | 13 +---- src/lib.rs | 13 +++++ src/main.rs | 145 ++++++++++++++++++++++++++++++++++++++++------ src/misc.rs | 21 ------- src/symbolizer.rs | 110 +++-------------------------------- 5 files changed, 149 insertions(+), 153 deletions(-) create mode 100644 src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index f3e9e7f..4f4ccf4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,22 +13,15 @@ rust-version = "1.70" [dependencies] anyhow = "1.0" +clap = { version = "4.5", features = ["derive"] } +itoa = "1.0.11" pdb = "0.8" log = "0.4" env_logger = "0.11" -clap = { version = "4.5", features = ["derive"] } msvc-demangler = "0.10" -ureq = { version = "2.9", default-features = false, features = [ - "tls", - "gzip", -] } +ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } kdmp-parser = "0.2" -itoa = "1.0.11" [profile.release] debug = true panic = "abort" - -[[bin]] -name = "symbolizer-rs" -path = "src/main.rs" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..12dd3f6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,13 @@ +// Axel '0vercl0k' Souchet - May 26th 2024 +mod guid; +mod hex_addrs_iter; +mod human; +mod misc; +mod modules; +mod pdbcache; +mod pe; +mod stats; +mod symbolizer; + +pub use hex_addrs_iter::HexAddressesIterator; +pub use symbolizer::Symbolizer; diff --git a/src/main.rs b/src/main.rs index 72f3e5b..e4ff84b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,27 +1,36 @@ // Axel '0vercl0k' Souchet - February 19 2024 #![doc = include_str!("../README.md")] -mod guid; -mod hex_addrs_iter; -mod human; -mod misc; -mod modules; -mod pdbcache; -mod pe; -mod stats; -mod symbolizer; - -use std::io::Write; -use std::path::PathBuf; -use std::{fs, io}; - -use anyhow::{bail, Context, Result}; +use std::fs::File; +use std::io::{stdout, BufReader, BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::{env, fs, io}; + +use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use misc::sympath; -use symbolizer::Symbolizer; +use symbolizer_rs::{HexAddressesIterator, Symbolizer}; + +/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol +/// cache. +fn sympath() -> Option { + let env = env::var("_NT_SYMBOL_PATH").ok()?; + + if !env.starts_with("srv*") { + return None; + } + + let sympath = env.strip_prefix("srv*").unwrap(); + let sympath = PathBuf::from(sympath.split('*').next().unwrap()); + + if sympath.is_dir() { + Some(sympath) + } else { + None + } +} /// The style of the symbols. -#[derive(Default, Debug, ValueEnum, Clone)] +#[derive(Default, Debug, Clone, ValueEnum)] enum SymbolStyle { /// Module + offset style like `foo.dll+0x11`. Modoff, @@ -76,6 +85,104 @@ struct CliArgs { in_buffer_size: usize, } +/// Create the output file from an input. +/// +/// This logic was moved into a function to be able to handle the `--overwrite` +/// logic and to handle the case when `output` is a directory path and not a +/// file path. In that case, we will create a file with the same input file +/// name, but with a specific suffix. +fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result { + let output_path = if output.is_dir() { + // If the output is a directory, then we'll create a file that has the same file + // name as the input, but with a suffix. + let path = input.with_extension("symbolized.txt"); + let filename = path.file_name().ok_or_else(|| anyhow!("no file name"))?; + + output.join(filename) + } else { + // If the output path is already a file path, then we'll use it as is. + output.into() + }; + + // If the output exists, we'll want the user to tell us to overwrite those + // files. + if output_path.exists() && !args.overwrite { + // If they don't we will bail. + bail!( + "{} already exists, run with --overwrite", + output_path.display() + ); + } + + // We can now create the output file! + File::create(output_path.clone()) + .with_context(|| format!("failed to create output file {output_path:?}")) +} + +/// Process an input file and symbolize every line. +fn symbolize_file( + symbolizer: &mut Symbolizer, + trace_path: impl AsRef, + args: &CliArgs, +) -> Result { + let trace_path = trace_path.as_ref(); + let input = File::open(trace_path) + .with_context(|| format!("failed to open {}", trace_path.display()))?; + + let writer: Box = match &args.output { + Some(output) => Box::new(get_output_file(args, trace_path, output)?), + None => Box::new(stdout()), + }; + + let mut output = BufWriter::with_capacity(args.out_buffer_size, writer); + let mut line_number = 1 + args.skip; + let mut lines_symbolized = 1; + let max_line = args.max.unwrap_or(usize::MAX); + let reader = BufReader::with_capacity(args.in_buffer_size, input); + for addr in HexAddressesIterator::new(reader).skip(args.skip) { + let addr = addr.with_context(|| { + format!( + "failed to get hex addr from l{line_number} of {}", + trace_path.display() + ) + })?; + + if args.line_numbers { + let mut buffer = itoa::Buffer::new(); + output.write_all(&[b'l'])?; + output.write_all(buffer.format(line_number).as_bytes())?; + output.write_all(&[b':', b' '])?; + } + + match args.style { + SymbolStyle::Modoff => symbolizer.modoff(&mut output, addr), + SymbolStyle::Full => symbolizer.full(&mut output, addr), + } + .with_context(|| { + format!( + "failed to symbolize l{line_number} of {}", + trace_path.display() + ) + })?; + + if lines_symbolized >= max_line { + println!( + "Hit maximum line limit {} for {}", + max_line, + trace_path.display() + ); + break; + } + + lines_symbolized += 1; + line_number += 1; + } + + // symbolizer.stats.done_file(lines_symbolized.try_into()?); + + Ok(lines_symbolized) +} + fn main() -> Result<()> { #[cfg(debug_assertions)] env_logger::init(); @@ -138,7 +245,7 @@ fn main() -> Result<()> { let total = paths.len(); for (idx, path) in paths.into_iter().enumerate() { print!("\x1B[2K\r"); - symbolizer.process_file(&path, &args)?; + symbolize_file(&mut symbolizer, &path, &args)?; print!("[{}/{total}] {} done", idx + 1, path.display()); io::stdout().flush()?; } diff --git a/src/misc.rs b/src/misc.rs index 5a708e1..4af5a0d 100644 --- a/src/misc.rs +++ b/src/misc.rs @@ -1,31 +1,10 @@ // Axel '0vercl0k' Souchet - February 23 2024 //! This module contains the implementation of a bunch of misc utility functions //! that didn't really fit anywhere else. -use std::env; -use std::path::PathBuf; /// A relative address. pub type Rva = u32; -/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol -/// cache. -pub fn sympath() -> Option { - let env = env::var("_NT_SYMBOL_PATH").ok()?; - - if !env.starts_with("srv*") { - return None; - } - - let sympath = env.strip_prefix("srv*").unwrap(); - let sympath = PathBuf::from(sympath.split('*').next().unwrap()); - - if sympath.is_dir() { - Some(sympath) - } else { - None - } -} - /// Calculate a percentage value. pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { assert!( diff --git a/src/symbolizer.rs b/src/symbolizer.rs index b3e9cee..f7f9a35 100644 --- a/src/symbolizer.rs +++ b/src/symbolizer.rs @@ -5,22 +5,20 @@ use std::cell::RefCell; use std::collections::{hash_map, HashMap}; use std::fs::{self, File}; use std::hash::{BuildHasher, Hasher}; -use std::io::{self, stdout, BufReader, BufWriter, Write}; +use std::io::{self, BufWriter, Write}; use std::ops::Range; use std::path::{Path, PathBuf}; use std::rc::Rc; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{bail, Context, Result}; use kdmp_parser::KernelDumpParser; use log::{debug, trace, warn}; -use crate::hex_addrs_iter::HexAddressesIterator; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; use crate::pe::{PdbId, Pe}; use crate::stats::{Stats, StatsBuilder}; -use crate::CliArgs; /// Format a path to find a PDB in a symbol cache. /// @@ -120,40 +118,6 @@ pub fn try_download_from_guid( Ok(None) } -/// Create the output file from an input. -/// -/// This logic was moved into a function to be able to handle the `--overwrite` -/// logic and to handle the case when `output` is a directory path and not a -/// file path. In that case, we will create a file with the same input file -/// name, but with a specific suffix. -fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result { - let output_path = if output.is_dir() { - // If the output is a directory, then we'll create a file that has the same file - // name as the input, but with a suffix. - let path = input.with_extension("symbolized.txt"); - let filename = path.file_name().ok_or_else(|| anyhow!("no file name"))?; - - output.join(filename) - } else { - // If the output path is already a file path, then we'll use it as is. - output.into() - }; - - // If the output exists, we'll want the user to tell us to overwrite those - // files. - if output_path.exists() && !args.overwrite { - // If they don't we will bail. - bail!( - "{} already exists, run with --overwrite", - output_path.display() - ); - } - - // We can now create the output file! - File::create(output_path.clone()) - .with_context(|| format!("failed to create output file {output_path:?}")) -} - /// Where did we find this PDB? On the file-system somewhere, in a local symbol /// cache or downloaded on a symbol server. /// @@ -298,7 +262,7 @@ impl Symbolizer { } /// Get the [`PdbCache`] for a specified `addr`. - pub fn module_pdbcache(&self, addr: u64) -> Option> { + fn module_pdbcache(&self, addr: u64) -> Option> { self.pdb_caches.borrow().iter().find_map(|(k, v)| { if k.contains(&addr) { Some(v.clone()) @@ -315,7 +279,7 @@ impl Symbolizer { /// or remotely) and extract every bit of relevant information for us. /// Finally, the result will be kept around to symbolize addresses in that /// module faster in the future. - pub fn try_symbolize_addr_from_pdbs(&self, addr: u64) -> Result>> { + fn try_symbolize_addr_from_pdbs(&self, addr: u64) -> Result>> { trace!("symbolizing address {addr:#x}.."); let Some(module) = self.modules.find(addr) else { trace!("address {addr:#x} doesn't belong to any module"); @@ -380,7 +344,7 @@ impl Symbolizer { /// If the address has been symbolized before, it will be in the /// `addr_cache` already. If not, we need to take the slow path and ask the /// right [`PdbCache`] which might require to create one in the first place. - pub fn try_symbolize_addr(&self, addr: u64) -> Result>> { + fn try_symbolize_addr(&self, addr: u64) -> Result>> { match self.addr_cache.borrow_mut().entry(addr) { hash_map::Entry::Occupied(o) => { self.stats.cache_hit(); @@ -400,7 +364,7 @@ impl Symbolizer { /// Symbolize `addr` in the `module+offset` style and write the result into /// `output`. - fn modoff(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { + pub fn modoff(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { let mut buffer = [0; 16]; if let Some(module) = self.modules.find(addr) { output.write_all(module.name.as_bytes())?; @@ -424,7 +388,7 @@ impl Symbolizer { /// Symbolize `addr` in the `module!function+offset` style and write the /// result into `output`. - fn full(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { + pub fn full(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { match self.try_symbolize_addr(addr)? { Some(sym) => { output @@ -437,64 +401,4 @@ impl Symbolizer { None => self.modoff(output, addr), } } - - /// Process an input file and symbolize every line. - pub fn process_file(&mut self, trace_path: impl AsRef, args: &CliArgs) -> Result { - let trace_path = trace_path.as_ref(); - let input = File::open(trace_path) - .with_context(|| format!("failed to open {}", trace_path.display()))?; - - let writer: Box = match &args.output { - Some(output) => Box::new(get_output_file(args, trace_path, output)?), - None => Box::new(stdout()), - }; - - let mut output = BufWriter::with_capacity(args.out_buffer_size, writer); - let mut line_number = 1 + args.skip; - let mut lines_symbolized = 1; - let max_line = args.max.unwrap_or(usize::MAX); - let reader = BufReader::with_capacity(args.in_buffer_size, input); - for addr in HexAddressesIterator::new(reader).skip(args.skip) { - let addr = addr.with_context(|| { - format!( - "failed to get hex addr from l{line_number} of {}", - trace_path.display() - ) - })?; - - if args.line_numbers { - let mut buffer = itoa::Buffer::new(); - output.write_all(&[b'l'])?; - output.write_all(buffer.format(line_number).as_bytes())?; - output.write_all(&[b':', b' '])?; - } - - match args.style { - crate::SymbolStyle::Modoff => self.modoff(&mut output, addr), - crate::SymbolStyle::Full => self.full(&mut output, addr), - } - .with_context(|| { - format!( - "failed to symbolize l{line_number} of {}", - trace_path.display() - ) - })?; - - if lines_symbolized >= max_line { - println!( - "Hit maximum line limit {} for {}", - max_line, - trace_path.display() - ); - break; - } - - lines_symbolized += 1; - line_number += 1; - } - - self.stats.done_file(lines_symbolized.try_into()?); - - Ok(lines_symbolized) - } } From c0d26ff60b2778b25b344cd3a0593d6cc7192a0a Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Sun, 26 May 2024 21:41:52 -0700 Subject: [PATCH 02/28] relax version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4f4ccf4..325303f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ rust-version = "1.70" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -itoa = "1.0.11" +itoa = "1.0" pdb = "0.8" log = "0.4" env_logger = "0.11" From 75d98fb9a9306b8736bdb359a592758b4ad01d90 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 28 May 2024 21:14:44 -0700 Subject: [PATCH 03/28] rework things --- Cargo.toml | 32 +++++++----- crates/symbolizer/Cargo.toml | 21 ++++++++ crates/symbolizer/README.md | 0 crates/symbolizer/src/error.rs | 40 ++++++++++++++ {src => crates/symbolizer/src}/guid.rs | 0 {src => crates/symbolizer/src}/human.rs | 0 {src => crates/symbolizer/src}/lib.rs | 4 +- {src => crates/symbolizer/src}/misc.rs | 0 {src => crates/symbolizer/src}/modules.rs | 0 {src => crates/symbolizer/src}/pdbcache.rs | 15 +++--- {src => crates/symbolizer/src}/pe.rs | 55 +++++++++----------- {src => crates/symbolizer/src}/stats.rs | 0 {src => crates/symbolizer/src}/symbolizer.rs | 24 ++++++--- src/hex_addrs_iter.rs | 8 ++- src/main.rs | 8 ++- 15 files changed, 141 insertions(+), 66 deletions(-) create mode 100644 crates/symbolizer/Cargo.toml create mode 100644 crates/symbolizer/README.md create mode 100644 crates/symbolizer/src/error.rs rename {src => crates/symbolizer/src}/guid.rs (100%) rename {src => crates/symbolizer/src}/human.rs (100%) rename {src => crates/symbolizer/src}/lib.rs (71%) rename {src => crates/symbolizer/src}/misc.rs (100%) rename {src => crates/symbolizer/src}/modules.rs (100%) rename {src => crates/symbolizer/src}/pdbcache.rs (97%) rename {src => crates/symbolizer/src}/pe.rs (90%) rename {src => crates/symbolizer/src}/stats.rs (100%) rename {src => crates/symbolizer/src}/symbolizer.rs (96%) diff --git a/Cargo.toml b/Cargo.toml index 325303f..314d652 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,27 +1,33 @@ [package] name = "symbolizer-rs" -version = "0.1.0" -edition = "2021" -authors = ["Axel '0vercl0k' Souchet"] categories = ["command-line-utilities", "development-tools::debugging"] description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] -keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] -license = "MIT" -repository = "https://github.com/0vercl0k/symbolizer-rs" -rust-version = "1.70" +version = "0.1.0" +authors.workspace = true +license.workspace = true +rust-version.workspace = true +repository.workspace = true +keywords.workspace = true +edition.workspace = true [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -itoa = "1.0" -pdb = "0.8" -log = "0.4" -env_logger = "0.11" -msvc-demangler = "0.10" -ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } +symbolizer = { path = "crates/symbolizer" } kdmp-parser = "0.2" +env_logger = "0.11" +itoa = "1.0" [profile.release] debug = true panic = "abort" + +[workspace] +members = ["crates/*"] +package.authors = ["Axel '0vercl0k' Souchet"] +package.license = "MIT" +package.rust-version = "1.70" +package.repository = "https://github.com/0vercl0k/symbolizer-rs" +package.keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] +package.edition = "2021" diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml new file mode 100644 index 0000000..3a57b41 --- /dev/null +++ b/crates/symbolizer/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "symbolizer" +version = "0.1.0" +# categories = ["command-line-utilities", "development-tools::debugging"] +# description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." +include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] +authors.workspace = true +license.workspace = true +rust-version.workspace = true +repository.workspace = true +keywords.workspace = true +edition.workspace = true + +[dependencies] +pdb = "0.8" +log = "0.4" +msvc-demangler = "0.10" +ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } +kdmp-parser = "0.2" +thiserror = "1.0.61" +anyhow = "1.0.86" diff --git a/crates/symbolizer/README.md b/crates/symbolizer/README.md new file mode 100644 index 0000000..e69de29 diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs new file mode 100644 index 0000000..dfcd3be --- /dev/null +++ b/crates/symbolizer/src/error.rs @@ -0,0 +1,40 @@ +// Axel '0vercl0k' Souchet - May 27 2024 +use std::io; +use std::num::TryFromIntError; +use std::path::PathBuf; +use std::string::FromUtf8Error; + +use kdmp_parser::KdmpParserError; +use pdb::PdbInternalSectionOffset; +use thiserror::Error; + +pub type Result = std::result::Result; + +#[derive(Error, Debug)] +pub enum Error { + #[error("failed to get rva from symbol {0} / {1:?}")] + SymbolRva(String, PdbInternalSectionOffset), + #[error("pdb error: {0}")] + Pdb(#[from] pdb::Error), + #[error("from int error: {0}")] + FromIntError(#[from] TryFromIntError), + #[error("utf8: {0}")] + Utf8(#[from] FromUtf8Error), + #[error("pdb path {0:?} does not have a filename")] + PdbPathNoName(PathBuf), + #[error("failed to perform an i/o: {0}")] + Io(#[from] io::Error), + #[error("failed to download pdb {pdb_url}: {e}")] + DownloadPdb { + pdb_url: String, + e: Box, + }, + #[error("misc: {0}")] + Misc(Box), + #[error("the module path is either 0 or larger than reasonable")] + CodeViewInvalidPath, + #[error("kdmp parser")] + DumpParserError(#[from] KdmpParserError), + #[error("{0}")] + Anyhow(#[from] anyhow::Error), +} diff --git a/src/guid.rs b/crates/symbolizer/src/guid.rs similarity index 100% rename from src/guid.rs rename to crates/symbolizer/src/guid.rs diff --git a/src/human.rs b/crates/symbolizer/src/human.rs similarity index 100% rename from src/human.rs rename to crates/symbolizer/src/human.rs diff --git a/src/lib.rs b/crates/symbolizer/src/lib.rs similarity index 71% rename from src/lib.rs rename to crates/symbolizer/src/lib.rs index 12dd3f6..8e69e4f 100644 --- a/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -1,6 +1,6 @@ // Axel '0vercl0k' Souchet - May 26th 2024 +mod error; mod guid; -mod hex_addrs_iter; mod human; mod misc; mod modules; @@ -9,5 +9,5 @@ mod pe; mod stats; mod symbolizer; -pub use hex_addrs_iter::HexAddressesIterator; +pub use error::{Error, Result}; pub use symbolizer::Symbolizer; diff --git a/src/misc.rs b/crates/symbolizer/src/misc.rs similarity index 100% rename from src/misc.rs rename to crates/symbolizer/src/misc.rs diff --git a/src/modules.rs b/crates/symbolizer/src/modules.rs similarity index 100% rename from src/modules.rs rename to crates/symbolizer/src/modules.rs diff --git a/src/pdbcache.rs b/crates/symbolizer/src/pdbcache.rs similarity index 97% rename from src/pdbcache.rs rename to crates/symbolizer/src/pdbcache.rs index f75707c..591f050 100644 --- a/src/pdbcache.rs +++ b/crates/symbolizer/src/pdbcache.rs @@ -10,13 +10,14 @@ use std::fs::File; use std::ops::Range; use std::path::Path; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Context}; use log::{trace, warn}; use pdb::{ AddressMap, FallibleIterator, LineProgram, PdbInternalSectionOffset, ProcedureSymbol, StringTable, Symbol, }; +use crate::error::Result; use crate::modules::Module; /// A PDB opened via file access. @@ -363,10 +364,10 @@ impl<'module> PdbCacheBuilder<'module> { len: Option, source_info: Option, ) -> Result<()> { - use msvc_demangler::DemangleFlags as E; + use msvc_demangler::DemangleFlags as DF; let undecorated_name = if name.as_bytes().starts_with(b"?") { // Demangle the name if it starts by a '?'. - match msvc_demangler::demangle(&name, E::NAME_ONLY) { + match msvc_demangler::demangle(&name, DF::NAME_ONLY) { Ok(o) => o, Err(e) => { // Let's log the failures as warning because we might care one day? @@ -498,11 +499,13 @@ impl<'module> PdbCacheBuilder<'module> { // global symbols. And if there's duplicates, then we'd rather have the entry // that gives us the exact procedure length instead of us guessing. self.parse_dbi(&mut pdb, &address_map) - .context("failed to parse private symbols")?; + .map_err(|e| anyhow!("failed to parse private symbols: {e:?}"))?; // Parse and extract all the bits we need from the global symbols.. self.parse_global_symbols_table(&mut pdb, &address_map) - .context("failed to parse public symbols") + .map_err(|e| anyhow!("failed to parse public symbols: {e:?}"))?; + + Ok(()) } /// Build a [`PdbCache`]. @@ -516,7 +519,7 @@ impl<'module> PdbCacheBuilder<'module> { // If we have a length, then use it! start .checked_add(len) - .ok_or_else(|| anyhow!("overflow w/ symbol range"))? + .ok_or(anyhow!("overflow w/ symbol range"))? } else { // If we don't have one, the length of the current function is basically up to // the next entry. diff --git a/src/pe.rs b/crates/symbolizer/src/pe.rs similarity index 90% rename from src/pe.rs rename to crates/symbolizer/src/pe.rs index 41353e8..850cb0b 100644 --- a/src/pe.rs +++ b/crates/symbolizer/src/pe.rs @@ -5,12 +5,13 @@ use std::mem; use std::ops::Range; use std::path::PathBuf; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{anyhow, Context}; use kdmp_parser::{Gva, KdmpParserError, KernelDumpParser}; use log::debug; use crate::guid::Guid; use crate::misc::Rva; +use crate::{Error as E, Result}; /// The IMAGE_DOS_HEADER. #[derive(Default, Debug)] @@ -207,7 +208,7 @@ impl Display for PdbId { impl PdbId { pub fn new(path: PathBuf, guid: Guid, age: u32) -> Result { if path.file_name().is_none() { - bail!("pdb path {path:?} does not have a filename"); + return Err(E::PdbPathNoName(path)); } Ok(Self { path, guid, age }) @@ -240,7 +241,7 @@ pub fn read_string(parser: &KernelDumpParser, addr: u64, max: usize) -> Result(debug_dir_addr.into())? else { @@ -309,7 +310,7 @@ impl Pe { // Let's read it. let codeview_addr = base .checked_add(debug_dir.address_of_raw_data.into()) - .ok_or_else(|| anyhow!("overflow with debug_dir"))?; + .ok_or(anyhow!("overflow w/ debug_dir"))?; let Some(codeview) = parser.try_virt_read_struct::(codeview_addr.into())? else { debug!("failed to read codeview {codeview_addr:#x} because of mem translation"); return Ok(None); @@ -320,7 +321,7 @@ impl Pe { let leftover = usize::try_from(debug_dir.size_of_data).unwrap() - mem::size_of::(); if leftover == 0 || leftover > 256 { - bail!("the module path is either 0 or larger than reasonable"); + return Err(E::CodeViewInvalidPath); } // Allocate space for it, and read it. @@ -331,7 +332,7 @@ impl Pe { 1, mem::size_of::(), ) - .ok_or_else(|| anyhow!("oveflow with debug_dir"))?; + .ok_or(anyhow!("overflow w/ debug_dir filename"))?; let Some(amount) = parser.try_virt_read(file_name_addr.into(), &mut file_name)? else { return Ok(None); @@ -339,7 +340,7 @@ impl Pe { // The last character is supposed to be a NULL byte, bail if it's not there. if *file_name.last().unwrap() != 0 { - bail!("the module path doesn't end with a NULL byte"); + return Err(anyhow!("the module path doesn't end with a NULL byte").into()); } file_name.resize(amount - 1, 0); @@ -368,7 +369,7 @@ impl Pe { // Read it. let export_dir_addr = base .checked_add(u64::from(export_data_dir.virtual_address)) - .ok_or_else(|| anyhow!("overflow with export_data_dir"))?; + .ok_or(anyhow!("export_data_dir"))?; let Some(export_dir) = parser.try_virt_read_struct::(export_dir_addr.into())? else { @@ -397,7 +398,7 @@ impl Pe { for name_idx in 0..n_names { // Read the name RVA's.. let name_rva_addr = array_offset(base, addr_of_names, name_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with name_rva_addr"))?; + .ok_or(anyhow!("name_rva_addr"))?; let Some(name_rva) = parser .try_virt_read_struct::(name_rva_addr.into()) .with_context(|| "failed to read EAT's name array".to_string())? @@ -410,17 +411,17 @@ impl Pe { let name_addr = base .checked_add(name_rva.into()) - .ok_or_else(|| anyhow!("overflow with name_addr"))?; + .ok_or(anyhow!("overflow w/ name_addr"))?; // ..then read the string in memory. let name = read_string(parser, name_addr, 64)?; names.push(name); // Read the ordinal. let ord_addr = array_offset(base, addr_of_ords, name_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with ord_addr"))?; + .ok_or(anyhow!("ord_addr"))?; let Some(ord) = parser .try_virt_read_struct::(ord_addr.into()) - .with_context(|| "failed to read EAT's ord array".to_string())? + .context("failed to read EAT's ord array")? else { debug!("failed to read EAT's ord array {ord_addr:#x} because of mem translation"); return Ok(None); @@ -444,7 +445,7 @@ impl Pe { // Read the RVA. let address_rva_addr = array_offset(base, addr_of_functs, addr_idx, mem::size_of::()) - .ok_or_else(|| anyhow!("overflow with address_rva_addr"))?; + .ok_or(anyhow!("overflow w/ address_rva_addr"))?; let Some(address_rva) = parser .try_virt_read_struct::(address_rva_addr.into()) @@ -465,14 +466,14 @@ impl Pe { end: export_data_dir .virtual_address .checked_add(export_data_dir.size) - .ok_or_else(|| anyhow!("overflow with export data dir size"))?, + .ok_or(anyhow!("overflow w/ export data dir size"))?, }; let mut exports = Vec::with_capacity(address_rvas.len()); for (unbiased_ordinal, addr_rva) in address_rvas.drain(..).enumerate() { let ordinal = unbiased_ordinal .checked_add(export_dir.base.try_into()?) - .ok_or_else(|| anyhow!("overflow with biased_ordinal"))?; + .ok_or(anyhow!("overflow w/ biased_ordinal"))?; let name = ords .iter() .position(|&o| usize::from(o) == unbiased_ordinal) @@ -500,29 +501,29 @@ impl Pe { .with_context(|| "failed to read ImageDosHeader")?; let nt_hdr_addr = base .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) - .ok_or_else(|| anyhow!("overflow with e_lfanew"))?; + .ok_or(anyhow!("overflow w/ e_lfanew"))?; let nt_hdr = parser.virt_read_struct::(nt_hdr_addr.into())?; // Let's verify the signature.. if nt_hdr.signature != IMAGE_NT_SIGNATURE { - bail!("wrong PE signature for {base:#x}"); + return Err(anyhow!("wrong PE signature for {base:#x}").into()); } // ..and let's ignore non x64 PEs. if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { - bail!("wrong architecture for {base:#x}"); + return Err(anyhow!("wrong architecture for {base:#x}").into()); } // Now locate the optional header, and check that it looks big enough. let opt_hdr_addr = nt_hdr_addr .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) - .ok_or_else(|| anyhow!("overflow with nt_hdr"))?; + .ok_or(anyhow!("overflow w/ nt_hdr"))?; let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); // If it's not big enough, let's bail. if opt_hdr_size < mem::size_of::() { - bail!("optional header's size is too small"); + return Err(anyhow!("optional header's size is too small").into()); } // Read the IMAGE_OPTIONAL_HEADER64. @@ -536,16 +537,8 @@ impl Pe { // Read the EXPORT table if there's any. let exports = match Self::try_parse_export_dir(parser, base, &opt_hdr) { Ok(o) => o, - Err(e) => { - let Some(kdmp) = e.downcast_ref::() else { - return Err(e); - }; - - match kdmp { - KdmpParserError::AddrTranslation(..) => None, - _ => return Err(e), - } - } + Err(E::DumpParserError(KdmpParserError::AddrTranslation(_))) => None, + Err(e) => return Err(e), } .unwrap_or_default(); diff --git a/src/stats.rs b/crates/symbolizer/src/stats.rs similarity index 100% rename from src/stats.rs rename to crates/symbolizer/src/stats.rs diff --git a/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs similarity index 96% rename from src/symbolizer.rs rename to crates/symbolizer/src/symbolizer.rs index f7f9a35..52473b9 100644 --- a/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -10,7 +10,7 @@ use std::ops::Range; use std::path::{Path, PathBuf}; use std::rc::Rc; -use anyhow::{bail, Context, Result}; +use anyhow::Context; use kdmp_parser::KernelDumpParser; use log::{debug, trace, warn}; @@ -19,6 +19,7 @@ use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; use crate::pe::{PdbId, Pe}; use crate::stats::{Stats, StatsBuilder}; +use crate::{Error as E, Result}; /// Format a path to find a PDB in a symbol cache. /// @@ -88,7 +89,12 @@ pub fn try_download_from_guid( continue; } // If we received any other errors, well that's not expected so let's bail. - Err(e) => bail!("failed to download pdb {pdb_url}: {e}"), + Err(e) => { + return Err(E::DownloadPdb { + pdb_url, + e: e.into(), + }) + } }; // If the server knows about this file, it is time to create the directory @@ -152,8 +158,7 @@ fn get_pdb( } // The last resort is to try to download it... - let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id) - .with_context(|| format!("failed to download PDB for {pdb_id}"))?; + let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id)?; Ok(downloaded_path.map(|p| (p, PdbKind::Download))) } @@ -326,7 +331,7 @@ impl Symbolizer { // .. symbolize `addr`.. let line = pdbcache .symbolize(module.rva(addr)) - .with_context(|| format!("failed to symbolize {addr:#x}"))?; + .map_err(|e| E::Misc(format!("failed to symbolize {addr:#x}: {e:?}").into()))?; // .. and store the sym cache to be used for next time we need to symbolize an // address from this module. @@ -383,7 +388,9 @@ impl Symbolizer { output .write_all(&[b'\n']) - .context("failed to write line feed modoff addr") + .context("failed to write line feed modoff addr")?; + + Ok(()) } /// Symbolize `addr` in the `module!function+offset` style and write the @@ -394,9 +401,12 @@ impl Symbolizer { output .write_all(sym.as_bytes()) .context("failed to write symbolized value to output")?; + output .write_all(&[b'\n']) - .context("failed to write line feed") + .context("failed to write line feed")?; + + Ok(()) } None => self.modoff(output, addr), } diff --git a/src/hex_addrs_iter.rs b/src/hex_addrs_iter.rs index c946925..1f027a5 100644 --- a/src/hex_addrs_iter.rs +++ b/src/hex_addrs_iter.rs @@ -6,7 +6,7 @@ use std::io::Read; use std::ops::RangeTo; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{Result, Context, bail}; /// Fill a `buffer` starting at the offset `append_idx` and return the slice of /// data that was read. Also, return if EOF was hit or not. @@ -232,7 +232,7 @@ where .with_context(|| anyhow!("failed to turn {addr_str:?} into an integer")) { Ok(o) => o, - Err(e) => return Some(Err(e)), + Err(e) => return Some(Err(E::Misc(""))), }; // If we hit the EOF, let's record the last range of data we'll consume. @@ -265,9 +265,7 @@ where mod tests { use std::io::BufReader; - use anyhow::Result; - - use super::HexAddressesIterator; + use super::{HexAddressesIterator, Result}; #[test] fn t1() { diff --git a/src/main.rs b/src/main.rs index e4ff84b..0ebdf6c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,11 @@ use std::{env, fs, io}; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use symbolizer_rs::{HexAddressesIterator, Symbolizer}; +use symbolizer::Symbolizer; + +mod hex_addrs_iter; + +use hex_addrs_iter::HexAddressesIterator; /// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol /// cache. @@ -178,7 +182,7 @@ fn symbolize_file( line_number += 1; } - // symbolizer.stats.done_file(lines_symbolized.try_into()?); + symbolizer.stats.done_file(lines_symbolized.try_into()?); Ok(lines_symbolized) } From 6614f76c55011f60507cfc1d6b5c2d9e078253d9 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 29 May 2024 20:40:40 -0700 Subject: [PATCH 04/28] get things to compile \o/ --- .github/workflows/symbolizer-rs.yml | 4 +- crates/symbolizer/src/error.rs | 2 - crates/symbolizer/src/lib.rs | 2 +- crates/symbolizer/src/misc.rs | 10 --- crates/symbolizer/src/stats.rs | 70 +++---------------- crates/symbolizer/src/symbolizer.rs | 12 +--- src/hex_addrs_iter.rs | 4 +- {crates/symbolizer/src => src}/human.rs | 0 src/main.rs | 93 +++++++++++++++++++++++-- 9 files changed, 104 insertions(+), 93 deletions(-) rename {crates/symbolizer/src => src}/human.rs (100%) diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index 1e2ee74..a512916 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -65,10 +65,10 @@ jobs: run: rustup default stable - name: cargo test - run: cargo test + run: cargo test --workspace - name: cargo test release - run: cargo test --release + run: cargo test --release --workspace - name: cargo check run: cargo check diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs index dfcd3be..414f760 100644 --- a/crates/symbolizer/src/error.rs +++ b/crates/symbolizer/src/error.rs @@ -29,8 +29,6 @@ pub enum Error { pdb_url: String, e: Box, }, - #[error("misc: {0}")] - Misc(Box), #[error("the module path is either 0 or larger than reasonable")] CodeViewInvalidPath, #[error("kdmp parser")] diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index 8e69e4f..120250c 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -1,7 +1,6 @@ // Axel '0vercl0k' Souchet - May 26th 2024 mod error; mod guid; -mod human; mod misc; mod modules; mod pdbcache; @@ -10,4 +9,5 @@ mod stats; mod symbolizer; pub use error::{Error, Result}; +pub use stats::Stats; pub use symbolizer::Symbolizer; diff --git a/crates/symbolizer/src/misc.rs b/crates/symbolizer/src/misc.rs index 4af5a0d..8df4116 100644 --- a/crates/symbolizer/src/misc.rs +++ b/crates/symbolizer/src/misc.rs @@ -5,16 +5,6 @@ /// A relative address. pub type Rva = u32; -/// Calculate a percentage value. -pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { - assert!( - how_many_total > 0, - "{how_many_total} needs to be bigger than 0" - ); - - ((how_many * 1_00) / how_many_total) as u32 -} - /// Convert an `u64` into an hex string. /// /// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: diff --git a/crates/symbolizer/src/stats.rs b/crates/symbolizer/src/stats.rs index c23b6dc..ca78f5a 100644 --- a/crates/symbolizer/src/stats.rs +++ b/crates/symbolizer/src/stats.rs @@ -2,54 +2,24 @@ //! This module contains the [`Stats`] type that is used to keep track of //! various statistics when symbolizing. use std::cell::RefCell; -use std::fmt::{Debug, Display}; -use std::time::Instant; +use std::fmt::Debug; -use crate::human::ToHuman; -use crate::misc::percentage; - -#[derive(Debug)] +#[derive(Debug, Default)] pub struct StatsBuilder { - start: RefCell, inner: RefCell, } -impl Default for StatsBuilder { - fn default() -> Self { - Self { - start: RefCell::new(Instant::now()), - inner: Default::default(), - } - } -} - #[derive(Default, Clone, Copy, Debug)] pub struct Stats { - time: u64, - n_files: u64, - n_lines: u64, - n_downloads: u64, - size_downloaded: u64, - cache_hit: u64, + pub n_lines: u64, + pub n_downloads: u64, + pub size_downloaded: u64, + pub cache_hit: u64, } impl StatsBuilder { - pub fn start(&self) { - self.start.replace_with(|_| Instant::now()); - } - - pub fn stop(&self) -> Stats { - let elapsed = self.start.borrow().elapsed(); - let mut stats = *self.inner.borrow(); - stats.time = elapsed.as_secs(); - - stats - } - - pub fn done_file(&self, n: u64) { - let mut inner = self.inner.borrow_mut(); - inner.n_files += 1; - inner.n_lines += n; + pub fn build(&self) -> Stats { + *self.inner.borrow() } pub fn downloaded_file(&self, size: u64) { @@ -62,27 +32,3 @@ impl StatsBuilder { self.inner.borrow_mut().cache_hit += 1; } } - -impl Display for Stats { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "✓ Successfully symbolized {} lines across {} files in {} ({}% cache hits", - self.n_lines.human_number(), - self.n_files.human_number(), - self.time.human_time(), - percentage(self.cache_hit, self.n_lines) - )?; - - if self.size_downloaded > 0 { - writeln!( - f, - ", downloaded {} / {} PDBs)", - self.size_downloaded.human_bytes(), - self.n_downloads.human_number() - ) - } else { - writeln!(f, ")") - } - } -} diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 52473b9..0a46355 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -256,14 +256,8 @@ impl Symbolizer { }) } - /// Start the stopwatch. - pub fn start_stopwatch(&self) { - self.stats.start() - } - - /// Stop the stopwatch and get a copy of the [`Stats`]. - pub fn stop_stopwatch(self) -> Stats { - self.stats.stop() + pub fn stats(self) -> Stats { + self.stats.build() } /// Get the [`PdbCache`] for a specified `addr`. @@ -331,7 +325,7 @@ impl Symbolizer { // .. symbolize `addr`.. let line = pdbcache .symbolize(module.rva(addr)) - .map_err(|e| E::Misc(format!("failed to symbolize {addr:#x}: {e:?}").into()))?; + .with_context(|| format!("failed to symbolize {addr:#x}"))?; // .. and store the sym cache to be used for next time we need to symbolize an // address from this module. diff --git a/src/hex_addrs_iter.rs b/src/hex_addrs_iter.rs index 1f027a5..5df6047 100644 --- a/src/hex_addrs_iter.rs +++ b/src/hex_addrs_iter.rs @@ -6,7 +6,7 @@ use std::io::Read; use std::ops::RangeTo; -use anyhow::{Result, Context, bail}; +use anyhow::{anyhow, bail, Context, Result}; /// Fill a `buffer` starting at the offset `append_idx` and return the slice of /// data that was read. Also, return if EOF was hit or not. @@ -232,7 +232,7 @@ where .with_context(|| anyhow!("failed to turn {addr_str:?} into an integer")) { Ok(o) => o, - Err(e) => return Some(Err(E::Misc(""))), + e => return Some(e), }; // If we hit the EOF, let's record the last range of data we'll consume. diff --git a/crates/symbolizer/src/human.rs b/src/human.rs similarity index 100% rename from crates/symbolizer/src/human.rs rename to src/human.rs diff --git a/src/main.rs b/src/main.rs index 0ebdf6c..64aacfa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,10 @@ // Axel '0vercl0k' Souchet - February 19 2024 #![doc = include_str!("../README.md")] +use std::fmt::Display; use std::fs::File; use std::io::{stdout, BufReader, BufWriter, Write}; use std::path::{Path, PathBuf}; +use std::time::Instant; use std::{env, fs, io}; use anyhow::{anyhow, bail, Context, Result}; @@ -11,8 +13,88 @@ use kdmp_parser::KernelDumpParser; use symbolizer::Symbolizer; mod hex_addrs_iter; +mod human; use hex_addrs_iter::HexAddressesIterator; +use human::ToHuman; + +#[derive(Debug)] +struct StatsBuilder { + start: Instant, + n_files: u64, + n_lines: u64, +} + +impl Default for StatsBuilder { + fn default() -> Self { + Self { + start: Instant::now(), + n_files: 0, + n_lines: 0, + } + } +} + +impl StatsBuilder { + pub fn start(&mut self) { + self.start = Instant::now(); + } + + pub fn done_file(&mut self, n: u64) { + self.n_files += 1; + self.n_lines += n; + } + + pub fn stop(self, symbolizer: Symbolizer) -> Stats { + Stats { + time: self.start.elapsed().as_secs(), + n_files: self.n_files, + n_lines: self.n_lines, + symbolizer_stats: symbolizer.stats(), + } + } +} + +struct Stats { + time: u64, + n_files: u64, + n_lines: u64, + symbolizer_stats: symbolizer::Stats, +} + +impl Display for Stats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "✓ Successfully symbolized {} lines across {} files in {} ({}% cache hits", + self.n_lines.human_number(), + self.n_files.human_number(), + self.time.human_time(), + percentage(self.symbolizer_stats.cache_hit, self.n_lines) + )?; + + if self.symbolizer_stats.size_downloaded > 0 { + writeln!( + f, + ", downloaded {} / {} PDBs)", + self.symbolizer_stats.size_downloaded.human_bytes(), + self.symbolizer_stats.n_downloads.human_number() + ) + } else { + writeln!(f, ")") + } + } +} + +/// Calculate a percentage value. +pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { + assert!( + how_many_total > 0, + "{how_many_total} needs to be bigger than 0" + ); + + ((how_many * 1_00) / how_many_total) as u32 +} /// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol /// cache. @@ -182,8 +264,6 @@ fn symbolize_file( line_number += 1; } - symbolizer.stats.done_file(lines_symbolized.try_into()?); - Ok(lines_symbolized) } @@ -231,7 +311,9 @@ fn main() -> Result<()> { // All right, ready to create the symbolizer. let mut symbolizer = Symbolizer::new(symcache, parser, args.symsrv.clone())?; - symbolizer.start_stopwatch(); + let mut stats_builder = StatsBuilder::default(); + stats_builder.start(); + let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize // every file inside that directory.. @@ -249,13 +331,14 @@ fn main() -> Result<()> { let total = paths.len(); for (idx, path) in paths.into_iter().enumerate() { print!("\x1B[2K\r"); - symbolize_file(&mut symbolizer, &path, &args)?; + let n = symbolize_file(&mut symbolizer, &path, &args)?; + stats_builder.done_file(n.try_into()?); print!("[{}/{total}] {} done", idx + 1, path.display()); io::stdout().flush()?; } // Grab a few stats before exiting! - let stats = symbolizer.stop_stopwatch(); + let stats = stats_builder.stop(symbolizer); println!("\x1B[2K\r{stats}"); Ok(()) From 0f0e06042c6e57799345e62be3cf9e98c24542ab Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 29 May 2024 20:41:18 -0700 Subject: [PATCH 05/28] tweak ci for workspace --- .github/workflows/symbolizer-rs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index a512916..817f61c 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -71,10 +71,10 @@ jobs: run: cargo test --release --workspace - name: cargo check - run: cargo check + run: cargo check --workspace - name: cargo build - run: cargo build --release + run: cargo build --release --workspace - name: Upload artifacts uses: actions/upload-artifact@v4 From 8580f3f692062980b03d2f3d017ffae277488764 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 4 Jun 2024 07:21:33 -0700 Subject: [PATCH 06/28] ok, move out kdmp-parser out of symbolizer; expose a trait users can implement to create adaptors, also start to add tests. also make it ok to be offline --- Cargo.toml | 2 +- crates/symbolizer/src/address_space.rs | 68 ++++++++++ crates/symbolizer/src/lib.rs | 3 + crates/symbolizer/src/modules.rs | 4 +- crates/symbolizer/src/pe.rs | 171 +++++++++++++------------ crates/symbolizer/src/symbolizer.rs | 64 ++++----- crates/symbolizer/tests/basics.rs | 54 ++++++++ src/main.rs | 65 ++++++++-- 8 files changed, 307 insertions(+), 124 deletions(-) create mode 100644 crates/symbolizer/src/address_space.rs create mode 100644 crates/symbolizer/tests/basics.rs diff --git a/Cargo.toml b/Cargo.toml index 314d652..7472dfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,9 +15,9 @@ edition.workspace = true anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } symbolizer = { path = "crates/symbolizer" } -kdmp-parser = "0.2" env_logger = "0.11" itoa = "1.0" +kdmp-parser = { version = "0.2", path = "../kdmp-parser-rs" } [profile.release] debug = true diff --git a/crates/symbolizer/src/address_space.rs b/crates/symbolizer/src/address_space.rs new file mode 100644 index 0000000..c242c9c --- /dev/null +++ b/crates/symbolizer/src/address_space.rs @@ -0,0 +1,68 @@ +// Axel '0vercl0k' Souchet - May 30 2024 +use core::slice; +use std::{ + io, + mem::{self, MaybeUninit}, +}; + +pub trait AddressSpace { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result; + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result>; + + fn read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result<()> { + let size = self.read_at(addr, buf)?; + + if size != buf.len() { + Err(io::Error::new( + io::ErrorKind::Other, + format!("could read only {size} bytes instead of {}", buf.len()), + )) + } else { + Ok(()) + } + } + + fn try_read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + let Some(size) = self.try_read_at(addr, buf)? else { + return Ok(None); + }; + + if size != buf.len() { + Err(io::Error::new( + io::ErrorKind::Other, + format!("could read only {size} bytes instead of {}", buf.len()), + )) + } else { + Ok(Some(())) + } + } + + fn read_struct_at(&mut self, addr: u64) -> io::Result + where + S: Copy, + { + let mut t = MaybeUninit::uninit(); + let size_of_t = mem::size_of_val(&t); + let slice_over_t = + unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; + + self.read_exact_at(addr, slice_over_t)?; + + Ok(unsafe { t.assume_init() }) + } + + fn try_read_struct_at(&mut self, addr: u64) -> io::Result> + where + S: Copy, + { + let mut t: MaybeUninit = MaybeUninit::uninit(); + let size_of_t = mem::size_of_val(&t); + let slice_over_t = + unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; + + Ok(self + .try_read_exact_at(addr, slice_over_t)? + .map(|_| unsafe { t.assume_init() })) + } +} diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index 120250c..a378bc3 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -5,9 +5,12 @@ mod misc; mod modules; mod pdbcache; mod pe; +mod address_space; mod stats; mod symbolizer; pub use error::{Error, Result}; +pub use modules::{Module, Modules}; pub use stats::Stats; pub use symbolizer::Symbolizer; +pub use address_space::AddressSpace; diff --git a/crates/symbolizer/src/modules.rs b/crates/symbolizer/src/modules.rs index e91ce37..bebf129 100644 --- a/crates/symbolizer/src/modules.rs +++ b/crates/symbolizer/src/modules.rs @@ -16,9 +16,9 @@ pub struct Module { impl Module { /// Create a [`Module`]. - pub fn new(name: String, start: u64, end: u64) -> Self { + pub fn new(name: impl Into, start: u64, end: u64) -> Self { Module { - name, + name: name.into(), at: start..end, } } diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs index 850cb0b..7a34feb 100644 --- a/crates/symbolizer/src/pe.rs +++ b/crates/symbolizer/src/pe.rs @@ -6,15 +6,15 @@ use std::ops::Range; use std::path::PathBuf; use anyhow::{anyhow, Context}; -use kdmp_parser::{Gva, KdmpParserError, KernelDumpParser}; use log::debug; use crate::guid::Guid; use crate::misc::Rva; +use crate::address_space::AddressSpace; use crate::{Error as E, Result}; /// The IMAGE_DOS_HEADER. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy)] #[repr(C, packed(2))] pub struct ImageDosHeader { pub e_magic: u16, @@ -39,7 +39,7 @@ pub struct ImageDosHeader { } /// The IMAGE_NT_HEADERS. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy)] #[repr(C)] struct NtHeaders { signature: u32, @@ -47,7 +47,7 @@ struct NtHeaders { } /// The IMAGE_FILE_HEADER. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy)] #[repr(C)] pub struct ImageFileHeader { pub machine: u16, @@ -105,7 +105,7 @@ pub struct ImageOptionalHeader32 { } /// The IMAGE_OPTIONAL_HEADER64. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy)] #[repr(C, packed(4))] pub struct ImageOptionalHeader64 { pub magic: u16, @@ -141,7 +141,7 @@ pub struct ImageOptionalHeader64 { } /// The IMAGE_DEBUG_DIRECTORY. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy)] #[repr(C)] pub struct ImageDebugDirectory { pub characteristics: u32, @@ -155,7 +155,7 @@ pub struct ImageDebugDirectory { } /// The IMAGE_EXPORT_DIRECTORY. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone, Copy)] #[repr(C)] pub struct ImageExportDirectory { pub characteristics: u32, @@ -172,7 +172,7 @@ pub struct ImageExportDirectory { } /// The code view information. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct Codeview { pub signature: u32, @@ -233,15 +233,17 @@ pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> O } /// Read a NULL terminated string from the dump file at a specific address. -pub fn read_string(parser: &KernelDumpParser, addr: u64, max: usize) -> Result { +pub fn read_string(addr_space: &mut impl AddressSpace, mut addr: u64, max: usize) -> Result> { let mut s = String::new(); let mut terminated = false; - let mut gva = Gva::new(addr); for _ in 0..max { let mut buf = [0]; - parser - .virt_read_exact(gva, &mut buf) - .map_err(|_| anyhow!("failed reading null terminated string"))?; + let Some(()) = addr_space + .try_read_exact_at(addr, &mut buf) + .context("failed reading null terminated string")? + else { + return Ok(None); + }; let c = buf[0]; if c == 0 { @@ -250,14 +252,14 @@ pub fn read_string(parser: &KernelDumpParser, addr: u64, max: usize) -> Result Result { + // All right let's parse the PE. + debug!("parsing PE @ {:#x}", base); + + // Read the DOS/NT headers. + let dos_hdr = addr_space + .read_struct_at::(base) + .context("failed to read ImageDosHeader")?; + let nt_hdr_addr = base + .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) + .ok_or(anyhow!("overflow w/ e_lfanew"))?; + let nt_hdr = addr_space + .read_struct_at::(nt_hdr_addr) + .context("failed to read Ntheaders")?; + + // Let's verify the signature.. + if nt_hdr.signature != IMAGE_NT_SIGNATURE { + return Err(anyhow!("wrong PE signature for {base:#x}").into()); + } + + // ..and let's ignore non x64 PEs. + if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { + return Err(anyhow!("wrong architecture for {base:#x}").into()); + } + + // Now locate the optional header, and check that it looks big enough. + let opt_hdr_addr = nt_hdr_addr + .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) + .ok_or(anyhow!("overflow w/ nt_hdr"))?; + let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; + debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); + + // If it's not big enough, let's bail. + if opt_hdr_size < mem::size_of::() { + return Err(anyhow!("optional header's size is too small").into()); + } + + // Read the IMAGE_OPTIONAL_HEADER64. + let opt_hdr = addr_space + .read_struct_at::(opt_hdr_addr) + .with_context(|| "failed to read ImageOptionalHeader64")?; + + // Read the PDB information if there's any. + let pdb_id = Self::try_parse_debug_dir(addr_space, base, &opt_hdr)?; + + // Read the EXPORT table if there's any. + let exports = match Self::try_parse_export_dir(addr_space, base, &opt_hdr) { + Ok(o) => o, + // Err(E::DumpParserError(KdmpParserError::AddrTranslation(_))) => None, + Err(e) => return Err(e), + } + .unwrap_or_default(); + + Ok(Self { pdb_id, exports }) + } + fn try_parse_debug_dir( - parser: &KernelDumpParser, + addr_space: &mut impl AddressSpace, base: u64, opt_hdr: &ImageOptionalHeader64, ) -> Result> { @@ -287,7 +345,7 @@ impl Pe { .checked_add(debug_data_dir.virtual_address.into()) .ok_or(anyhow!("overflow w/ debug_data_dir"))?; let Some(debug_dir) = - parser.try_virt_read_struct::(debug_dir_addr.into())? + addr_space.try_read_struct_at::(debug_dir_addr)? else { debug!( "failed to read ImageDebugDirectory {debug_dir_addr:#x} because of mem translation" @@ -311,7 +369,7 @@ impl Pe { let codeview_addr = base .checked_add(debug_dir.address_of_raw_data.into()) .ok_or(anyhow!("overflow w/ debug_dir"))?; - let Some(codeview) = parser.try_virt_read_struct::(codeview_addr.into())? else { + let Some(codeview) = addr_space.try_read_struct_at::(codeview_addr)? else { debug!("failed to read codeview {codeview_addr:#x} because of mem translation"); return Ok(None); }; @@ -334,7 +392,7 @@ impl Pe { ) .ok_or(anyhow!("overflow w/ debug_dir filename"))?; - let Some(amount) = parser.try_virt_read(file_name_addr.into(), &mut file_name)? else { + let Some(amount) = addr_space.try_read_at(file_name_addr, &mut file_name)? else { return Ok(None); }; @@ -354,7 +412,7 @@ impl Pe { } fn try_parse_export_dir( - parser: &KernelDumpParser, + addr_space: &mut impl AddressSpace, base: u64, opt_hdr: &ImageOptionalHeader64, ) -> Result>> { @@ -371,7 +429,7 @@ impl Pe { .checked_add(u64::from(export_data_dir.virtual_address)) .ok_or(anyhow!("export_data_dir"))?; let Some(export_dir) = - parser.try_virt_read_struct::(export_dir_addr.into())? + addr_space.try_read_struct_at::(export_dir_addr)? else { debug!("failed to read ImageExportDirectory {export_dir_addr:#x} because of mem translation"); return Ok(None); @@ -399,8 +457,8 @@ impl Pe { // Read the name RVA's.. let name_rva_addr = array_offset(base, addr_of_names, name_idx, mem::size_of::()) .ok_or(anyhow!("name_rva_addr"))?; - let Some(name_rva) = parser - .try_virt_read_struct::(name_rva_addr.into()) + let Some(name_rva) = addr_space + .try_read_struct_at::(name_rva_addr) .with_context(|| "failed to read EAT's name array".to_string())? else { debug!( @@ -413,14 +471,17 @@ impl Pe { .checked_add(name_rva.into()) .ok_or(anyhow!("overflow w/ name_addr"))?; // ..then read the string in memory. - let name = read_string(parser, name_addr, 64)?; + let Some(name) = read_string(addr_space, name_addr, 64)? else { + debug!("failed to read export's name #{name_idx}"); + return Ok(None); + }; names.push(name); // Read the ordinal. let ord_addr = array_offset(base, addr_of_ords, name_idx, mem::size_of::()) .ok_or(anyhow!("ord_addr"))?; - let Some(ord) = parser - .try_virt_read_struct::(ord_addr.into()) + let Some(ord) = addr_space + .try_read_struct_at::(ord_addr) .context("failed to read EAT's ord array")? else { debug!("failed to read EAT's ord array {ord_addr:#x} because of mem translation"); @@ -447,8 +508,8 @@ impl Pe { array_offset(base, addr_of_functs, addr_idx, mem::size_of::()) .ok_or(anyhow!("overflow w/ address_rva_addr"))?; - let Some(address_rva) = parser - .try_virt_read_struct::(address_rva_addr.into()) + let Some(address_rva) = addr_space + .try_read_struct_at::(address_rva_addr) .with_context(|| "failed to read EAT's address array".to_string())? else { debug!("failed to read EAT's address array {address_rva_addr:#x} because of mem translation"); @@ -490,58 +551,4 @@ impl Pe { Ok(Some(exports)) } - - pub fn new(parser: &KernelDumpParser, base: u64) -> Result { - // All right let's parse the PE. - debug!("parsing PE @ {:#x}", base); - - // Read the DOS/NT headers. - let dos_hdr = parser - .virt_read_struct::(base.into()) - .with_context(|| "failed to read ImageDosHeader")?; - let nt_hdr_addr = base - .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) - .ok_or(anyhow!("overflow w/ e_lfanew"))?; - let nt_hdr = parser.virt_read_struct::(nt_hdr_addr.into())?; - - // Let's verify the signature.. - if nt_hdr.signature != IMAGE_NT_SIGNATURE { - return Err(anyhow!("wrong PE signature for {base:#x}").into()); - } - - // ..and let's ignore non x64 PEs. - if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { - return Err(anyhow!("wrong architecture for {base:#x}").into()); - } - - // Now locate the optional header, and check that it looks big enough. - let opt_hdr_addr = nt_hdr_addr - .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) - .ok_or(anyhow!("overflow w/ nt_hdr"))?; - let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; - debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); - - // If it's not big enough, let's bail. - if opt_hdr_size < mem::size_of::() { - return Err(anyhow!("optional header's size is too small").into()); - } - - // Read the IMAGE_OPTIONAL_HEADER64. - let opt_hdr = parser - .virt_read_struct::(opt_hdr_addr.into()) - .with_context(|| "failed to read ImageOptionalHeader64")?; - - // Read the PDB information if there's any. - let pdb_id = Self::try_parse_debug_dir(parser, base, &opt_hdr)?; - - // Read the EXPORT table if there's any. - let exports = match Self::try_parse_export_dir(parser, base, &opt_hdr) { - Ok(o) => o, - Err(E::DumpParserError(KdmpParserError::AddrTranslation(_))) => None, - Err(e) => return Err(e), - } - .unwrap_or_default(); - - Ok(Self { pdb_id, exports }) - } } diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 0a46355..2dd70a9 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -11,13 +11,13 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use anyhow::Context; -use kdmp_parser::KernelDumpParser; use log::{debug, trace, warn}; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; use crate::pe::{PdbId, Pe}; +use crate::address_space::AddressSpace; use crate::stats::{Stats, StatsBuilder}; use crate::{Error as E, Result}; @@ -93,7 +93,7 @@ pub fn try_download_from_guid( return Err(E::DownloadPdb { pdb_url, e: e.into(), - }) + }); } }; @@ -143,6 +143,7 @@ fn get_pdb( sympath: &Path, symsrvs: &Vec, pdb_id: &PdbId, + offline: bool, ) -> Result> { // Let's see if the path exists locally.. if pdb_id.path.is_file() { @@ -157,7 +158,12 @@ fn get_pdb( return Ok(Some((local_path, PdbKind::LocalCache))); } - // The last resort is to try to download it... + // If we're offline, let's just skip the downloading part. + if offline { + return Ok(None); + } + + // We didn't find a PDB on disk, so last resort is to try to download it. let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id)?; Ok(downloaded_path.map(|p| (p, PdbKind::Download))) @@ -196,7 +202,10 @@ impl BuildHasher for IdentityHasher { /// The [`Symbolizer`] is the main object that glues all the logic. /// /// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer { +pub struct Symbolizer +where + AS: AddressSpace, +{ /// Keep track of some statistics regarding the number of lines symbolized, /// PDB downloaded, etc. stats: StatsBuilder, @@ -209,7 +218,7 @@ pub struct Symbolizer { /// The kernel dump parser. We need this to be able to read PDB identifiers /// out of the PE headers, as well as reading the export tables of those /// modules. - parser: KernelDumpParser, + addr_space: RefCell, /// List of symbol servers to try to download PDBs from when needed. symsrvs: Vec, /// Caches addresses to symbols. This allows us to not have to symbolize an @@ -218,42 +227,39 @@ pub struct Symbolizer { /// Each parsed module is stored in this cache. We parse PDBs, etc. only /// once and then the [`PdbCache`] is used to query. pdb_caches: RefCell, Rc>>, + offline: bool, } -impl Symbolizer { +impl Symbolizer +where + AS: AddressSpace, +{ /// Create a symbolizer. - /// - /// The `symcache` is used both for reading existing PDBs as well as writing - /// the newly downloaded ones, the `parser` is used to enumerate the kernel - /// / user modules loaded at the crash-dump time as well as reading PDB - /// identifiers off the modules' PE headers, and the HTTP symbol servers are - /// a list of servers that will get contacted to try to find one that knows - /// about a specific PDB file. pub fn new( symcache: impl AsRef, - parser: KernelDumpParser, symsrvs: Vec, - ) -> Result { - // Read both the user & kernel modules from the dump file. - let mut modules = Vec::new(); - for (at, name) in parser.user_modules().chain(parser.kernel_modules()) { - let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); - modules.push(Module::new( - filename.to_string(), - at.start.into(), - at.end.into(), - )); + modules: Vec, + addr_space: AS, + ) -> Self { + let offline = match ureq::get("https://www.google.com/").call() { + Ok(_) => false, + Err(_) => true, + }; + + if offline { + println!("Turning on 'offline' mode as you seem to not have internet access.."); } - Ok(Self { + Self { stats: Default::default(), symcache: symcache.as_ref().to_path_buf(), modules: Modules::new(modules), - parser, + addr_space: RefCell::new(addr_space), symsrvs, addr_cache: Default::default(), pdb_caches: Default::default(), - }) + offline, + } } pub fn stats(self) -> Stats { @@ -297,7 +303,7 @@ impl Symbolizer { // Let's start by parsing the PE to get its exports, and PDB information if // there's any. - let pe = Pe::new(&self.parser, module.at.start)?; + let pe = Pe::new(&mut *self.addr_space.borrow_mut(), module.at.start)?; // Ingest the EAT. builder.ingest(pe.exports.into_iter()); @@ -307,7 +313,7 @@ impl Symbolizer { if let Some(pdb_id) = pe.pdb_id { // Try to get a PDB.. - let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id)?; + let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id, self.offline)?; // .. and ingest it if we have one. if let Some((pdb_path, pdb_kind)) = pdb_path { diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs new file mode 100644 index 0000000..18beb33 --- /dev/null +++ b/crates/symbolizer/tests/basics.rs @@ -0,0 +1,54 @@ +// Axel '0vercl0k' Souchet - May 30 2024 +use std::io::{self, Read, Seek}; +use std::path::Path; +use std::{env::temp_dir, fs::File}; + +use symbolizer::{AddressSpace, Module, Symbolizer}; + +#[derive(Debug)] +struct RawAddressSpace { + raw: File, + len: u64, +} + +impl RawAddressSpace { + fn new(path: &impl AsRef) -> io::Result { + let raw = File::open(path)?; + let metadata = raw.metadata()?; + let len = metadata.len(); + + Ok(Self { raw, len }) + } + + fn len(&self) -> u64 { + self.len + } +} + +impl AddressSpace for RawAddressSpace { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result { + self.raw.seek(io::SeekFrom::Start(addr))?; + + self.raw.read(buf) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { + self.read_at(addr, buf).map(Some) + } +} + +#[test] +fn foo() { + let symcache = temp_dir().join("basics"); + let raw = RawAddressSpace::new(&r"c:\work\mrt100.raw").unwrap(); + let modules = vec![Module::new("mrt100", 0x0, raw.len())]; + + let mut symb = Symbolizer::new(symcache, vec![], modules, raw); + + let mut buf = Vec::new(); + symb.full(&mut buf, 0x19_50).unwrap(); + assert_eq!( + String::from_utf8(buf).unwrap().trim_end(), + "mrt100!GetManagedRuntimeService+0x0" + ); +} diff --git a/src/main.rs b/src/main.rs index 64aacfa..c330111 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use std::{env, fs, io}; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use symbolizer::Symbolizer; +use symbolizer::{AddressSpace, Module, Symbolizer}; mod hex_addrs_iter; mod human; @@ -45,7 +45,7 @@ impl StatsBuilder { self.n_lines += n; } - pub fn stop(self, symbolizer: Symbolizer) -> Stats { + pub fn stop(self, symbolizer: Symbolizer) -> Stats { Stats { time: self.start.elapsed().as_secs(), n_files: self.n_files, @@ -74,14 +74,14 @@ impl Display for Stats { )?; if self.symbolizer_stats.size_downloaded > 0 { - writeln!( + write!( f, ", downloaded {} / {} PDBs)", self.symbolizer_stats.size_downloaded.human_bytes(), self.symbolizer_stats.n_downloads.human_number() ) } else { - writeln!(f, ")") + write!(f, ")") } } } @@ -145,7 +145,7 @@ struct CliArgs { skip: usize, /// The maximum amount of lines to process per file. #[arg(short, long, default_value = "20000000")] - max: Option, + limit: Option, /// The symbolization style (mod+offset or mod!f+offset). #[arg(long, default_value = "full")] style: SymbolStyle, @@ -169,6 +169,9 @@ struct CliArgs { /// The size in bytes of the buffer used to read data from the input files. #[arg(long, default_value_t = 1024 * 1024)] in_buffer_size: usize, + /// Don't try to download PDBs off the network. + #[arg(long, default_value_t = false)] + offline: bool, } /// Create the output file from an input. @@ -207,7 +210,7 @@ fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result /// Process an input file and symbolize every line. fn symbolize_file( - symbolizer: &mut Symbolizer, + symbolizer: &mut Symbolizer, trace_path: impl AsRef, args: &CliArgs, ) -> Result { @@ -223,7 +226,7 @@ fn symbolize_file( let mut output = BufWriter::with_capacity(args.out_buffer_size, writer); let mut line_number = 1 + args.skip; let mut lines_symbolized = 1; - let max_line = args.max.unwrap_or(usize::MAX); + let limit = args.limit.unwrap_or(usize::MAX); let reader = BufReader::with_capacity(args.in_buffer_size, input); for addr in HexAddressesIterator::new(reader).skip(args.skip) { let addr = addr.with_context(|| { @@ -251,10 +254,10 @@ fn symbolize_file( ) })?; - if lines_symbolized >= max_line { + if lines_symbolized >= limit { println!( "Hit maximum line limit {} for {}", - max_line, + limit, trace_path.display() ); break; @@ -267,6 +270,33 @@ fn symbolize_file( Ok(lines_symbolized) } +#[derive(Debug)] +struct ParserWrapper { + parser: KernelDumpParser, +} + +impl ParserWrapper { + fn new(parser: KernelDumpParser) -> Self { + Self { + parser, + } + } +} + +impl AddressSpace for ParserWrapper { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { + self.parser + .virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + self.parser + .try_virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } +} + fn main() -> Result<()> { #[cfg(debug_assertions)] env_logger::init(); @@ -308,8 +338,23 @@ fn main() -> Result<()> { bail!("no sympath"); }; + let mut modules = Vec::new(); + for (at, name) in parser.user_modules().chain(parser.kernel_modules()) { + let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); + modules.push(Module::new( + filename.to_string(), + at.start.into(), + at.end.into(), + )); + } + // All right, ready to create the symbolizer. - let mut symbolizer = Symbolizer::new(symcache, parser, args.symsrv.clone())?; + let mut symbolizer = Symbolizer::new( + symcache, + args.symsrv.clone(), + modules, + ParserWrapper::new(parser), + ); let mut stats_builder = StatsBuilder::default(); stats_builder.start(); From d991f647e7ac95e5176942af2f9d1ec0afefff77 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:13:27 -0700 Subject: [PATCH 07/28] actually remove kdmp-parser, have symbolizer not add a new line, fmt + nightly --- Cargo.toml | 2 +- crates/symbolizer/Cargo.toml | 4 +++- crates/symbolizer/src/address_space.rs | 6 ++---- crates/symbolizer/src/error.rs | 3 --- crates/symbolizer/src/lib.rs | 4 ++-- crates/symbolizer/src/pe.rs | 8 ++++++-- crates/symbolizer/src/symbolizer.rs | 16 ++-------------- crates/symbolizer/tests/basics.rs | 5 +++-- src/main.rs | 6 +++--- 9 files changed, 22 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7472dfd..dc2d6a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ clap = { version = "4.5", features = ["derive"] } symbolizer = { path = "crates/symbolizer" } env_logger = "0.11" itoa = "1.0" -kdmp-parser = { version = "0.2", path = "../kdmp-parser-rs" } +kdmp-parser = "0.2" [profile.release] debug = true diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml index 3a57b41..8be91a1 100644 --- a/crates/symbolizer/Cargo.toml +++ b/crates/symbolizer/Cargo.toml @@ -16,6 +16,8 @@ pdb = "0.8" log = "0.4" msvc-demangler = "0.10" ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } -kdmp-parser = "0.2" thiserror = "1.0.61" anyhow = "1.0.86" + +[build-dependencies] +kdmp-parser = "0.2" diff --git a/crates/symbolizer/src/address_space.rs b/crates/symbolizer/src/address_space.rs index c242c9c..ec5612d 100644 --- a/crates/symbolizer/src/address_space.rs +++ b/crates/symbolizer/src/address_space.rs @@ -1,9 +1,7 @@ // Axel '0vercl0k' Souchet - May 30 2024 use core::slice; -use std::{ - io, - mem::{self, MaybeUninit}, -}; +use std::io; +use std::mem::{self, MaybeUninit}; pub trait AddressSpace { fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result; diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs index 414f760..31f84d1 100644 --- a/crates/symbolizer/src/error.rs +++ b/crates/symbolizer/src/error.rs @@ -4,7 +4,6 @@ use std::num::TryFromIntError; use std::path::PathBuf; use std::string::FromUtf8Error; -use kdmp_parser::KdmpParserError; use pdb::PdbInternalSectionOffset; use thiserror::Error; @@ -31,8 +30,6 @@ pub enum Error { }, #[error("the module path is either 0 or larger than reasonable")] CodeViewInvalidPath, - #[error("kdmp parser")] - DumpParserError(#[from] KdmpParserError), #[error("{0}")] Anyhow(#[from] anyhow::Error), } diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index a378bc3..b7339cb 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -1,16 +1,16 @@ // Axel '0vercl0k' Souchet - May 26th 2024 +mod address_space; mod error; mod guid; mod misc; mod modules; mod pdbcache; mod pe; -mod address_space; mod stats; mod symbolizer; +pub use address_space::AddressSpace; pub use error::{Error, Result}; pub use modules::{Module, Modules}; pub use stats::Stats; pub use symbolizer::Symbolizer; -pub use address_space::AddressSpace; diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs index 7a34feb..9e7f9f7 100644 --- a/crates/symbolizer/src/pe.rs +++ b/crates/symbolizer/src/pe.rs @@ -8,9 +8,9 @@ use std::path::PathBuf; use anyhow::{anyhow, Context}; use log::debug; +use crate::address_space::AddressSpace; use crate::guid::Guid; use crate::misc::Rva; -use crate::address_space::AddressSpace; use crate::{Error as E, Result}; /// The IMAGE_DOS_HEADER. @@ -233,7 +233,11 @@ pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> O } /// Read a NULL terminated string from the dump file at a specific address. -pub fn read_string(addr_space: &mut impl AddressSpace, mut addr: u64, max: usize) -> Result> { +pub fn read_string( + addr_space: &mut impl AddressSpace, + mut addr: u64, + max: usize, +) -> Result> { let mut s = String::new(); let mut terminated = false; for _ in 0..max { diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 2dd70a9..42bceac 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -13,11 +13,11 @@ use std::rc::Rc; use anyhow::Context; use log::{debug, trace, warn}; +use crate::address_space::AddressSpace; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; use crate::pe::{PdbId, Pe}; -use crate::address_space::AddressSpace; use crate::stats::{Stats, StatsBuilder}; use crate::{Error as E, Result}; @@ -241,11 +241,7 @@ where modules: Vec, addr_space: AS, ) -> Self { - let offline = match ureq::get("https://www.google.com/").call() { - Ok(_) => false, - Err(_) => true, - }; - + let offline = ureq::get("https://www.google.com/").call().is_err(); if offline { println!("Turning on 'offline' mode as you seem to not have internet access.."); } @@ -386,10 +382,6 @@ where } .context("failed to write symbolized value to output")?; - output - .write_all(&[b'\n']) - .context("failed to write line feed modoff addr")?; - Ok(()) } @@ -402,10 +394,6 @@ where .write_all(sym.as_bytes()) .context("failed to write symbolized value to output")?; - output - .write_all(&[b'\n']) - .context("failed to write line feed")?; - Ok(()) } None => self.modoff(output, addr), diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 18beb33..f17ffb6 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -1,7 +1,8 @@ // Axel '0vercl0k' Souchet - May 30 2024 +use std::env::temp_dir; +use std::fs::File; use std::io::{self, Read, Seek}; use std::path::Path; -use std::{env::temp_dir, fs::File}; use symbolizer::{AddressSpace, Module, Symbolizer}; @@ -48,7 +49,7 @@ fn foo() { let mut buf = Vec::new(); symb.full(&mut buf, 0x19_50).unwrap(); assert_eq!( - String::from_utf8(buf).unwrap().trim_end(), + String::from_utf8(buf).unwrap(), "mrt100!GetManagedRuntimeService+0x0" ); } diff --git a/src/main.rs b/src/main.rs index c330111..3aa3c17 100644 --- a/src/main.rs +++ b/src/main.rs @@ -254,6 +254,8 @@ fn symbolize_file( ) })?; + output.write_all(&[b'\n'])?; + if lines_symbolized >= limit { println!( "Hit maximum line limit {} for {}", @@ -277,9 +279,7 @@ struct ParserWrapper { impl ParserWrapper { fn new(parser: KernelDumpParser) -> Self { - Self { - parser, - } + Self { parser } } } From aaef2ecd853a67cc44c6b11f537a6bdf73e5586f Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 11:35:25 -0700 Subject: [PATCH 08/28] revert arguments --- crates/symbolizer/src/symbolizer.rs | 6 +++--- src/main.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 42bceac..119baa6 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -365,7 +365,7 @@ where /// Symbolize `addr` in the `module+offset` style and write the result into /// `output`. - pub fn modoff(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { + pub fn modoff(&mut self, addr: u64, output: &mut impl Write) -> Result<()> { let mut buffer = [0; 16]; if let Some(module) = self.modules.find(addr) { output.write_all(module.name.as_bytes())?; @@ -387,7 +387,7 @@ where /// Symbolize `addr` in the `module!function+offset` style and write the /// result into `output`. - pub fn full(&mut self, output: &mut impl Write, addr: u64) -> Result<()> { + pub fn full(&mut self, addr: u64, output: &mut impl Write) -> Result<()> { match self.try_symbolize_addr(addr)? { Some(sym) => { output @@ -396,7 +396,7 @@ where Ok(()) } - None => self.modoff(output, addr), + None => self.modoff(addr, output), } } } diff --git a/src/main.rs b/src/main.rs index 3aa3c17..7c5c54f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -244,8 +244,8 @@ fn symbolize_file( } match args.style { - SymbolStyle::Modoff => symbolizer.modoff(&mut output, addr), - SymbolStyle::Full => symbolizer.full(&mut output, addr), + SymbolStyle::Modoff => symbolizer.modoff(addr, &mut output), + SymbolStyle::Full => symbolizer.full(addr, &mut output), } .with_context(|| { format!( From 4fbc99f385c18a60130735acd6133682336b3fb3 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 11:36:33 -0700 Subject: [PATCH 09/28] some tests --- crates/symbolizer/tests/basics.rs | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index f17ffb6..595eb6a 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -39,17 +39,31 @@ impl AddressSpace for RawAddressSpace { } #[test] -fn foo() { +fn raw() { let symcache = temp_dir().join("basics"); let raw = RawAddressSpace::new(&r"c:\work\mrt100.raw").unwrap(); - let modules = vec![Module::new("mrt100", 0x0, raw.len())]; + let raw_len = raw.len(); + let modules = vec![Module::new("mrt100", 0x0, raw_len)]; let mut symb = Symbolizer::new(symcache, vec![], modules, raw); - let mut buf = Vec::new(); - symb.full(&mut buf, 0x19_50).unwrap(); - assert_eq!( - String::from_utf8(buf).unwrap(), - "mrt100!GetManagedRuntimeService+0x0" - ); + let expected = [ + ( + 0x19_50, + "mrt100!GetManagedRuntimeService+0x0", + "mrt100+0x00001950", + ), + (raw_len, "0x0000000000009000", "0x0000000000009000"), + (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), + ]; + + for (addr, expected_full, expected_modoff) in expected { + let mut full = Vec::new(); + symb.full(addr, &mut full).unwrap(); + assert_eq!(String::from_utf8(full).unwrap(), expected_full); + + let mut modoff = Vec::new(); + symb.modoff(addr, &mut modoff).unwrap(); + assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); + } } From 5a6602d0b8794f43ddbe7bf54a788b0c67e7e596 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:58:01 -0700 Subject: [PATCH 10/28] add test w/ gimli/object --- .github/workflows/symbolizer-rs.yml | 2 +- crates/symbolizer/Cargo.toml | 4 +- crates/symbolizer/fixtures/mrt100.dll | Bin 0 -> 36864 bytes crates/symbolizer/fixtures/mrt100.raw | Bin 0 -> 36864 bytes .../src/{address_space.rs => addr_space.rs} | 2 +- crates/symbolizer/src/lib.rs | 4 +- crates/symbolizer/src/pe.rs | 10 +- crates/symbolizer/src/symbolizer.rs | 10 +- crates/symbolizer/tests/basics.rs | 143 +++++++++++++++--- src/main.rs | 6 +- 10 files changed, 139 insertions(+), 42 deletions(-) create mode 100644 crates/symbolizer/fixtures/mrt100.dll create mode 100644 crates/symbolizer/fixtures/mrt100.raw rename crates/symbolizer/src/{address_space.rs => addr_space.rs} (98%) diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index 817f61c..f00b910 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -32,7 +32,7 @@ jobs: - name: cargo clippy env: RUSTFLAGS: "-Dwarnings" - run: cargo clippy + run: cargo clippy --workspace doc: name: doc diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml index 8be91a1..004249a 100644 --- a/crates/symbolizer/Cargo.toml +++ b/crates/symbolizer/Cargo.toml @@ -19,5 +19,7 @@ ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } thiserror = "1.0.61" anyhow = "1.0.86" -[build-dependencies] +[dev-dependencies] kdmp-parser = "0.2" +udmp-parser = "0.2" +object = {version = "0.36.0", default-features = false, features = ["read", "read_core", "pe", "std"]} diff --git a/crates/symbolizer/fixtures/mrt100.dll b/crates/symbolizer/fixtures/mrt100.dll new file mode 100644 index 0000000000000000000000000000000000000000..a986193308ecb7810ab1561901b6821a4ffd1ff0 GIT binary patch literal 36864 zcmeHwe|%KcweOxJlMMOcjF4ae#Q_G4TH=_Y8bEZ$Oz^}`AcgRwiY6hMB-19D$;=rE zT(pTpD#>x~rB(c3FYOxiJ`B-Dg?Bh_g&}5 zE>L>x;_sjN(J^+# zwMVXCKbiLWye^&V^?Bv>o`Bis_tyEV8qL*JO-)|jyxDE`2b;{ECbM((T63ef#+{#) zm2Qz$cR%VGYX5DTpoB+vKQ25crc;GqkE982>`v|sBmKtil+N#nG^tY%XN^tDAF}4w@J)reefse5=>Xb^xjwOLA zPb=~fk9<`WQMg~AC#~p1(NHPNRKS=)t!GM!eBRx{gJ+scKL}=-f+&v}*5vzZs`x6# z{4>Et9ikjj)niB1`H~nLPDMr;0_h~GdhCov^L>(_*v3j^k{nUhV<&z7fWI0%5fdLG zBu7;B*hM*agBOB?5o<*tIijk^&e)x*q!!f>&=AlN&=AlN&=AlN&=AlN&=AlN_?tkW zUw*jI*}TvtT`((_p266TXfVYV9o2Wy36cvP%j;3(6xuv=dVS!a;}4<)a+_bi{}$7= zFCIyxpZo>Cb?)%5I3dX;^l{7eGZr!y%;JPQ4M<#p1YdwW#82Z@KqBK8Nbs|yi~-7+ zE|=)1gub-HFnYa|`xY_g$+>S(BAW|!8%i#m;^suT1>Geb%ogqGFA14hSCXBzPH92& zkM1k{x0l}jk$o*%ZnpK62t$r8W;QctxFm&(=j@ornDJUCTHle(wMTh^V-ff%3Sa1L zj|S7a^iVyTWjxfK#aLQvd!KPtH}EpwJEgk*=F^)Bmfm50hj1o|DXvGJL9+mF<72aRmh-Tkg z>j(eG^S^-pLR%jfc3QahL9WATL2XV;eW@_u5?q$bQenuryPNptQsHQc@D>c{P%47~ zG(_IVD-?0)d}-x{lSfE%_nGWDw>zJg+gBo-gr@7JXny1`(kzaJ=A$P~zrX6!W1OJ> z23~e>9Z57BCZ6WPw>G_flm1h2{QDr4j2BOw?HDz!k_)>m=HqB1dXI6!X_!*3m?HFE zI8{6+>RjNoSezZnD$qx+`Sa~23^`kW?5mhAm?yr(JZZkTG1vOYzYHn!QL1&W(%>nx z@S(q;s>!pkxp3%qpHbII{mX@FEP1R{=mj-1mvGcJh%S%k#1yUS`7lFa{x!V`f0(h> z_sv5)4pDzj%y-c{#`5%Nyb8DjSO0n7ruyLKIYamS{H7P5wy#~kfonT!)lo<3Z(jyN zow&nd2B$vrcc%eCfq~F5y+BBq#L5bAApiFAU8& zbK$MF56N*tBNRQU)GG9F!lR<$G(@Ij9pc)KQOhdsPmB{~1UDzWH}{8&^v{U%C7!cF z(P8Urj~XBE7RU>$onrIoTibqi=)q4({xDRM|J$gvK;0P&6y}%qw_3GmQuEL+9=h%4 zx-m`w`Wp>gN4<&b@a1wHEoR4hRBCr~LdrrbDz=@s#Y=>dVga&TrAzpFZb|5~M%S_8utNu>O^Q+u!vnp~ucuOZCvmDCQBJ7G*~ur0uP5FY`4^-KkbhD6E3WI>%Xrlz2)3_s4M5py>vISf z9m1ICv+8Hb(1|Ym!Wq$nJB8zO`a~}mdOu0{C5Ni)R11d+tQA~*;iX*0luM%|5r?1?z>*-q<8~Nk;cwEwzs(fmH?m8~~qJ@1PkHZr%axQ{?391)8LHii`9o`fppe&i0 z?9zmc=PXyiL6>?jAQPjbm3Par+~B>OP>kW54ARAQ6d6FHpeE3hpt+#Epk~lLpct~n z2SBZ$;0I;1Ig;wo7c+W3qUMKBxt9Ni;&<36V_)b`n3?xtYysCU;5D*mkjrcQ;_a zO0I8oUfXAmAYV_}?k3(t+0~9xC$b5rQEU}3w_U4i#nJZu7}YpzbY}EI#$hb#M)f@Q_m1w->5lFXXLK1v z6KsRVgMIOChtPB3oHH2%s~5wI>(DP=${5y;4*gdU88z7t>PBba2Ut)?8A^oLVg*M0 znW^=>&ffVdCzOuUQdNg-JPjWBES`MUb_@JEFZnEaEH_)%+<(_~`nPEPiD8Ms?#GZ4 zn=eHGxO%zb!un_M=eoooMEVEnG&zk)`cA-!LSJrdP>SHNqShzMFRQRFc*(w>>c<$jx1$X1jWev@AZ=Dku!3Gf z*R>A@jZ1U#P{p^?Y|)V`&^6&ZS)-LK^U=V{M_az((f+IeDPPa|ULh?jf8f ztmM6Vkq^T*#e^SyY;f{oR$|iEDkaDW21BLZ?N!n2}r|53`*>jg6L>k>|G=d zU=>tVPG#ZnnlT;V@ODeO)p|3~7<#=11=C}HgaRbeVxJ;1diFB9VrP&O`{frZlKpLi z1@o4-zr^ro?}J-azeRfBTnU5y2SC< zKrW0Iw?P<$*{E0U3tG1x6P5BNbk>ruh&`A$@;Z32Rggn%`q;BbT@H_V5Fq|YZbD;! zzjW3&9@Jk4W$k^hkP9(7dv@xtLOSNh0u*}}`ed}SV<)A=G~hdc=;%kCJp~ZGTfP~47afR^kk@`DXo2P1kQ=)RB z)h^YCPh-NiaMf8+M6g&rNF@2Swxx6rDOk~!0moju)R7x3cini@c*v2>g}SaUI!~lH~(@u5fpzYk9Xn9q&|=yIsqpeghXenm76$_Q7!6G{Uk&+brxs zilt+Gr-IVnJ9I2JvA2*g>pT@Z1mkUkz$s(LnP3`@Ih{9%T{Z!)++VFLwjhUfp5jw> z3X#BLv(*mzEe za-ionIx!R?H#{unib5`&UU7J^y5Pyf;TUBgnMXP)f_-B=XPKzFy_YiaUaXAkXs2q( z5RvKV+C#O5qHy!=JIT%@GE0z3(CaihdSR)wgO<}6kLUbPTp7bEO0OBA4?7_)Ts&rc zyocf(Rk|AjRGU4X^L<1yR5Q^nhA@{LJBWpW&Pyk$mbPPr*@PRN*k72r@HJOWCoL;X zAfs1f|3WyJ;0(Ca>z}y&TzfnxpV}7c!7KUq{O?H~fEFLQhs6F7JqvW5p=Bbx;%$)2 z>-r$$%XBrrc6#=~tGb>%s+t_q*yDs#)Z(Y%7tv?&oEH#@=hP4YLhq4jDU1fJ`ChCH zmEb8eOVLN&gkgWSP#!g#M6~#{!?fXh{d$`3dFYZKQy1YyZXsrF)%A!2dAtXtKGUCr z09MJniQS=pB2+iYIM<8FkuK z@7GAclvq;mlEceZ+Zm@Y=x7_GbEKo~V>(tkg%iRUj#l9!U2(=q<32~*IXZQA zq*#hG2B0TJxJbRzQEtf$mT{r1Lkm&s!m&Hz#ud?29pm#>RHh~|e%=Znk=gnK7eB|| z(!9mO;SC-Ahqf7??ha%-1^o^%j^huGj`J+VB@FO09D?0YT3wcc_eRMBTy?K&!9YoM&)q_{i4%%U z*b;&m&O3Fw9JU*=ED{)o+!lM{W65qa# zc4?gOQD>n6#MTG%8); zteInUP%8Eb>);JaD`RhbD(SN(Rtl6A%h~$c3Na?7cjm|iw%YSIgg~^6wUR5W{JKm{ z61B^;Os4EgiSLu?uuRLQOZ)>eHO-LHyJR{nQ4GVC)K0Pg z-{o|=EI&3S|3x{yN~RxAsi)R1)##V$-^=s`nZ6;@vocMR?a!2Hu1uYBK9iqpIc=E2 zx61O0oG(ur*9x^SmeQ3|s3QNTsP7u7TvVoiN^UCu)_LHZ!v+iwd=WFi5ivt0{lFD5 z1N;y(zzHD(ryB#@4>Q2`FasP9GUx{`2TA&Yvq4!L!3=OS_`zRc3~(?gi=(C?nu{O! z4uQ#S!rJ&9OXv~7pyHpD5JRrkT5Y8fx!hiJx>{(K4 zQtKYHX$j)}pk`1Hdd3R;-=B{w0|-|wVrR+jB ziReXqljX#+Et7aT#uCMks@nj#?KDJln)p}|I zn73#xt;fnYW32$Nf%3&~$SeW0Gl6HN{%MiEIa9v5%Xdp@`u2`q(8zsN02A2R6^#6d!m|;N z-H!T5kJP7VHPUv*zKNZ_%%hP5ZNtE=9*u z?!@sJNAlEyB&(R-%-F0Pqz99%`!S~~p=H)JdX}ME#8Q08l}T3gJ@{r+pqrJ>X7O2U zR!t_GRi43SEkW9!GpsYFGGom&W?W)m#x)tfS&Q)Jqb2)UjxNGd%cSySa74QF6qXK| z^fjrzS=XntIls@upW5H?(yV_-S_G?r7723@Hvr{K!rRmelv zuvXk0`yB$!eR?)wLso~Nq~-RNU8$;%%EAQpEaYBA=#b0qMf_I?`{eX*5dT<}PwC&Q z@~6#CWwVzU*=+yJ;cVZGO43W%-)v@#*0rBP4^h#EHA!U>Hfg{H7!_prAS%l1vQk*q zl4&ffAhm27^~-$56kT;#8fkePhc2SJBppivjyk-u&#plzPh;88mtA1;>2e6`Bfz># z#9G2Qm8BP?`gGHDY#RDkXGmcNzto$OJ+Nhoo*Ac4*psk}>atQwdX)9akNHFGR6;${ z4G2DjGWhZbK@;A#rlEn|<>pJPnyTvDHEV*nhtTL=>-In3sdhtZ_zselp80l+cg(z< zD&8^&d_8#MD&8jW4492(%unTMaALk;hU`@Fo&awT%^DT23q11G2UNTv@S@ zHw+%9;#rE}o58DB*|G|}7T8y);`zbjz-v*JeFD6lq)#RHucQxhCY9W;!81e7tddJz ziMBy*qe{*SUOm~O;+28d0^a2wZND#}^0&Ui75rA88;WYZ6*cYv@Aq!4Xz&Dh z_LwM9?QZe#EF#6dO|_o7px<55ROR=3w^UU5>mFc7#XNV5yE@3bE4)oqpn|X3+~8*H zHXYSMMtZ6mJlhfl-Axa8{NAQU++<=arpioG@r@+X5OCL5)Os4w(h4%3g_FUp^}F3{ zh+>UZ4fFwdCab6@stEYp)t*{UbwzzuQ%!^0k2k!m(cKtu^K2?EKELOIDt;0!Q;-85 zo~AlAa4d_hb@Q&O0ACCYpxwsSd;EN`s$nJBQ^Jb88>&3~O0T~(*uZ;y4Q@`fBf!{V zMbaUfi#;Hm=?1@tcWBD`*@GHNvhJ%(pVA1cz3D0(d*v|?cSij+P%ul6F^lY$=E!0SA%zRRf8n6 zmapQ2fnsco8T)$B?cZAF_Sbs-ja5z6?jkS75dh61tBQhtKf15X@2z$R0wpy#;Z1NQ z>%e3)iMw`dfOj{RV+c9|D?JTvia~!#VqC0sVZ?FNpd_}2Zz!tr@fc=B*rU?FaF9Vf zm)HBPpl@ZcshZ3#ZsPr0v74YA8UA%m81AT1&q@ML_vT<-o!ejL2cQ^ZSF&|Y(r}o- z2!>E&n34MPPi7ZDo7_gPe(J@jl;__aDHz1ESmMZtV zQhp(oml6u72^q4Ww94nhIN!k7bcXnPk9*7NTFRI-V=%-PyJ^siP)?l}>X}25m~$9& z>ZvnTqT1W2&i+DOY7IAwUTdlbpEUY-aWVwd%BD(Q#%@XF7itY`4UjWbEBk^B>a7Jj zUn=9FxdONJEqBeA%K7<*K+xy&!cVLG^Hlq5Xhf?8E@upv$I9ak_2$v>c~Pw{ba6cW<{Nm?2MBaDRy)>cr)_hY|9K4?2uM`sqsU-MEZP z@^mxz#|R`(7j$2kiF-2?2YnBL_;g)&8SaEqoNn#DjEkdXBm+7JcZ=#N4!UPH?z$iz z0nNp~v#|?tGiVL&bM8f)uJeWwNDtlWeOHdt=5I2R(p5W$EVg_i*=?;XcT`rhMq8c4?1>fQEpEfQEpEfQEpEfQEpEfQEpEfQG ze~bm6{Ct6re;9{x5TD@ZRY;fMcO~NV{4C{n zf214eN#bpT*}OvU5OWj%7d<&85f|#yK_Acj%)^>k9khAyg(2OXZ(_B~3$Z*=#)@xC zt!yd668x{<&<^IpZE_tef=naxu`0-Kg*8=3(QR^^^P#C4dc7=wP>VXuqO>2&ZT;ziHf>F^ab$m#}RV z?Y#|OpiHf6YOi0u&*?krD^OM%+te!R*#NYzM2o9{3XR_t7-4Skt6&{If?-Ys9cE}1 z@)T4iW)Xcqj){(ON>(>P8);IH?P)0oNI-iu1T+LR1T+LR1T+LR1T+LR1T+NxJ0pO@ z@Fub^Vu+X`xe;?DFJg@pM(hzTQWn`5sf^S|e36z&Yh-6+S7c8l5_v9iAkr0yMtUQ8 zPuZX1o+^84=&9IKV^57g#rEayH}B8eZ{1(GzjA;5e)|8tXUe+&J42w^rXipqpdp|k bpdp|kpdp|kpdp|kpdp|kpds-8I|Ba)O&%Y; literal 0 HcmV?d00001 diff --git a/crates/symbolizer/fixtures/mrt100.raw b/crates/symbolizer/fixtures/mrt100.raw new file mode 100644 index 0000000000000000000000000000000000000000..d4ae68d16779422a864ba96809424477d92f123b GIT binary patch literal 36864 zcmeHw4SZBZmTz^EP8ve&77`4ic!36^4zW!$8bGw0bnwRMKnvkx6iq_XA+1Te(|ucl zgBolyl3aV28O4XQ&TNagqT~4D?kq%{+365g5{U}vi~+_me!QKIVcx_q20-@bkOdwVaxx~J;YsZ*!^RduTF?Yg1#*6l2nF_wmqOfuF1N|!^rf6dLn*aesE zzkvOG!mBeo4DMHFmeu+~Tp;M*5Ui}{swx{A`~tV$%LT&?oUeg%Enmsi`>VbASy>aU zit5gXe0?o{$Pm@=(2hsMJyJSd{9qtMd~HW+TNLTncBHj^U!o~(qC_*=wo268)-2I; zqMRO9s8Pya;j5}8-G8Yio?^zTzn{*Q&J13l$_%ktTxQAzGZ+U>h0Lv(32LFZ_wUc% zuf&m_h^QJUmu*J?KU8D@t`$xhS(!)Kijoncavaw)wibzgaYItegdxoMlyblgfLCcI7%6l zl@h&_FqXEO2CtBF-yp_JCHFN-Wb=_uQ^}d*{PZ|KyR)Q~+2dWkB@vEwq&QgfxE3`2 z^!CDkeg4g#I##0PoV}++>~nT7j$^K9NgAKb**cRk^JOlyzBQF^i3tiOWFQ%&2);C*a{+0FR_O0%|wsYIq z`Z}0HYS|5{QVo*(FZ*j6)|mPBjLhpk{WsDN@PJt}WAC-ZySUW8jU| z$Nv7AUqgSPy@!vwtbEHczSU(#Z7yqVsd&^ax~&zZVxM_OC-J#b@j!|A1`KFbD}wJq3vt3rJt2I>v`oN`M-)_Q?xA!K#jp>4U5*V2$jc3>9+8+GpK6O6Iwa!o*JZ=`= z_YJBVI|~~N`)&xB4Q#991NLL+@?=gz)vBEjQ&i?()0^;znJWW# z?%TSL`g3%?OWrY&XGG&Qz_nlU^}vny!Hv`VZvEBO&pqi_xq1!1<&@1p9c8>>AqaKi zRx1ZiiTDxx8ZVyXBj=Lgi7%~ziyC-T>eJWJ-XXQ#$Z#@z35_?EVcyrb<*?+?;^t+2 zgEf4#%*yfN>xpQpv^bFPx#^14MW`hIw^nU|wlij{%&+Wkt!l}n#=c+Qcm1yn zL%ay|*P8g&S_|JA$mLs`IOl3q>hSVn+8i4ywx6~qOT>X<5wd)RTf8N=ByxZ&!CJV$ z!wK`?b_DnwRfyyY>snqE;4s9^)>`zZ!+FPu3v1l;+ZCnPezb>gU1tT_g4MDHDBJ8kPVuZ$ z9Flxi?KA~C$%UUeDS2?0cxZZ$<3Q{(L%_ly#{4BR(=<8hC4j zJyLcF#_&xA>EK(7OrUX43+NHhT+nV%4s<6dhHP;= zs10-&v=9`lP7$yzM*kG?pqLRwo_51vNKuEYL8})UVFx0IYAyb+-n%RlInLW;9TKZ9deJzbziNC;8~A{+ajo z+j|@Zr=j^uRWlq?%VF}*$()z*tBGV>WcVI)%ag=}d1iT#$!X z*EcwG%jeFpP)pg~2Ej+!RnAfuvI(a_Y!xuK-Qrut@F#Ql=<;l*cr>!JkmuaC3a*LqqsICl=FTLwJfa@_CB+fp%yqicWG2JdH?XIK|xZ6GZGi9k@4} zcP++mLe1s}9+Y`f0ycNL#1qtlgvHs}KT$H&yi4p8yE_L>E~tOs+&PeK-g{K+N_`{p zx!GKFATn$T7Kdj$#ZI()QfDH~x#isILlYot=Y|ot^Jb=`cwq z*pHd__9Q!p>EC(^~mG#hW6wB!Vh8!!!P{aT|BBYRI`GCFxHhVU^cNZbyz+WNM+tPaOMLuB!#Xc{xOypH=JV-^xLL=!)U z(AK!HJaN?oq<#k?(VYD`M6&)4B*9@DiN~-j5+$-Yhx~iw{JSXs(@CVX;~K_j zU^@zuZHE&R;cpyUli`xYI?3~30W4%fK4w*7xn!m=3p<#+<4wi+hR3jjk$gu9);uxQ zEoOa@jR&^m(WG$-l*+S-dJmls&nH;n_BKmP35T`nMd)XVskm=fVbb_BN84G^_$qb; zE%E!aWEBp;3g?sTZZ-ZK83{X@Eg{847M}(qC45~Xgq9}WLvs?hV<<=F{0@AEX4zr8 z{g|We^~5T~PGgr9dEbz@7t&(a;4aw|5jop6wn)B>bGJHjTRm`ZylM3sd9zZ274#gs zuH{(RydWnJRotIpj}Kgcu8A&5Vy16g4)4bZX}~`bYYUA9HaG*wDFZE;b94&o?!)th zjl5Sk@`0xK7NAVr{ikGdH0x*Dtk56TtVq%jUIO(upqTv_6z=-dmL&(qP zj#jOTBy*O*1k77wB$*n%d8GDXtdwf)f*m@plXZkAMAsUyp4znSP)lLoGSZ&Gbo)t? zqYdjiYZH16Ta)t9%?Zs5fm_?zCjSEQJeDp^*IN$#aswvnJk?BPr0O*^x^!zsKyru$Vcr@5XN1G-%`( zbqm?{_z-V?A!`qmhyOP=BUBpuk~2yrQu(Ll@)?N_f$QM3gord;#4(wjRuCV|NW6_i zJ61tWs`gQw1;D{&GziQjywBE{d< zSut+~YcB1OFu2ar#Krg;NF?BOFn74f=Z5g~aZ`gAU**L^$s8N{mlw;dYw=W$rAr!r zb>zYXX&Zz=n2maszMysMVM(cAL1)eXmehlJ1FwRYSOz)NW=uSd)cNq32?62{d9BhCT(Y?8Ff{F#-4Jtm(G!@kO?*{W91$#5_Yz5cWy=COhZHXF$b@6BS>VhKm$|PxVBY7<=A*ad@~KA< zP3NPNn%&V!ukfwe){;d>g%&5Ji8hqWCS z(zYH8Pf9EWyjXv31W|!d$9vTHhpRRFn8+L_l^m&C8pG1Fi{o>^u;o-7Hi*Yxq-{nr z=Z9FkVNRNO_|+?3q-WQ}FW_O&wrJ^)v*QRYgO5R;lF6JXgn9AD35fC09ev2eZXcW2 zWKM)~pyzseVkkszv|q}Vgxq*~#lwTm4No48CMW~RJkm)K>>HCg3nkSp-IR&!_BvBBRf;bEKx2&uhZz!3rnQ~w2a1hGUp#~WDKh)y=H_y?1cDe z@sRnEE{gM1=?(}`ZH{Elj}XOBorG>NMfudkUMvjsymW+W*>aFD8+D_j`wNbbUV71F z(z4hBGI$C0FNA{y&VWC$`q3LcYDwngQ`;h4cqRW%@EzF$(Bh-?kkmhtXMwJhv`j=7 zzX@`FUGHUnm9EBDPF%eAqK?N7XeNg|_IU9)wfIT+Mf6!R=UGIOIn@M!*u8&38lwTr z-G-H+0z7qQsrsm!FziniDx+qdgqD!;B5n9yy@KX@9=hab)J1raTZoxkc?IG?9`6CE z&x{{I0ITH9#BMddbh#|)K>Sz8Ib_@c4w-St__v5r2Ocs$ro`xRg7J67p)l(QRPs=o zfodM*i&@Xi1;5pu#;Hua7Rh>CWujmcrcUCI)UA<&>T6-?y`J=tc|4i(KP7DbX&8#_ zNsMZGhOfY$A(`_O<)Bv=sH5ABrw+JuJb$Po8?WH#oeYg@J6?QXYmm&TL9yhaNHS&f z3#ikfdA~*imXRd|FFEWz^g!8JY(MD|k2$vt(Q~A8%g6Ls=@Ji%LwK}`78!~&kC=Bk zw|qoTovmrs;>@GalO~>}-f1nfP6~VYNY=hNsCCZJjY;$3c)EcJd5bI3Qhm5mf2x)hv` zG)>N%9@+$nOF|*2Hyq(R-{bhqBRpEg(TL(^buH`)uMv&Ev0_eln+_TO6%q1Rt;R0I zoTs}up)k_T9Wvq_4ZYn!#UFOF2TT*=Sp&pNfSQ>yj1>9P5co}e7h$8 zkS4xO6W^+dH*4Zr{+f{~hfg@KYa=T$9u`zk2L_C^+ z2a=z>0LaL$&#|&t1d1n|k@tu3gQN2_OLL1yg(*(aVJfZiq~X0$>QTO`+dcbeNmbX) zVzz}Bi!9g@f|yRb45?-z)4Ufn(m&)*?RMKyRnKh$(N9M#RXq>U(fxR`U&~kHn*2Ej zFqmY%V-@Yvc=5xwQU`43ZXdXBctpJ>lS|Zqzg-gP>K)0Di~Mx--Jq0%$IcIV(I8^d zVusVGbVswM4$*^BaX?%JZ%|s1cPtV&=Yt#MK52SR(CGSYI{qV;U?Jv0bS*;u^ zE*+B|Dv{;YdWIH{O-;g67B!dof2(#$+SY@TRd_){y${u;AJ}WJ(;Td9DX@nv2!dxp0;$1{T0_Q zm(sC}nrFJ={{e#^8-##T#PDwldO-}clek!*= zRhH{dk!i(!dnA6lQcl%7bc>wtSRqrDAHU!kNnT9{^5k)?QpY?wT``WT@_&g2E|ts0 z75bOt#`CY63C>5@fZ=jr2AMo`tDqah2EY`cTR+`Ac)BsQ)6Iexbf)ce>!%yTTRqcu zx((5-0*_#(xOBs>Fs328dGM%dis#~H!_Q$R4{j3!6x&X>cDe=do3$xUw;{UGcR;29 zo(oKIx=HqFZ}b*SdmZ!c$d0dj$oa^<{N~$U@OMs9)5S(j-UOhtnGd0QAga> z*7A!c$1lGKWc=Nkj{7U0`_9!bE{rXWuD|TY;pXk2mZZR#IHFrlG4F(lqZAEn?qu=ZPMFLc%6J>PE7 z6)RmUzq31QS9Iv!f4nO8!@u3++I+78{Q979jm?HHk=8SIQ&E|lj$_%-+0r*r*7^pB zFdK9boMI)r(P*3x>celyk#0P84U`i9JU zx4%xPuSS~2avGkP6#hN;+*7?Cu&^PFM+ujpP-QJ~O12D-;oU0P!WL@e%49jy7@jO= z0nbML(FXD1%J0rim{xlhq*q#caiiJiW} zqoE>w?1+k_i~lYfpm<@tq%b)Tov;7e(HB|^o<)>zy{yHS#8%K$|}I4 zU(XeuWG4sy__2NIx|wYc8@*d^uV4K-`3W@bYcE5Zfu$|M<1rq|(+g5;QhGgO({hkL zmSWqDIaL8I(=Ii#Ov7B37D%l~v7zt5=TL!R+C(-@$YRr~C$VW|nQYp8q=PyA26H+y zS5IK(`6gywkr|ja7e61(-_3FiF_!L;%MZa3878K&iIADNB0VteiivFc2U%>oFo{jC z&ScY}YkDx-Lv zS{{d?i>M*Rz*2yt0k7<{s}agFST^)!7gz#@9Kw1(uUu8!U0 z6-p}`DmQqmSA=m6q29aF8@$_B<%LxLjU+2yvm6-jn0ebYyoFBqdhmudymjE2FdI3{ zPxY#EVZLF8Y}4=_1#c(K8V#=lJo44;8eSiGaqy04c;~?D2anhAti|xn;MHnuSq5Gc z>?_ppg5dGsHEGH|3f?x-r;+;?(g!(I&!@R1XFc07I(@dRLcg?(O=GpRT8dtD#e|dR% zNT@CsHU+#;RO2tN_J)L@e^Ys#FC?&sC5b9;lTTnVIqq+$@ofkPz2yy+!JvO*d1Y|J z-Ryvr=WX&TGy8)Jb^i60b+XJ#p;8Ekim@?f?3S=MxXI%U*7$?z+io(Gly3Z5zS9wFBl4?BRjdEq{z+_X2yK+-V@Ya`M2s%SceRW=nL4V1}xLE1N zh~ue2DQtyMS5z4gFwBauN2R}TkV!nZ-+xy)ur%CIMP?T_2*FL*O;C;k|EdNIchqQP zB_Wr0eR#tLZ_pD2pcrEpvQ-W8a2Ul1hEQXenfmiDW*0!4(nf!9DX@fHs@6%G7ArDr z6)VGZ^fdrks%98D+2+H^EHf3W{9u$c{eVv zp^Pyz219I~mj=BA<@kA_ojD|lIfpT)ojT(ss{HlZ>@U@&)o_jEwZ>}*$fHk?CPPT8 zY`o-o?3Pu2sn*b@5IIAwvMD{xD9d8@xtE-2K6!hwJvep=(7$J<{` zBU&qPKAUJJj=|#msEs#$d|mQP8B>|M48UbbdArKxuf^T58DU$NANqr zH>9L5WwiL$>eqIx~3%0-cS+Pn+ykgj~eweC;a;#P0`PI9ZOTz4VP zgi@SN?Y@A6qaKn0osP3bwG;>4c`?qqARYtF#oyW3jyMNejq{w3BTmP8qX?vjPW8U6 z#OZLaV}{JQ@bOdw+bOdw+bOdw+bOdw+ zbOdw+bOdw+{z?Q4(zk?oq;4KtQ2t{q_~hqHeEeY?#zUCCNjsG~wiKmKrOwBFHBKd$ z<6g{GAXdVbvt>w^;Jy@bx_+7V$3N5QffVW2{)+{b-YDgc{4aV+N+xd9XMjF|1(^@0 z6gEJc4__G4$@vCW!~77-lVoi8w$#QJAk4>q{f2fhH%^lqSP^9ES%6hSeiN*zM2b$6 z<2fIis-V};LI^degOj9#koJRL362jjIvY{VW z+DZCul;?m`7`iDfTXY}0T2ser$OKV~Y5`*fEMEx=@cl-C6t_Xodgvic$o}TZCwbsEH0A^shNXg%2{_cx7O4x@MzatYgd$=>Vn1z5*q)HqP6GO+BcLOo zBcLOoBcLOoBcLOoBcLPjUl{>B46h^mVy2iSmK)<@c`;k8Fy@HyF;8r5tRhw$3&fgY z&9QB voBqG|Ns8|O$`I(b=?Lfu=m_Wt=m_Wt=m_Wt=m_Wt=m_Wt=m`A(j=;YI@bw io::Result; fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result>; diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index b7339cb..73c616a 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -1,5 +1,5 @@ // Axel '0vercl0k' Souchet - May 26th 2024 -mod address_space; +mod addr_space; mod error; mod guid; mod misc; @@ -9,7 +9,7 @@ mod pe; mod stats; mod symbolizer; -pub use address_space::AddressSpace; +pub use addr_space::AddrSpace; pub use error::{Error, Result}; pub use modules::{Module, Modules}; pub use stats::Stats; diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs index 9e7f9f7..20d4ed1 100644 --- a/crates/symbolizer/src/pe.rs +++ b/crates/symbolizer/src/pe.rs @@ -8,7 +8,7 @@ use std::path::PathBuf; use anyhow::{anyhow, Context}; use log::debug; -use crate::address_space::AddressSpace; +use crate::addr_space::AddrSpace; use crate::guid::Guid; use crate::misc::Rva; use crate::{Error as E, Result}; @@ -234,7 +234,7 @@ pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> O /// Read a NULL terminated string from the dump file at a specific address. pub fn read_string( - addr_space: &mut impl AddressSpace, + addr_space: &mut impl AddrSpace, mut addr: u64, max: usize, ) -> Result> { @@ -276,7 +276,7 @@ pub struct Pe { } impl Pe { - pub fn new(addr_space: &mut impl AddressSpace, base: u64) -> Result { + pub fn new(addr_space: &mut impl AddrSpace, base: u64) -> Result { // All right let's parse the PE. debug!("parsing PE @ {:#x}", base); @@ -333,7 +333,7 @@ impl Pe { } fn try_parse_debug_dir( - addr_space: &mut impl AddressSpace, + addr_space: &mut impl AddrSpace, base: u64, opt_hdr: &ImageOptionalHeader64, ) -> Result> { @@ -416,7 +416,7 @@ impl Pe { } fn try_parse_export_dir( - addr_space: &mut impl AddressSpace, + addr_space: &mut impl AddrSpace, base: u64, opt_hdr: &ImageOptionalHeader64, ) -> Result>> { diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 119baa6..23b2206 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -13,7 +13,7 @@ use std::rc::Rc; use anyhow::Context; use log::{debug, trace, warn}; -use crate::address_space::AddressSpace; +use crate::addr_space::AddrSpace; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; @@ -204,7 +204,7 @@ impl BuildHasher for IdentityHasher { /// It downloads, parses PDB information, and symbolizes. pub struct Symbolizer where - AS: AddressSpace, + AS: AddrSpace, { /// Keep track of some statistics regarding the number of lines symbolized, /// PDB downloaded, etc. @@ -232,18 +232,18 @@ where impl Symbolizer where - AS: AddressSpace, + AS: AddrSpace, { /// Create a symbolizer. pub fn new( - symcache: impl AsRef, + symcache: &impl AsRef, symsrvs: Vec, modules: Vec, addr_space: AS, ) -> Self { let offline = ureq::get("https://www.google.com/").call().is_err(); if offline { - println!("Turning on 'offline' mode as you seem to not have internet access.."); + debug!("Turning on 'offline' mode as you seem to not have internet access.."); } Self { diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 595eb6a..04f441a 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -1,10 +1,18 @@ // Axel '0vercl0k' Souchet - May 30 2024 use std::env::temp_dir; use std::fs::File; -use std::io::{self, Read, Seek}; -use std::path::Path; +use std::io::{self, Read, Seek, Write}; +use std::path::{Path, PathBuf}; -use symbolizer::{AddressSpace, Module, Symbolizer}; +use object::read::pe::PeFile64; +use object::{NativeEndian, ReadCache, ReadRef}; +use symbolizer::{AddrSpace, Module, Symbolizer}; + +fn fixture(name: &str) -> PathBuf { + PathBuf::from(&env!("CARGO_MANIFEST_DIR")) + .join("fixtures") + .join(name) +} #[derive(Debug)] struct RawAddressSpace { @@ -26,11 +34,11 @@ impl RawAddressSpace { } } -impl AddressSpace for RawAddressSpace { +impl AddrSpace for RawAddressSpace { fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result { - self.raw.seek(io::SeekFrom::Start(addr))?; + Seek::seek(&mut self.raw, io::SeekFrom::Start(addr))?; - self.raw.read(buf) + Read::read(&mut self.raw, buf) } fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { @@ -38,26 +46,113 @@ impl AddressSpace for RawAddressSpace { } } +#[derive(Debug)] +struct FileAddressSpace<'data> { + pe: PeFile64<'data, &'data ReadCache>, + virt_len: u64, +} + +impl<'data> FileAddressSpace<'data> { + fn new(cache: &'data ReadCache) -> io::Result { + let pe = + PeFile64::parse(cache).map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e))?; + + let virt_len = pe + .nt_headers() + .optional_header + .size_of_image + .get(NativeEndian) + .into(); + + Ok(Self { pe, virt_len }) + } + + fn len(&self) -> u64 { + self.virt_len + } +} + +impl<'data> AddrSpace for FileAddressSpace<'data> { + fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> std::io::Result { + if addr >= self.virt_len { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("{addr:#x} vs {:#x} is oob", self.virt_len), + )); + } + + let data = match self + .pe + .section_table() + .pe_data_at(self.pe.data(), addr.try_into().unwrap()) + { + Some(data) => data, + None => self + .pe + .data() + .read_slice_at(addr, buf.len()) + .map_err(|_| io::Error::new(io::ErrorKind::Unsupported, "read_slice_at"))?, + }; + + buf.write(data) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { + self.read_at(addr, buf).map(Some) + } +} + +const EXPECTED_LEN: u64 = 0x90_00; +const EXPECTED_RAW: [(u64, &str, &str); 3] = [ + ( + 0x19_50, + "mrt100!GetManagedRuntimeService+0x0", + "mrt100+0x00001950", + ), + (EXPECTED_LEN, "0x0000000000009000", "0x0000000000009000"), + (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), +]; + #[test] -fn raw() { +fn raw_virt() { let symcache = temp_dir().join("basics"); - let raw = RawAddressSpace::new(&r"c:\work\mrt100.raw").unwrap(); - let raw_len = raw.len(); - let modules = vec![Module::new("mrt100", 0x0, raw_len)]; - - let mut symb = Symbolizer::new(symcache, vec![], modules, raw); - - let expected = [ - ( - 0x19_50, - "mrt100!GetManagedRuntimeService+0x0", - "mrt100+0x00001950", - ), - (raw_len, "0x0000000000009000", "0x0000000000009000"), - (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), - ]; - - for (addr, expected_full, expected_modoff) in expected { + let raw_addr_space = RawAddressSpace::new(&fixture("mrt100.raw")).unwrap(); + let len = raw_addr_space.len(); + + let mut symb = Symbolizer::new( + &symcache, + vec![], + vec![Module::new("mrt100", 0x0, len)], + raw_addr_space, + ); + + for (addr, expected_full, expected_modoff) in EXPECTED_RAW { + let mut full = Vec::new(); + symb.full(addr, &mut full).unwrap(); + assert_eq!(String::from_utf8(full).unwrap(), expected_full); + + let mut modoff = Vec::new(); + symb.modoff(addr, &mut modoff).unwrap(); + assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); + } +} + +#[test] +fn raw_file() { + let symcache = temp_dir().join("basics"); + let file = File::open(fixture("mrt100.dll")).unwrap(); + let cache = ReadCache::new(file); + let file_addr_space = FileAddressSpace::new(&cache).unwrap(); + let len = file_addr_space.len(); + + let mut symb = Symbolizer::new( + &symcache, + vec![], + vec![Module::new("mrt100", 0x0, len)], + file_addr_space, + ); + + for (addr, expected_full, expected_modoff) in EXPECTED_RAW { let mut full = Vec::new(); symb.full(addr, &mut full).unwrap(); assert_eq!(String::from_utf8(full).unwrap(), expected_full); diff --git a/src/main.rs b/src/main.rs index 7c5c54f..c9136f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use std::{env, fs, io}; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use symbolizer::{AddressSpace, Module, Symbolizer}; +use symbolizer::{AddrSpace, Module, Symbolizer}; mod hex_addrs_iter; mod human; @@ -283,7 +283,7 @@ impl ParserWrapper { } } -impl AddressSpace for ParserWrapper { +impl AddrSpace for ParserWrapper { fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { self.parser .virt_read(addr.into(), buf) @@ -350,7 +350,7 @@ fn main() -> Result<()> { // All right, ready to create the symbolizer. let mut symbolizer = Symbolizer::new( - symcache, + &symcache, args.symsrv.clone(), modules, ParserWrapper::new(parser), From d0ffb5609aebe34a972c84c3013a5033315bfd07 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 18:40:31 -0700 Subject: [PATCH 11/28] track addrs --- crates/symbolizer/src/stats.rs | 11 ++++++++++- crates/symbolizer/src/symbolizer.rs | 8 ++++++-- src/main.rs | 23 ++++++----------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/crates/symbolizer/src/stats.rs b/crates/symbolizer/src/stats.rs index ca78f5a..9f23fe4 100644 --- a/crates/symbolizer/src/stats.rs +++ b/crates/symbolizer/src/stats.rs @@ -9,11 +9,16 @@ pub struct StatsBuilder { inner: RefCell, } +/// Various statistics that the symbolizer keeps track of. #[derive(Default, Clone, Copy, Debug)] pub struct Stats { - pub n_lines: u64, + /// The number of addresses symbolized. + pub n_addrs: u64, + /// The number of downloaded PDB files. pub n_downloads: u64, + /// The total size in bytes of downloads. pub size_downloaded: u64, + /// The number of time the address cache was a hit. pub cache_hit: u64, } @@ -28,6 +33,10 @@ impl StatsBuilder { inner.size_downloaded += size; } + pub fn addr_symbolized(&self) { + self.inner.borrow_mut().n_addrs += 1; + } + pub fn cache_hit(&self) { self.inner.borrow_mut().cache_hit += 1; } diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 23b2206..bf0dae5 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -206,7 +206,7 @@ pub struct Symbolizer where AS: AddrSpace, { - /// Keep track of some statistics regarding the number of lines symbolized, + /// Keep track of some statistics such as the number of lines symbolized, /// PDB downloaded, etc. stats: StatsBuilder, /// This is a path to the local PDB symbol cache where PDBs will be @@ -234,7 +234,7 @@ impl Symbolizer where AS: AddrSpace, { - /// Create a symbolizer. + /// Create a [`Symbolizer`]. pub fn new( symcache: &impl AsRef, symsrvs: Vec, @@ -258,6 +258,7 @@ where } } + /// Get [`Stats`]. pub fn stats(self) -> Stats { self.stats.build() } @@ -382,6 +383,8 @@ where } .context("failed to write symbolized value to output")?; + self.stats.addr_symbolized(); + Ok(()) } @@ -394,6 +397,7 @@ where .write_all(sym.as_bytes()) .context("failed to write symbolized value to output")?; + self.stats.addr_symbolized(); Ok(()) } None => self.modoff(addr, output), diff --git a/src/main.rs b/src/main.rs index c9136f1..dbed745 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,6 @@ use human::ToHuman; struct StatsBuilder { start: Instant, n_files: u64, - n_lines: u64, } impl Default for StatsBuilder { @@ -30,26 +29,19 @@ impl Default for StatsBuilder { Self { start: Instant::now(), n_files: 0, - n_lines: 0, } } } impl StatsBuilder { - pub fn start(&mut self) { - self.start = Instant::now(); - } - - pub fn done_file(&mut self, n: u64) { + pub fn done_file(&mut self) { self.n_files += 1; - self.n_lines += n; } pub fn stop(self, symbolizer: Symbolizer) -> Stats { Stats { time: self.start.elapsed().as_secs(), n_files: self.n_files, - n_lines: self.n_lines, symbolizer_stats: symbolizer.stats(), } } @@ -58,7 +50,6 @@ impl StatsBuilder { struct Stats { time: u64, n_files: u64, - n_lines: u64, symbolizer_stats: symbolizer::Stats, } @@ -67,10 +58,10 @@ impl Display for Stats { write!( f, "✓ Successfully symbolized {} lines across {} files in {} ({}% cache hits", - self.n_lines.human_number(), + self.symbolizer_stats.n_addrs.human_number(), self.n_files.human_number(), self.time.human_time(), - percentage(self.symbolizer_stats.cache_hit, self.n_lines) + percentage(self.symbolizer_stats.cache_hit, self.symbolizer_stats.n_addrs) )?; if self.symbolizer_stats.size_downloaded > 0 { @@ -356,9 +347,6 @@ fn main() -> Result<()> { ParserWrapper::new(parser), ); - let mut stats_builder = StatsBuilder::default(); - stats_builder.start(); - let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize // every file inside that directory.. @@ -373,11 +361,12 @@ fn main() -> Result<()> { vec![args.trace.clone()] }; + let mut stats_builder = StatsBuilder::default(); let total = paths.len(); for (idx, path) in paths.into_iter().enumerate() { print!("\x1B[2K\r"); - let n = symbolize_file(&mut symbolizer, &path, &args)?; - stats_builder.done_file(n.try_into()?); + symbolize_file(&mut symbolizer, &path, &args)?; + stats_builder.done_file(); print!("[{}/{total}] {} done", idx + 1, path.display()); io::stdout().flush()?; } From 318742a19edd4b4bb33ad9fa9b695bf637337dbc Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 21:49:40 -0700 Subject: [PATCH 12/28] fmt --- src/main.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index dbed745..0c4215e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,10 @@ impl Display for Stats { self.symbolizer_stats.n_addrs.human_number(), self.n_files.human_number(), self.time.human_time(), - percentage(self.symbolizer_stats.cache_hit, self.symbolizer_stats.n_addrs) + percentage( + self.symbolizer_stats.cache_hit, + self.symbolizer_stats.n_addrs + ) )?; if self.symbolizer_stats.size_downloaded > 0 { From f13e3fca16e77ec5f66d7d1c73cfd7e4553d7353 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 5 Jun 2024 21:59:19 -0700 Subject: [PATCH 13/28] doc --- crates/symbolizer/src/modules.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/symbolizer/src/modules.rs b/crates/symbolizer/src/modules.rs index bebf129..79bf064 100644 --- a/crates/symbolizer/src/modules.rs +++ b/crates/symbolizer/src/modules.rs @@ -23,7 +23,7 @@ impl Module { } } - /// Calculate an [`Rva`] from an `addr` contained in this module. + /// Calculate an rva from an `addr` contained in this module. pub fn rva(&self, addr: u64) -> Rva { debug_assert!(self.at.contains(&addr)); From 8d14f35322b08726d6a1dc9c8d2c7e9bbd61b1b9 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Sat, 8 Jun 2024 10:27:27 -0700 Subject: [PATCH 14/28] add builder, etc --- crates/symbolizer/src/builder.rs | 93 +++++++++++++++++++++++ crates/symbolizer/src/lib.rs | 2 + crates/symbolizer/src/symbolizer.rs | 75 ++++++++++++++----- crates/symbolizer/tests/basics.rs | 69 ++++++++++------- src/main.rs | 111 ++++++++++++++-------------- 5 files changed, 251 insertions(+), 99 deletions(-) create mode 100644 crates/symbolizer/src/builder.rs diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs new file mode 100644 index 0000000..e3717d6 --- /dev/null +++ b/crates/symbolizer/src/builder.rs @@ -0,0 +1,93 @@ +// Axel '0vercl0k' Souchet - June 7 2024 +use std::path::{Path, PathBuf}; + +use crate::symbolizer::{Config, PdbLookupMode}; +use crate::{AddrSpace, Module, Result, Symbolizer}; + +#[derive(Default)] +pub struct NoSymcache; + +pub struct Symcache(PathBuf); + +#[derive(Default, Debug)] +pub struct Builder { + symcache: SC, + modules: Vec, + mode: M, +} + +#[derive(Default)] +pub struct Offline; +pub struct Online(Vec); + +impl Builder { + pub fn online(self, symsrvs: impl Iterator>) -> Builder { + let Self { + symcache, modules, .. + } = self; + + Builder { + symcache, + modules, + mode: Online(symsrvs.map(Into::into).collect()), + } + } +} + +impl Builder { + pub fn symcache(self, cache: &impl AsRef) -> Builder { + let Self { modules, mode, .. } = self; + + Builder { + symcache: Symcache(cache.as_ref().to_path_buf()), + modules, + mode, + } + } +} + +impl Builder { + pub fn modules(mut self, modules: impl Iterator) -> Self { + self.modules = modules.collect(); + + self + } +} + +impl Builder { + pub fn build(self, addr_space: AS) -> Result> + where + AS: AddrSpace, + { + let Self { + symcache, modules, .. + } = self; + let config = Config { + symcache: symcache.0, + modules, + mode: PdbLookupMode::Offline, + }; + + Symbolizer::new(addr_space, config) + } +} + +impl Builder { + pub fn build(self, addr_space: AS) -> Result> + where + AS: AddrSpace, + { + let Self { + symcache, + modules, + mode, + } = self; + let config = Config { + symcache: symcache.0, + modules, + mode: PdbLookupMode::Online { symcache: mode.0 }, + }; + + Symbolizer::new(addr_space, config) + } +} diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index 73c616a..f6aa2e0 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -1,5 +1,6 @@ // Axel '0vercl0k' Souchet - May 26th 2024 mod addr_space; +mod builder; mod error; mod guid; mod misc; @@ -10,6 +11,7 @@ mod stats; mod symbolizer; pub use addr_space::AddrSpace; +pub use builder::Builder; pub use error::{Error, Result}; pub use modules::{Module, Modules}; pub use stats::Stats; diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index bf0dae5..394e6a6 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -10,10 +10,11 @@ use std::ops::Range; use std::path::{Path, PathBuf}; use std::rc::Rc; -use anyhow::Context; +use anyhow::{anyhow, Context}; use log::{debug, trace, warn}; use crate::addr_space::AddrSpace; +use crate::builder::{Builder, NoSymcache, Offline}; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; @@ -199,6 +200,29 @@ impl BuildHasher for IdentityHasher { } } +#[derive(Debug, Default)] +pub enum PdbLookupMode { + #[default] + Offline, + Online { + /// List of symbol servers to try to download PDBs from when needed. + symcache: Vec, + }, +} + +/// Configuration for the [`Symbolizer`]. +#[derive(Debug)] +pub struct Config { + /// Path to the local PDB symbol cache where PDBs will be + /// downloaded into, or where we'll look for cached PDBs. + pub symcache: PathBuf, + /// This is the list of kernel / user modules read from the kernel crash + /// dump. + pub modules: Vec, + /// Which mode are we using for PDB lookups? Online or Offline? + pub mode: PdbLookupMode, +} + /// The [`Symbolizer`] is the main object that glues all the logic. /// /// It downloads, parses PDB information, and symbolizes. @@ -234,32 +258,49 @@ impl Symbolizer where AS: AddrSpace, { + pub fn builder() -> Builder { + Builder::default() + } + /// Create a [`Symbolizer`]. - pub fn new( - symcache: &impl AsRef, - symsrvs: Vec, - modules: Vec, - addr_space: AS, - ) -> Self { - let offline = ureq::get("https://www.google.com/").call().is_err(); - if offline { - debug!("Turning on 'offline' mode as you seem to not have internet access.."); + pub fn new(addr_space: AS, config: Config) -> Result { + let (offline, symsrvs) = match config.mode { + PdbLookupMode::Offline => + // If the user wants offline, then let's do that.. + { + (true, vec![]) + } + PdbLookupMode::Online { symcache } => { + // ..otherwise, we'll try to resolve a DNS and see what happens. If we can't do + // that, then we'll assume we're offline and turn the offline mode. + // Otherwise, we'll assume we have online access and attempt to download PDBs. + let offline = ureq::get("https://www.google.com/").call().is_err(); + if offline { + debug!("Turning on 'offline' mode as you seem to not have internet access.."); + } + + (offline, symcache) + } + }; + + if !config.symcache.is_dir() { + return Err(anyhow!("{:?} directory does not exist", config.symcache))?; } - Self { + Ok(Self { stats: Default::default(), - symcache: symcache.as_ref().to_path_buf(), - modules: Modules::new(modules), + symcache: config.symcache, + modules: Modules::new(config.modules), addr_space: RefCell::new(addr_space), symsrvs, addr_cache: Default::default(), pdb_caches: Default::default(), offline, - } + }) } /// Get [`Stats`]. - pub fn stats(self) -> Stats { + pub fn stats(&self) -> Stats { self.stats.build() } @@ -306,9 +347,9 @@ where builder.ingest(pe.exports.into_iter()); // .. and see if it has PDB information. - trace!("Get PDB information for {module:?}.."); - if let Some(pdb_id) = pe.pdb_id { + trace!("Get PDB information for {module:?}/{pdb_id}.."); + // Try to get a PDB.. let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id, self.offline)?; diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 04f441a..495a990 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -1,12 +1,28 @@ // Axel '0vercl0k' Souchet - May 30 2024 use std::env::temp_dir; -use std::fs::File; +use std::fs::{self, File}; use std::io::{self, Read, Seek, Write}; use std::path::{Path, PathBuf}; use object::read::pe::PeFile64; use object::{NativeEndian, ReadCache, ReadRef}; -use symbolizer::{AddrSpace, Module, Symbolizer}; +use symbolizer::{AddrSpace, Builder, Module}; + +const EXPECTED_LEN: u64 = 0x90_00; +const EXPECTED_RAW: [(u64, &str, &str); 4] = [ + ( + 0x19_50, + "mrt100!GetManagedRuntimeService+0x0", + "mrt100+0x00001950", + ), + ( + 0x19_30, + "mrt100!ManagedRuntimeServices::SetWerDataBuffer+0x0", + "mrt100+0x00001930", + ), + (EXPECTED_LEN, "0x0000000000009000", "0x0000000000009000"), + (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), +]; fn fixture(name: &str) -> PathBuf { PathBuf::from(&env!("CARGO_MANIFEST_DIR")) @@ -14,6 +30,14 @@ fn fixture(name: &str) -> PathBuf { .join(name) } +fn symcache(name: &str) -> PathBuf { + let cache = temp_dir().join(name); + let _ = fs::remove_dir_all(&cache); + let _ = fs::create_dir(&cache); + + cache +} + #[derive(Debug)] struct RawAddressSpace { raw: File, @@ -102,29 +126,17 @@ impl<'data> AddrSpace for FileAddressSpace<'data> { } } -const EXPECTED_LEN: u64 = 0x90_00; -const EXPECTED_RAW: [(u64, &str, &str); 3] = [ - ( - 0x19_50, - "mrt100!GetManagedRuntimeService+0x0", - "mrt100+0x00001950", - ), - (EXPECTED_LEN, "0x0000000000009000", "0x0000000000009000"), - (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), -]; - #[test] fn raw_virt() { - let symcache = temp_dir().join("basics"); let raw_addr_space = RawAddressSpace::new(&fixture("mrt100.raw")).unwrap(); let len = raw_addr_space.len(); - let mut symb = Symbolizer::new( - &symcache, - vec![], - vec![Module::new("mrt100", 0x0, len)], - raw_addr_space, - ); + let mut symb = Builder::default() + .modules(vec![Module::new("mrt100", 0x0, len)].into_iter()) + .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) + .symcache(&symcache("basics")) + .build(raw_addr_space) + .unwrap(); for (addr, expected_full, expected_modoff) in EXPECTED_RAW { let mut full = Vec::new(); @@ -135,22 +147,23 @@ fn raw_virt() { symb.modoff(addr, &mut modoff).unwrap(); assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); } + + assert_eq!(symb.stats().n_downloads, 1); } #[test] fn raw_file() { - let symcache = temp_dir().join("basics"); let file = File::open(fixture("mrt100.dll")).unwrap(); let cache = ReadCache::new(file); let file_addr_space = FileAddressSpace::new(&cache).unwrap(); let len = file_addr_space.len(); - let mut symb = Symbolizer::new( - &symcache, - vec![], - vec![Module::new("mrt100", 0x0, len)], - file_addr_space, - ); + let mut symb = Builder::default() + .modules(vec![Module::new("mrt100", 0x0, len)].into_iter()) + .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) + .symcache(&symcache("basics")) + .build(file_addr_space) + .unwrap(); for (addr, expected_full, expected_modoff) in EXPECTED_RAW { let mut full = Vec::new(); @@ -161,4 +174,6 @@ fn raw_file() { symb.modoff(addr, &mut modoff).unwrap(); assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); } + + assert_eq!(symb.stats().n_downloads, 1); } diff --git a/src/main.rs b/src/main.rs index 0c4215e..0ec8359 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use std::{env, fs, io}; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use symbolizer::{AddrSpace, Module, Symbolizer}; +use symbolizer::{AddrSpace, Builder as SymbolizerBuilder, Module, Symbolizer}; mod hex_addrs_iter; mod human; @@ -38,7 +38,7 @@ impl StatsBuilder { self.n_files += 1; } - pub fn stop(self, symbolizer: Symbolizer) -> Stats { + pub fn stop(self, symbolizer: KernelDumpSymbolizer) -> Stats { Stats { time: self.start.elapsed().as_secs(), n_files: self.n_files, @@ -80,35 +80,33 @@ impl Display for Stats { } } -/// Calculate a percentage value. -pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { - assert!( - how_many_total > 0, - "{how_many_total} needs to be bigger than 0" - ); - - ((how_many * 1_00) / how_many_total) as u32 +#[derive(Debug)] +struct ParserWrapper { + parser: KernelDumpParser, } -/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol -/// cache. -fn sympath() -> Option { - let env = env::var("_NT_SYMBOL_PATH").ok()?; - - if !env.starts_with("srv*") { - return None; +impl ParserWrapper { + fn new(parser: KernelDumpParser) -> Self { + Self { parser } } +} - let sympath = env.strip_prefix("srv*").unwrap(); - let sympath = PathBuf::from(sympath.split('*').next().unwrap()); +impl AddrSpace for ParserWrapper { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { + self.parser + .virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } - if sympath.is_dir() { - Some(sympath) - } else { - None + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + self.parser + .try_virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) } } +type KernelDumpSymbolizer = Symbolizer; + /// The style of the symbols. #[derive(Default, Debug, Clone, ValueEnum)] enum SymbolStyle { @@ -168,6 +166,35 @@ struct CliArgs { offline: bool, } +/// Calculate a percentage value. +pub fn percentage(how_many: u64, how_many_total: u64) -> u32 { + assert!( + how_many_total > 0, + "{how_many_total} needs to be bigger than 0" + ); + + ((how_many * 1_00) / how_many_total) as u32 +} + +/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol +/// cache. +fn sympath() -> Option { + let env = env::var("_NT_SYMBOL_PATH").ok()?; + + if !env.starts_with("srv*") { + return None; + } + + let sympath = env.strip_prefix("srv*").unwrap(); + let sympath = PathBuf::from(sympath.split('*').next().unwrap()); + + if sympath.is_dir() { + Some(sympath) + } else { + None + } +} + /// Create the output file from an input. /// /// This logic was moved into a function to be able to handle the `--overwrite` @@ -204,7 +231,7 @@ fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result /// Process an input file and symbolize every line. fn symbolize_file( - symbolizer: &mut Symbolizer, + symbolizer: &mut KernelDumpSymbolizer, trace_path: impl AsRef, args: &CliArgs, ) -> Result { @@ -266,31 +293,6 @@ fn symbolize_file( Ok(lines_symbolized) } -#[derive(Debug)] -struct ParserWrapper { - parser: KernelDumpParser, -} - -impl ParserWrapper { - fn new(parser: KernelDumpParser) -> Self { - Self { parser } - } -} - -impl AddrSpace for ParserWrapper { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { - self.parser - .virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - self.parser - .try_virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } -} - fn main() -> Result<()> { #[cfg(debug_assertions)] env_logger::init(); @@ -343,12 +345,11 @@ fn main() -> Result<()> { } // All right, ready to create the symbolizer. - let mut symbolizer = Symbolizer::new( - &symcache, - args.symsrv.clone(), - modules, - ParserWrapper::new(parser), - ); + let mut symbolizer = SymbolizerBuilder::default() + .online(args.symsrv.iter()) + .modules(modules.into_iter()) + .symcache(&symcache) + .build(ParserWrapper::new(parser))?; let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize From 5ec62e3905e719cde675c2d2cafe188d6b32a2e9 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Sat, 8 Jun 2024 11:49:10 -0700 Subject: [PATCH 15/28] rename --- crates/symbolizer/src/builder.rs | 1 + src/main.rs | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs index e3717d6..1ade93c 100644 --- a/crates/symbolizer/src/builder.rs +++ b/crates/symbolizer/src/builder.rs @@ -9,6 +9,7 @@ pub struct NoSymcache; pub struct Symcache(PathBuf); +/// Builder for [`Symbolizer`]. #[derive(Default, Debug)] pub struct Builder { symcache: SC, diff --git a/src/main.rs b/src/main.rs index 0ec8359..0a2c170 100644 --- a/src/main.rs +++ b/src/main.rs @@ -81,17 +81,17 @@ impl Display for Stats { } #[derive(Debug)] -struct ParserWrapper { +struct AddrSpaceWrapper { parser: KernelDumpParser, } -impl ParserWrapper { +impl AddrSpaceWrapper { fn new(parser: KernelDumpParser) -> Self { Self { parser } } } -impl AddrSpace for ParserWrapper { +impl AddrSpace for AddrSpaceWrapper { fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { self.parser .virt_read(addr.into(), buf) @@ -105,7 +105,7 @@ impl AddrSpace for ParserWrapper { } } -type KernelDumpSymbolizer = Symbolizer; +type KernelDumpSymbolizer = Symbolizer; /// The style of the symbols. #[derive(Default, Debug, Clone, ValueEnum)] @@ -349,7 +349,7 @@ fn main() -> Result<()> { .online(args.symsrv.iter()) .modules(modules.into_iter()) .symcache(&symcache) - .build(ParserWrapper::new(parser))?; + .build(AddrSpaceWrapper::new(parser))?; let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize From 61f4c6337191253aa932ed090f91148a3bf74671 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Mon, 10 Jun 2024 20:56:13 -0700 Subject: [PATCH 16/28] buncha stuff --- crates/symbolizer/Cargo.toml | 7 +- crates/symbolizer/src/builder.rs | 62 +++++----- crates/symbolizer/src/error.rs | 11 +- crates/symbolizer/src/guid.rs | 103 ++++++++++++++++- crates/symbolizer/src/lib.rs | 2 + crates/symbolizer/src/modules.rs | 2 +- crates/symbolizer/src/pe.rs | 5 +- crates/symbolizer/src/stats.rs | 44 +++++-- crates/symbolizer/src/symbolizer.rs | 23 ++-- crates/symbolizer/tests/basics.rs | 170 ++++++++++++++++++++++++---- src/human.rs | 10 +- src/main.rs | 14 ++- 12 files changed, 355 insertions(+), 98 deletions(-) diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml index 004249a..b2af2b7 100644 --- a/crates/symbolizer/Cargo.toml +++ b/crates/symbolizer/Cargo.toml @@ -22,4 +22,9 @@ anyhow = "1.0.86" [dev-dependencies] kdmp-parser = "0.2" udmp-parser = "0.2" -object = {version = "0.36.0", default-features = false, features = ["read", "read_core", "pe", "std"]} +object = { version = "0.36.0", default-features = false, features = [ + "read", + "read_core", + "pe", + "std", +] } diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs index 1ade93c..98aa1b1 100644 --- a/crates/symbolizer/src/builder.rs +++ b/crates/symbolizer/src/builder.rs @@ -11,18 +11,28 @@ pub struct Symcache(PathBuf); /// Builder for [`Symbolizer`]. #[derive(Default, Debug)] -pub struct Builder { +pub struct Builder { symcache: SC, modules: Vec, - mode: M, + mode: PdbLookupMode, } -#[derive(Default)] -pub struct Offline; -pub struct Online(Vec); +impl Builder { + pub fn msft_symsrv(self) -> Builder { + let Self { + symcache, modules, .. + } = self; -impl Builder { - pub fn online(self, symsrvs: impl Iterator>) -> Builder { + Builder { + symcache, + modules, + mode: PdbLookupMode::Online { + symsrvs: vec!["https://msdl.microsoft.com/download/symbols/".into()], + }, + } + } + + pub fn online(self, symsrvs: impl Iterator>) -> Builder { let Self { symcache, modules, .. } = self; @@ -30,13 +40,15 @@ impl Builder { Builder { symcache, modules, - mode: Online(symsrvs.map(Into::into).collect()), + mode: PdbLookupMode::Online { + symsrvs: symsrvs.map(Into::into).collect(), + }, } } } -impl Builder { - pub fn symcache(self, cache: &impl AsRef) -> Builder { +impl Builder { + pub fn symcache(self, cache: &impl AsRef) -> Builder { let Self { modules, mode, .. } = self; Builder { @@ -47,34 +59,16 @@ impl Builder { } } -impl Builder { - pub fn modules(mut self, modules: impl Iterator) -> Self { - self.modules = modules.collect(); +impl Builder { + pub fn modules<'a>(mut self, modules: impl IntoIterator) -> Self { + self.modules = modules.into_iter().cloned().collect(); self } } -impl Builder { - pub fn build(self, addr_space: AS) -> Result> - where - AS: AddrSpace, - { - let Self { - symcache, modules, .. - } = self; - let config = Config { - symcache: symcache.0, - modules, - mode: PdbLookupMode::Offline, - }; - - Symbolizer::new(addr_space, config) - } -} - -impl Builder { - pub fn build(self, addr_space: AS) -> Result> +impl Builder { + pub fn build(self, addr_space: &mut AS) -> Result> where AS: AddrSpace, { @@ -86,7 +80,7 @@ impl Builder { let config = Config { symcache: symcache.0, modules, - mode: PdbLookupMode::Online { symcache: mode.0 }, + mode, }; Symbolizer::new(addr_space, config) diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs index 31f84d1..a1f3ebe 100644 --- a/crates/symbolizer/src/error.rs +++ b/crates/symbolizer/src/error.rs @@ -1,7 +1,8 @@ // Axel '0vercl0k' Souchet - May 27 2024 use std::io; -use std::num::TryFromIntError; +use std::num::{ParseIntError, TryFromIntError}; use std::path::PathBuf; +use std::str::Utf8Error; use std::string::FromUtf8Error; use pdb::PdbInternalSectionOffset; @@ -16,9 +17,13 @@ pub enum Error { #[error("pdb error: {0}")] Pdb(#[from] pdb::Error), #[error("from int error: {0}")] - FromIntError(#[from] TryFromIntError), + FromInt(#[from] TryFromIntError), + #[error("parse int error: {0}")] + ParseInt(#[from] ParseIntError), #[error("utf8: {0}")] - Utf8(#[from] FromUtf8Error), + Utf8(#[from] Utf8Error), + #[error("from utf8: {0}")] + FromUtf8(#[from] FromUtf8Error), #[error("pdb path {0:?} does not have a filename")] PdbPathNoName(PathBuf), #[error("failed to perform an i/o: {0}")] diff --git a/crates/symbolizer/src/guid.rs b/crates/symbolizer/src/guid.rs index 25da4d6..1f12cda 100644 --- a/crates/symbolizer/src/guid.rs +++ b/crates/symbolizer/src/guid.rs @@ -2,8 +2,12 @@ //! This module contains the implementation of the [`Guid`] type. use std::fmt::Display; +use anyhow::anyhow; + +use crate::Error; + /// A GUID. -#[derive(Default, Debug)] +#[derive(Default, Debug, PartialEq, Eq, Hash, Clone, Copy)] pub struct Guid { d0: u32, d1: u16, @@ -11,6 +15,29 @@ pub struct Guid { d3: [u8; 8], } +impl TryFrom<&str> for Guid { + type Error = Error; + + fn try_from(value: &str) -> Result { + if value.len() != 32 { + return Err(anyhow!("the guid str ({value:?}) should be 32 bytes long").into()); + } + + let mut bytes = [0; 16]; + for (n, chunk) in value.as_bytes().chunks_exact(2).enumerate() { + let s = std::str::from_utf8(chunk)?; + bytes[n] = u8::from_str_radix(s, 16)?; + } + + let d0 = u32::from_be_bytes(bytes[0..4].try_into().unwrap()); + let d1 = u16::from_be_bytes(bytes[4..6].try_into().unwrap()); + let d2 = u16::from_be_bytes(bytes[6..8].try_into().unwrap()); + let d3 = bytes[8..].try_into().unwrap(); + + Ok(Self { d0, d1, d2, d3 }) + } +} + impl From<[u8; 16]> for Guid { fn from(value: [u8; 16]) -> Self { let d0 = u32::from_le_bytes(value[0..4].try_into().unwrap()); @@ -40,3 +67,77 @@ impl Display for Guid { )) } } + +#[cfg(test)] +mod tests { + use crate::Guid; + + const NTDLL_GUID: Guid = Guid { + d0: 0x8d5d5ed5, + d1: 0xd5b8, + d2: 0xaa60, + d3: [0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, 0x00, 0x4d], + }; + + #[test] + fn malformed_guids() { + assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004D1").is_err()); + + assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004").is_err()); + } + + #[test] + fn non_hex_guids() { + assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004Z").is_err()); + } + + #[test] + fn str() { + // 0:000> lmvm ntdll + // Browse full module list + // start end module name + // 00007ff9`aa450000 00007ff9`aa667000 ntdll (pdb symbols) + // c:\dbg\sym\ntdll.pdb\8D5D5ED5D5B8AA609A82600C14E3004D1\ntdll.pdb + assert_eq!( + Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004D").unwrap(), + NTDLL_GUID + ) + } + + #[test] + fn from() { + // 0:000> !dh ntdll + // ... + // SECTION HEADER #5 + // .rdata name + // 4D210 virtual size + // 132000 virtual address + // 4E000 size of raw data + // 132000 file pointer to raw data + // 0 file pointer to relocation table + // 0 file pointer to line numbers + // 0 number of relocations + // 0 number of line numbers + // 40000040 flags + // Initialized Data + // (no align specified) + // Read Only + // ... + // Debug Directories(4) + // Type Size Address Pointer + // cv 22 15b880 15b880 Format: RSDS, guid, 1, ntdll.pdb + // + // 0:000> db ntdll+15b880 + // 00007ff9`aa5ab880 52 53 44 53 d5 5e 5d 8d-b8 d5 60 aa 9a 82 60 0c + // RSDS.^]...`...`. 00007ff9`aa5ab890 14 e3 00 4d 01 00 00 00-6e 74 64 + // 6c 6c 2e 70 64 ...M....ntdll.pd + + assert_eq!( + Guid::from([ + 0xd5, 0x5e, 0x5d, 0x8d, 0xb8, 0xd5, 0x60, 0xaa, 0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, + 0x00, 0x4d + ]), + NTDLL_GUID + ) + } +} diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs index f6aa2e0..f94e1bf 100644 --- a/crates/symbolizer/src/lib.rs +++ b/crates/symbolizer/src/lib.rs @@ -13,6 +13,8 @@ mod symbolizer; pub use addr_space::AddrSpace; pub use builder::Builder; pub use error::{Error, Result}; +pub use guid::Guid; pub use modules::{Module, Modules}; +pub use pe::PdbId; pub use stats::Stats; pub use symbolizer::Symbolizer; diff --git a/crates/symbolizer/src/modules.rs b/crates/symbolizer/src/modules.rs index 79bf064..7411cb0 100644 --- a/crates/symbolizer/src/modules.rs +++ b/crates/symbolizer/src/modules.rs @@ -6,7 +6,7 @@ use std::ops::Range; use crate::misc::Rva; /// A user or kernel module. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct Module { /// Where the module is loaded into virtual memory. pub at: Range, diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs index 20d4ed1..bd1774a 100644 --- a/crates/symbolizer/src/pe.rs +++ b/crates/symbolizer/src/pe.rs @@ -192,7 +192,7 @@ pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; /// /// To download a PDB off Microsoft's Symbol Server, we need three pieces of /// information: the pdb name, a guid and its age. -#[derive(Debug, Default)] +#[derive(Debug, Default, PartialEq, Eq, Hash, Clone)] pub struct PdbId { pub path: PathBuf, pub guid: Guid, @@ -206,7 +206,8 @@ impl Display for PdbId { } impl PdbId { - pub fn new(path: PathBuf, guid: Guid, age: u32) -> Result { + pub fn new(path: impl Into, guid: Guid, age: u32) -> Result { + let path = path.into(); if path.file_name().is_none() { return Err(E::PdbPathNoName(path)); } diff --git a/crates/symbolizer/src/stats.rs b/crates/symbolizer/src/stats.rs index 9f23fe4..e22cd4c 100644 --- a/crates/symbolizer/src/stats.rs +++ b/crates/symbolizer/src/stats.rs @@ -2,35 +2,59 @@ //! This module contains the [`Stats`] type that is used to keep track of //! various statistics when symbolizing. use std::cell::RefCell; +use std::collections::HashMap; use std::fmt::Debug; +use crate::pe::PdbId; + #[derive(Debug, Default)] pub struct StatsBuilder { inner: RefCell, } /// Various statistics that the symbolizer keeps track of. -#[derive(Default, Clone, Copy, Debug)] +#[derive(Default, Clone, Debug)] pub struct Stats { /// The number of addresses symbolized. pub n_addrs: u64, - /// The number of downloaded PDB files. - pub n_downloads: u64, - /// The total size in bytes of downloads. - pub size_downloaded: u64, + /// The PDB identifiers that have been downloaded & the associated file size + /// in bytes. + pub downloaded: HashMap, /// The number of time the address cache was a hit. pub cache_hit: u64, } +impl Stats { + pub fn did_download(&self, pdb_id: PdbId) -> bool { + self.downloaded.contains_key(&pdb_id) + } + + pub fn amount_downloaded(&self) -> u64 { + let mut total = 0u64; + for value in self.downloaded.values() { + total = total.saturating_add(*value); + } + + total + } + + pub fn amount_pdb_downloaded(&self) -> usize { + self.downloaded.len() + } +} + impl StatsBuilder { pub fn build(&self) -> Stats { - *self.inner.borrow() + self.inner.borrow().clone() } - pub fn downloaded_file(&self, size: u64) { - let mut inner = self.inner.borrow_mut(); - inner.n_downloads += 1; - inner.size_downloaded += size; + pub fn downloaded_file(&self, pdb_id: PdbId, size: u64) { + assert!(self + .inner + .borrow_mut() + .downloaded + .insert(pdb_id, size) + .is_none()); } pub fn addr_symbolized(&self) { diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index 394e6a6..a3bc85f 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -14,7 +14,7 @@ use anyhow::{anyhow, Context}; use log::{debug, trace, warn}; use crate::addr_space::AddrSpace; -use crate::builder::{Builder, NoSymcache, Offline}; +use crate::builder::{Builder, NoSymcache}; use crate::misc::{fast_hex32, fast_hex64}; use crate::modules::{Module, Modules}; use crate::pdbcache::{PdbCache, PdbCacheBuilder}; @@ -206,7 +206,7 @@ pub enum PdbLookupMode { Offline, Online { /// List of symbol servers to try to download PDBs from when needed. - symcache: Vec, + symsrvs: Vec, }, } @@ -226,7 +226,7 @@ pub struct Config { /// The [`Symbolizer`] is the main object that glues all the logic. /// /// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer +pub struct Symbolizer<'a, AS> where AS: AddrSpace, { @@ -242,7 +242,7 @@ where /// The kernel dump parser. We need this to be able to read PDB identifiers /// out of the PE headers, as well as reading the export tables of those /// modules. - addr_space: RefCell, + addr_space: RefCell<&'a mut AS>, /// List of symbol servers to try to download PDBs from when needed. symsrvs: Vec, /// Caches addresses to symbols. This allows us to not have to symbolize an @@ -254,23 +254,23 @@ where offline: bool, } -impl Symbolizer +impl<'a, AS> Symbolizer<'a, AS> where AS: AddrSpace, { - pub fn builder() -> Builder { + pub fn builder() -> Builder { Builder::default() } /// Create a [`Symbolizer`]. - pub fn new(addr_space: AS, config: Config) -> Result { + pub fn new(addr_space: &'a mut AS, config: Config) -> Result { let (offline, symsrvs) = match config.mode { PdbLookupMode::Offline => // If the user wants offline, then let's do that.. { (true, vec![]) } - PdbLookupMode::Online { symcache } => { + PdbLookupMode::Online { symsrvs } => { // ..otherwise, we'll try to resolve a DNS and see what happens. If we can't do // that, then we'll assume we're offline and turn the offline mode. // Otherwise, we'll assume we have online access and attempt to download PDBs. @@ -279,7 +279,7 @@ where debug!("Turning on 'offline' mode as you seem to not have internet access.."); } - (offline, symcache) + (offline, symsrvs) } }; @@ -341,7 +341,7 @@ where // Let's start by parsing the PE to get its exports, and PDB information if // there's any. - let pe = Pe::new(&mut *self.addr_space.borrow_mut(), module.at.start)?; + let pe = Pe::new(*self.addr_space.borrow_mut(), module.at.start)?; // Ingest the EAT. builder.ingest(pe.exports.into_iter()); @@ -356,7 +356,8 @@ where // .. and ingest it if we have one. if let Some((pdb_path, pdb_kind)) = pdb_path { if matches!(pdb_kind, PdbKind::Download) { - self.stats.downloaded_file(pdb_path.metadata()?.len()) + self.stats + .downloaded_file(pdb_id, pdb_path.metadata()?.len()) } builder.ingest_pdb(pdb_path)?; diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 495a990..9e17ded 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -1,4 +1,5 @@ // Axel '0vercl0k' Souchet - May 30 2024 +use std::cmp::min; use std::env::temp_dir; use std::fs::{self, File}; use std::io::{self, Read, Seek, Write}; @@ -6,7 +7,8 @@ use std::path::{Path, PathBuf}; use object::read::pe::PeFile64; use object::{NativeEndian, ReadCache, ReadRef}; -use symbolizer::{AddrSpace, Builder, Module}; +use symbolizer::{AddrSpace, Builder, Guid, Module, PdbId}; +use udmp_parser::UserDumpParser; const EXPECTED_LEN: u64 = 0x90_00; const EXPECTED_RAW: [(u64, &str, &str); 4] = [ @@ -70,6 +72,40 @@ impl AddrSpace for RawAddressSpace { } } +#[test] +fn raw_virt() { + let mut raw_addr_space = RawAddressSpace::new(&fixture("mrt100.raw")).unwrap(); + let len = raw_addr_space.len(); + + let mut symb = Builder::default() + .modules(&vec![Module::new("mrt100", 0x0, len)]) + .msft_symsrv() + .symcache(&symcache("basics")) + .build(&mut raw_addr_space) + .unwrap(); + + for (addr, expected_full, expected_modoff) in EXPECTED_RAW { + let mut full = Vec::new(); + symb.full(addr, &mut full).unwrap(); + assert_eq!(String::from_utf8(full).unwrap(), expected_full); + + let mut modoff = Vec::new(); + symb.modoff(addr, &mut modoff).unwrap(); + assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); + } + + let stats = symb.stats(); + assert_eq!(stats.amount_pdb_downloaded(), 1); + assert!(stats.did_download( + PdbId::new( + "mrt100.pdb", + "A20DA44BF08DB27D2BA0928F79447C7D".try_into().unwrap(), + 1 + ) + .unwrap() + )); +} + #[derive(Debug)] struct FileAddressSpace<'data> { pe: PeFile64<'data, &'data ReadCache>, @@ -127,15 +163,17 @@ impl<'data> AddrSpace for FileAddressSpace<'data> { } #[test] -fn raw_virt() { - let raw_addr_space = RawAddressSpace::new(&fixture("mrt100.raw")).unwrap(); - let len = raw_addr_space.len(); +fn raw_file() { + let file = File::open(fixture("mrt100.dll")).unwrap(); + let cache = ReadCache::new(file); + let mut file_addr_space = FileAddressSpace::new(&cache).unwrap(); + let len = file_addr_space.len(); let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)].into_iter()) + .modules(&vec![Module::new("mrt100", 0x0, len)]) .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) .symcache(&symcache("basics")) - .build(raw_addr_space) + .build(&mut file_addr_space) .unwrap(); for (addr, expected_full, expected_modoff) in EXPECTED_RAW { @@ -148,32 +186,116 @@ fn raw_virt() { assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); } - assert_eq!(symb.stats().n_downloads, 1); + let stats = symb.stats(); + assert_eq!(stats.amount_pdb_downloaded(), 1); + assert!(stats.did_download( + PdbId::new( + "mrt100.pdb", + "A20DA44BF08DB27D2BA0928F79447C7D".try_into().unwrap(), + 1 + ) + .unwrap() + )); +} + +#[derive(Debug)] +struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); + +impl<'a> AddrSpace for UserDumpAddrSpace<'a> { + fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { + let mut cur_addr = addr; + let mut read_len = 0; + while read_len < buf.len() { + let Some(block) = self.0.get_mem_block(addr) else { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("no mem block found for {addr:#x}"), + )); + }; + + let Some(data) = block.data_from(cur_addr) else { + panic!(); + }; + + let left = buf.len() - read_len; + let len = min(data.len(), left); + buf.write(&data[..len]).unwrap(); + cur_addr += u64::try_from(len).unwrap(); + read_len += len; + } + + Ok(read_len) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + match self.read_at(addr, buf) { + Ok(sz) => Ok(Some(sz)), + Err(_) => Ok(None), + } + } } #[test] -fn raw_file() { - let file = File::open(fixture("mrt100.dll")).unwrap(); - let cache = ReadCache::new(file); - let file_addr_space = FileAddressSpace::new(&cache).unwrap(); - let len = file_addr_space.len(); +fn user_dump() { + let dump = UserDumpParser::new(fixture("udmp.dmp")).unwrap(); + let modules = dump + .modules() + .values() + .map(|module| { + Module::new( + module.path.file_name().unwrap().to_string_lossy(), + module.start_addr(), + module.end_addr(), + ) + }) + .collect::>(); + let mut udmp_addr_space = UserDumpAddrSpace(dump); let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)].into_iter()) - .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) + .modules(&modules) + .msft_symsrv() .symcache(&symcache("basics")) - .build(file_addr_space) + .build(&mut udmp_addr_space) .unwrap(); - for (addr, expected_full, expected_modoff) in EXPECTED_RAW { - let mut full = Vec::new(); - symb.full(addr, &mut full).unwrap(); - assert_eq!(String::from_utf8(full).unwrap(), expected_full); + // 0:000> u 00007ff9`aa4f8eb2 + // ntdll!EvtIntReportEventWorker$fin$0+0x2: + // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h + let mut output = Vec::new(); + symb.full(0x7ff9aa4f8eb2, &mut output).unwrap(); + assert_eq!( + String::from_utf8(output).unwrap(), + "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" + ); - let mut modoff = Vec::new(); - symb.modoff(addr, &mut modoff).unwrap(); - assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); - } + let stats = symb.stats(); + assert_eq!(stats.amount_pdb_downloaded(), 1); + assert!(stats.did_download( + PdbId::new( + "ntdll.pdb", + "8D5D5ED5D5B8AA609A82600C14E3004D".try_into().unwrap(), + 1 + ) + .unwrap() + )); + + drop(symb); + let mut symb_offline = Builder::default() + .symcache(&symcache("basics")) + .modules(&modules) + .build(&mut udmp_addr_space) + .unwrap(); + + // 0:000> u 00007ff9`aa4f8eb2 + // ntdll!EvtIntReportEventWorker$fin$0+0x2: + // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h + let mut output = Vec::new(); + symb_offline.full(0x7ff9aa4f8eb2, &mut output).unwrap(); + assert_ne!( + String::from_utf8(output).unwrap(), + "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" + ); - assert_eq!(symb.stats().n_downloads, 1); + let stats = symb_offline.stats(); + assert_eq!(stats.amount_pdb_downloaded(), 0); } diff --git a/src/human.rs b/src/human.rs index 9eb1892..1865b51 100644 --- a/src/human.rs +++ b/src/human.rs @@ -24,7 +24,7 @@ pub trait ToHuman: Sized + Copy { /// Blanket implementation for all the `T` that have what we need. impl ToHuman for T where - T: Into, + T: TryInto, T: Copy, { } @@ -34,12 +34,12 @@ pub struct HumanTime(T); impl Display for HumanTime where - T: Into, + T: TryInto, T: Copy, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut unit = "s"; - let mut time = self.0.into() as f64; + let mut time = self.0.try_into().map_err(|_| std::fmt::Error)? as f64; let m = 60f64; let h = m * m; let d = h * 24.0; @@ -92,12 +92,12 @@ pub struct HumanNumber(T); impl Display for HumanNumber where - T: Into, + T: TryInto, T: Copy, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut unit = ""; - let mut size = self.0.into() as f64; + let mut size = self.0.try_into().map_err(|_| std::fmt::Error)? as f64; let k = 1_000f64; let m = k * k; let b = m * k; diff --git a/src/main.rs b/src/main.rs index 0a2c170..f981941 100644 --- a/src/main.rs +++ b/src/main.rs @@ -67,12 +67,13 @@ impl Display for Stats { ) )?; - if self.symbolizer_stats.size_downloaded > 0 { + let size_downloaded = self.symbolizer_stats.amount_downloaded(); + if size_downloaded > 0 { write!( f, ", downloaded {} / {} PDBs)", - self.symbolizer_stats.size_downloaded.human_bytes(), - self.symbolizer_stats.n_downloads.human_number() + size_downloaded.human_bytes(), + self.symbolizer_stats.amount_pdb_downloaded().human_number() ) } else { write!(f, ")") @@ -105,7 +106,7 @@ impl AddrSpace for AddrSpaceWrapper { } } -type KernelDumpSymbolizer = Symbolizer; +type KernelDumpSymbolizer<'a> = Symbolizer<'a, AddrSpaceWrapper>; /// The style of the symbols. #[derive(Default, Debug, Clone, ValueEnum)] @@ -345,11 +346,12 @@ fn main() -> Result<()> { } // All right, ready to create the symbolizer. + let mut wrapper = AddrSpaceWrapper::new(parser); let mut symbolizer = SymbolizerBuilder::default() .online(args.symsrv.iter()) - .modules(modules.into_iter()) + .modules(&modules) .symcache(&symcache) - .build(AddrSpaceWrapper::new(parser))?; + .build(&mut wrapper)?; let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize From 8cf4418142703dc261fbe8119a3483abd1ce3152 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Mon, 10 Jun 2024 21:04:29 -0700 Subject: [PATCH 17/28] oops --- .github/workflows/symbolizer-rs.yml | 2 +- crates/symbolizer/tests/basics.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index f00b910..dd26431 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -32,7 +32,7 @@ jobs: - name: cargo clippy env: RUSTFLAGS: "-Dwarnings" - run: cargo clippy --workspace + run: cargo clippy --workspace --tests doc: name: doc diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 9e17ded..969e436 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -7,7 +7,7 @@ use std::path::{Path, PathBuf}; use object::read::pe::PeFile64; use object::{NativeEndian, ReadCache, ReadRef}; -use symbolizer::{AddrSpace, Builder, Guid, Module, PdbId}; +use symbolizer::{AddrSpace, Builder, Module, PdbId}; use udmp_parser::UserDumpParser; const EXPECTED_LEN: u64 = 0x90_00; @@ -219,7 +219,7 @@ impl<'a> AddrSpace for UserDumpAddrSpace<'a> { let left = buf.len() - read_len; let len = min(data.len(), left); - buf.write(&data[..len]).unwrap(); + buf.write_all(&data[..len]).unwrap(); cur_addr += u64::try_from(len).unwrap(); read_len += len; } From 7a9aba8de5fa0b68209ea19cf71be75b5596d42b Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 12 Jun 2024 07:42:24 -0700 Subject: [PATCH 18/28] example, fromstr --- .github/workflows/symbolizer-rs.yml | 2 +- Cargo.toml | 2 +- crates/symbolizer/Cargo.toml | 3 +- crates/symbolizer/examples/symbolize-dump.rs | 160 +++++++++++++++++++ crates/symbolizer/src/guid.rs | 24 +-- crates/symbolizer/tests/basics.rs | 6 +- 6 files changed, 180 insertions(+), 17 deletions(-) create mode 100644 crates/symbolizer/examples/symbolize-dump.rs diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index dd26431..9e9ddb9 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -32,7 +32,7 @@ jobs: - name: cargo clippy env: RUSTFLAGS: "-Dwarnings" - run: cargo clippy --workspace --tests + run: cargo clippy --workspace --tests --examples doc: name: doc diff --git a/Cargo.toml b/Cargo.toml index dc2d6a8..124748a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ clap = { version = "4.5", features = ["derive"] } symbolizer = { path = "crates/symbolizer" } env_logger = "0.11" itoa = "1.0" -kdmp-parser = "0.2" +kdmp-parser = "0.3" [profile.release] debug = true diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml index b2af2b7..4ca1d36 100644 --- a/crates/symbolizer/Cargo.toml +++ b/crates/symbolizer/Cargo.toml @@ -20,7 +20,7 @@ thiserror = "1.0.61" anyhow = "1.0.86" [dev-dependencies] -kdmp-parser = "0.2" +kdmp-parser = "0.3" udmp-parser = "0.2" object = { version = "0.36.0", default-features = false, features = [ "read", @@ -28,3 +28,4 @@ object = { version = "0.36.0", default-features = false, features = [ "pe", "std", ] } +clap = { version = "4.5", features = ["derive"] } diff --git a/crates/symbolizer/examples/symbolize-dump.rs b/crates/symbolizer/examples/symbolize-dump.rs new file mode 100644 index 0000000..031e60f --- /dev/null +++ b/crates/symbolizer/examples/symbolize-dump.rs @@ -0,0 +1,160 @@ +// Axel '0vercl0k' Souchet +use std::cmp::min; +use std::env; +use std::io::{self, Write}; +use std::path::PathBuf; + +use anyhow::Result; +use clap::{Parser, Subcommand}; +use kdmp_parser::KernelDumpParser; +use symbolizer::{AddrSpace, Builder, Module}; +use udmp_parser::UserDumpParser; + +/// The command line arguments. +#[derive(Debug, Parser)] +#[command(about = "Symbolize an address from a user or kernel dump file.")] +enum CliArgs { + User { dump: PathBuf, addr: String }, + Kernel { dump: PathBuf, addr: String }, +} + +/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol +/// cache. +fn sympath() -> Option { + let env = env::var("_NT_SYMBOL_PATH").ok()?; + + if !env.starts_with("srv*") { + return None; + } + + let sympath = env.strip_prefix("srv*").unwrap(); + let sympath = PathBuf::from(sympath.split('*').next().unwrap()); + + if sympath.is_dir() { + Some(sympath) + } else { + None + } +} + +fn user(dmp: UserDumpParser, addr: u64) -> Result<()> { + #[derive(Debug)] + struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); + impl<'a> AddrSpace for UserDumpAddrSpace<'a> { + fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { + let mut cur_addr = addr; + let mut read_len = 0; + while read_len < buf.len() { + let Some(block) = self.0.get_mem_block(addr) else { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("no mem block found for {addr:#x}"), + )); + }; + + let Some(data) = block.data_from(cur_addr) else { + panic!(); + }; + + let left = buf.len() - read_len; + let len = min(data.len(), left); + buf.write_all(&data[..len]).unwrap(); + cur_addr += u64::try_from(len).unwrap(); + read_len += len; + } + + Ok(read_len) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + match self.read_at(addr, buf) { + Ok(sz) => Ok(Some(sz)), + Err(_) => Ok(None), + } + } + } + + let modules = dmp + .modules() + .values() + .map(|module| { + Module::new( + module.path.file_name().unwrap().to_string_lossy(), + module.start_addr(), + module.end_addr(), + ) + }) + .collect::>(); + + let mut wrapper = UserDumpAddrSpace(dmp); + let mut symb = Builder::default() + .modules(&modules) + .msft_symsrv() + .symcache(&sympath().expect("define a _NT_SYMBOL_PATH")) + .build(&mut wrapper)?; + + let mut s = Vec::new(); + symb.full(addr, &mut s)?; + println!("{addr:#x}: {}", String::from_utf8(s)?); + + Ok(()) +} + +fn kernel(dmp: KernelDumpParser, addr: u64) -> Result<()> { + #[derive(Debug)] + struct KernelDumpAdrSpace<'a>(&'a KernelDumpParser); + impl<'a> AddrSpace for KernelDumpAdrSpace<'a> { + fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { + self.0 + .virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } + + fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { + self.0 + .try_virt_read(addr.into(), buf) + .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) + } + } + + let mut modules = Vec::new(); + for (at, name) in dmp.user_modules().chain(dmp.kernel_modules()) { + let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); + modules.push(Module::new( + filename.to_string(), + at.start.into(), + at.end.into(), + )); + } + + let mut wrapper = KernelDumpAdrSpace(&dmp); + let mut symb = Builder::default() + .modules(&modules) + .msft_symsrv() + .symcache(&sympath().expect("define a _NT_SYMBOL_PATH")) + .build(&mut wrapper)?; + + let mut s = Vec::new(); + symb.full(addr, &mut s)?; + println!("{addr:#x}: {}", String::from_utf8(s)?); + + Ok(()) +} + +fn hex(x: &str) -> Result { + let no_backtick = x.replace('`', ""); + let no_prefix = no_backtick.strip_prefix("0x").unwrap_or(x); + + Ok(u64::from_str_radix(no_prefix, 16)?) +} + +fn main() -> Result<()> { + // Parse the CLI arguments. + let args = CliArgs::parse(); + match args { + CliArgs::User { dump, addr } => user(UserDumpParser::new(dump)?, hex(&addr)?), + CliArgs::Kernel { dump, addr } => kernel(KernelDumpParser::new(dump)?, hex(&addr)?), + }?; + + Ok(()) +} diff --git a/crates/symbolizer/src/guid.rs b/crates/symbolizer/src/guid.rs index 1f12cda..5bd682d 100644 --- a/crates/symbolizer/src/guid.rs +++ b/crates/symbolizer/src/guid.rs @@ -1,6 +1,7 @@ // Axel '0vercl0k' Souchet - February 20 2024 //! This module contains the implementation of the [`Guid`] type. use std::fmt::Display; +use std::str::FromStr; use anyhow::anyhow; @@ -15,16 +16,16 @@ pub struct Guid { d3: [u8; 8], } -impl TryFrom<&str> for Guid { - type Error = Error; +impl FromStr for Guid { + type Err = Error; - fn try_from(value: &str) -> Result { - if value.len() != 32 { - return Err(anyhow!("the guid str ({value:?}) should be 32 bytes long").into()); + fn from_str(s: &str) -> Result { + if s.len() != 32 { + return Err(anyhow!("the guid str ({s:?}) should be 32 bytes long").into()); } let mut bytes = [0; 16]; - for (n, chunk) in value.as_bytes().chunks_exact(2).enumerate() { + for (n, chunk) in s.as_bytes().chunks_exact(2).enumerate() { let s = std::str::from_utf8(chunk)?; bytes[n] = u8::from_str_radix(s, 16)?; } @@ -70,6 +71,8 @@ impl Display for Guid { #[cfg(test)] mod tests { + use std::str::FromStr; + use crate::Guid; const NTDLL_GUID: Guid = Guid { @@ -81,14 +84,13 @@ mod tests { #[test] fn malformed_guids() { - assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004D1").is_err()); - - assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004").is_err()); + assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004D1").is_err()); + assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004").is_err()); } #[test] fn non_hex_guids() { - assert!(Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004Z").is_err()); + assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004Z").is_err()); } #[test] @@ -99,7 +101,7 @@ mod tests { // 00007ff9`aa450000 00007ff9`aa667000 ntdll (pdb symbols) // c:\dbg\sym\ntdll.pdb\8D5D5ED5D5B8AA609A82600C14E3004D1\ntdll.pdb assert_eq!( - Guid::try_from("8D5D5ED5D5B8AA609A82600C14E3004D").unwrap(), + "8D5D5ED5D5B8AA609A82600C14E3004D".parse::().unwrap(), NTDLL_GUID ) } diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index 969e436..fbedf6d 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -99,7 +99,7 @@ fn raw_virt() { assert!(stats.did_download( PdbId::new( "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".try_into().unwrap(), + "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), 1 ) .unwrap() @@ -191,7 +191,7 @@ fn raw_file() { assert!(stats.did_download( PdbId::new( "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".try_into().unwrap(), + "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), 1 ) .unwrap() @@ -273,7 +273,7 @@ fn user_dump() { assert!(stats.did_download( PdbId::new( "ntdll.pdb", - "8D5D5ED5D5B8AA609A82600C14E3004D".try_into().unwrap(), + "8D5D5ED5D5B8AA609A82600C14E3004D".parse().unwrap(), 1 ) .unwrap() From cf0fc2bd47e6d2666742795915abff707d462433 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Wed, 12 Jun 2024 07:47:01 -0700 Subject: [PATCH 19/28] clippy --- crates/symbolizer/examples/symbolize-dump.rs | 2 +- src/main.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/symbolizer/examples/symbolize-dump.rs b/crates/symbolizer/examples/symbolize-dump.rs index 031e60f..104d1b1 100644 --- a/crates/symbolizer/examples/symbolize-dump.rs +++ b/crates/symbolizer/examples/symbolize-dump.rs @@ -5,7 +5,7 @@ use std::io::{self, Write}; use std::path::PathBuf; use anyhow::Result; -use clap::{Parser, Subcommand}; +use clap::Parser; use kdmp_parser::KernelDumpParser; use symbolizer::{AddrSpace, Builder, Module}; use udmp_parser::UserDumpParser; diff --git a/src/main.rs b/src/main.rs index f981941..b237502 100644 --- a/src/main.rs +++ b/src/main.rs @@ -323,7 +323,7 @@ fn main() -> Result<()> { // We need to parse the crash-dump to figure out where drivers / user-modules // are loaded at, and to read enough information out of the PE to download PDB // files ourselves. - let parser = KernelDumpParser::new(&crash_dump_path).context("failed to create dump parser")?; + let parser = KernelDumpParser::new(crash_dump_path).context("failed to create dump parser")?; // Figure out what is the symbol path we should be using. We will use the one // specified by the user, or will try to find one in the `_NT_SYMBOL_PATH` From 4ad6548b88e33cd49ca0b61b43fbffc0df7c1c2f Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:35:02 -0700 Subject: [PATCH 20/28] rework api a bit --- .github/workflows/symbolizer-rs.yml | 4 +- crates/symbolizer/examples/symbolize-dump.rs | 16 ++++---- crates/symbolizer/src/builder.rs | 37 ++++++++---------- crates/symbolizer/src/symbolizer.rs | 27 +++++-------- .../{fixtures => testdatas}/mrt100.dll | Bin .../{fixtures => testdatas}/mrt100.raw | Bin crates/symbolizer/tests/basics.rs | 34 ++++++++-------- src/main.rs | 15 ++++--- 8 files changed, 59 insertions(+), 74 deletions(-) rename crates/symbolizer/{fixtures => testdatas}/mrt100.dll (100%) rename crates/symbolizer/{fixtures => testdatas}/mrt100.raw (100%) diff --git a/.github/workflows/symbolizer-rs.yml b/.github/workflows/symbolizer-rs.yml index 9e9ddb9..6986384 100644 --- a/.github/workflows/symbolizer-rs.yml +++ b/.github/workflows/symbolizer-rs.yml @@ -71,10 +71,10 @@ jobs: run: cargo test --release --workspace - name: cargo check - run: cargo check --workspace + run: cargo check --workspace --examples --tests - name: cargo build - run: cargo build --release --workspace + run: cargo build --release --workspace --examples --tests - name: Upload artifacts uses: actions/upload-artifact@v4 diff --git a/crates/symbolizer/examples/symbolize-dump.rs b/crates/symbolizer/examples/symbolize-dump.rs index 104d1b1..7b99d36 100644 --- a/crates/symbolizer/examples/symbolize-dump.rs +++ b/crates/symbolizer/examples/symbolize-dump.rs @@ -88,13 +88,13 @@ fn user(dmp: UserDumpParser, addr: u64) -> Result<()> { let mut wrapper = UserDumpAddrSpace(dmp); let mut symb = Builder::default() - .modules(&modules) + .modules(modules) .msft_symsrv() - .symcache(&sympath().expect("define a _NT_SYMBOL_PATH")) - .build(&mut wrapper)?; + .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) + .build()?; let mut s = Vec::new(); - symb.full(addr, &mut s)?; + symb.full(&mut wrapper, addr, &mut s)?; println!("{addr:#x}: {}", String::from_utf8(s)?); Ok(()) @@ -129,13 +129,13 @@ fn kernel(dmp: KernelDumpParser, addr: u64) -> Result<()> { let mut wrapper = KernelDumpAdrSpace(&dmp); let mut symb = Builder::default() - .modules(&modules) + .modules(modules) .msft_symsrv() - .symcache(&sympath().expect("define a _NT_SYMBOL_PATH")) - .build(&mut wrapper)?; + .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) + .build()?; let mut s = Vec::new(); - symb.full(addr, &mut s)?; + symb.full(&mut wrapper, addr, &mut s)?; println!("{addr:#x}: {}", String::from_utf8(s)?); Ok(()) diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs index 98aa1b1..51b7fc4 100644 --- a/crates/symbolizer/src/builder.rs +++ b/crates/symbolizer/src/builder.rs @@ -1,8 +1,10 @@ // Axel '0vercl0k' Souchet - June 7 2024 use std::path::{Path, PathBuf}; +use anyhow::anyhow; + use crate::symbolizer::{Config, PdbLookupMode}; -use crate::{AddrSpace, Module, Result, Symbolizer}; +use crate::{Module, Result, Symbolizer}; #[derive(Default)] pub struct NoSymcache; @@ -19,20 +21,10 @@ pub struct Builder { impl Builder { pub fn msft_symsrv(self) -> Builder { - let Self { - symcache, modules, .. - } = self; - - Builder { - symcache, - modules, - mode: PdbLookupMode::Online { - symsrvs: vec!["https://msdl.microsoft.com/download/symbols/".into()], - }, - } + self.online(vec!["https://msdl.microsoft.com/download/symbols/"]) } - pub fn online(self, symsrvs: impl Iterator>) -> Builder { + pub fn online(self, symsrvs: impl IntoIterator>) -> Builder { let Self { symcache, modules, .. } = self; @@ -41,14 +33,14 @@ impl Builder { symcache, modules, mode: PdbLookupMode::Online { - symsrvs: symsrvs.map(Into::into).collect(), + symsrvs: symsrvs.into_iter().map(Into::into).collect(), }, } } } impl Builder { - pub fn symcache(self, cache: &impl AsRef) -> Builder { + pub fn symcache(self, cache: impl AsRef) -> Builder { let Self { modules, mode, .. } = self; Builder { @@ -60,29 +52,32 @@ impl Builder { } impl Builder { - pub fn modules<'a>(mut self, modules: impl IntoIterator) -> Self { - self.modules = modules.into_iter().cloned().collect(); + pub fn modules(mut self, modules: impl IntoIterator) -> Self { + self.modules = modules.into_iter().collect(); self } } impl Builder { - pub fn build(self, addr_space: &mut AS) -> Result> - where - AS: AddrSpace, + pub fn build(self) -> Result { let Self { symcache, modules, mode, } = self; + + if !symcache.0.exists() { + return Err(anyhow!("symcache {:?} does not exist", symcache.0).into()); + } + let config = Config { symcache: symcache.0, modules, mode, }; - Symbolizer::new(addr_space, config) + Symbolizer::new(config) } } diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index a3bc85f..ce818d2 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -226,9 +226,7 @@ pub struct Config { /// The [`Symbolizer`] is the main object that glues all the logic. /// /// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer<'a, AS> -where - AS: AddrSpace, +pub struct Symbolizer { /// Keep track of some statistics such as the number of lines symbolized, /// PDB downloaded, etc. @@ -239,10 +237,6 @@ where /// This is the list of kernel / user modules read from the kernel crash /// dump. modules: Modules, - /// The kernel dump parser. We need this to be able to read PDB identifiers - /// out of the PE headers, as well as reading the export tables of those - /// modules. - addr_space: RefCell<&'a mut AS>, /// List of symbol servers to try to download PDBs from when needed. symsrvs: Vec, /// Caches addresses to symbols. This allows us to not have to symbolize an @@ -254,16 +248,14 @@ where offline: bool, } -impl<'a, AS> Symbolizer<'a, AS> -where - AS: AddrSpace, +impl Symbolizer { pub fn builder() -> Builder { Builder::default() } /// Create a [`Symbolizer`]. - pub fn new(addr_space: &'a mut AS, config: Config) -> Result { + pub fn new(config: Config) -> Result { let (offline, symsrvs) = match config.mode { PdbLookupMode::Offline => // If the user wants offline, then let's do that.. @@ -291,7 +283,6 @@ where stats: Default::default(), symcache: config.symcache, modules: Modules::new(config.modules), - addr_space: RefCell::new(addr_space), symsrvs, addr_cache: Default::default(), pdb_caches: Default::default(), @@ -322,7 +313,7 @@ where /// or remotely) and extract every bit of relevant information for us. /// Finally, the result will be kept around to symbolize addresses in that /// module faster in the future. - fn try_symbolize_addr_from_pdbs(&self, addr: u64) -> Result>> { + fn try_symbolize_addr_from_pdbs(&self, addr_space: &mut impl AddrSpace, addr: u64) -> Result>> { trace!("symbolizing address {addr:#x}.."); let Some(module) = self.modules.find(addr) else { trace!("address {addr:#x} doesn't belong to any module"); @@ -341,7 +332,7 @@ where // Let's start by parsing the PE to get its exports, and PDB information if // there's any. - let pe = Pe::new(*self.addr_space.borrow_mut(), module.at.start)?; + let pe = Pe::new(addr_space, module.at.start)?; // Ingest the EAT. builder.ingest(pe.exports.into_iter()); @@ -388,14 +379,14 @@ where /// If the address has been symbolized before, it will be in the /// `addr_cache` already. If not, we need to take the slow path and ask the /// right [`PdbCache`] which might require to create one in the first place. - fn try_symbolize_addr(&self, addr: u64) -> Result>> { + fn try_symbolize_addr(&self, addr_space: &mut impl AddrSpace, addr: u64) -> Result>> { match self.addr_cache.borrow_mut().entry(addr) { hash_map::Entry::Occupied(o) => { self.stats.cache_hit(); return Ok(Some(o.get().clone())); } hash_map::Entry::Vacant(v) => { - let Some(symbol) = self.try_symbolize_addr_from_pdbs(addr)? else { + let Some(symbol) = self.try_symbolize_addr_from_pdbs(addr_space, addr)? else { return Ok(None); }; @@ -432,8 +423,8 @@ where /// Symbolize `addr` in the `module!function+offset` style and write the /// result into `output`. - pub fn full(&mut self, addr: u64, output: &mut impl Write) -> Result<()> { - match self.try_symbolize_addr(addr)? { + pub fn full(&mut self, addr_space: &mut impl AddrSpace, addr: u64, output: &mut impl Write) -> Result<()> { + match self.try_symbolize_addr(addr_space, addr)? { Some(sym) => { output .write_all(sym.as_bytes()) diff --git a/crates/symbolizer/fixtures/mrt100.dll b/crates/symbolizer/testdatas/mrt100.dll similarity index 100% rename from crates/symbolizer/fixtures/mrt100.dll rename to crates/symbolizer/testdatas/mrt100.dll diff --git a/crates/symbolizer/fixtures/mrt100.raw b/crates/symbolizer/testdatas/mrt100.raw similarity index 100% rename from crates/symbolizer/fixtures/mrt100.raw rename to crates/symbolizer/testdatas/mrt100.raw diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index fbedf6d..b0b788d 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -26,9 +26,9 @@ const EXPECTED_RAW: [(u64, &str, &str); 4] = [ (0xdeadbeef, "0x00000000deadbeef", "0x00000000deadbeef"), ]; -fn fixture(name: &str) -> PathBuf { +fn testdata(name: &str) -> PathBuf { PathBuf::from(&env!("CARGO_MANIFEST_DIR")) - .join("fixtures") + .join("testdatas") .join(name) } @@ -74,19 +74,19 @@ impl AddrSpace for RawAddressSpace { #[test] fn raw_virt() { - let mut raw_addr_space = RawAddressSpace::new(&fixture("mrt100.raw")).unwrap(); + let mut raw_addr_space = RawAddressSpace::new(&testdata("mrt100.raw")).unwrap(); let len = raw_addr_space.len(); let mut symb = Builder::default() - .modules(&vec![Module::new("mrt100", 0x0, len)]) + .modules(vec![Module::new("mrt100", 0x0, len)]) .msft_symsrv() .symcache(&symcache("basics")) - .build(&mut raw_addr_space) + .build() .unwrap(); for (addr, expected_full, expected_modoff) in EXPECTED_RAW { let mut full = Vec::new(); - symb.full(addr, &mut full).unwrap(); + symb.full(&mut raw_addr_space, addr, &mut full).unwrap(); assert_eq!(String::from_utf8(full).unwrap(), expected_full); let mut modoff = Vec::new(); @@ -164,21 +164,21 @@ impl<'data> AddrSpace for FileAddressSpace<'data> { #[test] fn raw_file() { - let file = File::open(fixture("mrt100.dll")).unwrap(); + let file = File::open(testdata("mrt100.dll")).unwrap(); let cache = ReadCache::new(file); let mut file_addr_space = FileAddressSpace::new(&cache).unwrap(); let len = file_addr_space.len(); let mut symb = Builder::default() - .modules(&vec![Module::new("mrt100", 0x0, len)]) + .modules(vec![Module::new("mrt100", 0x0, len)]) .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) .symcache(&symcache("basics")) - .build(&mut file_addr_space) + .build() .unwrap(); for (addr, expected_full, expected_modoff) in EXPECTED_RAW { let mut full = Vec::new(); - symb.full(addr, &mut full).unwrap(); + symb.full(&mut file_addr_space, addr, &mut full).unwrap(); assert_eq!(String::from_utf8(full).unwrap(), expected_full); let mut modoff = Vec::new(); @@ -237,7 +237,7 @@ impl<'a> AddrSpace for UserDumpAddrSpace<'a> { #[test] fn user_dump() { - let dump = UserDumpParser::new(fixture("udmp.dmp")).unwrap(); + let dump = UserDumpParser::new(testdata("udmp.dmp")).unwrap(); let modules = dump .modules() .values() @@ -252,17 +252,17 @@ fn user_dump() { let mut udmp_addr_space = UserDumpAddrSpace(dump); let mut symb = Builder::default() - .modules(&modules) + .modules(modules.clone()) .msft_symsrv() .symcache(&symcache("basics")) - .build(&mut udmp_addr_space) + .build() .unwrap(); // 0:000> u 00007ff9`aa4f8eb2 // ntdll!EvtIntReportEventWorker$fin$0+0x2: // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h let mut output = Vec::new(); - symb.full(0x7ff9aa4f8eb2, &mut output).unwrap(); + symb.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output).unwrap(); assert_eq!( String::from_utf8(output).unwrap(), "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" @@ -282,15 +282,15 @@ fn user_dump() { drop(symb); let mut symb_offline = Builder::default() .symcache(&symcache("basics")) - .modules(&modules) - .build(&mut udmp_addr_space) + .modules(modules) + .build() .unwrap(); // 0:000> u 00007ff9`aa4f8eb2 // ntdll!EvtIntReportEventWorker$fin$0+0x2: // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h let mut output = Vec::new(); - symb_offline.full(0x7ff9aa4f8eb2, &mut output).unwrap(); + symb_offline.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output).unwrap(); assert_ne!( String::from_utf8(output).unwrap(), "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" diff --git a/src/main.rs b/src/main.rs index b237502..9f7106f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -38,7 +38,7 @@ impl StatsBuilder { self.n_files += 1; } - pub fn stop(self, symbolizer: KernelDumpSymbolizer) -> Stats { + pub fn stop(self, symbolizer: Symbolizer) -> Stats { Stats { time: self.start.elapsed().as_secs(), n_files: self.n_files, @@ -106,8 +106,6 @@ impl AddrSpace for AddrSpaceWrapper { } } -type KernelDumpSymbolizer<'a> = Symbolizer<'a, AddrSpaceWrapper>; - /// The style of the symbols. #[derive(Default, Debug, Clone, ValueEnum)] enum SymbolStyle { @@ -232,7 +230,8 @@ fn get_output_file(args: &CliArgs, input: &Path, output: &Path) -> Result /// Process an input file and symbolize every line. fn symbolize_file( - symbolizer: &mut KernelDumpSymbolizer, + symbolizer: &mut Symbolizer, + addr_space: &mut impl AddrSpace, trace_path: impl AsRef, args: &CliArgs, ) -> Result { @@ -267,7 +266,7 @@ fn symbolize_file( match args.style { SymbolStyle::Modoff => symbolizer.modoff(addr, &mut output), - SymbolStyle::Full => symbolizer.full(addr, &mut output), + SymbolStyle::Full => symbolizer.full(addr_space, addr, &mut output), } .with_context(|| { format!( @@ -349,9 +348,9 @@ fn main() -> Result<()> { let mut wrapper = AddrSpaceWrapper::new(parser); let mut symbolizer = SymbolizerBuilder::default() .online(args.symsrv.iter()) - .modules(&modules) + .modules(modules) .symcache(&symcache) - .build(&mut wrapper)?; + .build()?; let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize @@ -371,7 +370,7 @@ fn main() -> Result<()> { let total = paths.len(); for (idx, path) in paths.into_iter().enumerate() { print!("\x1B[2K\r"); - symbolize_file(&mut symbolizer, &path, &args)?; + symbolize_file(&mut symbolizer, &mut wrapper, &path, &args)?; stats_builder.done_file(); print!("[{}/{total}] {} done", idx + 1, path.display()); io::stdout().flush()?; From 961fdac74eb84271581825860434ced369557260 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:41:40 -0700 Subject: [PATCH 21/28] fmt, clippy --- crates/symbolizer/src/builder.rs | 3 +-- crates/symbolizer/src/symbolizer.rs | 25 ++++++++++++++++++------- crates/symbolizer/tests/basics.rs | 17 ++++++++++------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs index 51b7fc4..c30bcc7 100644 --- a/crates/symbolizer/src/builder.rs +++ b/crates/symbolizer/src/builder.rs @@ -60,8 +60,7 @@ impl Builder { } impl Builder { - pub fn build(self) -> Result - { + pub fn build(self) -> Result { let Self { symcache, modules, diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs index ce818d2..f1c8a16 100644 --- a/crates/symbolizer/src/symbolizer.rs +++ b/crates/symbolizer/src/symbolizer.rs @@ -226,8 +226,7 @@ pub struct Config { /// The [`Symbolizer`] is the main object that glues all the logic. /// /// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer -{ +pub struct Symbolizer { /// Keep track of some statistics such as the number of lines symbolized, /// PDB downloaded, etc. stats: StatsBuilder, @@ -248,8 +247,7 @@ pub struct Symbolizer offline: bool, } -impl Symbolizer -{ +impl Symbolizer { pub fn builder() -> Builder { Builder::default() } @@ -313,7 +311,11 @@ impl Symbolizer /// or remotely) and extract every bit of relevant information for us. /// Finally, the result will be kept around to symbolize addresses in that /// module faster in the future. - fn try_symbolize_addr_from_pdbs(&self, addr_space: &mut impl AddrSpace, addr: u64) -> Result>> { + fn try_symbolize_addr_from_pdbs( + &self, + addr_space: &mut impl AddrSpace, + addr: u64, + ) -> Result>> { trace!("symbolizing address {addr:#x}.."); let Some(module) = self.modules.find(addr) else { trace!("address {addr:#x} doesn't belong to any module"); @@ -379,7 +381,11 @@ impl Symbolizer /// If the address has been symbolized before, it will be in the /// `addr_cache` already. If not, we need to take the slow path and ask the /// right [`PdbCache`] which might require to create one in the first place. - fn try_symbolize_addr(&self, addr_space: &mut impl AddrSpace, addr: u64) -> Result>> { + fn try_symbolize_addr( + &self, + addr_space: &mut impl AddrSpace, + addr: u64, + ) -> Result>> { match self.addr_cache.borrow_mut().entry(addr) { hash_map::Entry::Occupied(o) => { self.stats.cache_hit(); @@ -423,7 +429,12 @@ impl Symbolizer /// Symbolize `addr` in the `module!function+offset` style and write the /// result into `output`. - pub fn full(&mut self, addr_space: &mut impl AddrSpace, addr: u64, output: &mut impl Write) -> Result<()> { + pub fn full( + &mut self, + addr_space: &mut impl AddrSpace, + addr: u64, + output: &mut impl Write, + ) -> Result<()> { match self.try_symbolize_addr(addr_space, addr)? { Some(sym) => { output diff --git a/crates/symbolizer/tests/basics.rs b/crates/symbolizer/tests/basics.rs index b0b788d..7be510d 100644 --- a/crates/symbolizer/tests/basics.rs +++ b/crates/symbolizer/tests/basics.rs @@ -80,7 +80,7 @@ fn raw_virt() { let mut symb = Builder::default() .modules(vec![Module::new("mrt100", 0x0, len)]) .msft_symsrv() - .symcache(&symcache("basics")) + .symcache(symcache("basics")) .build() .unwrap(); @@ -171,8 +171,8 @@ fn raw_file() { let mut symb = Builder::default() .modules(vec![Module::new("mrt100", 0x0, len)]) - .online(vec!["https://msdl.microsoft.com/download/symbols/"].into_iter()) - .symcache(&symcache("basics")) + .online(vec!["https://msdl.microsoft.com/download/symbols/"]) + .symcache(symcache("basics")) .build() .unwrap(); @@ -254,7 +254,7 @@ fn user_dump() { let mut symb = Builder::default() .modules(modules.clone()) .msft_symsrv() - .symcache(&symcache("basics")) + .symcache(symcache("basics")) .build() .unwrap(); @@ -262,7 +262,8 @@ fn user_dump() { // ntdll!EvtIntReportEventWorker$fin$0+0x2: // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h let mut output = Vec::new(); - symb.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output).unwrap(); + symb.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) + .unwrap(); assert_eq!( String::from_utf8(output).unwrap(), "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" @@ -281,7 +282,7 @@ fn user_dump() { drop(symb); let mut symb_offline = Builder::default() - .symcache(&symcache("basics")) + .symcache(symcache("basics")) .modules(modules) .build() .unwrap(); @@ -290,7 +291,9 @@ fn user_dump() { // ntdll!EvtIntReportEventWorker$fin$0+0x2: // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h let mut output = Vec::new(); - symb_offline.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output).unwrap(); + symb_offline + .full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) + .unwrap(); assert_ne!( String::from_utf8(output).unwrap(), "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" From 22496d3c8ad1c638a1ffe7091896d16cfc9472ee Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Thu, 13 Jun 2024 16:43:02 -0700 Subject: [PATCH 22/28] clippy --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 9f7106f..5ee5694 100644 --- a/src/main.rs +++ b/src/main.rs @@ -349,7 +349,7 @@ fn main() -> Result<()> { let mut symbolizer = SymbolizerBuilder::default() .online(args.symsrv.iter()) .modules(modules) - .symcache(&symcache) + .symcache(symcache) .build()?; let paths = if args.trace.is_dir() { From 4b15b89f0cef21f6dbbadbf4fefc422e9bee2c0b Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:26:37 -0700 Subject: [PATCH 23/28] move it off in a different repo --- Cargo.toml | 23 +- crates/symbolizer/Cargo.toml | 31 - crates/symbolizer/README.md | 0 crates/symbolizer/examples/symbolize-dump.rs | 160 ------ crates/symbolizer/src/addr_space.rs | 66 --- crates/symbolizer/src/builder.rs | 82 --- crates/symbolizer/src/error.rs | 40 -- crates/symbolizer/src/guid.rs | 145 ----- crates/symbolizer/src/lib.rs | 20 - crates/symbolizer/src/misc.rs | 130 ----- crates/symbolizer/src/modules.rs | 98 ---- crates/symbolizer/src/pdbcache.rs | 551 ------------------ crates/symbolizer/src/pe.rs | 559 ------------------- crates/symbolizer/src/stats.rs | 67 --- crates/symbolizer/src/symbolizer.rs | 450 --------------- crates/symbolizer/testdatas/mrt100.dll | Bin 36864 -> 0 bytes crates/symbolizer/testdatas/mrt100.raw | Bin 36864 -> 0 bytes crates/symbolizer/tests/basics.rs | 304 ---------- 18 files changed, 7 insertions(+), 2719 deletions(-) delete mode 100644 crates/symbolizer/Cargo.toml delete mode 100644 crates/symbolizer/README.md delete mode 100644 crates/symbolizer/examples/symbolize-dump.rs delete mode 100644 crates/symbolizer/src/addr_space.rs delete mode 100644 crates/symbolizer/src/builder.rs delete mode 100644 crates/symbolizer/src/error.rs delete mode 100644 crates/symbolizer/src/guid.rs delete mode 100644 crates/symbolizer/src/lib.rs delete mode 100644 crates/symbolizer/src/misc.rs delete mode 100644 crates/symbolizer/src/modules.rs delete mode 100644 crates/symbolizer/src/pdbcache.rs delete mode 100644 crates/symbolizer/src/pe.rs delete mode 100644 crates/symbolizer/src/stats.rs delete mode 100644 crates/symbolizer/src/symbolizer.rs delete mode 100644 crates/symbolizer/testdatas/mrt100.dll delete mode 100644 crates/symbolizer/testdatas/mrt100.raw delete mode 100644 crates/symbolizer/tests/basics.rs diff --git a/Cargo.toml b/Cargo.toml index 124748a..75d57be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,17 +4,17 @@ categories = ["command-line-utilities", "development-tools::debugging"] description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] version = "0.1.0" -authors.workspace = true -license.workspace = true -rust-version.workspace = true -repository.workspace = true -keywords.workspace = true -edition.workspace = true +authors = ["Axel '0vercl0k' Souchet"] +license = "MIT" +rust-version = "1.70" +repository = "https://github.com/0vercl0k/symbolizer-rs" +keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] +edition = "2021" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -symbolizer = { path = "crates/symbolizer" } +addr-symbolizer = { path = "../addr-symbolizer" } env_logger = "0.11" itoa = "1.0" kdmp-parser = "0.3" @@ -22,12 +22,3 @@ kdmp-parser = "0.3" [profile.release] debug = true panic = "abort" - -[workspace] -members = ["crates/*"] -package.authors = ["Axel '0vercl0k' Souchet"] -package.license = "MIT" -package.rust-version = "1.70" -package.repository = "https://github.com/0vercl0k/symbolizer-rs" -package.keywords = ["windows", "kernel", "crash-dump", "symbols", "pdb"] -package.edition = "2021" diff --git a/crates/symbolizer/Cargo.toml b/crates/symbolizer/Cargo.toml deleted file mode 100644 index 4ca1d36..0000000 --- a/crates/symbolizer/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -[package] -name = "symbolizer" -version = "0.1.0" -# categories = ["command-line-utilities", "development-tools::debugging"] -# description = "A fast execution trace symbolizer for Windows that runs on all major platforms and doesn't depend on any Microsoft libraries." -include = ["/Cargo.toml", "/LICENSE", "/src/**", "README.md"] -authors.workspace = true -license.workspace = true -rust-version.workspace = true -repository.workspace = true -keywords.workspace = true -edition.workspace = true - -[dependencies] -pdb = "0.8" -log = "0.4" -msvc-demangler = "0.10" -ureq = { version = "2.9", default-features = false, features = ["tls", "gzip"] } -thiserror = "1.0.61" -anyhow = "1.0.86" - -[dev-dependencies] -kdmp-parser = "0.3" -udmp-parser = "0.2" -object = { version = "0.36.0", default-features = false, features = [ - "read", - "read_core", - "pe", - "std", -] } -clap = { version = "4.5", features = ["derive"] } diff --git a/crates/symbolizer/README.md b/crates/symbolizer/README.md deleted file mode 100644 index e69de29..0000000 diff --git a/crates/symbolizer/examples/symbolize-dump.rs b/crates/symbolizer/examples/symbolize-dump.rs deleted file mode 100644 index 7b99d36..0000000 --- a/crates/symbolizer/examples/symbolize-dump.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Axel '0vercl0k' Souchet -use std::cmp::min; -use std::env; -use std::io::{self, Write}; -use std::path::PathBuf; - -use anyhow::Result; -use clap::Parser; -use kdmp_parser::KernelDumpParser; -use symbolizer::{AddrSpace, Builder, Module}; -use udmp_parser::UserDumpParser; - -/// The command line arguments. -#[derive(Debug, Parser)] -#[command(about = "Symbolize an address from a user or kernel dump file.")] -enum CliArgs { - User { dump: PathBuf, addr: String }, - Kernel { dump: PathBuf, addr: String }, -} - -/// Parse the `_NT_SYMBOL_PATH` environment variable to try the path of a symbol -/// cache. -fn sympath() -> Option { - let env = env::var("_NT_SYMBOL_PATH").ok()?; - - if !env.starts_with("srv*") { - return None; - } - - let sympath = env.strip_prefix("srv*").unwrap(); - let sympath = PathBuf::from(sympath.split('*').next().unwrap()); - - if sympath.is_dir() { - Some(sympath) - } else { - None - } -} - -fn user(dmp: UserDumpParser, addr: u64) -> Result<()> { - #[derive(Debug)] - struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); - impl<'a> AddrSpace for UserDumpAddrSpace<'a> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { - let mut cur_addr = addr; - let mut read_len = 0; - while read_len < buf.len() { - let Some(block) = self.0.get_mem_block(addr) else { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("no mem block found for {addr:#x}"), - )); - }; - - let Some(data) = block.data_from(cur_addr) else { - panic!(); - }; - - let left = buf.len() - read_len; - let len = min(data.len(), left); - buf.write_all(&data[..len]).unwrap(); - cur_addr += u64::try_from(len).unwrap(); - read_len += len; - } - - Ok(read_len) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - match self.read_at(addr, buf) { - Ok(sz) => Ok(Some(sz)), - Err(_) => Ok(None), - } - } - } - - let modules = dmp - .modules() - .values() - .map(|module| { - Module::new( - module.path.file_name().unwrap().to_string_lossy(), - module.start_addr(), - module.end_addr(), - ) - }) - .collect::>(); - - let mut wrapper = UserDumpAddrSpace(dmp); - let mut symb = Builder::default() - .modules(modules) - .msft_symsrv() - .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) - .build()?; - - let mut s = Vec::new(); - symb.full(&mut wrapper, addr, &mut s)?; - println!("{addr:#x}: {}", String::from_utf8(s)?); - - Ok(()) -} - -fn kernel(dmp: KernelDumpParser, addr: u64) -> Result<()> { - #[derive(Debug)] - struct KernelDumpAdrSpace<'a>(&'a KernelDumpParser); - impl<'a> AddrSpace for KernelDumpAdrSpace<'a> { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result { - self.0 - .virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - self.0 - .try_virt_read(addr.into(), buf) - .map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e)) - } - } - - let mut modules = Vec::new(); - for (at, name) in dmp.user_modules().chain(dmp.kernel_modules()) { - let (_, filename) = name.rsplit_once('\\').unwrap_or((name, name)); - modules.push(Module::new( - filename.to_string(), - at.start.into(), - at.end.into(), - )); - } - - let mut wrapper = KernelDumpAdrSpace(&dmp); - let mut symb = Builder::default() - .modules(modules) - .msft_symsrv() - .symcache(sympath().expect("define a _NT_SYMBOL_PATH")) - .build()?; - - let mut s = Vec::new(); - symb.full(&mut wrapper, addr, &mut s)?; - println!("{addr:#x}: {}", String::from_utf8(s)?); - - Ok(()) -} - -fn hex(x: &str) -> Result { - let no_backtick = x.replace('`', ""); - let no_prefix = no_backtick.strip_prefix("0x").unwrap_or(x); - - Ok(u64::from_str_radix(no_prefix, 16)?) -} - -fn main() -> Result<()> { - // Parse the CLI arguments. - let args = CliArgs::parse(); - match args { - CliArgs::User { dump, addr } => user(UserDumpParser::new(dump)?, hex(&addr)?), - CliArgs::Kernel { dump, addr } => kernel(KernelDumpParser::new(dump)?, hex(&addr)?), - }?; - - Ok(()) -} diff --git a/crates/symbolizer/src/addr_space.rs b/crates/symbolizer/src/addr_space.rs deleted file mode 100644 index 765ff15..0000000 --- a/crates/symbolizer/src/addr_space.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Axel '0vercl0k' Souchet - May 30 2024 -use core::slice; -use std::io; -use std::mem::{self, MaybeUninit}; - -pub trait AddrSpace { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result; - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result>; - - fn read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result<()> { - let size = self.read_at(addr, buf)?; - - if size != buf.len() { - Err(io::Error::new( - io::ErrorKind::Other, - format!("could read only {size} bytes instead of {}", buf.len()), - )) - } else { - Ok(()) - } - } - - fn try_read_exact_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - let Some(size) = self.try_read_at(addr, buf)? else { - return Ok(None); - }; - - if size != buf.len() { - Err(io::Error::new( - io::ErrorKind::Other, - format!("could read only {size} bytes instead of {}", buf.len()), - )) - } else { - Ok(Some(())) - } - } - - fn read_struct_at(&mut self, addr: u64) -> io::Result - where - S: Copy, - { - let mut t = MaybeUninit::uninit(); - let size_of_t = mem::size_of_val(&t); - let slice_over_t = - unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; - - self.read_exact_at(addr, slice_over_t)?; - - Ok(unsafe { t.assume_init() }) - } - - fn try_read_struct_at(&mut self, addr: u64) -> io::Result> - where - S: Copy, - { - let mut t: MaybeUninit = MaybeUninit::uninit(); - let size_of_t = mem::size_of_val(&t); - let slice_over_t = - unsafe { slice::from_raw_parts_mut(t.as_mut_ptr() as *mut u8, size_of_t) }; - - Ok(self - .try_read_exact_at(addr, slice_over_t)? - .map(|_| unsafe { t.assume_init() })) - } -} diff --git a/crates/symbolizer/src/builder.rs b/crates/symbolizer/src/builder.rs deleted file mode 100644 index c30bcc7..0000000 --- a/crates/symbolizer/src/builder.rs +++ /dev/null @@ -1,82 +0,0 @@ -// Axel '0vercl0k' Souchet - June 7 2024 -use std::path::{Path, PathBuf}; - -use anyhow::anyhow; - -use crate::symbolizer::{Config, PdbLookupMode}; -use crate::{Module, Result, Symbolizer}; - -#[derive(Default)] -pub struct NoSymcache; - -pub struct Symcache(PathBuf); - -/// Builder for [`Symbolizer`]. -#[derive(Default, Debug)] -pub struct Builder { - symcache: SC, - modules: Vec, - mode: PdbLookupMode, -} - -impl Builder { - pub fn msft_symsrv(self) -> Builder { - self.online(vec!["https://msdl.microsoft.com/download/symbols/"]) - } - - pub fn online(self, symsrvs: impl IntoIterator>) -> Builder { - let Self { - symcache, modules, .. - } = self; - - Builder { - symcache, - modules, - mode: PdbLookupMode::Online { - symsrvs: symsrvs.into_iter().map(Into::into).collect(), - }, - } - } -} - -impl Builder { - pub fn symcache(self, cache: impl AsRef) -> Builder { - let Self { modules, mode, .. } = self; - - Builder { - symcache: Symcache(cache.as_ref().to_path_buf()), - modules, - mode, - } - } -} - -impl Builder { - pub fn modules(mut self, modules: impl IntoIterator) -> Self { - self.modules = modules.into_iter().collect(); - - self - } -} - -impl Builder { - pub fn build(self) -> Result { - let Self { - symcache, - modules, - mode, - } = self; - - if !symcache.0.exists() { - return Err(anyhow!("symcache {:?} does not exist", symcache.0).into()); - } - - let config = Config { - symcache: symcache.0, - modules, - mode, - }; - - Symbolizer::new(config) - } -} diff --git a/crates/symbolizer/src/error.rs b/crates/symbolizer/src/error.rs deleted file mode 100644 index a1f3ebe..0000000 --- a/crates/symbolizer/src/error.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Axel '0vercl0k' Souchet - May 27 2024 -use std::io; -use std::num::{ParseIntError, TryFromIntError}; -use std::path::PathBuf; -use std::str::Utf8Error; -use std::string::FromUtf8Error; - -use pdb::PdbInternalSectionOffset; -use thiserror::Error; - -pub type Result = std::result::Result; - -#[derive(Error, Debug)] -pub enum Error { - #[error("failed to get rva from symbol {0} / {1:?}")] - SymbolRva(String, PdbInternalSectionOffset), - #[error("pdb error: {0}")] - Pdb(#[from] pdb::Error), - #[error("from int error: {0}")] - FromInt(#[from] TryFromIntError), - #[error("parse int error: {0}")] - ParseInt(#[from] ParseIntError), - #[error("utf8: {0}")] - Utf8(#[from] Utf8Error), - #[error("from utf8: {0}")] - FromUtf8(#[from] FromUtf8Error), - #[error("pdb path {0:?} does not have a filename")] - PdbPathNoName(PathBuf), - #[error("failed to perform an i/o: {0}")] - Io(#[from] io::Error), - #[error("failed to download pdb {pdb_url}: {e}")] - DownloadPdb { - pdb_url: String, - e: Box, - }, - #[error("the module path is either 0 or larger than reasonable")] - CodeViewInvalidPath, - #[error("{0}")] - Anyhow(#[from] anyhow::Error), -} diff --git a/crates/symbolizer/src/guid.rs b/crates/symbolizer/src/guid.rs deleted file mode 100644 index 5bd682d..0000000 --- a/crates/symbolizer/src/guid.rs +++ /dev/null @@ -1,145 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Guid`] type. -use std::fmt::Display; -use std::str::FromStr; - -use anyhow::anyhow; - -use crate::Error; - -/// A GUID. -#[derive(Default, Debug, PartialEq, Eq, Hash, Clone, Copy)] -pub struct Guid { - d0: u32, - d1: u16, - d2: u16, - d3: [u8; 8], -} - -impl FromStr for Guid { - type Err = Error; - - fn from_str(s: &str) -> Result { - if s.len() != 32 { - return Err(anyhow!("the guid str ({s:?}) should be 32 bytes long").into()); - } - - let mut bytes = [0; 16]; - for (n, chunk) in s.as_bytes().chunks_exact(2).enumerate() { - let s = std::str::from_utf8(chunk)?; - bytes[n] = u8::from_str_radix(s, 16)?; - } - - let d0 = u32::from_be_bytes(bytes[0..4].try_into().unwrap()); - let d1 = u16::from_be_bytes(bytes[4..6].try_into().unwrap()); - let d2 = u16::from_be_bytes(bytes[6..8].try_into().unwrap()); - let d3 = bytes[8..].try_into().unwrap(); - - Ok(Self { d0, d1, d2, d3 }) - } -} - -impl From<[u8; 16]> for Guid { - fn from(value: [u8; 16]) -> Self { - let d0 = u32::from_le_bytes(value[0..4].try_into().unwrap()); - let d1 = u16::from_le_bytes(value[4..6].try_into().unwrap()); - let d2 = u16::from_le_bytes(value[6..8].try_into().unwrap()); - let d3 = value[8..].try_into().unwrap(); - - Self { d0, d1, d2, d3 } - } -} - -impl Display for Guid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!( - "{:08X}{:04X}{:04X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", - self.d0, - self.d1, - self.d2, - self.d3[0], - self.d3[1], - self.d3[2], - self.d3[3], - self.d3[4], - self.d3[5], - self.d3[6], - self.d3[7] - )) - } -} - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use crate::Guid; - - const NTDLL_GUID: Guid = Guid { - d0: 0x8d5d5ed5, - d1: 0xd5b8, - d2: 0xaa60, - d3: [0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, 0x00, 0x4d], - }; - - #[test] - fn malformed_guids() { - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004D1").is_err()); - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004").is_err()); - } - - #[test] - fn non_hex_guids() { - assert!(Guid::from_str("8D5D5ED5D5B8AA609A82600C14E3004Z").is_err()); - } - - #[test] - fn str() { - // 0:000> lmvm ntdll - // Browse full module list - // start end module name - // 00007ff9`aa450000 00007ff9`aa667000 ntdll (pdb symbols) - // c:\dbg\sym\ntdll.pdb\8D5D5ED5D5B8AA609A82600C14E3004D1\ntdll.pdb - assert_eq!( - "8D5D5ED5D5B8AA609A82600C14E3004D".parse::().unwrap(), - NTDLL_GUID - ) - } - - #[test] - fn from() { - // 0:000> !dh ntdll - // ... - // SECTION HEADER #5 - // .rdata name - // 4D210 virtual size - // 132000 virtual address - // 4E000 size of raw data - // 132000 file pointer to raw data - // 0 file pointer to relocation table - // 0 file pointer to line numbers - // 0 number of relocations - // 0 number of line numbers - // 40000040 flags - // Initialized Data - // (no align specified) - // Read Only - // ... - // Debug Directories(4) - // Type Size Address Pointer - // cv 22 15b880 15b880 Format: RSDS, guid, 1, ntdll.pdb - // - // 0:000> db ntdll+15b880 - // 00007ff9`aa5ab880 52 53 44 53 d5 5e 5d 8d-b8 d5 60 aa 9a 82 60 0c - // RSDS.^]...`...`. 00007ff9`aa5ab890 14 e3 00 4d 01 00 00 00-6e 74 64 - // 6c 6c 2e 70 64 ...M....ntdll.pd - - assert_eq!( - Guid::from([ - 0xd5, 0x5e, 0x5d, 0x8d, 0xb8, 0xd5, 0x60, 0xaa, 0x9a, 0x82, 0x60, 0x0c, 0x14, 0xe3, - 0x00, 0x4d - ]), - NTDLL_GUID - ) - } -} diff --git a/crates/symbolizer/src/lib.rs b/crates/symbolizer/src/lib.rs deleted file mode 100644 index f94e1bf..0000000 --- a/crates/symbolizer/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -// Axel '0vercl0k' Souchet - May 26th 2024 -mod addr_space; -mod builder; -mod error; -mod guid; -mod misc; -mod modules; -mod pdbcache; -mod pe; -mod stats; -mod symbolizer; - -pub use addr_space::AddrSpace; -pub use builder::Builder; -pub use error::{Error, Result}; -pub use guid::Guid; -pub use modules::{Module, Modules}; -pub use pe::PdbId; -pub use stats::Stats; -pub use symbolizer::Symbolizer; diff --git a/crates/symbolizer/src/misc.rs b/crates/symbolizer/src/misc.rs deleted file mode 100644 index 8df4116..0000000 --- a/crates/symbolizer/src/misc.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of a bunch of misc utility functions -//! that didn't really fit anywhere else. - -/// A relative address. -pub type Rva = u32; - -/// Convert an `u64` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -pub fn fast_hex64(buffer: &mut [u8; 16], u: u64) -> &[u8] { - let mut x = u as u128; - - // Arrange each digit into their own byte. Each byte will become the ascii - // character representing its digit. For example, we want to arrange: - // - `0x00000000_00000000_DEADBEEF_BAADC0DE` into - // - `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E`. - // - // Here's a step by step using `0xDEADBEEF_BAADC0DE`: - // 1. `x = 0x00000000_DEADBEEF_00000000_BAADC0DE` - // 2. `x = 0xDEAD0000_BEEF0000_BAAD0000_C0DE0000` - // 3. `x = 0x00DE00AD_00BE00EF_00BA00AD_00C000DE` - // 4. `x = 0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` - // - // Let's start the dance.. - x = (x & 0xFFFFFFFF_00000000) << 32 | x; - x = ((x & 0xFFFF0000_00000000_FFFF0000) << 32) | ((x & 0xFFFF_00000000_0000FFFF) << 16); - x = ((x & 0xFF0000_00FF0000_00FF0000_00FF0000) >> 16) - | ((x & 0xFF000000_FF000000_FF000000_FF000000) >> 8); - x = ((x & 0xF000F0_00F000F0_00F000F0_00F000F0) << 4) | (x & 0xF000F_000F000F_000F000F_000F000F); - - // This creates a mask where there'll be a 0x01 byte for each digit that is - // alpha. For example, for `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` we want: - // `0x01010101_01010101_01010101_01000101`. The trick is to add 0x06 to each - // byte; if the digit is 0x0A..0x0F, adding 0x06 will give 0x10..0x15 (notice - // the leading '1'). Note that we need to ADD, not an OR :). At this point, - // right shifting by 4 bits means to position that leading '1' in the lower - // nibble which is then 'grabbed' via the masking with 0x01.. - let mask = - ((x + 0x06060606_06060606_06060606_06060606) >> 4) & 0x01010101_01010101_01010101_01010101; - - // Turn each digit into their ASCII equivalent by setting the high nibble of - // each byte to 0x3. `0x0D0E0A0D_0B0E0E0F_0B0A0A0D_0C000D0E` becomes - // `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E`. - x |= 0x30303030_30303030_30303030_30303030; - - // The last step is to adjust the ASCII byte for every digit that was in - // 0xA..0xF. We basically add to each of those bytes `0x27` to make them lower - // case alpha ASCII. - // For example: - // - `0x01010101_01010101_01010101_01000101 * 0x27 = - // 0x27272727_27272727_27272727_27002727` - // - `0x3D3E3A3D_3B3E3E3F_3B3A3A3D_3C303D3E + - // 0x27272727_27272727_27272727_27002727` = - // `0x64656164_62656566_62616164_63306465` - // - // Why `0x27`? Well, if we have the digit 'a', we end up with `0x3a`. ASCII - // character for 'a' is `0x61`, so `0x61 - 0x3a = 0x27`. - x += 0x27 * mask; - - // Transform the integer into a slice of bytes. - buffer.copy_from_slice(&x.to_be_bytes()); - - // We're done! - buffer -} - -/// Convert an `u32` into an hex string. -/// -/// Highly inspired by 'Fast unsigned integer to hex string' by Johnny Lee: -/// - -/// -/// Adapted to not bother shuffling the bytes in little endian; we simply read -/// the final integer as big endian. -pub fn fast_hex32(buffer: &mut [u8; 8], u: u32) -> &[u8] { - let mut x = u as u64; - - // Here's a step by step using `0xDEADBEEF`: - // 1. `x = 0x0000DEAD_0000BEEF` - // 2. `x = 0xDE00AD00_BE00EF00` - // 3. `x = 0x0D0E0A0D_0B0E0E0F` - x = (x & 0xFFFF0000) << 16 | x; - x = ((x & 0x0000FF00_0000FF00) << 16) | ((x & 0x000000FF_000000FF) << 8); - x = ((x & 0xF000F000_F000F000) >> 4) | ((x & 0x0F000F00_0F000F00) >> 8); - - let mask = ((x + 0x06060606_06060606) >> 4) & 0x01010101_01010101; - x |= 0x30303030_30303030; - x += 0x27 * mask; - - buffer.copy_from_slice(&x.to_be_bytes()); - - buffer -} - -#[cfg(test)] -mod tests { - use super::{fast_hex32, fast_hex64}; - - #[test] - fn hex32() { - let mut buffer = [0; 8]; - let out = fast_hex32(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f']); - let out = fast_hex32(&mut buffer, 0xdead); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd']); - let out = fast_hex32(&mut buffer, 0x0); - assert_eq!(out, &[b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0']); - } - - #[test] - fn hex64() { - let mut buffer = [0; 16]; - let out = fast_hex64(&mut buffer, 0xdeadbeef_baadc0de); - assert_eq!(out, &[ - b'd', b'e', b'a', b'd', b'b', b'e', b'e', b'f', b'b', b'a', b'a', b'd', b'c', b'0', - b'd', b'e' - ]); - let out = fast_hex64(&mut buffer, 0xdeadbeef); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'd', b'e', b'a', b'd', b'b', b'e', - b'e', b'f' - ]); - let out = fast_hex64(&mut buffer, 0x0); - assert_eq!(out, &[ - b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0', - b'0', b'0' - ]); - } -} diff --git a/crates/symbolizer/src/modules.rs b/crates/symbolizer/src/modules.rs deleted file mode 100644 index 7411cb0..0000000 --- a/crates/symbolizer/src/modules.rs +++ /dev/null @@ -1,98 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`Module`] type which is used -//! across the codebase. -use std::ops::Range; - -use crate::misc::Rva; - -/// A user or kernel module. -#[derive(Debug, Default, Clone)] -pub struct Module { - /// Where the module is loaded into virtual memory. - pub at: Range, - /// The name of the module. - pub name: String, -} - -impl Module { - /// Create a [`Module`]. - pub fn new(name: impl Into, start: u64, end: u64) -> Self { - Module { - name: name.into(), - at: start..end, - } - } - - /// Calculate an rva from an `addr` contained in this module. - pub fn rva(&self, addr: u64) -> Rva { - debug_assert!(self.at.contains(&addr)); - - let offset = addr - self.at.start; - assert!(offset <= u32::MAX.into()); - - offset as Rva - } -} - -/// A list of modules. -#[derive(Debug, Default)] -pub struct Modules(Vec); - -impl Modules { - /// Create a [`Modules`]. - pub fn new(mut modules: Vec) -> Self { - // Order the modules by their end addresses. - modules.sort_unstable_by_key(|e| e.at.end); - - Self(modules) - } - - /// Find the module that contains this address. - pub fn find(&self, addr: u64) -> Option<&Module> { - // Find the index of the first module that might contain `addr`. - let idx = self.0.partition_point(|m| m.at.end <= addr); - - // At this point there's several cases to handle. - // - // `partition_point` returns the len of the vector if it couldn't - // partition in two. This means that `addr` cannot possibly be contained by any - // of the modules we have, so we're done. - if idx == self.0.len() { - return None; - } - - // We found the first module that has an end address larger than `addr`. This - // doesn't mean the module contains the address though. Imagine `addr` = - // `0xdeadbeef`, and `module.at` = `[0xefefefef, 0xefefefef+1]`. - let module = &self.0[idx]; - - // For this reason, we'll make sure the `addr` is in fact included, otherwise - // it's not a match. - if module.at.contains(&addr) { - Some(module) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn basics() { - let modules = Modules::new(vec![ - Module::new("foo".to_string(), 0x1_000, 0x2_000), - Module::new("foobar".to_string(), 0x2_000, 0x3_000), - Module::new("bar".to_string(), 0x4_000, 0x5_000), - ]); - - assert!(modules.find(1).is_none()); - assert_eq!(modules.find(0x1_000).unwrap().name, "foo"); - assert_eq!(modules.find(0x2_000).unwrap().name, "foobar"); - assert!(modules.find(0x3_000).is_none()); - assert_eq!(modules.find(0x4_fff).unwrap().name, "bar"); - assert!(modules.find(0x6_000).is_none()); - } -} diff --git a/crates/symbolizer/src/pdbcache.rs b/crates/symbolizer/src/pdbcache.rs deleted file mode 100644 index 591f050..0000000 --- a/crates/symbolizer/src/pdbcache.rs +++ /dev/null @@ -1,551 +0,0 @@ -// Axel '0vercl0k' Souchet - February 23 2024 -//! This module contains the implementation of the [`PdbCache`] which is the -//! object that keeps track of all the information needed to symbolize an -//! address. It extracts it out of a PDB file and doesn't require it to be -//! around. -use std::borrow::Cow; -use std::collections::BTreeMap; -use std::fmt::Debug; -use std::fs::File; -use std::ops::Range; -use std::path::Path; - -use anyhow::{anyhow, Context}; -use log::{trace, warn}; -use pdb::{ - AddressMap, FallibleIterator, LineProgram, PdbInternalSectionOffset, ProcedureSymbol, - StringTable, Symbol, -}; - -use crate::error::Result; -use crate::modules::Module; - -/// A PDB opened via file access. -type Pdb<'p> = pdb::PDB<'p, File>; -/// A relative virtual address. -type Rva = u32; -/// A vector of lines. -type Lines = Vec; - -/// A line of source code. -/// -/// It maps an offset in the function (like offset -/// `0x1122`) to a line number in a file (like `foo.c:1336`). -#[derive(Default, Debug)] -struct Line { - /// Offset from the start of the function it's part of. - offset: u32, - /// The line number. - number: Rva, - /// Most lines in a function are part of the same file which is stored in - /// the [`SourceInfo`] which contains the lines info. But in case, this line - /// is stored in a different file, this is its path. - override_path: Option, -} - -impl Line { - /// Build a [`Line`]. - fn new(offset: Rva, number: u32, override_path: Option) -> Self { - Self { - offset, - number, - override_path, - } - } -} - -/// Information related to source code. -/// -/// It contains the path to the source code file as well as a mapping between -/// offsets to line number. -#[derive(Debug, Default)] -struct SourceInfo { - path: String, - lines: Lines, -} - -impl SourceInfo { - /// Build a [`SourceInfo`]. - fn new(path: String, lines: Lines) -> Self { - // We assume we have at least one entry in the vector. - assert!(!lines.is_empty()); - - Self { path, lines } - } - - /// Find the line number associated to a raw offset from inside a function. - pub fn line(&self, offset: Rva) -> &Line { - self.lines - .iter() - .find(|&line| offset < line.offset) - .unwrap_or(self.lines.last().unwrap()) - } -} - -/// A function. -/// -/// It has a name and if available, information related to the file where the -/// function is implemented as well as the line of code. -#[derive(Default, Debug)] -struct FuncSymbol { - pub name: String, - pub source_info: Option, -} - -impl FuncSymbol { - fn new(name: String, source_info: Option) -> Self { - Self { name, source_info } - } -} - -impl From for FuncSymbol { - fn from(value: BuilderEntry) -> Self { - FuncSymbol::new(value.name, value.source_info) - } -} - -/// A PDB cache. -/// -/// It basically is a data-structure that stores all the information about the -/// functions defined in a module. It extracts everything it can off a PDB and -/// then toss it as a PDB file is larger than a [`PdbCache`] (as we don't care -/// about types, variables, etc.). -pub struct PdbCache { - module_name: String, - addrs: Vec>, - symbols: Vec, -} - -impl Debug for PdbCache { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PdbCache") - .field("module_name", &self.module_name) - .finish_non_exhaustive() - } -} - -impl PdbCache { - fn new(module_name: String, mut symbols: Vec<(Range, FuncSymbol)>) -> Self { - symbols.sort_unstable_by_key(|(range, _)| range.end); - let (addrs, symbols) = symbols.into_iter().unzip(); - - Self { - module_name, - addrs, - symbols, - } - } - - /// Find a symbol that contains `rva`. - fn find_sym(&self, rva: Rva) -> Option<(Rva, &FuncSymbol)> { - let idx = self.addrs.partition_point(|probe| probe.end <= rva); - if idx == self.addrs.len() { - return None; - } - - let range = &self.addrs[idx]; - let func = &self.symbols[idx]; - - if range.contains(&rva) { - Some((range.start, func)) - } else { - None - } - } - - /// Symbolize a raw address. - /// - /// This pulls as much information as possible and use any private symbols - /// if there were any. - pub fn symbolize(&self, rva: Rva) -> Result { - // Find the function in which this `rva` is in. - let Some((func_rva, func_symbol)) = self.find_sym(rva) else { - // If we can't find one, we'll just return `module.dll+rva`. - return Ok(format!("{}+{:#x}", self.module_name, rva)); - }; - - debug_assert!( - rva >= func_rva, - "The function RVA should always be smaller or equal to the instruction RVA" - ); - - // Calculate the instruction offset. - let instr_offset = rva - func_rva; - - // Generate the symbolized version. - let symbolized = if let Some(source_info) = &func_symbol.source_info { - // If we have knowledge about in which source file this is implemented and at - // what line number, then let's use it.. - let line = source_info.line(instr_offset); - let path = line.override_path.as_ref().unwrap_or(&source_info.path); - - format!( - "{}!{}+{instr_offset:#x} [{path} @ {}]", - self.module_name, func_symbol.name, line.number - ) - } else { - // ..or do without if it's not present. - format!( - "{}!{}+{instr_offset:#x}", - self.module_name, func_symbol.name - ) - }; - - Ok(symbolized) - } -} - -#[derive(Debug)] -struct BuilderEntry { - name: String, - len: Option, - source_info: Option, -} - -impl BuilderEntry { - fn new(name: String, len: Option, source_info: Option) -> Self { - Self { - name, - len, - source_info, - } - } - - fn with_name(name: String) -> Self { - Self::new(name, None, None) - } - - fn len(&self) -> Option { - self.len - } -} - -/// A [`PdbCache`] builder. -/// -/// Ultimately, we try to get as much information possible on modules with what -/// we have. Sometimes, we have public symbols, something we have private -/// symbols and.. sometimes we have nothing (just its PE). If we're dealing with -/// just information extracted from the PE or the public symbols, we have no -/// available information regarding function sizes. -/// -/// To work around this issue, what we do is we aggregate all the information in -/// a data structure ordered by the function address. Once we're done, we walk -/// this data structure and we calculate the size of the current function by -/// 'filling the hole' up to the next function. This is innacurate but is the -/// only heuristic I had in store. -/// -/// Once we have a list of functions with assigned sizes, we can finally build -/// the [`PdbCache`] structure. -#[derive(Debug)] -pub struct PdbCacheBuilder<'module> { - /// The module for which this symbol cache is for. - module: &'module Module, - /// Basically all the information we've extracted so far. - /// - /// The key is the [`Rva`] of where the module starts, and the value is a - /// [`BuilderEntry`] which describes the symbol with more details. - symbols: BTreeMap, -} - -impl<'module> PdbCacheBuilder<'module> { - pub fn new(module: &'module Module) -> Self { - Self { - module, - symbols: BTreeMap::new(), - } - } - - /// Ingest a bunch of symbols. - /// - /// The key is the start [`Rva`] of the symbol, and the value is its name. - /// This is used to ingest for example a list of functions acquired from the - /// EAT of a module. - pub fn ingest(&mut self, symbols: impl Iterator) { - for (start, name) in symbols { - self.symbols.insert(start, BuilderEntry::with_name(name)); - } - } - - /// Parse a [`ProcedureSymbol`]. - fn parse_procedure_symbol( - &mut self, - proc: &ProcedureSymbol, - address_map: &AddressMap, - string_table: &StringTable, - line_program: &LineProgram, - ) -> Result<()> { - let proc_name = proc.name.to_string(); - let Some(pdb::Rva(proc_rva)) = proc.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - - return Ok(()); - }; - - let mut lines_it = line_program.lines_for_symbol(proc.offset); - let mut main_path = None; - let mut lines = Lines::new(); - while let Some(line) = lines_it.next()? { - let Some(pdb::Rva(line_rva)) = line.offset.to_rva(address_map) else { - warn!( - "failed to get rva for procedure symbol {} / {:?}, skipping", - proc_name, proc.offset - ); - continue; - }; - - let file_info = line_program.get_file_info(line.file_index)?; - let override_path = if main_path.is_none() { - main_path = Some(file_info.name.to_string_lossy(string_table)?.into_owned()); - - None - } else { - let new_path = file_info.name.to_string_lossy(string_table)?; - if main_path.as_ref().unwrap() != &new_path { - Some(new_path.into_owned()) - } else { - None - } - }; - - if line_rva < proc_rva { - warn!( - "symbol {} has confusing line information, skipping", - proc_name - ); - return Ok(()); - } - - let line_offset = line_rva - proc_rva; - lines.push(Line::new(line_offset, line.line_start, override_path)); - } - - self.ingest_symbol( - address_map, - proc_name, - proc.offset, - Some(proc.len), - main_path.map(|p| SourceInfo::new(p, lines)), - ) - } - - /// Ingest a symbol with a name. - fn ingest_symbol_with_name( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, None, None) - } - - /// Ingest a symbol with a name and a length. - fn ingest_symbol_with_len( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: u32, - ) -> Result<()> { - self.ingest_symbol(address_map, name, offset, Some(len), None) - } - - /// Ingest a symbol. - /// - /// Some symbols have a length, some don't, some have source information, - /// some don't. - fn ingest_symbol( - &mut self, - address_map: &AddressMap, - name: Cow, - offset: PdbInternalSectionOffset, - len: Option, - source_info: Option, - ) -> Result<()> { - use msvc_demangler::DemangleFlags as DF; - let undecorated_name = if name.as_bytes().starts_with(b"?") { - // Demangle the name if it starts by a '?'. - match msvc_demangler::demangle(&name, DF::NAME_ONLY) { - Ok(o) => o, - Err(e) => { - // Let's log the failures as warning because we might care one day? - warn!("failed to demangle {name}: {e}"); - - // But if it failed, returning the mangled name is better than nothing. - name.into_owned() - } - } - } else { - // If it isn't a mangled name, then do.. nothing! - name.into() - }; - - // Get the RVA.. - let pdb::Rva(rva) = offset.to_rva(address_map).ok_or_else(|| { - anyhow!( - "failed to get rva from symbol {undecorated_name} / {:?}, skipping", - offset - ) - })?; - - //.. and build an entry for this function. - if let Some(prev) = self - .symbols - .insert(rva, BuilderEntry::new(undecorated_name, len, source_info)) - { - warn!("symbol {prev:?} in dbi has a duplicate at {rva:#x}, skipping"); - } - - Ok(()) - } - - /// Parse a [`Symbol`]. - fn parse_symbol( - &mut self, - address_map: &AddressMap, - symbol: &Symbol, - extra: Option<(&StringTable, &LineProgram)>, - ) -> Result<()> { - use pdb::SymbolData as SD; - match symbol.parse()? { - SD::Procedure(procedure) => { - let (string_table, line_program) = extra.unwrap(); - self.parse_procedure_symbol(&procedure, address_map, string_table, line_program)?; - } - SD::Public(public) => { - self.ingest_symbol_with_name(address_map, public.name.to_string(), public.offset)?; - } - SD::Thunk(thunk) => { - self.ingest_symbol_with_len( - address_map, - thunk.name.to_string(), - thunk.offset, - thunk.len.into(), - )?; - } - _ => {} - }; - - Ok(()) - } - - /// Parse the debug information stream which is where private symbols are - /// stored in. - fn parse_dbi(&mut self, pdb: &mut Pdb, address_map: &AddressMap) -> Result<()> { - // If we don't have a string table, there is no point in parsing the debug - // information stream. - let Ok(string_table) = pdb.string_table() else { - return Ok(()); - }; - - // Grab the debug information stream.. - let dbi = pdb.debug_information().context("failed to get dbi")?; - // ..and grab / walk through the 'modules'. - let mut module_it = dbi.modules()?; - while let Some(module) = module_it.next()? { - // Get information about the module; such as its path, its symbols, etc. - let Some(info) = pdb.module_info(&module)? else { - warn!("no module info: {:?}", &module); - continue; - }; - - let program = info.line_program()?; - let mut sym_it = info.symbols()?; - while let Some(symbol) = sym_it.next()? { - if let Err(e) = - self.parse_symbol(address_map, &symbol, Some((&string_table, &program))) - { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - } - - Ok(()) - } - - /// Parse the global symbols stream where public symbols are stored at. - fn parse_global_symbols_table( - &mut self, - pdb: &mut Pdb, - address_map: &AddressMap, - ) -> Result<()> { - let global_symbols = pdb.global_symbols()?; - let mut symbol_it = global_symbols.iter(); - while let Some(symbol) = symbol_it.next()? { - if let Err(e) = self.parse_symbol(address_map, &symbol, None) { - warn!("parsing {symbol:?} failed with {e:?}, ignoring"); - } - } - - Ok(()) - } - - /// Ingest a PDB file stored on the file system. - pub fn ingest_pdb(&mut self, pdb_path: impl AsRef) -> Result<()> { - // Open the PDB file. - let pdb_path = pdb_path.as_ref(); - let pdb_file = - File::open(pdb_path).with_context(|| format!("failed to open pdb {pdb_path:?}"))?; - let mut pdb = - Pdb::open(pdb_file).with_context(|| format!("failed to parse pdb {pdb_path:?}"))?; - - trace!("ingesting {pdb_path:?}.."); - - let address_map = pdb.address_map()?; - // Parse and extract all the bits we need from the private symbols first. We do - // this first, because procedures have a length field which isn't the case for - // global symbols. And if there's duplicates, then we'd rather have the entry - // that gives us the exact procedure length instead of us guessing. - self.parse_dbi(&mut pdb, &address_map) - .map_err(|e| anyhow!("failed to parse private symbols: {e:?}"))?; - - // Parse and extract all the bits we need from the global symbols.. - self.parse_global_symbols_table(&mut pdb, &address_map) - .map_err(|e| anyhow!("failed to parse public symbols: {e:?}"))?; - - Ok(()) - } - - /// Build a [`PdbCache`]. - pub fn build(mut self) -> Result { - // Walk the map of ordered RVA with their associated names and assign lengths to - // each of the functions. Some function have a length and some don't. If a - // length is specified, then we'll use it; otherwise we'll assign one ourselves. - let mut functions = Vec::with_capacity(self.symbols.len()); - while let Some((start, entry)) = self.symbols.pop_first() { - let end = if let Some(len) = entry.len() { - // If we have a length, then use it! - start - .checked_add(len) - .ok_or(anyhow!("overflow w/ symbol range"))? - } else { - // If we don't have one, the length of the current function is basically up to - // the next entry. - // - // For example imagine the below: - // - RVA: 0, Name: foo - // - RVA: 5, Name: bar - // - // In that case, we consider the first function to be spanning [0..4], and - // [5..module size] for the second one. - - // If we didn't pop the last value, then just check the one that follows. - if let Some((&end, _)) = self.symbols.first_key_value() { - end - } else { - debug_assert!(self.module.at.end > self.module.at.start); - - // If we popped the last value, just use the module end as the end of the range. - u32::try_from(self.module.at.end - self.module.at.start) - .context("failed to make the module's end into a rva")? - } - }; - - functions.push((Range { start, end }, entry.into())); - } - - Ok(PdbCache::new(self.module.name.clone(), functions)) - } -} diff --git a/crates/symbolizer/src/pe.rs b/crates/symbolizer/src/pe.rs deleted file mode 100644 index bd1774a..0000000 --- a/crates/symbolizer/src/pe.rs +++ /dev/null @@ -1,559 +0,0 @@ -// Axel '0vercl0k' Souchet - February 19 2024 -//! This module contains the implementation of the PE parsing we do. -use std::fmt::Display; -use std::mem; -use std::ops::Range; -use std::path::PathBuf; - -use anyhow::{anyhow, Context}; -use log::debug; - -use crate::addr_space::AddrSpace; -use crate::guid::Guid; -use crate::misc::Rva; -use crate::{Error as E, Result}; - -/// The IMAGE_DOS_HEADER. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C, packed(2))] -pub struct ImageDosHeader { - pub e_magic: u16, - pub e_cblp: u16, - pub e_cp: u16, - pub e_crlc: u16, - pub e_cparhdr: u16, - pub e_minalloc: u16, - pub e_maxalloc: u16, - pub e_ss: u16, - pub e_sp: u16, - pub e_csum: u16, - pub e_ip: u16, - pub e_cs: u16, - pub e_lfarlc: u16, - pub e_ovno: u16, - pub e_res: [u16; 4], - pub e_oemid: u16, - pub e_oeminfo: u16, - pub e_res2: [u16; 10], - pub e_lfanew: i32, -} - -/// The IMAGE_NT_HEADERS. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -struct NtHeaders { - signature: u32, - file_hdr: ImageFileHeader, -} - -/// The IMAGE_FILE_HEADER. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageFileHeader { - pub machine: u16, - pub number_of_sections: u16, - pub time_date_stamp: u32, - pub pointer_to_symbol_table: u32, - pub number_of_symbols: u32, - pub size_of_optional_header: u16, - pub characteristics: u16, -} - -/// The IMAGE_DATA_DIRECTORY. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C)] -pub struct ImageDataDirectory { - pub virtual_address: u32, - pub size: u32, -} - -/// The IMAGE_OPTIONAL_HEADER32. -#[derive(Debug, Default)] -#[repr(C)] -pub struct ImageOptionalHeader32 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub base_of_data: u32, - pub image_base: u32, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u32, - pub size_of_stack_commit: u32, - pub size_of_heap_reserve: u32, - pub size_of_heap_commit: u32, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_OPTIONAL_HEADER64. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C, packed(4))] -pub struct ImageOptionalHeader64 { - pub magic: u16, - pub major_linker_version: u8, - pub minor_linker_version: u8, - pub size_of_code: u32, - pub size_of_initialized_data: u32, - pub size_of_uninitialized_data: u32, - pub address_of_entry_point: u32, - pub base_of_code: u32, - pub image_base: u64, - pub section_alignment: u32, - pub file_alignment: u32, - pub major_operating_system_version: u16, - pub minor_operating_system_version: u16, - pub major_image_version: u16, - pub minor_image_version: u16, - pub major_subsystem_version: u16, - pub minor_subsystem_version: u16, - pub win32_version_value: u32, - pub size_of_image: u32, - pub size_of_headers: u32, - pub check_sum: u32, - pub subsystem: u16, - pub dll_characteristics: u16, - pub size_of_stack_reserve: u64, - pub size_of_stack_commit: u64, - pub size_of_heap_reserve: u64, - pub size_of_heap_commit: u64, - pub loader_flags: u32, - pub number_of_rva_and_sizes: u32, - pub data_directory: [ImageDataDirectory; 16], -} - -/// The IMAGE_DEBUG_DIRECTORY. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageDebugDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub type_: u32, - pub size_of_data: u32, - pub address_of_raw_data: u32, - pub pointer_to_raw_data: u32, -} - -/// The IMAGE_EXPORT_DIRECTORY. -#[derive(Default, Debug, Clone, Copy)] -#[repr(C)] -pub struct ImageExportDirectory { - pub characteristics: u32, - pub time_date_stamp: u32, - pub major_version: u16, - pub minor_version: u16, - pub name: u32, - pub base: u32, - pub number_of_functions: u32, - pub number_of_names: u32, - pub address_of_functions: u32, - pub address_of_names: u32, - pub address_of_name_ordinals: u32, -} - -/// The code view information. -#[derive(Debug, Default, Clone, Copy)] -#[repr(C)] -pub struct Codeview { - pub signature: u32, - pub guid: [u8; 16], - pub age: u32, - // name follows -} - -pub const IMAGE_NT_SIGNATURE: u32 = 17744; -pub const IMAGE_FILE_MACHINE_AMD64: u16 = 34404; -pub const IMAGE_DIRECTORY_ENTRY_EXPORT: usize = 0; -pub const IMAGE_DIRECTORY_ENTRY_DEBUG: usize = 6; - -pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; - -/// A PDB identifier. -/// -/// To download a PDB off Microsoft's Symbol Server, we need three pieces of -/// information: the pdb name, a guid and its age. -#[derive(Debug, Default, PartialEq, Eq, Hash, Clone)] -pub struct PdbId { - pub path: PathBuf, - pub guid: Guid, - pub age: u32, -} - -impl Display for PdbId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{:?}:{}:{:x}", self.path, self.guid, self.age)) - } -} - -impl PdbId { - pub fn new(path: impl Into, guid: Guid, age: u32) -> Result { - let path = path.into(); - if path.file_name().is_none() { - return Err(E::PdbPathNoName(path)); - } - - Ok(Self { path, guid, age }) - } - - pub fn name(&self) -> String { - self.path - .file_name() - .unwrap() - .to_string_lossy() - .into_owned() - } -} - -/// Calculate the absolute address of an array entry based on a base address, -/// the RVA of the array, the entry index and the size of an entry. -pub fn array_offset(base: u64, rva_array: u32, idx: u32, entry_size: usize) -> Option { - let offset = idx.checked_mul(entry_size.try_into().ok()?)?; - let rva = rva_array.checked_add(offset)?; - - base.checked_add(rva.into()) -} - -/// Read a NULL terminated string from the dump file at a specific address. -pub fn read_string( - addr_space: &mut impl AddrSpace, - mut addr: u64, - max: usize, -) -> Result> { - let mut s = String::new(); - let mut terminated = false; - for _ in 0..max { - let mut buf = [0]; - let Some(()) = addr_space - .try_read_exact_at(addr, &mut buf) - .context("failed reading null terminated string")? - else { - return Ok(None); - }; - - let c = buf[0]; - if c == 0 { - terminated = true; - break; - } - - s.push(c.into()); - addr += 1; - } - - if !terminated && s.len() == max { - s.push_str("..."); - } - - Ok(Some(s)) -} - -/// A parsed PE headers. -/// -/// We are only interested in the PDB identifier and the Export Address Table. -#[derive(Debug, Default)] -pub struct Pe { - pub pdb_id: Option, - pub exports: Vec<(Rva, String)>, -} - -impl Pe { - pub fn new(addr_space: &mut impl AddrSpace, base: u64) -> Result { - // All right let's parse the PE. - debug!("parsing PE @ {:#x}", base); - - // Read the DOS/NT headers. - let dos_hdr = addr_space - .read_struct_at::(base) - .context("failed to read ImageDosHeader")?; - let nt_hdr_addr = base - .checked_add(dos_hdr.e_lfanew.try_into().unwrap()) - .ok_or(anyhow!("overflow w/ e_lfanew"))?; - let nt_hdr = addr_space - .read_struct_at::(nt_hdr_addr) - .context("failed to read Ntheaders")?; - - // Let's verify the signature.. - if nt_hdr.signature != IMAGE_NT_SIGNATURE { - return Err(anyhow!("wrong PE signature for {base:#x}").into()); - } - - // ..and let's ignore non x64 PEs. - if nt_hdr.file_hdr.machine != IMAGE_FILE_MACHINE_AMD64 { - return Err(anyhow!("wrong architecture for {base:#x}").into()); - } - - // Now locate the optional header, and check that it looks big enough. - let opt_hdr_addr = nt_hdr_addr - .checked_add(mem::size_of_val(&nt_hdr).try_into().unwrap()) - .ok_or(anyhow!("overflow w/ nt_hdr"))?; - let opt_hdr_size = nt_hdr.file_hdr.size_of_optional_header as usize; - debug!("parsing optional hdr @ {:#x}", opt_hdr_addr); - - // If it's not big enough, let's bail. - if opt_hdr_size < mem::size_of::() { - return Err(anyhow!("optional header's size is too small").into()); - } - - // Read the IMAGE_OPTIONAL_HEADER64. - let opt_hdr = addr_space - .read_struct_at::(opt_hdr_addr) - .with_context(|| "failed to read ImageOptionalHeader64")?; - - // Read the PDB information if there's any. - let pdb_id = Self::try_parse_debug_dir(addr_space, base, &opt_hdr)?; - - // Read the EXPORT table if there's any. - let exports = match Self::try_parse_export_dir(addr_space, base, &opt_hdr) { - Ok(o) => o, - // Err(E::DumpParserError(KdmpParserError::AddrTranslation(_))) => None, - Err(e) => return Err(e), - } - .unwrap_or_default(); - - Ok(Self { pdb_id, exports }) - } - - fn try_parse_debug_dir( - addr_space: &mut impl AddrSpace, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result> { - // Let's check if there's an ImageDebugDirectory. - let debug_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_DEBUG]; - if usize::try_from(debug_data_dir.size).unwrap() < mem::size_of::() { - debug!("debug dir is too small"); - return Ok(None); - } - - // Read it. - let debug_dir_addr = base - .checked_add(debug_data_dir.virtual_address.into()) - .ok_or(anyhow!("overflow w/ debug_data_dir"))?; - let Some(debug_dir) = - addr_space.try_read_struct_at::(debug_dir_addr)? - else { - debug!( - "failed to read ImageDebugDirectory {debug_dir_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - // If it's not a codeview type.. I don't know what to do, so let's bail. - if debug_dir.type_ != IMAGE_DEBUG_TYPE_CODEVIEW { - debug!("debug dir is not a codeview"); - return Ok(None); - } - - // Let's make sure it's big enough to back a codeview structure. - if usize::try_from(debug_dir.size_of_data).unwrap() < mem::size_of::() { - debug!("codeview too small"); - return Ok(None); - } - - // Let's read it. - let codeview_addr = base - .checked_add(debug_dir.address_of_raw_data.into()) - .ok_or(anyhow!("overflow w/ debug_dir"))?; - let Some(codeview) = addr_space.try_read_struct_at::(codeview_addr)? else { - debug!("failed to read codeview {codeview_addr:#x} because of mem translation"); - return Ok(None); - }; - - // The codeview structure is followed by a NULL terminated string which is the - // module name. - let leftover = - usize::try_from(debug_dir.size_of_data).unwrap() - mem::size_of::(); - if leftover == 0 || leftover > 256 { - return Err(E::CodeViewInvalidPath); - } - - // Allocate space for it, and read it. - let mut file_name = vec![0; leftover]; - let file_name_addr = array_offset( - base, - debug_dir.address_of_raw_data, - 1, - mem::size_of::(), - ) - .ok_or(anyhow!("overflow w/ debug_dir filename"))?; - - let Some(amount) = addr_space.try_read_at(file_name_addr, &mut file_name)? else { - return Ok(None); - }; - - // The last character is supposed to be a NULL byte, bail if it's not there. - if *file_name.last().unwrap() != 0 { - return Err(anyhow!("the module path doesn't end with a NULL byte").into()); - } - - file_name.resize(amount - 1, 0); - - // All right, at this point we have everything we need: the PDB name / GUID / - // age. Those are the three piece of information we need to download a PDB - // off Microsoft's symbol server. - let path = PathBuf::from(String::from_utf8(file_name)?); - - Ok(Some(PdbId::new(path, codeview.guid.into(), codeview.age)?)) - } - - fn try_parse_export_dir( - addr_space: &mut impl AddrSpace, - base: u64, - opt_hdr: &ImageOptionalHeader64, - ) -> Result>> { - // Let's check if there's an EAT. - debug!("parsing EAT.."); - let export_data_dir = opt_hdr.data_directory[IMAGE_DIRECTORY_ENTRY_EXPORT]; - if usize::try_from(export_data_dir.size)? < mem::size_of::() { - debug!("export dir is too small"); - return Ok(None); - } - - // Read it. - let export_dir_addr = base - .checked_add(u64::from(export_data_dir.virtual_address)) - .ok_or(anyhow!("export_data_dir"))?; - let Some(export_dir) = - addr_space.try_read_struct_at::(export_dir_addr)? - else { - debug!("failed to read ImageExportDirectory {export_dir_addr:#x} because of mem translation"); - return Ok(None); - }; - - // Read the ordinal / name arrays. - // """ - // The export name pointer table is an array of addresses (RVAs) into the export - // name table. The pointers are 32 bits each and are relative to the image base. - // The pointers are ordered lexically to allow binary searches. - // An export name is defined only if the export name pointer table contains a - // pointer to it. """ - let n_names = export_dir.number_of_names; - let addr_of_names = export_dir.address_of_names; - // """ - // The export ordinal table is an array of 16-bit unbiased indexes into the - // export address table. Ordinals are biased by the Ordinal Base field of the - // export directory table. In other words, the ordinal base must be subtracted - // from the ordinals to obtain true indexes into the export address table. - // """ - let addr_of_ords = export_dir.address_of_name_ordinals; - let mut names = Vec::with_capacity(n_names.try_into()?); - let mut ords = Vec::with_capacity(names.len()); - for name_idx in 0..n_names { - // Read the name RVA's.. - let name_rva_addr = array_offset(base, addr_of_names, name_idx, mem::size_of::()) - .ok_or(anyhow!("name_rva_addr"))?; - let Some(name_rva) = addr_space - .try_read_struct_at::(name_rva_addr) - .with_context(|| "failed to read EAT's name array".to_string())? - else { - debug!( - "failed to read EAT's name array {name_rva_addr:#x} because of mem translation" - ); - return Ok(None); - }; - - let name_addr = base - .checked_add(name_rva.into()) - .ok_or(anyhow!("overflow w/ name_addr"))?; - // ..then read the string in memory. - let Some(name) = read_string(addr_space, name_addr, 64)? else { - debug!("failed to read export's name #{name_idx}"); - return Ok(None); - }; - names.push(name); - - // Read the ordinal. - let ord_addr = array_offset(base, addr_of_ords, name_idx, mem::size_of::()) - .ok_or(anyhow!("ord_addr"))?; - let Some(ord) = addr_space - .try_read_struct_at::(ord_addr) - .context("failed to read EAT's ord array")? - else { - debug!("failed to read EAT's ord array {ord_addr:#x} because of mem translation"); - return Ok(None); - }; - ords.push(ord); - } - - debug!("read {n_names} names"); - - // Read the address array. - // - // """ - // The export address table contains the address of exported entry points and - // exported data and absolutes. An ordinal number is used as an index into the - // export address table. - // """ - let addr_of_functs = export_dir.address_of_functions; - let n_functs = export_dir.number_of_functions; - let mut address_rvas = Vec::with_capacity(n_functs.try_into()?); - for addr_idx in 0..n_functs { - // Read the RVA. - let address_rva_addr = - array_offset(base, addr_of_functs, addr_idx, mem::size_of::()) - .ok_or(anyhow!("overflow w/ address_rva_addr"))?; - - let Some(address_rva) = addr_space - .try_read_struct_at::(address_rva_addr) - .with_context(|| "failed to read EAT's address array".to_string())? - else { - debug!("failed to read EAT's address array {address_rva_addr:#x} because of mem translation"); - return Ok(None); - }; - - address_rvas.push(address_rva); - } - - debug!("read {n_functs} addresses"); - - // Time to build the EAT. - let eat_range = Range { - start: export_data_dir.virtual_address, - end: export_data_dir - .virtual_address - .checked_add(export_data_dir.size) - .ok_or(anyhow!("overflow w/ export data dir size"))?, - }; - - let mut exports = Vec::with_capacity(address_rvas.len()); - for (unbiased_ordinal, addr_rva) in address_rvas.drain(..).enumerate() { - let ordinal = unbiased_ordinal - .checked_add(export_dir.base.try_into()?) - .ok_or(anyhow!("overflow w/ biased_ordinal"))?; - let name = ords - .iter() - .position(|&o| usize::from(o) == unbiased_ordinal) - .map(|name_idx| names[name_idx].clone()) - .unwrap_or_else(|| format!("ORD#{ordinal}")); - - let forwarder = eat_range.contains(&addr_rva); - if !forwarder { - exports.push((addr_rva, name.clone())); - } - } - - debug!("built table w/ {} entries", exports.len()); - - Ok(Some(exports)) - } -} diff --git a/crates/symbolizer/src/stats.rs b/crates/symbolizer/src/stats.rs deleted file mode 100644 index e22cd4c..0000000 --- a/crates/symbolizer/src/stats.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Axel '0vercl0k' Souchet - April 21 2024 -//! This module contains the [`Stats`] type that is used to keep track of -//! various statistics when symbolizing. -use std::cell::RefCell; -use std::collections::HashMap; -use std::fmt::Debug; - -use crate::pe::PdbId; - -#[derive(Debug, Default)] -pub struct StatsBuilder { - inner: RefCell, -} - -/// Various statistics that the symbolizer keeps track of. -#[derive(Default, Clone, Debug)] -pub struct Stats { - /// The number of addresses symbolized. - pub n_addrs: u64, - /// The PDB identifiers that have been downloaded & the associated file size - /// in bytes. - pub downloaded: HashMap, - /// The number of time the address cache was a hit. - pub cache_hit: u64, -} - -impl Stats { - pub fn did_download(&self, pdb_id: PdbId) -> bool { - self.downloaded.contains_key(&pdb_id) - } - - pub fn amount_downloaded(&self) -> u64 { - let mut total = 0u64; - for value in self.downloaded.values() { - total = total.saturating_add(*value); - } - - total - } - - pub fn amount_pdb_downloaded(&self) -> usize { - self.downloaded.len() - } -} - -impl StatsBuilder { - pub fn build(&self) -> Stats { - self.inner.borrow().clone() - } - - pub fn downloaded_file(&self, pdb_id: PdbId, size: u64) { - assert!(self - .inner - .borrow_mut() - .downloaded - .insert(pdb_id, size) - .is_none()); - } - - pub fn addr_symbolized(&self) { - self.inner.borrow_mut().n_addrs += 1; - } - - pub fn cache_hit(&self) { - self.inner.borrow_mut().cache_hit += 1; - } -} diff --git a/crates/symbolizer/src/symbolizer.rs b/crates/symbolizer/src/symbolizer.rs deleted file mode 100644 index f1c8a16..0000000 --- a/crates/symbolizer/src/symbolizer.rs +++ /dev/null @@ -1,450 +0,0 @@ -// Axel '0vercl0k' Souchet - February 20 2024 -//! This module contains the implementation of the [`Symbolizer`] which is the -//! object that is able to symbolize files using PDB information if available. -use std::cell::RefCell; -use std::collections::{hash_map, HashMap}; -use std::fs::{self, File}; -use std::hash::{BuildHasher, Hasher}; -use std::io::{self, BufWriter, Write}; -use std::ops::Range; -use std::path::{Path, PathBuf}; -use std::rc::Rc; - -use anyhow::{anyhow, Context}; -use log::{debug, trace, warn}; - -use crate::addr_space::AddrSpace; -use crate::builder::{Builder, NoSymcache}; -use crate::misc::{fast_hex32, fast_hex64}; -use crate::modules::{Module, Modules}; -use crate::pdbcache::{PdbCache, PdbCacheBuilder}; -use crate::pe::{PdbId, Pe}; -use crate::stats::{Stats, StatsBuilder}; -use crate::{Error as E, Result}; - -/// Format a path to find a PDB in a symbol cache. -/// -/// Here is an example: -/// ```text -/// C:\work\dbg\sym\ntfs.pdb\64D20DCBA29FFC0CD355FFE7440EC5F81\ntfs.pdb -/// ^^^^^^^^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^ -/// cache path PDB name PDB GUID & PDB Age PDB name -/// ``` -pub fn format_pdb_path(symsrv_cache: &Path, pdb_id: &PdbId) -> PathBuf { - let pdb_name = pdb_id.name(); - symsrv_cache - .join(&pdb_name) - .join(format!("{}{:x}", pdb_id.guid, pdb_id.age,)) - .join(&pdb_name) -} - -/// Format a URL to find a PDB on an HTTP symbol server. -pub fn format_pdb_url(symsrv: &str, pdb_id: &PdbId) -> String { - // It seems that Chrome's symsrv server only accepts the GUID/age part as - // uppercase hex, so let's use that. - format!( - "{symsrv}/{}/{}{:x}/{}", - pdb_id.name(), - pdb_id.guid, - pdb_id.age, - pdb_id.name() - ) -} - -/// Download a PDB file from a candidate symbol servers. -/// -/// The code iterates through every symbol servers, and stops as soon as it was -/// able to download a matching file. -pub fn try_download_from_guid( - symsrvs: &Vec, - sympath_dir: impl AsRef, - pdb_id: &PdbId, -) -> Result> { - // Give a try to each of the symbol servers. - for symsrv in symsrvs { - debug!( - "trying to download pdb for {} from {}..", - pdb_id.name(), - symsrv - ); - - // The way a symbol path is structured is that there is a directory per module.. - let sympath_dir = sympath_dir.as_ref(); - let pdb_root_dir = sympath_dir.join(pdb_id.name()); - - // ..and inside, there is a directory per version of the PDB.. - let pdb_dir = pdb_root_dir.join(format!("{}{:x}", pdb_id.guid, pdb_id.age)); - - // ..and finally the PDB file itself. - let pdb_path = pdb_dir.join(pdb_id.name()); - - // The file doesn't exist on the file system, so let's try to download it from a - // symbol server. - let pdb_url = format_pdb_url(symsrv, pdb_id); - let resp = match ureq::get(&pdb_url).call() { - Ok(o) => o, - // If we get a 404, it means that the server doesn't know about this file. So we'll skip - // to the next symbol server. - Err(ureq::Error::Status(404, ..)) => { - warn!("got a 404 for {pdb_url}"); - continue; - } - // If we received any other errors, well that's not expected so let's bail. - Err(e) => { - return Err(E::DownloadPdb { - pdb_url, - e: e.into(), - }); - } - }; - - // If the server knows about this file, it is time to create the directory - // structure in which we'll download the file into. - if !(pdb_root_dir.try_exists()?) { - debug!("creating {pdb_root_dir:?}.."); - fs::create_dir(&pdb_root_dir) - .with_context(|| format!("failed to create base pdb dir {pdb_root_dir:?}"))?; - } - - if !pdb_dir.try_exists()? { - debug!("creating {pdb_dir:?}.."); - fs::create_dir(&pdb_dir) - .with_context(|| format!("failed to create pdb dir {pdb_dir:?}"))?; - } - - // Finally, we can download and save the file. - let file = - File::create(&pdb_path).with_context(|| format!("failed to create {pdb_path:?}"))?; - - io::copy(&mut resp.into_reader(), &mut BufWriter::new(file))?; - - debug!("downloaded to {pdb_path:?}"); - return Ok(Some(pdb_path)); - } - - Ok(None) -} - -/// Where did we find this PDB? On the file-system somewhere, in a local symbol -/// cache or downloaded on a symbol server. -/// -/// This is used mainly to account for statistics; how many files were -/// downloaded, etc. -enum PdbKind { - /// The PDB file was found on the file system but no in a symbol cache. - Local, - /// The PDB file was found on the file system in a local symbol cache. - LocalCache, - /// The PDB file was downloaded on a remote symbol server. - Download, -} - -/// Try to find a PDB file online or locally from a [`PdbId`]. -fn get_pdb( - sympath: &Path, - symsrvs: &Vec, - pdb_id: &PdbId, - offline: bool, -) -> Result> { - // Let's see if the path exists locally.. - if pdb_id.path.is_file() { - // .. if it does, this is a 'Local' PDB. - return Ok(Some((pdb_id.path.clone(), PdbKind::Local))); - } - - // Now, let's see if it's in the local cache.. - let local_path = format_pdb_path(sympath, pdb_id); - if local_path.is_file() { - // .. if it does, this is a 'LocalCache' PDB. - return Ok(Some((local_path, PdbKind::LocalCache))); - } - - // If we're offline, let's just skip the downloading part. - if offline { - return Ok(None); - } - - // We didn't find a PDB on disk, so last resort is to try to download it. - let downloaded_path = try_download_from_guid(symsrvs, sympath, pdb_id)?; - - Ok(downloaded_path.map(|p| (p, PdbKind::Download))) -} - -/// A simple 'hasher' that uses the input bytes as a hash. -/// -/// This is used for the cache HashMap used in the [`Symbolizer`]. We are -/// caching symbol addresses and so we know those addresses are unique and do -/// not need to be hashed. -#[derive(Default)] -struct IdentityHasher { - h: u64, -} - -impl Hasher for IdentityHasher { - fn finish(&self) -> u64 { - self.h - } - - fn write(&mut self, bytes: &[u8]) { - debug_assert_eq!(bytes.len(), 8); - - self.h = u64::from_le_bytes(bytes.try_into().unwrap()); - } -} - -impl BuildHasher for IdentityHasher { - type Hasher = Self; - - fn build_hasher(&self) -> Self::Hasher { - Self::default() - } -} - -#[derive(Debug, Default)] -pub enum PdbLookupMode { - #[default] - Offline, - Online { - /// List of symbol servers to try to download PDBs from when needed. - symsrvs: Vec, - }, -} - -/// Configuration for the [`Symbolizer`]. -#[derive(Debug)] -pub struct Config { - /// Path to the local PDB symbol cache where PDBs will be - /// downloaded into, or where we'll look for cached PDBs. - pub symcache: PathBuf, - /// This is the list of kernel / user modules read from the kernel crash - /// dump. - pub modules: Vec, - /// Which mode are we using for PDB lookups? Online or Offline? - pub mode: PdbLookupMode, -} - -/// The [`Symbolizer`] is the main object that glues all the logic. -/// -/// It downloads, parses PDB information, and symbolizes. -pub struct Symbolizer { - /// Keep track of some statistics such as the number of lines symbolized, - /// PDB downloaded, etc. - stats: StatsBuilder, - /// This is a path to the local PDB symbol cache where PDBs will be - /// downloaded into / where some are available. - symcache: PathBuf, - /// This is the list of kernel / user modules read from the kernel crash - /// dump. - modules: Modules, - /// List of symbol servers to try to download PDBs from when needed. - symsrvs: Vec, - /// Caches addresses to symbols. This allows us to not have to symbolize an - /// address again. - addr_cache: RefCell, IdentityHasher>>, - /// Each parsed module is stored in this cache. We parse PDBs, etc. only - /// once and then the [`PdbCache`] is used to query. - pdb_caches: RefCell, Rc>>, - offline: bool, -} - -impl Symbolizer { - pub fn builder() -> Builder { - Builder::default() - } - - /// Create a [`Symbolizer`]. - pub fn new(config: Config) -> Result { - let (offline, symsrvs) = match config.mode { - PdbLookupMode::Offline => - // If the user wants offline, then let's do that.. - { - (true, vec![]) - } - PdbLookupMode::Online { symsrvs } => { - // ..otherwise, we'll try to resolve a DNS and see what happens. If we can't do - // that, then we'll assume we're offline and turn the offline mode. - // Otherwise, we'll assume we have online access and attempt to download PDBs. - let offline = ureq::get("https://www.google.com/").call().is_err(); - if offline { - debug!("Turning on 'offline' mode as you seem to not have internet access.."); - } - - (offline, symsrvs) - } - }; - - if !config.symcache.is_dir() { - return Err(anyhow!("{:?} directory does not exist", config.symcache))?; - } - - Ok(Self { - stats: Default::default(), - symcache: config.symcache, - modules: Modules::new(config.modules), - symsrvs, - addr_cache: Default::default(), - pdb_caches: Default::default(), - offline, - }) - } - - /// Get [`Stats`]. - pub fn stats(&self) -> Stats { - self.stats.build() - } - - /// Get the [`PdbCache`] for a specified `addr`. - fn module_pdbcache(&self, addr: u64) -> Option> { - self.pdb_caches.borrow().iter().find_map(|(k, v)| { - if k.contains(&addr) { - Some(v.clone()) - } else { - None - } - }) - } - - /// Try to symbolize an address. - /// - /// If there's a [`PdbCache`] already created, then ask it to symbolize. - /// Otherwise, this will create a [`PdbCache`], try to find a PDB (locally - /// or remotely) and extract every bit of relevant information for us. - /// Finally, the result will be kept around to symbolize addresses in that - /// module faster in the future. - fn try_symbolize_addr_from_pdbs( - &self, - addr_space: &mut impl AddrSpace, - addr: u64, - ) -> Result>> { - trace!("symbolizing address {addr:#x}.."); - let Some(module) = self.modules.find(addr) else { - trace!("address {addr:#x} doesn't belong to any module"); - return Ok(None); - }; - - trace!("address {addr:#x} found in {}", module.name); - - // Do we have a cache already ready to go? - if let Some(pdbcache) = self.module_pdbcache(addr) { - return Ok(Some(Rc::new(pdbcache.symbolize(module.rva(addr))?))); - } - - // Otherwise, let's make one. - let mut builder = PdbCacheBuilder::new(module); - - // Let's start by parsing the PE to get its exports, and PDB information if - // there's any. - let pe = Pe::new(addr_space, module.at.start)?; - - // Ingest the EAT. - builder.ingest(pe.exports.into_iter()); - - // .. and see if it has PDB information. - if let Some(pdb_id) = pe.pdb_id { - trace!("Get PDB information for {module:?}/{pdb_id}.."); - - // Try to get a PDB.. - let pdb_path = get_pdb(&self.symcache, &self.symsrvs, &pdb_id, self.offline)?; - - // .. and ingest it if we have one. - if let Some((pdb_path, pdb_kind)) = pdb_path { - if matches!(pdb_kind, PdbKind::Download) { - self.stats - .downloaded_file(pdb_id, pdb_path.metadata()?.len()) - } - - builder.ingest_pdb(pdb_path)?; - } - } - - // Build the cache.. - let pdbcache = builder.build()?; - - // .. symbolize `addr`.. - let line = pdbcache - .symbolize(module.rva(addr)) - .with_context(|| format!("failed to symbolize {addr:#x}"))?; - - // .. and store the sym cache to be used for next time we need to symbolize an - // address from this module. - assert!(self - .pdb_caches - .borrow_mut() - .insert(module.at.clone(), Rc::new(pdbcache)) - .is_none()); - - Ok(Some(Rc::new(line))) - } - - /// Try to symbolize an address. - /// - /// If the address has been symbolized before, it will be in the - /// `addr_cache` already. If not, we need to take the slow path and ask the - /// right [`PdbCache`] which might require to create one in the first place. - fn try_symbolize_addr( - &self, - addr_space: &mut impl AddrSpace, - addr: u64, - ) -> Result>> { - match self.addr_cache.borrow_mut().entry(addr) { - hash_map::Entry::Occupied(o) => { - self.stats.cache_hit(); - return Ok(Some(o.get().clone())); - } - hash_map::Entry::Vacant(v) => { - let Some(symbol) = self.try_symbolize_addr_from_pdbs(addr_space, addr)? else { - return Ok(None); - }; - - v.insert(symbol); - } - }; - - Ok(self.addr_cache.borrow().get(&addr).cloned()) - } - - /// Symbolize `addr` in the `module+offset` style and write the result into - /// `output`. - pub fn modoff(&mut self, addr: u64, output: &mut impl Write) -> Result<()> { - let mut buffer = [0; 16]; - if let Some(module) = self.modules.find(addr) { - output.write_all(module.name.as_bytes())?; - output.write_all(&[b'+', b'0', b'x'])?; - - output.write_all(fast_hex32( - &mut buffer[0..8].try_into().unwrap(), - module.rva(addr), - )) - } else { - output.write_all(&[b'0', b'x'])?; - - output.write_all(fast_hex64(&mut buffer, addr)) - } - .context("failed to write symbolized value to output")?; - - self.stats.addr_symbolized(); - - Ok(()) - } - - /// Symbolize `addr` in the `module!function+offset` style and write the - /// result into `output`. - pub fn full( - &mut self, - addr_space: &mut impl AddrSpace, - addr: u64, - output: &mut impl Write, - ) -> Result<()> { - match self.try_symbolize_addr(addr_space, addr)? { - Some(sym) => { - output - .write_all(sym.as_bytes()) - .context("failed to write symbolized value to output")?; - - self.stats.addr_symbolized(); - Ok(()) - } - None => self.modoff(addr, output), - } - } -} diff --git a/crates/symbolizer/testdatas/mrt100.dll b/crates/symbolizer/testdatas/mrt100.dll deleted file mode 100644 index a986193308ecb7810ab1561901b6821a4ffd1ff0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 36864 zcmeHwe|%KcweOxJlMMOcjF4ae#Q_G4TH=_Y8bEZ$Oz^}`AcgRwiY6hMB-19D$;=rE zT(pTpD#>x~rB(c3FYOxiJ`B-Dg?Bh_g&}5 zE>L>x;_sjN(J^+# zwMVXCKbiLWye^&V^?Bv>o`Bis_tyEV8qL*JO-)|jyxDE`2b;{ECbM((T63ef#+{#) zm2Qz$cR%VGYX5DTpoB+vKQ25crc;GqkE982>`v|sBmKtil+N#nG^tY%XN^tDAF}4w@J)reefse5=>Xb^xjwOLA zPb=~fk9<`WQMg~AC#~p1(NHPNRKS=)t!GM!eBRx{gJ+scKL}=-f+&v}*5vzZs`x6# z{4>Et9ikjj)niB1`H~nLPDMr;0_h~GdhCov^L>(_*v3j^k{nUhV<&z7fWI0%5fdLG zBu7;B*hM*agBOB?5o<*tIijk^&e)x*q!!f>&=AlN&=AlN&=AlN&=AlN&=AlN_?tkW zUw*jI*}TvtT`((_p266TXfVYV9o2Wy36cvP%j;3(6xuv=dVS!a;}4<)a+_bi{}$7= zFCIyxpZo>Cb?)%5I3dX;^l{7eGZr!y%;JPQ4M<#p1YdwW#82Z@KqBK8Nbs|yi~-7+ zE|=)1gub-HFnYa|`xY_g$+>S(BAW|!8%i#m;^suT1>Geb%ogqGFA14hSCXBzPH92& zkM1k{x0l}jk$o*%ZnpK62t$r8W;QctxFm&(=j@ornDJUCTHle(wMTh^V-ff%3Sa1L zj|S7a^iVyTWjxfK#aLQvd!KPtH}EpwJEgk*=F^)Bmfm50hj1o|DXvGJL9+mF<72aRmh-Tkg z>j(eG^S^-pLR%jfc3QahL9WATL2XV;eW@_u5?q$bQenuryPNptQsHQc@D>c{P%47~ zG(_IVD-?0)d}-x{lSfE%_nGWDw>zJg+gBo-gr@7JXny1`(kzaJ=A$P~zrX6!W1OJ> z23~e>9Z57BCZ6WPw>G_flm1h2{QDr4j2BOw?HDz!k_)>m=HqB1dXI6!X_!*3m?HFE zI8{6+>RjNoSezZnD$qx+`Sa~23^`kW?5mhAm?yr(JZZkTG1vOYzYHn!QL1&W(%>nx z@S(q;s>!pkxp3%qpHbII{mX@FEP1R{=mj-1mvGcJh%S%k#1yUS`7lFa{x!V`f0(h> z_sv5)4pDzj%y-c{#`5%Nyb8DjSO0n7ruyLKIYamS{H7P5wy#~kfonT!)lo<3Z(jyN zow&nd2B$vrcc%eCfq~F5y+BBq#L5bAApiFAU8& zbK$MF56N*tBNRQU)GG9F!lR<$G(@Ij9pc)KQOhdsPmB{~1UDzWH}{8&^v{U%C7!cF z(P8Urj~XBE7RU>$onrIoTibqi=)q4({xDRM|J$gvK;0P&6y}%qw_3GmQuEL+9=h%4 zx-m`w`Wp>gN4<&b@a1wHEoR4hRBCr~LdrrbDz=@s#Y=>dVga&TrAzpFZb|5~M%S_8utNu>O^Q+u!vnp~ucuOZCvmDCQBJ7G*~ur0uP5FY`4^-KkbhD6E3WI>%Xrlz2)3_s4M5py>vISf z9m1ICv+8Hb(1|Ym!Wq$nJB8zO`a~}mdOu0{C5Ni)R11d+tQA~*;iX*0luM%|5r?1?z>*-q<8~Nk;cwEwzs(fmH?m8~~qJ@1PkHZr%axQ{?391)8LHii`9o`fppe&i0 z?9zmc=PXyiL6>?jAQPjbm3Par+~B>OP>kW54ARAQ6d6FHpeE3hpt+#Epk~lLpct~n z2SBZ$;0I;1Ig;wo7c+W3qUMKBxt9Ni;&<36V_)b`n3?xtYysCU;5D*mkjrcQ;_a zO0I8oUfXAmAYV_}?k3(t+0~9xC$b5rQEU}3w_U4i#nJZu7}YpzbY}EI#$hb#M)f@Q_m1w->5lFXXLK1v z6KsRVgMIOChtPB3oHH2%s~5wI>(DP=${5y;4*gdU88z7t>PBba2Ut)?8A^oLVg*M0 znW^=>&ffVdCzOuUQdNg-JPjWBES`MUb_@JEFZnEaEH_)%+<(_~`nPEPiD8Ms?#GZ4 zn=eHGxO%zb!un_M=eoooMEVEnG&zk)`cA-!LSJrdP>SHNqShzMFRQRFc*(w>>c<$jx1$X1jWev@AZ=Dku!3Gf z*R>A@jZ1U#P{p^?Y|)V`&^6&ZS)-LK^U=V{M_az((f+IeDPPa|ULh?jf8f ztmM6Vkq^T*#e^SyY;f{oR$|iEDkaDW21BLZ?N!n2}r|53`*>jg6L>k>|G=d zU=>tVPG#ZnnlT;V@ODeO)p|3~7<#=11=C}HgaRbeVxJ;1diFB9VrP&O`{frZlKpLi z1@o4-zr^ro?}J-azeRfBTnU5y2SC< zKrW0Iw?P<$*{E0U3tG1x6P5BNbk>ruh&`A$@;Z32Rggn%`q;BbT@H_V5Fq|YZbD;! zzjW3&9@Jk4W$k^hkP9(7dv@xtLOSNh0u*}}`ed}SV<)A=G~hdc=;%kCJp~ZGTfP~47afR^kk@`DXo2P1kQ=)RB z)h^YCPh-NiaMf8+M6g&rNF@2Swxx6rDOk~!0moju)R7x3cini@c*v2>g}SaUI!~lH~(@u5fpzYk9Xn9q&|=yIsqpeghXenm76$_Q7!6G{Uk&+brxs zilt+Gr-IVnJ9I2JvA2*g>pT@Z1mkUkz$s(LnP3`@Ih{9%T{Z!)++VFLwjhUfp5jw> z3X#BLv(*mzEe za-ionIx!R?H#{unib5`&UU7J^y5Pyf;TUBgnMXP)f_-B=XPKzFy_YiaUaXAkXs2q( z5RvKV+C#O5qHy!=JIT%@GE0z3(CaihdSR)wgO<}6kLUbPTp7bEO0OBA4?7_)Ts&rc zyocf(Rk|AjRGU4X^L<1yR5Q^nhA@{LJBWpW&Pyk$mbPPr*@PRN*k72r@HJOWCoL;X zAfs1f|3WyJ;0(Ca>z}y&TzfnxpV}7c!7KUq{O?H~fEFLQhs6F7JqvW5p=Bbx;%$)2 z>-r$$%XBrrc6#=~tGb>%s+t_q*yDs#)Z(Y%7tv?&oEH#@=hP4YLhq4jDU1fJ`ChCH zmEb8eOVLN&gkgWSP#!g#M6~#{!?fXh{d$`3dFYZKQy1YyZXsrF)%A!2dAtXtKGUCr z09MJniQS=pB2+iYIM<8FkuK z@7GAclvq;mlEceZ+Zm@Y=x7_GbEKo~V>(tkg%iRUj#l9!U2(=q<32~*IXZQA zq*#hG2B0TJxJbRzQEtf$mT{r1Lkm&s!m&Hz#ud?29pm#>RHh~|e%=Znk=gnK7eB|| z(!9mO;SC-Ahqf7??ha%-1^o^%j^huGj`J+VB@FO09D?0YT3wcc_eRMBTy?K&!9YoM&)q_{i4%%U z*b;&m&O3Fw9JU*=ED{)o+!lM{W65qa# zc4?gOQD>n6#MTG%8); zteInUP%8Eb>);JaD`RhbD(SN(Rtl6A%h~$c3Na?7cjm|iw%YSIgg~^6wUR5W{JKm{ z61B^;Os4EgiSLu?uuRLQOZ)>eHO-LHyJR{nQ4GVC)K0Pg z-{o|=EI&3S|3x{yN~RxAsi)R1)##V$-^=s`nZ6;@vocMR?a!2Hu1uYBK9iqpIc=E2 zx61O0oG(ur*9x^SmeQ3|s3QNTsP7u7TvVoiN^UCu)_LHZ!v+iwd=WFi5ivt0{lFD5 z1N;y(zzHD(ryB#@4>Q2`FasP9GUx{`2TA&Yvq4!L!3=OS_`zRc3~(?gi=(C?nu{O! z4uQ#S!rJ&9OXv~7pyHpD5JRrkT5Y8fx!hiJx>{(K4 zQtKYHX$j)}pk`1Hdd3R;-=B{w0|-|wVrR+jB ziReXqljX#+Et7aT#uCMks@nj#?KDJln)p}|I zn73#xt;fnYW32$Nf%3&~$SeW0Gl6HN{%MiEIa9v5%Xdp@`u2`q(8zsN02A2R6^#6d!m|;N z-H!T5kJP7VHPUv*zKNZ_%%hP5ZNtE=9*u z?!@sJNAlEyB&(R-%-F0Pqz99%`!S~~p=H)JdX}ME#8Q08l}T3gJ@{r+pqrJ>X7O2U zR!t_GRi43SEkW9!GpsYFGGom&W?W)m#x)tfS&Q)Jqb2)UjxNGd%cSySa74QF6qXK| z^fjrzS=XntIls@upW5H?(yV_-S_G?r7723@Hvr{K!rRmelv zuvXk0`yB$!eR?)wLso~Nq~-RNU8$;%%EAQpEaYBA=#b0qMf_I?`{eX*5dT<}PwC&Q z@~6#CWwVzU*=+yJ;cVZGO43W%-)v@#*0rBP4^h#EHA!U>Hfg{H7!_prAS%l1vQk*q zl4&ffAhm27^~-$56kT;#8fkePhc2SJBppivjyk-u&#plzPh;88mtA1;>2e6`Bfz># z#9G2Qm8BP?`gGHDY#RDkXGmcNzto$OJ+Nhoo*Ac4*psk}>atQwdX)9akNHFGR6;${ z4G2DjGWhZbK@;A#rlEn|<>pJPnyTvDHEV*nhtTL=>-In3sdhtZ_zselp80l+cg(z< zD&8^&d_8#MD&8jW4492(%unTMaALk;hU`@Fo&awT%^DT23q11G2UNTv@S@ zHw+%9;#rE}o58DB*|G|}7T8y);`zbjz-v*JeFD6lq)#RHucQxhCY9W;!81e7tddJz ziMBy*qe{*SUOm~O;+28d0^a2wZND#}^0&Ui75rA88;WYZ6*cYv@Aq!4Xz&Dh z_LwM9?QZe#EF#6dO|_o7px<55ROR=3w^UU5>mFc7#XNV5yE@3bE4)oqpn|X3+~8*H zHXYSMMtZ6mJlhfl-Axa8{NAQU++<=arpioG@r@+X5OCL5)Os4w(h4%3g_FUp^}F3{ zh+>UZ4fFwdCab6@stEYp)t*{UbwzzuQ%!^0k2k!m(cKtu^K2?EKELOIDt;0!Q;-85 zo~AlAa4d_hb@Q&O0ACCYpxwsSd;EN`s$nJBQ^Jb88>&3~O0T~(*uZ;y4Q@`fBf!{V zMbaUfi#;Hm=?1@tcWBD`*@GHNvhJ%(pVA1cz3D0(d*v|?cSij+P%ul6F^lY$=E!0SA%zRRf8n6 zmapQ2fnsco8T)$B?cZAF_Sbs-ja5z6?jkS75dh61tBQhtKf15X@2z$R0wpy#;Z1NQ z>%e3)iMw`dfOj{RV+c9|D?JTvia~!#VqC0sVZ?FNpd_}2Zz!tr@fc=B*rU?FaF9Vf zm)HBPpl@ZcshZ3#ZsPr0v74YA8UA%m81AT1&q@ML_vT<-o!ejL2cQ^ZSF&|Y(r}o- z2!>E&n34MPPi7ZDo7_gPe(J@jl;__aDHz1ESmMZtV zQhp(oml6u72^q4Ww94nhIN!k7bcXnPk9*7NTFRI-V=%-PyJ^siP)?l}>X}25m~$9& z>ZvnTqT1W2&i+DOY7IAwUTdlbpEUY-aWVwd%BD(Q#%@XF7itY`4UjWbEBk^B>a7Jj zUn=9FxdONJEqBeA%K7<*K+xy&!cVLG^Hlq5Xhf?8E@upv$I9ak_2$v>c~Pw{ba6cW<{Nm?2MBaDRy)>cr)_hY|9K4?2uM`sqsU-MEZP z@^mxz#|R`(7j$2kiF-2?2YnBL_;g)&8SaEqoNn#DjEkdXBm+7JcZ=#N4!UPH?z$iz z0nNp~v#|?tGiVL&bM8f)uJeWwNDtlWeOHdt=5I2R(p5W$EVg_i*=?;XcT`rhMq8c4?1>fQEpEfQEpEfQEpEfQEpEfQEpEfQG ze~bm6{Ct6re;9{x5TD@ZRY;fMcO~NV{4C{n zf214eN#bpT*}OvU5OWj%7d<&85f|#yK_Acj%)^>k9khAyg(2OXZ(_B~3$Z*=#)@xC zt!yd668x{<&<^IpZE_tef=naxu`0-Kg*8=3(QR^^^P#C4dc7=wP>VXuqO>2&ZT;ziHf>F^ab$m#}RV z?Y#|OpiHf6YOi0u&*?krD^OM%+te!R*#NYzM2o9{3XR_t7-4Skt6&{If?-Ys9cE}1 z@)T4iW)Xcqj){(ON>(>P8);IH?P)0oNI-iu1T+LR1T+LR1T+LR1T+LR1T+NxJ0pO@ z@Fub^Vu+X`xe;?DFJg@pM(hzTQWn`5sf^S|e36z&Yh-6+S7c8l5_v9iAkr0yMtUQ8 zPuZX1o+^84=&9IKV^57g#rEayH}B8eZ{1(GzjA;5e)|8tXUe+&J42w^rXipqpdp|k bpdp|kpdp|kpdp|kpdp|kpds-8I|Ba)O&%Y; diff --git a/crates/symbolizer/testdatas/mrt100.raw b/crates/symbolizer/testdatas/mrt100.raw deleted file mode 100644 index d4ae68d16779422a864ba96809424477d92f123b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 36864 zcmeHw4SZBZmTz^EP8ve&77`4ic!36^4zW!$8bGw0bnwRMKnvkx6iq_XA+1Te(|ucl zgBolyl3aV28O4XQ&TNagqT~4D?kq%{+365g5{U}vi~+_me!QKIVcx_q20-@bkOdwVaxx~J;YsZ*!^RduTF?Yg1#*6l2nF_wmqOfuF1N|!^rf6dLn*aesE zzkvOG!mBeo4DMHFmeu+~Tp;M*5Ui}{swx{A`~tV$%LT&?oUeg%Enmsi`>VbASy>aU zit5gXe0?o{$Pm@=(2hsMJyJSd{9qtMd~HW+TNLTncBHj^U!o~(qC_*=wo268)-2I; zqMRO9s8Pya;j5}8-G8Yio?^zTzn{*Q&J13l$_%ktTxQAzGZ+U>h0Lv(32LFZ_wUc% zuf&m_h^QJUmu*J?KU8D@t`$xhS(!)Kijoncavaw)wibzgaYItegdxoMlyblgfLCcI7%6l zl@h&_FqXEO2CtBF-yp_JCHFN-Wb=_uQ^}d*{PZ|KyR)Q~+2dWkB@vEwq&QgfxE3`2 z^!CDkeg4g#I##0PoV}++>~nT7j$^K9NgAKb**cRk^JOlyzBQF^i3tiOWFQ%&2);C*a{+0FR_O0%|wsYIq z`Z}0HYS|5{QVo*(FZ*j6)|mPBjLhpk{WsDN@PJt}WAC-ZySUW8jU| z$Nv7AUqgSPy@!vwtbEHczSU(#Z7yqVsd&^ax~&zZVxM_OC-J#b@j!|A1`KFbD}wJq3vt3rJt2I>v`oN`M-)_Q?xA!K#jp>4U5*V2$jc3>9+8+GpK6O6Iwa!o*JZ=`= z_YJBVI|~~N`)&xB4Q#991NLL+@?=gz)vBEjQ&i?()0^;znJWW# z?%TSL`g3%?OWrY&XGG&Qz_nlU^}vny!Hv`VZvEBO&pqi_xq1!1<&@1p9c8>>AqaKi zRx1ZiiTDxx8ZVyXBj=Lgi7%~ziyC-T>eJWJ-XXQ#$Z#@z35_?EVcyrb<*?+?;^t+2 zgEf4#%*yfN>xpQpv^bFPx#^14MW`hIw^nU|wlij{%&+Wkt!l}n#=c+Qcm1yn zL%ay|*P8g&S_|JA$mLs`IOl3q>hSVn+8i4ywx6~qOT>X<5wd)RTf8N=ByxZ&!CJV$ z!wK`?b_DnwRfyyY>snqE;4s9^)>`zZ!+FPu3v1l;+ZCnPezb>gU1tT_g4MDHDBJ8kPVuZ$ z9Flxi?KA~C$%UUeDS2?0cxZZ$<3Q{(L%_ly#{4BR(=<8hC4j zJyLcF#_&xA>EK(7OrUX43+NHhT+nV%4s<6dhHP;= zs10-&v=9`lP7$yzM*kG?pqLRwo_51vNKuEYL8})UVFx0IYAyb+-n%RlInLW;9TKZ9deJzbziNC;8~A{+ajo z+j|@Zr=j^uRWlq?%VF}*$()z*tBGV>WcVI)%ag=}d1iT#$!X z*EcwG%jeFpP)pg~2Ej+!RnAfuvI(a_Y!xuK-Qrut@F#Ql=<;l*cr>!JkmuaC3a*LqqsICl=FTLwJfa@_CB+fp%yqicWG2JdH?XIK|xZ6GZGi9k@4} zcP++mLe1s}9+Y`f0ycNL#1qtlgvHs}KT$H&yi4p8yE_L>E~tOs+&PeK-g{K+N_`{p zx!GKFATn$T7Kdj$#ZI()QfDH~x#isILlYot=Y|ot^Jb=`cwq z*pHd__9Q!p>EC(^~mG#hW6wB!Vh8!!!P{aT|BBYRI`GCFxHhVU^cNZbyz+WNM+tPaOMLuB!#Xc{xOypH=JV-^xLL=!)U z(AK!HJaN?oq<#k?(VYD`M6&)4B*9@DiN~-j5+$-Yhx~iw{JSXs(@CVX;~K_j zU^@zuZHE&R;cpyUli`xYI?3~30W4%fK4w*7xn!m=3p<#+<4wi+hR3jjk$gu9);uxQ zEoOa@jR&^m(WG$-l*+S-dJmls&nH;n_BKmP35T`nMd)XVskm=fVbb_BN84G^_$qb; zE%E!aWEBp;3g?sTZZ-ZK83{X@Eg{847M}(qC45~Xgq9}WLvs?hV<<=F{0@AEX4zr8 z{g|We^~5T~PGgr9dEbz@7t&(a;4aw|5jop6wn)B>bGJHjTRm`ZylM3sd9zZ274#gs zuH{(RydWnJRotIpj}Kgcu8A&5Vy16g4)4bZX}~`bYYUA9HaG*wDFZE;b94&o?!)th zjl5Sk@`0xK7NAVr{ikGdH0x*Dtk56TtVq%jUIO(upqTv_6z=-dmL&(qP zj#jOTBy*O*1k77wB$*n%d8GDXtdwf)f*m@plXZkAMAsUyp4znSP)lLoGSZ&Gbo)t? zqYdjiYZH16Ta)t9%?Zs5fm_?zCjSEQJeDp^*IN$#aswvnJk?BPr0O*^x^!zsKyru$Vcr@5XN1G-%`( zbqm?{_z-V?A!`qmhyOP=BUBpuk~2yrQu(Ll@)?N_f$QM3gord;#4(wjRuCV|NW6_i zJ61tWs`gQw1;D{&GziQjywBE{d< zSut+~YcB1OFu2ar#Krg;NF?BOFn74f=Z5g~aZ`gAU**L^$s8N{mlw;dYw=W$rAr!r zb>zYXX&Zz=n2maszMysMVM(cAL1)eXmehlJ1FwRYSOz)NW=uSd)cNq32?62{d9BhCT(Y?8Ff{F#-4Jtm(G!@kO?*{W91$#5_Yz5cWy=COhZHXF$b@6BS>VhKm$|PxVBY7<=A*ad@~KA< zP3NPNn%&V!ukfwe){;d>g%&5Ji8hqWCS z(zYH8Pf9EWyjXv31W|!d$9vTHhpRRFn8+L_l^m&C8pG1Fi{o>^u;o-7Hi*Yxq-{nr z=Z9FkVNRNO_|+?3q-WQ}FW_O&wrJ^)v*QRYgO5R;lF6JXgn9AD35fC09ev2eZXcW2 zWKM)~pyzseVkkszv|q}Vgxq*~#lwTm4No48CMW~RJkm)K>>HCg3nkSp-IR&!_BvBBRf;bEKx2&uhZz!3rnQ~w2a1hGUp#~WDKh)y=H_y?1cDe z@sRnEE{gM1=?(}`ZH{Elj}XOBorG>NMfudkUMvjsymW+W*>aFD8+D_j`wNbbUV71F z(z4hBGI$C0FNA{y&VWC$`q3LcYDwngQ`;h4cqRW%@EzF$(Bh-?kkmhtXMwJhv`j=7 zzX@`FUGHUnm9EBDPF%eAqK?N7XeNg|_IU9)wfIT+Mf6!R=UGIOIn@M!*u8&38lwTr z-G-H+0z7qQsrsm!FziniDx+qdgqD!;B5n9yy@KX@9=hab)J1raTZoxkc?IG?9`6CE z&x{{I0ITH9#BMddbh#|)K>Sz8Ib_@c4w-St__v5r2Ocs$ro`xRg7J67p)l(QRPs=o zfodM*i&@Xi1;5pu#;Hua7Rh>CWujmcrcUCI)UA<&>T6-?y`J=tc|4i(KP7DbX&8#_ zNsMZGhOfY$A(`_O<)Bv=sH5ABrw+JuJb$Po8?WH#oeYg@J6?QXYmm&TL9yhaNHS&f z3#ikfdA~*imXRd|FFEWz^g!8JY(MD|k2$vt(Q~A8%g6Ls=@Ji%LwK}`78!~&kC=Bk zw|qoTovmrs;>@GalO~>}-f1nfP6~VYNY=hNsCCZJjY;$3c)EcJd5bI3Qhm5mf2x)hv` zG)>N%9@+$nOF|*2Hyq(R-{bhqBRpEg(TL(^buH`)uMv&Ev0_eln+_TO6%q1Rt;R0I zoTs}up)k_T9Wvq_4ZYn!#UFOF2TT*=Sp&pNfSQ>yj1>9P5co}e7h$8 zkS4xO6W^+dH*4Zr{+f{~hfg@KYa=T$9u`zk2L_C^+ z2a=z>0LaL$&#|&t1d1n|k@tu3gQN2_OLL1yg(*(aVJfZiq~X0$>QTO`+dcbeNmbX) zVzz}Bi!9g@f|yRb45?-z)4Ufn(m&)*?RMKyRnKh$(N9M#RXq>U(fxR`U&~kHn*2Ej zFqmY%V-@Yvc=5xwQU`43ZXdXBctpJ>lS|Zqzg-gP>K)0Di~Mx--Jq0%$IcIV(I8^d zVusVGbVswM4$*^BaX?%JZ%|s1cPtV&=Yt#MK52SR(CGSYI{qV;U?Jv0bS*;u^ zE*+B|Dv{;YdWIH{O-;g67B!dof2(#$+SY@TRd_){y${u;AJ}WJ(;Td9DX@nv2!dxp0;$1{T0_Q zm(sC}nrFJ={{e#^8-##T#PDwldO-}clek!*= zRhH{dk!i(!dnA6lQcl%7bc>wtSRqrDAHU!kNnT9{^5k)?QpY?wT``WT@_&g2E|ts0 z75bOt#`CY63C>5@fZ=jr2AMo`tDqah2EY`cTR+`Ac)BsQ)6Iexbf)ce>!%yTTRqcu zx((5-0*_#(xOBs>Fs328dGM%dis#~H!_Q$R4{j3!6x&X>cDe=do3$xUw;{UGcR;29 zo(oKIx=HqFZ}b*SdmZ!c$d0dj$oa^<{N~$U@OMs9)5S(j-UOhtnGd0QAga> z*7A!c$1lGKWc=Nkj{7U0`_9!bE{rXWuD|TY;pXk2mZZR#IHFrlG4F(lqZAEn?qu=ZPMFLc%6J>PE7 z6)RmUzq31QS9Iv!f4nO8!@u3++I+78{Q979jm?HHk=8SIQ&E|lj$_%-+0r*r*7^pB zFdK9boMI)r(P*3x>celyk#0P84U`i9JU zx4%xPuSS~2avGkP6#hN;+*7?Cu&^PFM+ujpP-QJ~O12D-;oU0P!WL@e%49jy7@jO= z0nbML(FXD1%J0rim{xlhq*q#caiiJiW} zqoE>w?1+k_i~lYfpm<@tq%b)Tov;7e(HB|^o<)>zy{yHS#8%K$|}I4 zU(XeuWG4sy__2NIx|wYc8@*d^uV4K-`3W@bYcE5Zfu$|M<1rq|(+g5;QhGgO({hkL zmSWqDIaL8I(=Ii#Ov7B37D%l~v7zt5=TL!R+C(-@$YRr~C$VW|nQYp8q=PyA26H+y zS5IK(`6gywkr|ja7e61(-_3FiF_!L;%MZa3878K&iIADNB0VteiivFc2U%>oFo{jC z&ScY}YkDx-Lv zS{{d?i>M*Rz*2yt0k7<{s}agFST^)!7gz#@9Kw1(uUu8!U0 z6-p}`DmQqmSA=m6q29aF8@$_B<%LxLjU+2yvm6-jn0ebYyoFBqdhmudymjE2FdI3{ zPxY#EVZLF8Y}4=_1#c(K8V#=lJo44;8eSiGaqy04c;~?D2anhAti|xn;MHnuSq5Gc z>?_ppg5dGsHEGH|3f?x-r;+;?(g!(I&!@R1XFc07I(@dRLcg?(O=GpRT8dtD#e|dR% zNT@CsHU+#;RO2tN_J)L@e^Ys#FC?&sC5b9;lTTnVIqq+$@ofkPz2yy+!JvO*d1Y|J z-Ryvr=WX&TGy8)Jb^i60b+XJ#p;8Ekim@?f?3S=MxXI%U*7$?z+io(Gly3Z5zS9wFBl4?BRjdEq{z+_X2yK+-V@Ya`M2s%SceRW=nL4V1}xLE1N zh~ue2DQtyMS5z4gFwBauN2R}TkV!nZ-+xy)ur%CIMP?T_2*FL*O;C;k|EdNIchqQP zB_Wr0eR#tLZ_pD2pcrEpvQ-W8a2Ul1hEQXenfmiDW*0!4(nf!9DX@fHs@6%G7ArDr z6)VGZ^fdrks%98D+2+H^EHf3W{9u$c{eVv zp^Pyz219I~mj=BA<@kA_ojD|lIfpT)ojT(ss{HlZ>@U@&)o_jEwZ>}*$fHk?CPPT8 zY`o-o?3Pu2sn*b@5IIAwvMD{xD9d8@xtE-2K6!hwJvep=(7$J<{` zBU&qPKAUJJj=|#msEs#$d|mQP8B>|M48UbbdArKxuf^T58DU$NANqr zH>9L5WwiL$>eqIx~3%0-cS+Pn+ykgj~eweC;a;#P0`PI9ZOTz4VP zgi@SN?Y@A6qaKn0osP3bwG;>4c`?qqARYtF#oyW3jyMNejq{w3BTmP8qX?vjPW8U6 z#OZLaV}{JQ@bOdw+bOdw+bOdw+bOdw+ zbOdw+bOdw+{z?Q4(zk?oq;4KtQ2t{q_~hqHeEeY?#zUCCNjsG~wiKmKrOwBFHBKd$ z<6g{GAXdVbvt>w^;Jy@bx_+7V$3N5QffVW2{)+{b-YDgc{4aV+N+xd9XMjF|1(^@0 z6gEJc4__G4$@vCW!~77-lVoi8w$#QJAk4>q{f2fhH%^lqSP^9ES%6hSeiN*zM2b$6 z<2fIis-V};LI^degOj9#koJRL362jjIvY{VW z+DZCul;?m`7`iDfTXY}0T2ser$OKV~Y5`*fEMEx=@cl-C6t_Xodgvic$o}TZCwbsEH0A^shNXg%2{_cx7O4x@MzatYgd$=>Vn1z5*q)HqP6GO+BcLOo zBcLOoBcLOoBcLOoBcLPjUl{>B46h^mVy2iSmK)<@c`;k8Fy@HyF;8r5tRhw$3&fgY z&9QB voBqG|Ns8|O$`I(b=?Lfu=m_Wt=m_Wt=m_Wt=m_Wt=m_Wt=m`A(j=;YI@bw PathBuf { - PathBuf::from(&env!("CARGO_MANIFEST_DIR")) - .join("testdatas") - .join(name) -} - -fn symcache(name: &str) -> PathBuf { - let cache = temp_dir().join(name); - let _ = fs::remove_dir_all(&cache); - let _ = fs::create_dir(&cache); - - cache -} - -#[derive(Debug)] -struct RawAddressSpace { - raw: File, - len: u64, -} - -impl RawAddressSpace { - fn new(path: &impl AsRef) -> io::Result { - let raw = File::open(path)?; - let metadata = raw.metadata()?; - let len = metadata.len(); - - Ok(Self { raw, len }) - } - - fn len(&self) -> u64 { - self.len - } -} - -impl AddrSpace for RawAddressSpace { - fn read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result { - Seek::seek(&mut self.raw, io::SeekFrom::Start(addr))?; - - Read::read(&mut self.raw, buf) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { - self.read_at(addr, buf).map(Some) - } -} - -#[test] -fn raw_virt() { - let mut raw_addr_space = RawAddressSpace::new(&testdata("mrt100.raw")).unwrap(); - let len = raw_addr_space.len(); - - let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)]) - .msft_symsrv() - .symcache(symcache("basics")) - .build() - .unwrap(); - - for (addr, expected_full, expected_modoff) in EXPECTED_RAW { - let mut full = Vec::new(); - symb.full(&mut raw_addr_space, addr, &mut full).unwrap(); - assert_eq!(String::from_utf8(full).unwrap(), expected_full); - - let mut modoff = Vec::new(); - symb.modoff(addr, &mut modoff).unwrap(); - assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); - } - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), - 1 - ) - .unwrap() - )); -} - -#[derive(Debug)] -struct FileAddressSpace<'data> { - pe: PeFile64<'data, &'data ReadCache>, - virt_len: u64, -} - -impl<'data> FileAddressSpace<'data> { - fn new(cache: &'data ReadCache) -> io::Result { - let pe = - PeFile64::parse(cache).map_err(|e| io::Error::new(io::ErrorKind::Unsupported, e))?; - - let virt_len = pe - .nt_headers() - .optional_header - .size_of_image - .get(NativeEndian) - .into(); - - Ok(Self { pe, virt_len }) - } - - fn len(&self) -> u64 { - self.virt_len - } -} - -impl<'data> AddrSpace for FileAddressSpace<'data> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> std::io::Result { - if addr >= self.virt_len { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("{addr:#x} vs {:#x} is oob", self.virt_len), - )); - } - - let data = match self - .pe - .section_table() - .pe_data_at(self.pe.data(), addr.try_into().unwrap()) - { - Some(data) => data, - None => self - .pe - .data() - .read_slice_at(addr, buf.len()) - .map_err(|_| io::Error::new(io::ErrorKind::Unsupported, "read_slice_at"))?, - }; - - buf.write(data) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> std::io::Result> { - self.read_at(addr, buf).map(Some) - } -} - -#[test] -fn raw_file() { - let file = File::open(testdata("mrt100.dll")).unwrap(); - let cache = ReadCache::new(file); - let mut file_addr_space = FileAddressSpace::new(&cache).unwrap(); - let len = file_addr_space.len(); - - let mut symb = Builder::default() - .modules(vec![Module::new("mrt100", 0x0, len)]) - .online(vec!["https://msdl.microsoft.com/download/symbols/"]) - .symcache(symcache("basics")) - .build() - .unwrap(); - - for (addr, expected_full, expected_modoff) in EXPECTED_RAW { - let mut full = Vec::new(); - symb.full(&mut file_addr_space, addr, &mut full).unwrap(); - assert_eq!(String::from_utf8(full).unwrap(), expected_full); - - let mut modoff = Vec::new(); - symb.modoff(addr, &mut modoff).unwrap(); - assert_eq!(String::from_utf8(modoff).unwrap(), expected_modoff); - } - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "mrt100.pdb", - "A20DA44BF08DB27D2BA0928F79447C7D".parse().unwrap(), - 1 - ) - .unwrap() - )); -} - -#[derive(Debug)] -struct UserDumpAddrSpace<'a>(UserDumpParser<'a>); - -impl<'a> AddrSpace for UserDumpAddrSpace<'a> { - fn read_at(&mut self, addr: u64, mut buf: &mut [u8]) -> io::Result { - let mut cur_addr = addr; - let mut read_len = 0; - while read_len < buf.len() { - let Some(block) = self.0.get_mem_block(addr) else { - return Err(io::Error::new( - io::ErrorKind::Unsupported, - format!("no mem block found for {addr:#x}"), - )); - }; - - let Some(data) = block.data_from(cur_addr) else { - panic!(); - }; - - let left = buf.len() - read_len; - let len = min(data.len(), left); - buf.write_all(&data[..len]).unwrap(); - cur_addr += u64::try_from(len).unwrap(); - read_len += len; - } - - Ok(read_len) - } - - fn try_read_at(&mut self, addr: u64, buf: &mut [u8]) -> io::Result> { - match self.read_at(addr, buf) { - Ok(sz) => Ok(Some(sz)), - Err(_) => Ok(None), - } - } -} - -#[test] -fn user_dump() { - let dump = UserDumpParser::new(testdata("udmp.dmp")).unwrap(); - let modules = dump - .modules() - .values() - .map(|module| { - Module::new( - module.path.file_name().unwrap().to_string_lossy(), - module.start_addr(), - module.end_addr(), - ) - }) - .collect::>(); - - let mut udmp_addr_space = UserDumpAddrSpace(dump); - let mut symb = Builder::default() - .modules(modules.clone()) - .msft_symsrv() - .symcache(symcache("basics")) - .build() - .unwrap(); - - // 0:000> u 00007ff9`aa4f8eb2 - // ntdll!EvtIntReportEventWorker$fin$0+0x2: - // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h - let mut output = Vec::new(); - symb.full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) - .unwrap(); - assert_eq!( - String::from_utf8(output).unwrap(), - "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" - ); - - let stats = symb.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 1); - assert!(stats.did_download( - PdbId::new( - "ntdll.pdb", - "8D5D5ED5D5B8AA609A82600C14E3004D".parse().unwrap(), - 1 - ) - .unwrap() - )); - - drop(symb); - let mut symb_offline = Builder::default() - .symcache(symcache("basics")) - .modules(modules) - .build() - .unwrap(); - - // 0:000> u 00007ff9`aa4f8eb2 - // ntdll!EvtIntReportEventWorker$fin$0+0x2: - // 00007ff9`aa4f8eb2 4883ec50 sub rsp,50h - let mut output = Vec::new(); - symb_offline - .full(&mut udmp_addr_space, 0x7ff9aa4f8eb2, &mut output) - .unwrap(); - assert_ne!( - String::from_utf8(output).unwrap(), - "ntdll.dll!EvtIntReportEventWorker$fin$0+0x2" - ); - - let stats = symb_offline.stats(); - assert_eq!(stats.amount_pdb_downloaded(), 0); -} From fe937480db612a8a850300f830fc2bee5f530fc4 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 25 Jun 2024 20:55:34 -0700 Subject: [PATCH 24/28] addr-symbolizer --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 75d57be..e623c92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -addr-symbolizer = { path = "../addr-symbolizer" } +addr-symbolizer = { git = "https://github.com/0vercl0k/addr-symbolizer-rs", branch = "fbl_v0.1.0"} env_logger = "0.11" itoa = "1.0" kdmp-parser = "0.3" From ec426ca76806c74347c0a131af599de0606adf3a Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Tue, 25 Jun 2024 20:56:21 -0700 Subject: [PATCH 25/28] nits --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 5ee5694..3839a87 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,10 +7,10 @@ use std::path::{Path, PathBuf}; use std::time::Instant; use std::{env, fs, io}; +use addr_symbolizer::{AddrSpace, Builder as SymbolizerBuilder, Module, Symbolizer}; use anyhow::{anyhow, bail, Context, Result}; use clap::{ArgAction, Parser, ValueEnum}; use kdmp_parser::KernelDumpParser; -use symbolizer::{AddrSpace, Builder as SymbolizerBuilder, Module, Symbolizer}; mod hex_addrs_iter; mod human; @@ -50,7 +50,7 @@ impl StatsBuilder { struct Stats { time: u64, n_files: u64, - symbolizer_stats: symbolizer::Stats, + symbolizer_stats: addr_symbolizer::Stats, } impl Display for Stats { From b9e3cd8eb1485fca347efd5dc943c53fc31031ab Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Sun, 11 Aug 2024 20:28:06 -0700 Subject: [PATCH 26/28] Add `--import-pdbs` support (fix https://github.com/0vercl0k/symbolizer-rs/issues/6) & new clippys --- src/hex_addrs_iter.rs | 6 +++--- src/main.rs | 25 ++++++++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/hex_addrs_iter.rs b/src/hex_addrs_iter.rs index 5df6047..0bc6e0f 100644 --- a/src/hex_addrs_iter.rs +++ b/src/hex_addrs_iter.rs @@ -75,7 +75,7 @@ fn fast_hex_str_to_u32(hex: [u8; 8]) -> u32 { /// Convert the `slice` of an hexadecimal string into an integer. fn hex_slice(slice: &[u8]) -> Result { - let slice = slice.strip_prefix(&[b'0', b'x']).unwrap_or(slice); + let slice = slice.strip_prefix(b"0x").unwrap_or(slice); if slice.len() > 16 { bail!("{slice:?} has more digits than supported (16)"); } @@ -165,7 +165,7 @@ where if let Some(last_range) = self.last_range { let last_slice = &self.buf[last_range]; // Be nice, and ignore a potential trailing end of line.. - let last_slice = last_slice.strip_suffix(&[b'\n']).unwrap_or(last_slice); + let last_slice = last_slice.strip_suffix(b"\n").unwrap_or(last_slice); // ..and if there's a carriage return right before, let's ignore this one as // well. let last_slice = last_slice @@ -214,7 +214,7 @@ where // what we return where the next slice starts at Some(idx) => { let without_lf = &parse_slice[..idx]; - let without_cr = without_lf.strip_suffix(&[b'\r']); + let without_cr = without_lf.strip_suffix(b"\r"); (without_cr.unwrap_or(without_lf), idx + 1) } diff --git a/src/main.rs b/src/main.rs index 3839a87..3c4307a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -153,6 +153,9 @@ struct CliArgs { /// parsed if present. #[arg(long)] symcache: Option, + /// Import PDBs found in the specified directories into the symbol cache. + #[arg(long)] + import_pdbs: Option>, /// The size in bytes of the buffer used to write data into the output /// files. #[arg(long, default_value_t = 3 * 1024 * 1024)] @@ -259,9 +262,9 @@ fn symbolize_file( if args.line_numbers { let mut buffer = itoa::Buffer::new(); - output.write_all(&[b'l'])?; + output.write_all(b"l")?; output.write_all(buffer.format(line_number).as_bytes())?; - output.write_all(&[b':', b' '])?; + output.write_all(b": ")?; } match args.style { @@ -275,7 +278,7 @@ fn symbolize_file( ) })?; - output.write_all(&[b'\n'])?; + output.write_all(b"\n")?; if lines_symbolized >= limit { println!( @@ -346,11 +349,19 @@ fn main() -> Result<()> { // All right, ready to create the symbolizer. let mut wrapper = AddrSpaceWrapper::new(parser); - let mut symbolizer = SymbolizerBuilder::default() - .online(args.symsrv.iter()) + let mut builder = SymbolizerBuilder::default() .modules(modules) - .symcache(symcache) - .build()?; + .symcache(symcache)?; + + if let Some(import_pdbs) = &args.import_pdbs { + builder = builder.import_pdbs(import_pdbs.iter())?; + } + + if !args.offline { + builder = builder.online(args.symsrv.iter()); + } + + let mut symbolizer = builder.build()?; let paths = if args.trace.is_dir() { // If we received a path to a directory as input, then we will try to symbolize From ece169f417976177f282c04128053424d40d861b Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Mon, 12 Aug 2024 20:49:23 -0700 Subject: [PATCH 27/28] bump kdmp parser --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e623c92..32b6b14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ clap = { version = "4.5", features = ["derive"] } addr-symbolizer = { git = "https://github.com/0vercl0k/addr-symbolizer-rs", branch = "fbl_v0.1.0"} env_logger = "0.11" itoa = "1.0" -kdmp-parser = "0.3" +kdmp-parser = "0.5" [profile.release] debug = true From 9dd8928f63909c5499683d325e540a52b6e2acf7 Mon Sep 17 00:00:00 2001 From: 0vercl0k <1476421+0vercl0k@users.noreply.github.com> Date: Thu, 17 Oct 2024 05:26:03 -0700 Subject: [PATCH 28/28] addr-symbolizer v0.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 32b6b14..8223c6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ edition = "2021" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -addr-symbolizer = { git = "https://github.com/0vercl0k/addr-symbolizer-rs", branch = "fbl_v0.1.0"} +addr-symbolizer = { version = "0.1" } env_logger = "0.11" itoa = "1.0" kdmp-parser = "0.5"