Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

factors: Fix tests / CI #2754

Merged
merged 4 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 63 additions & 66 deletions crates/componentize/src/bugs.rs
Original file line number Diff line number Diff line change
@@ -1,66 +1,63 @@
use anyhow::bail;
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use crate::module_info::ModuleInfo;

/// Represents the detected likelihood of the allocation bug fixed in
/// https://github.com/WebAssembly/wasi-libc/pull/377 being present in a Wasm
/// module.
pub const EARLIEST_PROBABLY_SAFE_CLANG_VERSION: &str = "15.0.7";

/// This error represents the likely presence of the allocation bug fixed in
/// https://github.com/WebAssembly/wasi-libc/pull/377 in a Wasm module.
#[derive(Debug, PartialEq)]
pub enum WasiLibc377Bug {
ProbablySafe,
ProbablyUnsafe,
Unknown,
pub struct WasiLibc377Bug {
clang_version: Option<String>,
}

impl WasiLibc377Bug {
pub fn detect(module: &[u8]) -> anyhow::Result<Self> {
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } if encoding != Encoding::Module => {
bail!("detection only applicable to modules");
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func && export.name == "cabi_realloc" {
// `cabi_realloc` is a good signal that this module
// uses wit-bindgen, making it probably-safe.
tracing::debug!("Found cabi_realloc export");
return Ok(Self::ProbablySafe);
}
}
}
Payload::CustomSection(c) if c.name() == "producers" => {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");

// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
return if (major, minor, patch) >= (15, 0, 7) {
Ok(Self::ProbablySafe)
} else {
Ok(Self::ProbablyUnsafe)
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
}
_ => (),
/// Detects the likely presence of this bug.
pub fn check(module_info: &ModuleInfo) -> Result<(), Self> {
if module_info.probably_uses_wit_bindgen() {
// Modules built with wit-bindgen are probably safe.
return Ok(());
}
if let Some(clang_version) = &module_info.clang_version {
// Clang/LLVM version is a good proxy for wasi-sdk
// version; the allocation bug was fixed in wasi-sdk-18
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
let earliest_safe =
parse_clang_version(EARLIEST_PROBABLY_SAFE_CLANG_VERSION).unwrap();
if (major, minor, patch) >= earliest_safe {
return Ok(());
} else {
return Err(Self {
clang_version: Some(clang_version.clone()),
});
};
} else {
tracing::warn!(
clang_version,
"Unexpected producers.processed-by.clang version"
);
}
}
Ok(Self::Unknown)
// If we can't assert that the module uses wit-bindgen OR was compiled
// with a new-enough wasi-sdk, conservatively assume it may be buggy.
Err(Self {
clang_version: None,
})
}
}

impl std::fmt::Display for WasiLibc377Bug {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"This Wasm module may have been compiled with wasi-sdk version <19 which \
contains a critical memory safety bug. For more information, see: \
https://github.com/fermyon/spin/issues/2552"
)
}
}

impl std::error::Error for WasiLibc377Bug {}

fn parse_clang_version(ver: &str) -> Option<(u16, u16, u16)> {
// Strip optional trailing detail after space
let ver = ver.split(' ').next().unwrap();
Expand All @@ -77,42 +74,42 @@ mod tests {

#[test]
fn wasi_libc_377_detect() {
use WasiLibc377Bug::*;
for (wasm, expected) in [
(r#"(module)"#, Unknown),
for (wasm, safe) in [
(r#"(module)"#, false),
(
r#"(module (func (export "cabi_realloc") (unreachable)))"#,
ProbablySafe,
true,
),
(
r#"(module (func (export "some_other_function") (unreachable)))"#,
Unknown,
false,
),
(
r#"(module (@producers (processed-by "clang" "16.0.0 extra-stuff")))"#,
ProbablySafe,
true,
),
(
r#"(module (@producers (processed-by "clang" "15.0.7")))"#,
ProbablySafe,
true,
),
(
r#"(module (@producers (processed-by "clang" "15.0.6")))"#,
ProbablyUnsafe,
false,
),
(
r#"(module (@producers (processed-by "clang" "14.0.0")))"#,
ProbablyUnsafe,
r#"(module (@producers (processed-by "clang" "14.0.0 extra-stuff")))"#,
false,
),
(
r#"(module (@producers (processed-by "clang" "a.b.c")))"#,
Unknown,
false,
),
] {
eprintln!("WAT: {wasm}");
let module = wat::parse_str(wasm).unwrap();
let detected = WasiLibc377Bug::detect(&module).unwrap();
assert_eq!(detected, expected);
let module_info = ModuleInfo::from_module(&module).unwrap();
let detected = WasiLibc377Bug::check(&module_info);
assert!(detected.is_ok() == safe, "{wasm} -> {detected:?}");
}
}
}
69 changes: 40 additions & 29 deletions crates/componentize/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use {
anyhow::{anyhow, Context, Result},
convert::{IntoEntityType, IntoExportKind},
module_info::ModuleInfo,
std::{borrow::Cow, collections::HashSet},
wasm_encoder::{CustomSection, ExportSection, ImportSection, Module, RawSection},
wasmparser::{Encoding, Parser, Payload},
Expand All @@ -14,6 +15,7 @@ pub mod bugs;
#[cfg(test)]
mod abi_conformance;
mod convert;
mod module_info;

const SPIN_ADAPTER: &[u8] = include_bytes!(concat!(
env!("OUT_DIR"),
Expand Down Expand Up @@ -51,8 +53,9 @@ pub fn componentize_if_necessary(module_or_component: &[u8]) -> Result<Cow<[u8]>
}

pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
match WitBindgenVersion::from_module(module)? {
WitBindgenVersion::V0_2 => componentize_old_bindgen(module),
let module_info = ModuleInfo::from_module(module)?;
match WitBindgenVersion::detect(&module_info)? {
WitBindgenVersion::V0_2OrNone => componentize_old_module(module, &module_info),
WitBindgenVersion::GreaterThanV0_4 => componentize_new_bindgen(module),
WitBindgenVersion::Other(other) => Err(anyhow::anyhow!(
"cannot adapt modules created with wit-bindgen version {other}"
Expand All @@ -65,40 +68,36 @@ pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
#[derive(Debug)]
enum WitBindgenVersion {
GreaterThanV0_4,
V0_2,
V0_2OrNone,
Other(String),
}

impl WitBindgenVersion {
fn from_module(module: &[u8]) -> Result<Self> {
let (_, bindgen) = metadata::decode(module)?;
if let Some(producers) = bindgen.producers {
if let Some(processors) = producers.get("processed-by") {
let bindgen_version = processors.iter().find_map(|(key, value)| {
key.starts_with("wit-bindgen").then_some(value.as_str())
});
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
fn detect(module_info: &ModuleInfo) -> Result<Self> {
if let Some(processors) = module_info.bindgen_processors() {
let bindgen_version = processors
.iter()
.find_map(|(key, value)| key.starts_with("wit-bindgen").then_some(value.as_str()));
if let Some(v) = bindgen_version {
let mut parts = v.split('.');
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
return Ok(Self::Other(v.to_owned()));
};
if (major == 0 && minor < 5) || major >= 1 {
return Ok(Self::Other(v.to_owned()));
}
// Either there should be no patch version or nothing after patch
if parts.next().is_none() || parts.next().is_none() {
return Ok(Self::GreaterThanV0_4);
} else {
return Ok(Self::Other(v.to_owned()));
}
}
}

Ok(Self::V0_2)
Ok(Self::V0_2OrNone)
}
}

Expand All @@ -111,6 +110,18 @@ pub fn componentize_new_bindgen(module: &[u8]) -> Result<Vec<u8>> {
.encode()
}

/// Modules *not* produced with wit-bindgen >= 0.5 could be old wit-bindgen or no wit-bindgen
pub fn componentize_old_module(module: &[u8], module_info: &ModuleInfo) -> Result<Vec<u8>> {
// If the module has a _start export and doesn't obviously use wit-bindgen
// it is likely an old p1 command module.
if module_info.has_start_export && !module_info.probably_uses_wit_bindgen() {
bugs::WasiLibc377Bug::check(module_info)?;
componentize_command(module)
} else {
componentize_old_bindgen(module)
}
}

/// Modules produced with wit-bindgen 0.2 need more extensive adaption
pub fn componentize_old_bindgen(module: &[u8]) -> Result<Vec<u8>> {
let (module, exports) = retarget_imports_and_get_exports(ADAPTER_NAME, module)?;
Expand Down
111 changes: 111 additions & 0 deletions crates/componentize/src/module_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
use wasm_metadata::Producers;
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
use wit_component::metadata::Bindgen;

// wit-bindgen has used both of these historically.
const CANONICAL_ABI_REALLOC_EXPORTS: &[&str] = &["cabi_realloc", "canonical_abi_realloc"];

/// Stores various bits of info parsed from a Wasm module that are relevant to
/// componentization.
#[derive(Default)]
pub struct ModuleInfo {
pub bindgen: Option<Bindgen>,
pub clang_version: Option<String>,
pub realloc_export: Option<String>,
pub has_start_export: bool,
}

impl ModuleInfo {
/// Parses info from the given binary module bytes.
pub fn from_module(module: &[u8]) -> anyhow::Result<Self> {
let mut info = Self::default();
for payload in Parser::new(0).parse_all(module) {
match payload? {
Payload::Version { encoding, .. } => {
anyhow::ensure!(
encoding == Encoding::Module,
"ModuleInfo::from_module is only applicable to Modules; got a {encoding:?}"
);
}
Payload::ExportSection(reader) => {
for export in reader {
let export = export?;
if export.kind == ExternalKind::Func {
if CANONICAL_ABI_REALLOC_EXPORTS.contains(&export.name) {
tracing::debug!(
"Found canonical ABI realloc export {:?}",
export.name
);
info.realloc_export = Some(export.name.to_string());
} else if export.name == "_start" {
tracing::debug!("Found _start export");
info.has_start_export = true;
}
}
}
}
Payload::CustomSection(c) => {
let section_name = c.name();
if section_name == "producers" {
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
if let Some(clang_version) =
producers.get("processed-by").and_then(|f| f.get("clang"))
{
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");
info.clang_version = Some(clang_version.to_string());
}
} else if section_name.starts_with("component-type") {
match decode_bindgen_custom_section(section_name, c.data()) {
Ok(bindgen) => {
tracing::debug!("Parsed bindgen section {section_name:?}");
info.bindgen = Some(bindgen);
}
Err(err) => tracing::warn!(
"Error parsing bindgen section {section_name:?}: {err}"
),
}
}
}
_ => (),
}
}
Ok(info)
}

/// Returns true if the given module was heuristically probably compiled
/// with wit-bindgen.
pub fn probably_uses_wit_bindgen(&self) -> bool {
if self.bindgen.is_some() {
// Presence of bindgen metadata is a strong signal
true
} else if self.realloc_export.is_some() {
// A canonical ABI realloc export is a decent signal
true
} else {
false
}
}

/// Returns the wit-bindgen metadata producers processed-by field, if
/// present.
pub fn bindgen_processors(&self) -> Option<wasm_metadata::ProducersField> {
self.bindgen
.as_ref()?
.producers
.as_ref()?
.get("processed-by")
}
}

/// This is a silly workaround for the limited public interface available in
/// [`wit_component::metadata`].
// TODO: Make Bindgen::decode_custom_section public?
fn decode_bindgen_custom_section(name: &str, data: &[u8]) -> anyhow::Result<Bindgen> {
let mut module = wasm_encoder::Module::new();
module.section(&wasm_encoder::CustomSection {
name: name.into(),
data: data.into(),
});
let (_, bindgen) = wit_component::metadata::decode(module.as_slice())?;
Ok(bindgen)
}
Loading
Loading