From c3f686f08184683d01131e5dd2de373df785cf05 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Thu, 18 Jul 2024 04:55:38 +0200 Subject: [PATCH] feat: re implement using a single IR function --- crates/revmc-backend/src/traits.rs | 1 + crates/revmc-builtins/src/ir.rs | 7 + crates/revmc-builtins/src/lib.rs | 27 +- crates/revmc-cli/src/main.rs | 2 +- crates/revmc-context/src/lib.rs | 11 +- crates/revmc-cranelift/src/lib.rs | 4 + crates/revmc-llvm/src/lib.rs | 4 + crates/revmc/src/bytecode/mod.rs | 351 +++++++++++------------ crates/revmc/src/bytecode/sections.rs | 8 +- crates/revmc/src/compiler/mod.rs | 154 ++++++---- crates/revmc/src/compiler/translate.rs | 370 ++++++++++++++++--------- crates/revmc/src/lib.rs | 2 +- crates/revmc/src/linker.rs | 2 +- crates/revmc/src/tests/resume.rs | 17 +- examples/compiler/src/main.rs | 2 +- 15 files changed, 568 insertions(+), 394 deletions(-) diff --git a/crates/revmc-backend/src/traits.rs b/crates/revmc-backend/src/traits.rs index d6cb8dc..8df629f 100644 --- a/crates/revmc-backend/src/traits.rs +++ b/crates/revmc-backend/src/traits.rs @@ -261,6 +261,7 @@ pub trait Builder: BackendTypes + TypeMethods { self.str_const(value.to_str().unwrap()) } fn str_const(&mut self, value: &str) -> Self::Value; + fn nullptr(&mut self) -> Self::Value; fn new_stack_slot(&mut self, ty: Self::Type, name: &str) -> Pointer { Pointer::new_stack_slot(self, ty, name) diff --git a/crates/revmc-builtins/src/ir.rs b/crates/revmc-builtins/src/ir.rs index eb971db..1dc250a 100644 --- a/crates/revmc-builtins/src/ir.rs +++ b/crates/revmc-builtins/src/ir.rs @@ -154,6 +154,9 @@ macro_rules! builtins { const LOG: u8 = LOG0; const DORETURN: u8 = RETURN; const RESIZEMEMORY: u8 = 0; + const FUNCSTACKPUSH: u8 = 0; + const FUNCSTACKPOP: u8 = 0; + const FUNCSTACKGROW: u8 = 0; match self { $(Self::$ident => [<$ident:upper>]),* @@ -251,5 +254,9 @@ builtins! { DoReturn = __revmc_builtin_do_return(@[ecx] ptr, @[sp] ptr, u8) Some(u8), SelfDestruct = __revmc_builtin_selfdestruct(@[ecx] ptr, @[sp] ptr, u8) Some(u8), + FuncStackPush = __revmc_builtin_func_stack_push(@[ecx] ptr, ptr, usize) Some(u8), + FuncStackPop = __revmc_builtin_func_stack_pop(@[ecx] ptr) Some(ptr), + FuncStackGrow = __revmc_builtin_func_stack_grow(@[ecx] ptr) None, + ResizeMemory = __revmc_builtin_resize_memory(@[ecx] ptr, usize) Some(u8), } diff --git a/crates/revmc-builtins/src/lib.rs b/crates/revmc-builtins/src/lib.rs index 542e485..352f981 100644 --- a/crates/revmc-builtins/src/lib.rs +++ b/crates/revmc-builtins/src/lib.rs @@ -13,8 +13,8 @@ extern crate tracing; use alloc::{boxed::Box, vec::Vec}; use revm_interpreter::{ as_u64_saturated, as_usize_saturated, CallInputs, CallScheme, CallValue, CreateInputs, - EOFCreateInputs, InstructionResult, InterpreterAction, InterpreterResult, LoadAccountResult, - SStoreResult, + EOFCreateInputs, FunctionStack, InstructionResult, InterpreterAction, InterpreterResult, + LoadAccountResult, SStoreResult, }; use revm_primitives::{ eof::EofHeader, Address, Bytes, CreateScheme, Eof, Log, LogData, SpecId, KECCAK_EMPTY, @@ -867,6 +867,29 @@ pub unsafe extern "C" fn __revmc_builtin_selfdestruct( InstructionResult::Continue } +#[no_mangle] +pub unsafe extern "C" fn __revmc_builtin_func_stack_push( + ecx: &mut EvmContext<'_>, + pc: usize, + new_idx: usize, +) -> InstructionResult { + if ecx.func_stack.return_stack_len() >= 1024 { + return InstructionResult::EOFFunctionStackOverflow; + } + ecx.func_stack.push(pc, new_idx); + InstructionResult::Continue +} + +#[no_mangle] +pub unsafe extern "C" fn __revmc_builtin_func_stack_pop(ecx: &mut EvmContext<'_>) -> usize { + ecx.func_stack.pop().expect("RETF with empty return stack").pc +} + +#[no_mangle] +pub unsafe extern "C" fn __revmc_builtin_func_stack_grow(func_stack: &mut FunctionStack) { + func_stack.return_stack.reserve(1); +} + #[no_mangle] pub unsafe extern "C" fn __revmc_builtin_resize_memory( ecx: &mut EvmContext<'_>, diff --git a/crates/revmc-cli/src/main.rs b/crates/revmc-cli/src/main.rs index d8effef..a05e32e 100644 --- a/crates/revmc-cli/src/main.rs +++ b/crates/revmc-cli/src/main.rs @@ -153,7 +153,7 @@ fn main() -> Result<()> { } if cli.parse_only { - let _ = compiler.parse(bytecode, spec_id)?; + let _ = compiler.parse(bytecode.into(), spec_id)?; return Ok(()); } diff --git a/crates/revmc-context/src/lib.rs b/crates/revmc-context/src/lib.rs index 7806e9f..9a6b0ca 100644 --- a/crates/revmc-context/src/lib.rs +++ b/crates/revmc-context/src/lib.rs @@ -33,8 +33,8 @@ pub struct EvmContext<'a> { pub next_action: &'a mut InterpreterAction, /// The return data. pub return_data: &'a [u8], - /// The length of the return stack. - pub return_stack_len: usize, + /// The function stack. + pub func_stack: &'a mut FunctionStack, /// Whether the context is static. pub is_static: bool, /// Whether the context is EOF init. @@ -76,7 +76,7 @@ impl<'a> EvmContext<'a> { host, next_action: &mut interpreter.next_action, return_data: &interpreter.return_data_buffer, - return_stack_len: 0, + func_stack: &mut interpreter.function_stack, is_static: interpreter.is_static, is_eof_init: interpreter.is_eof_init, resume_at, @@ -91,7 +91,10 @@ impl<'a> EvmContext<'a> { is_eof: self.contract.bytecode.is_eof(), instruction_pointer: bytecode.as_ptr(), bytecode, - function_stack: FunctionStack::new(), + function_stack: FunctionStack { + return_stack: self.func_stack.return_stack.clone(), + current_code_idx: self.func_stack.current_code_idx, + }, is_eof_init: self.is_eof_init, contract: self.contract.clone(), instruction_result: InstructionResult::Continue, diff --git a/crates/revmc-cranelift/src/lib.rs b/crates/revmc-cranelift/src/lib.rs index f2786b8..872d126 100644 --- a/crates/revmc-cranelift/src/lib.rs +++ b/crates/revmc-cranelift/src/lib.rs @@ -403,6 +403,10 @@ impl<'a> Builder for EvmCraneliftBuilder<'a> { self.bcx.ins().global_value(self.ptr_type, local_msg_id) } + fn nullptr(&mut self) -> Self::Value { + self.iconst(self.ptr_type, 0) + } + fn new_stack_slot_raw(&mut self, ty: Self::Type, name: &str) -> Self::StackSlot { // https://github.com/rust-lang/rustc_codegen_cranelift/blob/1122338eb88648ec36a2eb2b1c27031fa897964d/src/common.rs#L388 diff --git a/crates/revmc-llvm/src/lib.rs b/crates/revmc-llvm/src/lib.rs index 6ba5725..87bcfc5 100644 --- a/crates/revmc-llvm/src/lib.rs +++ b/crates/revmc-llvm/src/lib.rs @@ -671,6 +671,10 @@ impl<'a, 'ctx> Builder for EvmLlvmBuilder<'a, 'ctx> { self.bcx.build_global_string_ptr(value, "").unwrap().as_pointer_value().into() } + fn nullptr(&mut self) -> Self::Value { + self.ty_ptr.const_null().into() + } + fn new_stack_slot_raw(&mut self, ty: Self::Type, name: &str) -> Self::StackSlot { // let ty = self.ty_i8.array_type(size); // let ptr = self.bcx.build_alloca(ty, name).unwrap(); diff --git a/crates/revmc/src/bytecode/mod.rs b/crates/revmc/src/bytecode/mod.rs index c2c7f2c..fe50064 100644 --- a/crates/revmc/src/bytecode/mod.rs +++ b/crates/revmc/src/bytecode/mod.rs @@ -2,8 +2,8 @@ use bitvec::vec::BitVec; use revm_interpreter::opcode as op; -use revm_primitives::{hex, Eof, SpecId, EOF_MAGIC_BYTES}; -use revmc_backend::Result; +use revm_primitives::{hex, Eof, SpecId}; +use revmc_backend::{eyre::ensure, Result}; use rustc_hash::FxHashMap; use std::{borrow::Cow, fmt}; @@ -28,68 +28,13 @@ pub(crate) const TEST_SUSPEND: u8 = 0x25; /// Also known as `ic`, or instruction counter; not to be confused with SSA `inst`s. pub(crate) type Inst = usize; -#[doc(hidden)] // Not public API. -pub struct Bytecode<'a>(pub(crate) BytecodeInner<'a>); - -#[derive(Debug)] -pub(crate) enum BytecodeInner<'a> { - Legacy(LegacyBytecode<'a>), - Eof(EofBytecode<'a>), -} - -impl<'a> Bytecode<'a> { - pub(crate) fn new(code: &'a [u8], spec_id: SpecId) -> Result { - if spec_id.is_enabled_in(SpecId::PRAGUE_EOF) && code.starts_with(&EOF_MAGIC_BYTES) { - Ok(Self(BytecodeInner::Eof(EofBytecode::decode(code, spec_id)?))) - } else { - Ok(Self(BytecodeInner::Legacy(LegacyBytecode::new(code, spec_id, None)))) - } - } - - pub(crate) fn analyze(&mut self) -> Result<()> { - match &mut self.0 { - BytecodeInner::Legacy(bytecode) => bytecode.analyze(), - BytecodeInner::Eof(bytecode) => bytecode.analyze(), - } - } - - pub(crate) fn as_legacy_slice(&self) -> &[LegacyBytecode<'a>] { - match &self.0 { - BytecodeInner::Legacy(bytecode) => std::slice::from_ref(bytecode), - BytecodeInner::Eof(eof) => &eof.sections, - } - } - - pub(crate) fn as_eof(&self) -> Option<&EofBytecode<'a>> { - match &self.0 { - BytecodeInner::Legacy(_) => None, - BytecodeInner::Eof(eof) => Some(eof), - } - } -} - -impl fmt::Debug for Bytecode<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.0 { - BytecodeInner::Legacy(bytecode) => bytecode.fmt(f), - BytecodeInner::Eof(bytecode) => bytecode.fmt(f), - } - } -} - -impl fmt::Display for Bytecode<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.0 { - BytecodeInner::Legacy(bytecode) => bytecode.fmt(f), - BytecodeInner::Eof(bytecode) => bytecode.fmt(f), - } - } -} - /// EVM bytecode. -pub(crate) struct LegacyBytecode<'a> { +#[doc(hidden)] // Not public API. +pub struct Bytecode<'a> { /// The original bytecode slice. pub(crate) code: &'a [u8], + /// The parsed EOF container, if any. + eof: Option>, /// The instructions. insts: Vec, /// `JUMPDEST` opcode map. `jumpdests[pc]` is `true` if `code[pc] == op::JUMPDEST`. @@ -100,37 +45,44 @@ pub(crate) struct LegacyBytecode<'a> { has_dynamic_jumps: bool, /// Whether the bytecode may suspend execution. may_suspend: bool, - /// The number of resumes in the bytecode. - n_resumes: usize, /// Mapping from program counter to instruction. pc_to_inst: FxHashMap, - /// The EOF section index, if any. - pub(crate) eof_section: Option, + /// Mapping from EOF code section index to the list of instructions that call it. + eof_called_by: Vec>, } -impl<'a> LegacyBytecode<'a> { +impl<'a> Bytecode<'a> { #[instrument(name = "new_bytecode", level = "debug", skip_all)] - pub(crate) fn new(code: &'a [u8], spec_id: SpecId, eof_section: Option) -> Self { - let is_eof = eof_section.is_some(); + pub(crate) fn new(mut code: &'a [u8], eof: Option>, spec_id: SpecId) -> Self { + if let Some(eof) = &eof { + code = unsafe { + std::slice::from_raw_parts( + eof.body.code_section.first().unwrap().as_ptr(), + eof.header.sum_code_sizes, + ) + }; + } + + let is_eof = eof.is_some(); let mut insts = Vec::with_capacity(code.len() + 8); // JUMPDEST analysis is not done in EOF. let mut jumpdests = if is_eof { BitVec::new() } else { BitVec::repeat(false, code.len()) }; let mut pc_to_inst = FxHashMap::with_capacity_and_hasher(code.len(), Default::default()); let op_infos = op_info_map(spec_id); - for (inst, (pc, Opcode { opcode, immediate })) in + for (inst, (absolute_pc, Opcode { opcode, immediate })) in OpcodesIter::new(code).with_pc().enumerate() { - pc_to_inst.insert(pc as u32, inst as u32); + pc_to_inst.insert(absolute_pc as u32, inst as u32); - if opcode == op::JUMPDEST && !is_eof { - jumpdests.set(pc, true) + if !is_eof && opcode == op::JUMPDEST { + jumpdests.set(absolute_pc, true) } let mut data = 0; if let Some(imm) = immediate { - // `pc` is at `opcode` right now, add 1 for the data. - data = Immediate::pack(pc + 1, imm.len()); + // `absolute_pc` is at `opcode` right now, add 1 for the data. + data = Immediate::pack(absolute_pc + 1, imm.len()); } let mut flags = InstFlags::empty(); @@ -145,22 +97,23 @@ impl<'a> LegacyBytecode<'a> { let section = Section::default(); - insts.push(InstData { opcode, flags, base_gas, data, pc: pc as u32, section }); + insts.push(InstData { opcode, flags, base_gas, data, pc: absolute_pc as u32, section }); } let mut bytecode = Self { code, + eof, insts, jumpdests, spec_id, has_dynamic_jumps: false, may_suspend: false, - n_resumes: 0, pc_to_inst, - eof_section, + eof_called_by: vec![], }; // Pad code to ensure there is at least one diverging instruction. + // EOF enforces this, so there is no need to pad it ourselves. if !is_eof && bytecode.insts.last().map_or(true, |last| !last.is_diverging(false)) { bytecode.insts.push(InstData::new(op::STOP)); } @@ -243,6 +196,10 @@ impl<'a> LegacyBytecode<'a> { self.calc_may_suspend(); self.construct_sections(); + if self.is_eof() { + self.calc_eof_called_by()?; + } + Ok(()) } @@ -349,17 +306,6 @@ impl<'a> LegacyBytecode<'a> { self.may_suspend = may_suspend; } - /// Calculates the total number of resumes in the bytecode. - #[instrument(name = "resumes", level = "debug", skip_all)] - pub(crate) fn calc_total_resumes(&mut self) { - debug_assert!(self.is_eof()); - let mut total = 0; - for (_, op) in self.iter_insts() { - total += op.may_suspend(true) as usize; - } - self.n_resumes = total; - } - /// Constructs the sections in the bytecode. #[instrument(name = "sections", level = "debug", skip_all)] fn construct_sections(&mut self) { @@ -384,6 +330,69 @@ impl<'a> LegacyBytecode<'a> { } } + /// Calculates the list of instructions that call each EOF section. + /// + /// This is done to compute the `indirectbr` destinations of `RETF` instructions. + #[instrument(name = "eof_called_by", level = "debug", skip_all)] + fn calc_eof_called_by(&mut self) -> Result<()> { + let code_sections_len = self.expect_eof().body.code_section.len(); + if code_sections_len <= 1 { + return Ok(()); + } + + // First, collect all `CALLF` targets. + let mut eof_called_by = vec![Vec::new(); code_sections_len]; + for (inst, data) in self.iter_all_insts() { + if data.opcode == op::CALLF { + let imm = self.get_imm(data.data); + let target_section = u16::from_be_bytes(imm.try_into().unwrap()) as usize; + eof_called_by[target_section].push(inst); + } + } + + // Then, propagate `JUMPF` calls. + const MAX_ITERATIONS: usize = 32; + let mut any_progress = true; + let mut i = 0usize; + let first_section_inst = self.eof_section_inst(1); + while any_progress && i < MAX_ITERATIONS { + any_progress = false; + + for (_inst, data) in self.iter_all_insts().skip(first_section_inst) { + if data.opcode == op::JUMPF { + let source_section = self.pc_to_eof_section(data.pc as usize); + debug_assert!(source_section != 0); + + let imm = self.get_imm(data.data); + let target_section = u16::from_be_bytes(imm.try_into().unwrap()) as usize; + + let (source_section, target_section) = + get_two_mut(&mut eof_called_by, source_section, target_section); + + for &source_call in &*source_section { + if !target_section.contains(&source_call) { + any_progress = true; + target_section.push(source_call); + } + } + } + } + + i += 1; + } + // TODO: Is this actually reachable? + // If so, we should remove this error and handle this case properly by making all `CALLF` + // reachable. + ensure!(i < MAX_ITERATIONS, "`calc_eof_called_by` did not converge"); + self.eof_called_by = eof_called_by; + Ok(()) + } + + /// Returns the list of instructions that call the given EOF section. + pub(crate) fn eof_section_called_by(&self, section: usize) -> &[Inst] { + &self.eof_called_by[section] + } + /// Returns the immediate value of the given instruction data, if any. pub(crate) fn get_imm_of(&self, instr_data: &InstData) -> Option<&'a [u8]> { (instr_data.imm_len() > 0).then(|| self.get_imm(instr_data.data)) @@ -394,11 +403,6 @@ impl<'a> LegacyBytecode<'a> { &self.code[offset..offset + len] } - /// Returns `true` if this bytecode is not EOF or is the main (first) EOF section: - pub(crate) fn is_main_section(&self) -> bool { - self.eof_section.map_or(true, |section| section == 0) - } - /// Returns `true` if the given program counter is a valid jump destination. fn is_valid_jump(&self, pc: usize) -> bool { self.jumpdests.get(pc).as_deref().copied() == Some(true) @@ -414,14 +418,9 @@ impl<'a> LegacyBytecode<'a> { self.may_suspend } - /// Returns the total number of resumes in the bytecode. - pub(crate) fn n_resumes(&self) -> usize { - self.n_resumes - } - /// Returns `true` if the bytecode is EOF. pub(crate) fn is_eof(&self) -> bool { - self.eof_section.is_some() + self.eof.is_some() } /// Returns `true` if the bytecode is small. @@ -448,9 +447,71 @@ impl<'a> LegacyBytecode<'a> { self.insts[inst].pc as usize } */ + + /// Returns the program counter of the given EOF section index. + pub(crate) fn eof_section_pc(&self, section: usize) -> Inst { + let code = &self.expect_eof().body.code_section; + let first = code.first().unwrap().as_ptr(); + let section_ptr = code[section].as_ptr(); + section_ptr as usize - first as usize + } + + /// Returns the first instruction of the given EOF section index. + pub(crate) fn eof_section_inst(&self, section: usize) -> Inst { + self.pc_to_inst(self.eof_section_pc(section)) + } + + /// Asserts that the given jump target is in bounds. + pub(crate) fn eof_assert_jump_in_bounds(&self, from: usize, to: usize) { + assert_eq!( + self.pc_to_eof_section(from), + self.pc_to_eof_section(to), + "RJUMP* target out of bounds: {from} -> {to}" + ); + } + + pub(crate) fn pc_to_eof_section(&self, pc: usize) -> usize { + (0..self.expect_eof().body.code_section.len()) + .rev() + .find(|§ion| pc >= self.eof_section_pc(section)) + .unwrap() + } + + /// Returns the `Eof` container, panicking if it is not set. + #[track_caller] + #[inline] + pub(crate) fn expect_eof(&self) -> &Eof { + self.eof.as_deref().expect("EOF container not set") + } + + /// Returns the name for a basic block. + pub(crate) fn op_block_name(&self, mut inst: usize, name: &str) -> String { + use std::fmt::Write; + + if inst == usize::MAX { + return format!("entry.{name}"); + } + let mut section = None; + let data = self.inst(inst); + if self.is_eof() { + let section_index = self.pc_to_eof_section(data.pc as usize); + section = Some(section_index); + inst -= self.eof_section_inst(section_index); + } + + let mut s = String::new(); + if let Some(section) = section { + let _ = write!(s, "S{section}."); + } + let _ = write!(s, "OP{inst}.{}", data.to_op()); + if !name.is_empty() { + let _ = write!(s, ".{name}"); + } + s + } } -impl fmt::Display for LegacyBytecode<'_> { +impl fmt::Display for Bytecode<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let header = format!("{:^6} | {:^6} | {:^80} | {}", "ic", "pc", "opcode", "instruction"); writeln!(f, "{header}")?; @@ -464,7 +525,7 @@ impl fmt::Display for LegacyBytecode<'_> { } } -impl fmt::Debug for LegacyBytecode<'_> { +impl fmt::Debug for Bytecode<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Bytecode") .field("code", &hex::encode(self.code)) @@ -477,86 +538,6 @@ impl fmt::Debug for LegacyBytecode<'_> { } } -#[derive(Debug)] -pub(crate) struct EofBytecode<'a> { - pub(crate) container: Cow<'a, Eof>, - pub(crate) sections: Vec>, - pub(crate) any_may_suspend: bool, - pub(crate) total_resumes: usize, -} - -impl<'a> EofBytecode<'a> { - // TODO: Accept revm Bytecode in the compiler - #[allow(dead_code)] - fn new(container: &'a Eof, spec_id: SpecId) -> Self { - Self::new_inner(Cow::Borrowed(container), spec_id) - } - - fn decode(code: &'a [u8], spec_id: SpecId) -> Result { - let container = Eof::decode(code.to_vec().into())?; - Ok(Self::new_inner(Cow::Owned(container), spec_id)) - } - - #[instrument(name = "new_eof", level = "debug", skip_all)] - fn new_inner(container: Cow<'a, Eof>, spec_id: SpecId) -> Self { - Self { container, sections: vec![], any_may_suspend: false, total_resumes: 0 } - .make_sections(spec_id) - } - - fn make_sections(mut self, spec_id: SpecId) -> Self { - self.sections = self - .container - .body - .code_section - .iter() - .enumerate() - .map(|(section, code)| { - // SAFETY: Code section `Bytes` outlives `self`. - let code = unsafe { std::mem::transmute::<&[u8], &[u8]>(&code[..]) }; - LegacyBytecode::new(code, spec_id, Some(section)) - }) - .collect(); - self - } - - #[instrument(name = "analyze_eof", level = "debug", skip_all)] - fn analyze(&mut self) -> Result<()> { - for section in &mut self.sections { - section.analyze()?; - } - self.calc_any_may_suspend(); - if self.any_may_suspend { - self.calc_total_resumes(); - } - Ok(()) - } - - #[instrument(name = "any_suspend", level = "debug", skip_all)] - fn calc_any_may_suspend(&mut self) { - self.any_may_suspend = self.sections.iter().any(|section| section.may_suspend()); - } - - #[instrument(name = "total_resumes", level = "debug", skip_all)] - fn calc_total_resumes(&mut self) { - let mut total = 0; - for section in &mut self.sections { - section.calc_total_resumes(); - total += section.n_resumes; - } - self.total_resumes = total; - } -} - -impl fmt::Display for EofBytecode<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (i, section) in self.sections.iter().enumerate() { - writeln!(f, "# Section {i}")?; - writeln!(f, "{section}")?; - } - Ok(()) - } -} - /// A single instruction in the bytecode. #[derive(Clone, Default)] pub(crate) struct InstData { @@ -641,7 +622,7 @@ impl InstData { /// Converts this instruction to a raw opcode in the given bytecode. #[inline] #[allow(dead_code)] - pub(crate) fn to_op_in<'a>(&self, bytecode: &LegacyBytecode<'a>) -> Opcode<'a> { + pub(crate) fn to_op_in<'a>(&self, bytecode: &Bytecode<'a>) -> Opcode<'a> { Opcode { opcode: self.opcode, immediate: bytecode.get_imm_of(self) } } @@ -806,6 +787,12 @@ fn slice_as_bytes(a: &[T]) -> &[u8] { unsafe { std::slice::from_raw_parts(a.as_ptr().cast(), std::mem::size_of_val(a)) } } +fn get_two_mut(sl: &mut [T], idx_1: usize, idx_2: usize) -> (&mut T, &mut T) { + assert!(idx_1 != idx_2 && idx_1 < sl.len() && idx_2 < sl.len()); + let ptr = sl.as_mut_ptr(); + unsafe { (&mut *ptr.add(idx_1), &mut *ptr.add(idx_2)) } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/revmc/src/bytecode/sections.rs b/crates/revmc/src/bytecode/sections.rs index c046572..d6534f9 100644 --- a/crates/revmc/src/bytecode/sections.rs +++ b/crates/revmc/src/bytecode/sections.rs @@ -1,4 +1,4 @@ -use super::LegacyBytecode; +use super::Bytecode; use core::fmt; // TODO: Separate gas sections from stack length sections. @@ -55,7 +55,7 @@ pub(crate) struct SectionAnalysis { impl SectionAnalysis { /// Process a single instruction. - pub(crate) fn process(&mut self, bytecode: &mut LegacyBytecode<'_>, inst: usize) { + pub(crate) fn process(&mut self, bytecode: &mut Bytecode<'_>, inst: usize) { // JUMPDEST starts a section. if bytecode.inst(inst).is_reachable_jumpdest(bytecode.has_dynamic_jumps()) { self.save_to(bytecode, inst); @@ -85,7 +85,7 @@ impl SectionAnalysis { } /// Finishes the analysis. - pub(crate) fn finish(self, bytecode: &mut LegacyBytecode<'_>) { + pub(crate) fn finish(self, bytecode: &mut Bytecode<'_>) { self.save_to(bytecode, bytecode.insts.len() - 1); if enabled!(tracing::Level::DEBUG) { let mut max_len = 0; @@ -105,7 +105,7 @@ impl SectionAnalysis { } /// Saves the current section to the bytecode. - fn save_to(&self, bytecode: &mut LegacyBytecode<'_>, next_section_inst: usize) { + fn save_to(&self, bytecode: &mut Bytecode<'_>, next_section_inst: usize) { if self.start_inst >= bytecode.insts.len() { return; } diff --git a/crates/revmc/src/compiler/mod.rs b/crates/revmc/src/compiler/mod.rs index 3172d48..fe95fe6 100644 --- a/crates/revmc/src/compiler/mod.rs +++ b/crates/revmc/src/compiler/mod.rs @@ -2,9 +2,10 @@ use crate::{Backend, Builder, Bytecode, EvmCompilerFn, EvmContext, EvmStack, Result}; use revm_interpreter::{Contract, Gas}; -use revm_primitives::{Env, SpecId}; +use revm_primitives::{Bytes, Env, Eof, SpecId, EOF_MAGIC_BYTES}; use revmc_backend::{ - eyre::ensure, Attribute, FunctionAttributeLocation, Linkage, OptimizationLevel, + eyre::{ensure, eyre}, + Attribute, FunctionAttributeLocation, Linkage, OptimizationLevel, }; use revmc_builtins::Builtins; use revmc_context::RawEvmCompilerFn; @@ -157,6 +158,15 @@ impl EvmCompiler { self.config.frame_pointers = yes; } + /// Sets whether to validate input EOF containers. + /// + /// **An invalid EOF container will likely results in a panic.** + /// + /// Defaults to `true`. + pub fn validate_eof(&mut self, yes: bool) { + self.config.validate_eof = yes; + } + /// Sets whether to allocate the stack locally. /// /// If this is set to `true`, the stack pointer argument will be ignored and the stack will be @@ -222,10 +232,15 @@ impl EvmCompiler { /// /// NOTE: `name` must be unique for each function, as it is used as the name of the final /// symbol. - pub fn translate(&mut self, name: &str, bytecode: &[u8], spec_id: SpecId) -> Result { + pub fn translate<'a>( + &mut self, + name: &str, + input: impl Into>, + spec_id: SpecId, + ) -> Result { ensure!(cfg!(target_endian = "little"), "only little-endian is supported"); ensure!(!self.finalized, "cannot compile more functions after finalizing the module"); - let bytecode = self.parse(bytecode, spec_id)?; + let bytecode = self.parse(input.into(), spec_id)?; self.translate_inner(name, &bytecode) } @@ -237,13 +252,13 @@ impl EvmCompiler { /// /// The returned function pointer is owned by the module, and must not be called after the /// module is cleared or the function is freed. - pub unsafe fn jit( + pub unsafe fn jit<'a>( &mut self, name: &str, - bytecode: &[u8], + bytecode: impl Into>, spec_id: SpecId, ) -> Result { - let id = self.translate(name, bytecode, spec_id)?; + let id = self.translate(name, bytecode.into(), spec_id)?; unsafe { self.jit_function(id) } } @@ -308,9 +323,33 @@ impl EvmCompiler { } /// Parses the given EVM bytecode. Not public API. - #[doc(hidden)] - pub fn parse<'a>(&mut self, bytecode: &'a [u8], spec_id: SpecId) -> Result> { - let mut bytecode = Bytecode::new(bytecode, spec_id)?; + #[doc(hidden)] // Not public API. + pub fn parse<'a>( + &mut self, + input: EvmCompilerInput<'a>, + spec_id: SpecId, + ) -> Result> { + let bytecode; + let eof; + match input { + EvmCompilerInput::Code(code) => { + bytecode = code; + if spec_id.is_enabled_in(SpecId::PRAGUE_EOF) && code.starts_with(&EOF_MAGIC_BYTES) { + eof = Some(Cow::Owned(Eof::decode(Bytes::copy_from_slice(code))?)); + } else { + eof = None; + } + } + EvmCompilerInput::Eof(e) => { + bytecode = &e.raw[..]; + eof = Some(Cow::Borrowed(e)); + } + } + if let Some(eof) = &eof { + self.do_validate_eof(eof)?; + } + + let mut bytecode = Bytecode::new(bytecode, eof, spec_id); bytecode.analyze()?; if let Some(dump_dir) = &self.dump_dir() { Self::dump_bytecode(dump_dir, &bytecode)?; @@ -318,47 +357,25 @@ impl EvmCompiler { Ok(bytecode) } - #[instrument(name = "translate", level = "debug", skip_all)] - fn translate_inner(&mut self, main_name: &str, bytecode: &Bytecode<'_>) -> Result { - let bytecodes = bytecode.as_legacy_slice(); - assert!(!bytecodes.is_empty()); - let eof = bytecode.as_eof(); - - ensure!( - self.backend.function_name_is_unique(main_name), - "function name `{main_name}` is not unique" - ); - - if let [bytecode] = bytecodes { - let linkage = Linkage::Public; - let (bcx, id) = - Self::make_builder(&mut self.backend, &self.config, main_name, linkage)?; - FunctionCx::translate(bcx, self.config, &mut self.builtins, bytecode, eof, main_name)?; - return Ok(id); + fn do_validate_eof(&self, eof: &Eof) -> Result<()> { + if !self.config.validate_eof { + return Ok(()); } - - let make_name = |i: usize| section_mangled_name(main_name, i); - - // First declare all functions. - let mut id = None; - for i in 0..bytecodes.len() { - let linkage = if i == 0 { Linkage::Public } else { Linkage::Private }; - let (_, local_id) = - Self::make_builder(&mut self.backend, &self.config, &make_name(i), linkage)?; - if i == 0 { - id = Some(local_id); + revm_interpreter::analysis::validate_eof(eof).map_err(|e| match e { + revm_interpreter::analysis::EofError::Decode(e) => e.into(), + revm_interpreter::analysis::EofError::Validation(e) => { + eyre!("validation error: {e:?}") } - } - - // Then translate them. - for (i, bytecode) in bytecodes.iter().enumerate() { - let linkage = if i == 0 { Linkage::Public } else { Linkage::Private }; - let (bcx, _) = - Self::make_builder(&mut self.backend, &self.config, &make_name(i), linkage)?; - FunctionCx::translate(bcx, self.config, &mut self.builtins, bytecode, eof, main_name)?; - } + }) + } - Ok(id.unwrap()) + #[instrument(name = "translate", level = "debug", skip_all)] + fn translate_inner(&mut self, name: &str, bytecode: &Bytecode<'_>) -> Result { + ensure!(self.backend.function_name_is_unique(name), "function name `{name}` is not unique"); + let linkage = Linkage::Public; + let (bcx, id) = Self::make_builder(&mut self.backend, &self.config, name, linkage)?; + FunctionCx::translate(bcx, self.config, &mut self.builtins, bytecode)?; + Ok(id) } #[instrument(level = "debug", skip_all)] @@ -510,6 +527,39 @@ impl EvmCompiler { } } +/// [`EvmCompiler`] input. +#[allow(missing_debug_implementations)] +pub enum EvmCompilerInput<'a> { + /// EVM bytecode. Can also be raw EOF code, which will be parsed. + Code(&'a [u8]), + /// Already-parsed EOF container. + Eof(&'a Eof), +} + +impl<'a> From<&'a [u8]> for EvmCompilerInput<'a> { + fn from(code: &'a [u8]) -> Self { + EvmCompilerInput::Code(code) + } +} + +impl<'a> From<&'a Vec> for EvmCompilerInput<'a> { + fn from(code: &'a Vec) -> Self { + EvmCompilerInput::Code(code) + } +} + +impl<'a> From<&'a Bytes> for EvmCompilerInput<'a> { + fn from(code: &'a Bytes) -> Self { + EvmCompilerInput::Code(code) + } +} + +impl<'a> From<&'a Eof> for EvmCompilerInput<'a> { + fn from(eof: &'a Eof) -> Self { + EvmCompilerInput::Eof(eof) + } +} + #[allow(dead_code)] mod default_attrs { use revmc_backend::Attribute; @@ -557,11 +607,3 @@ mod default_attrs { (std::mem::size_of::(), std::mem::align_of::()) } } - -fn section_mangled_name(main_name: &str, i: usize) -> Cow<'_, str> { - if i == 0 { - Cow::Borrowed(main_name) - } else { - Cow::Owned(format!("{main_name}_section_{i}")) - } -} diff --git a/crates/revmc/src/compiler/translate.rs b/crates/revmc/src/compiler/translate.rs index 5be9e77..f52ac9f 100644 --- a/crates/revmc/src/compiler/translate.rs +++ b/crates/revmc/src/compiler/translate.rs @@ -1,15 +1,16 @@ //! EVM to IR translation. -use super::{default_attrs, section_mangled_name}; +use super::default_attrs; use crate::{ - Backend, Builder, EofBytecode, EvmContext, Inst, InstData, InstFlags, IntCC, LegacyBytecode, - Result, I256_MIN, + Backend, Builder, Bytecode, EvmContext, Inst, InstData, InstFlags, IntCC, Result, I256_MIN, +}; +use revm_interpreter::{ + opcode as op, Contract, FunctionReturnFrame, FunctionStack, InstructionResult, + OPCODE_INFO_JUMPTABLE, }; -use revm_interpreter::{opcode as op, Contract, InstructionResult, OPCODE_INFO_JUMPTABLE}; use revm_primitives::{BlockEnv, CfgEnv, Env, Eof, TxEnv, U256}; use revmc_backend::{ - eyre::ensure, Attribute, BackendTypes, FunctionAttributeLocation, Pointer, TailCallKind, - TypeMethods, + eyre::ensure, Attribute, BackendTypes, FunctionAttributeLocation, Pointer, TypeMethods, }; use revmc_builtins::{Builtin, Builtins, CallKind, CreateKind, ExtCallKind, EXTCALL_LIGHT_FAILURE}; use std::{fmt::Write, mem, sync::atomic::AtomicPtr}; @@ -22,6 +23,7 @@ pub(super) struct FcxConfig { pub(super) comments: bool, pub(super) debug_assertions: bool, pub(super) frame_pointers: bool, + pub(super) validate_eof: bool, pub(super) local_stack: bool, pub(super) inspect_stack_length: bool, @@ -35,6 +37,7 @@ impl Default for FcxConfig { debug_assertions: cfg!(debug_assertions), comments: false, frame_pointers: cfg!(debug_assertions), + validate_eof: true, local_stack: false, inspect_stack_length: false, stack_bound_checks: true, @@ -64,6 +67,7 @@ pub(super) struct FunctionCx<'a, B: Backend> { bcx: B::Builder<'a>, // Common types. + ptr_type: B::Type, isize_type: B::Type, word_type: B::Type, address_type: B::Type, @@ -88,12 +92,8 @@ pub(super) struct FunctionCx<'a, B: Backend> { /// Stack length offset for the current instruction, used for push/pop. len_offset: i8, - /// The name of the main function / first code section. - main_name: &'a str, /// The bytecode being translated. - bytecode: &'a LegacyBytecode<'a>, - /// The full EOF bytecode, if any. - eof: Option<&'a EofBytecode<'a>>, + bytecode: &'a Bytecode<'a>, /// All entry blocks for each instruction. inst_entries: Vec, /// The current instruction being translated. @@ -187,13 +187,12 @@ impl<'a, B: Backend> FunctionCx<'a, B> { mut bcx: B::Builder<'a>, config: FcxConfig, builtins: &'a mut Builtins, - bytecode: &'a LegacyBytecode<'a>, - eof: Option<&'a EofBytecode<'a>>, - main_name: &'a str, + bytecode: &'a Bytecode<'a>, ) -> Result<()> { let entry_block = bcx.current_block().unwrap(); // Get common types. + let ptr_type = bcx.type_ptr(); let isize_type = bcx.type_ptr_sized_int(); let i8_type = bcx.type_int(8); let i64_type = bcx.type_int(64); @@ -217,11 +216,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let stack_len_arg = bcx.fn_param(2); // This is initialized later in `post_entry_block`. - let stack_len = if !bytecode.is_main_section() { - Pointer::new_address(isize_type, stack_len_arg) - } else { - bcx.new_stack_slot(isize_type, "len.addr") - }; + let stack_len = bcx.new_stack_slot(isize_type, "len.addr"); let env = bcx.fn_param(3); let contract = bcx.fn_param(4); @@ -235,7 +230,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { if data.is_dead_code() { unreachable_block } else { - bcx.create_block(&op_block_name_with(i, data, "")) + bcx.create_block(&bytecode.op_block_name(i, "")) } }) .collect(); @@ -249,6 +244,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let mut fx = FunctionCx { config, + ptr_type, isize_type, address_type, word_type, @@ -263,9 +259,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { len_offset: 0, bcx, - main_name, bytecode, - eof, inst_entries, current_inst: usize::MAX, @@ -286,13 +280,10 @@ impl<'a, B: Backend> FunctionCx<'a, B> { }; // We store the stack length if requested or necessary due to the bytecode. - let stack_length_observable = config.inspect_stack_length - || bytecode.may_suspend() - || (bytecode.is_eof() - && (!bytecode.is_main_section() || fx.expect_full_eof().any_may_suspend)); + let stack_length_observable = config.inspect_stack_length || bytecode.may_suspend(); // Add debug assertions for the parameters. - if config.debug_assertions && bytecode.is_main_section() { + if config.debug_assertions { fx.pointer_panic_with_bool( config.gas_metering, gas_ptr, @@ -371,10 +362,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { fx.stack_len.store_imm(&mut fx.bcx, 0); } }; - let generate_resume = bytecode.may_suspend() - || (bytecode.is_eof() - && bytecode.eof_section == Some(0) - && fx.expect_full_eof().any_may_suspend); + let generate_resume = bytecode.may_suspend(); if generate_resume { let get_ecx_resume_at_ptr = |fx: &mut Self| { fx.get_field( @@ -386,31 +374,10 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let kind = fx.resume_kind; let resume_ty = match kind { - ResumeKind::Blocks => fx.bcx.type_ptr(), + ResumeKind::Blocks => fx.ptr_type, ResumeKind::Indexes => fx.isize_type, }; - // Dispatch to the relevant sections. - // TODO: Doesn't work - if cfg!(any()) - && bytecode.eof_section == Some(0) - && bytecode.is_eof() - && fx.expect_full_eof().any_may_suspend - { - let eof = fx.eof.take().unwrap(); - for (i, bytecode) in eof.sections.iter().enumerate().skip(1) { - let name = format!("resume.dispatch_to_section_{i}"); - let block = fx.bcx.create_block_after(resume_block, &name); - fx.bcx.switch_to_block(block); - fx.call_eof_section(i, true); - for _ in 0..bytecode.n_resumes() { - fx.add_resume_at(block); - } - } - debug_assert_eq!(fx.resume_blocks.len(), eof.total_resumes); - fx.eof = Some(eof); - } - // Resume block: load the `resume_at` value and switch to the corresponding block. // Invalid values are treated as unreachable. { @@ -1072,6 +1039,9 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let offset = i16::from_be_bytes(imm.try_into().unwrap()); let base_pc = data.pc + 3; let target_pc = base_pc.wrapping_add(offset as u16 as u32); + if cfg!(debug_assertions) { + self.bytecode.eof_assert_jump_in_bounds(base_pc as usize, target_pc as usize); + } let target_inst = self.bytecode.pc_to_inst(target_pc as usize); let target = self.inst_entries[target_inst]; if opcode == op::RJUMP { @@ -1098,6 +1068,10 @@ impl<'a, B: Backend> FunctionCx<'a, B> { assert_eq!(chunk.len(), 2); let offset = i16::from_be_bytes(chunk.try_into().unwrap()); let target_pc = base_pc.wrapping_add(offset as u16 as u32); + if cfg!(debug_assertions) { + self.bytecode + .eof_assert_jump_in_bounds(base_pc as usize, target_pc as usize); + } let target_inst = self.bytecode.pc_to_inst(target_pc as usize); (i as u64, self.inst_entries[target_inst]) }) @@ -1108,17 +1082,19 @@ impl<'a, B: Backend> FunctionCx<'a, B> { op::CALLF => { let imm = self.bytecode.get_imm_of(data).unwrap(); self.callf_common(imm, false); + goto_return!(no_branch); } op::RETF => { - let ptr = self.return_stack_len_ptr(); - let len = self.bcx.load(self.isize_type, ptr, "return_stack.len"); - if self.config.debug_assertions { - let cond = self.bcx.icmp_imm(IntCC::Equal, len, 0); - self.build_assertion(cond, "RETF with return_stack.len == 0"); - } - let decremented = self.bcx.isub_imm(len, 1); - self.bcx.store(decremented, ptr); - goto_return!(build InstructionResult::Continue); + let address = self.call_func_stack_pop(); + let section = self.bytecode.pc_to_eof_section(data.pc as usize); + let destinations = self + .bytecode + .eof_section_called_by(section) + .iter() + .map(|inst| self.inst_entries[*inst + 1]) + .collect::>(); + self.bcx.br_indirect(address, &destinations); + goto_return!(no_branch); } op::JUMPF => { let imm = self.bytecode.get_imm_of(data).unwrap(); @@ -1361,16 +1337,6 @@ impl<'a, B: Backend> FunctionCx<'a, B> { fn callf_common(&mut self, imm: &[u8], is_jumpf: bool) { let op_name = if is_jumpf { "JUMPF" } else { "CALLF" }; - // Check return stack overflow. We only store the length. - if !is_jumpf { - let ptr = self.return_stack_len_ptr(); - let len = self.bcx.load(self.isize_type, ptr, "return_stack.len"); - let cond = self.bcx.icmp_imm(IntCC::UnsignedGreaterThanOrEqual, len, STACK_CAP as i64); - self.build_check(cond, InstructionResult::EOFFunctionStackOverflow); - let incremented = self.bcx.iadd_imm(len, 1); - self.bcx.store(incremented, ptr); - } - let idx = u16::from_be_bytes(imm.try_into().unwrap()) as usize; // Check stack max height. @@ -1388,33 +1354,41 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let cond = self.bcx.icmp_imm(IntCC::UnsignedGreaterThan, max_len, STACK_CAP as i64); self.build_check(cond, InstructionResult::StackOverflow); - // Call the section function. - self.call_eof_section(idx, is_jumpf); - } - - /// Calls the section `idx` function. - /// `tail_call` forces a tail call. - pub(crate) fn call_eof_section(&mut self, idx: usize, tail_call: bool) { - let name = section_mangled_name(self.main_name, idx); - let function = self - .bcx - .get_function(&name) - .unwrap_or_else(|| panic!("section {idx}: function not found")); - let mut args = - (0..self.bcx.num_fn_params()).map(|i| self.bcx.fn_param(i)).collect::>(); - if tail_call { - self.save_stack_len(); - } else { - args[2] = self.stack_len.addr(&mut self.bcx); - } - let tail = if tail_call { TailCallKind::MustTail } else { TailCallKind::None }; - let ret = self.bcx.tail_call(function, &args, tail).unwrap(); - if tail_call { - // `musttail` must precede `ret`. - self.bcx.ret(&[ret]); + // Push the return address to the function stack. + let next_block = self.inst_entries[self.current_inst + 1]; + if is_jumpf { + self.func_stack_set(idx); } else { - self.build_check_instruction_result(ret); + let value = match self.bcx.block_addr(next_block) { + Some(addr) => addr, + None => todo!(), + }; + self.call_func_stack_push(value, idx); } + + let inst = self.bytecode.eof_section_inst(idx); + self.bcx.br(self.inst_entries[inst]); + } + + fn func_stack_set(&mut self, idx: usize) { + let func_stack = self.func_stack(self.ecx); + let idx_ptr = self.get_field( + func_stack, + mem::offset_of!(FunctionStack, current_code_idx), + "ecx.func_stack.current_code_idx", + ); + let value = self.bcx.iconst(self.isize_type, idx as i64); + self.bcx.store(value, idx_ptr); + } + + /// Loads `ecx.func_stack`. + fn func_stack(&mut self, ecx: B::Value) -> B::Value { + let ptr = self.get_field( + ecx, + mem::offset_of!(EvmContext<'_>, func_stack), + "ecx.func_stack.addr.addr", + ); + self.bcx.load(self.ptr_type, ptr, "ecx.func_stack.addr") } /// Suspend execution, storing the resume point in the context. @@ -1453,13 +1427,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { /// Returns the `Eof` container, panicking if it is not set. #[track_caller] fn expect_eof(&self) -> &Eof { - &self.expect_full_eof().container - } - - /// Returns the full `EofBytecode`, panicking if it is not set. - #[track_caller] - fn expect_full_eof(&self) -> &EofBytecode<'a> { - self.eof.expect("EOF container not set") + self.bytecode.expect_eof() } /// Gets the stack length before the current instruction. @@ -1477,15 +1445,6 @@ impl<'a, B: Backend> FunctionCx<'a, B> { get_field(&mut self.bcx, ptr, offset, name) } - /// Returns the return stack length pointer. - fn return_stack_len_ptr(&mut self) -> B::Value { - self.get_field( - self.ecx, - mem::offset_of!(EvmContext<'_>, return_stack_len), - "return_stack.len.addr", - ) - } - /// Loads the gas used. fn load_gas_remaining(&mut self) -> B::Value { self.gas_remaining.load(&mut self.bcx, "gas_remaining") @@ -1667,6 +1626,10 @@ impl<'a, B: Backend> FunctionCx<'a, B> { } } + fn const_continue(&mut self) -> B::Value { + self.bcx.iconst(self.i8_type, InstructionResult::Continue as i64) + } + fn add_invalid_jump(&mut self) { self.incoming_returns.push(( self.bcx.iconst(self.i8_type, InstructionResult::InvalidJump as i64), @@ -1783,10 +1746,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { /// Returns the block name for the current opcode with the given suffix. fn op_block_name(&self, name: &str) -> String { - if self.current_inst == usize::MAX { - return format!("entry.{name}"); - } - op_block_name_with(self.current_inst, self.bytecode.inst(self.current_inst), name) + self.bytecode.op_block_name(self.current_inst, name) } } @@ -1875,7 +1835,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { self.call_ir_builtin( "calldataload", &[index, self.contract], - &[self.word_type, self.bcx.type_ptr()], + &[self.word_type, self.ptr_type], Some(self.word_type), Self::build_calldataload, ) @@ -1897,7 +1857,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { input_offset + mem::offset_of!(pf::Bytes, ptr), "contract.input.ptr.addr", ); - let ptr = self.bcx.load(self.bcx.type_ptr(), ptr_ptr, "contract.input.ptr"); + let ptr = self.bcx.load(self.ptr_type, ptr_ptr, "contract.input.ptr"); let len_ptr = self.get_field( contract, @@ -1962,7 +1922,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { MemOpKind::Store8 => "mstore8", }; let value_ty = match kind { - MemOpKind::Load => self.bcx.type_ptr(), + MemOpKind::Load => self.ptr_type, MemOpKind::Store => self.word_type, MemOpKind::Store8 => self.i8_type, }; @@ -1970,7 +1930,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { .call_ir_builtin( name, &[offset, value, self.ecx], - &[self.word_type, value_ty, self.bcx.type_ptr()], + &[self.word_type, value_ty, self.ptr_type], Some(self.i8_type), |this| this.build_mem_op(kind), ) @@ -2005,7 +1965,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let memory_ptr = { let memory_ptr_ptr = self.get_field(ecx, mem::offset_of!(EvmContext<'_>, memory), "ecx.memory.addr"); - self.bcx.load(self.bcx.type_ptr(), memory_ptr_ptr, "ecx.memory") + self.bcx.load(self.ptr_type, memory_ptr_ptr, "ecx.memory") }; let memory_buffer_offset = mem::offset_of!(pf::SharedMemory, buffer); @@ -2062,7 +2022,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { memory_buffer_offset + mem::offset_of!(pf::Vec, ptr), "ecx.memory.buffer.ptr.shared.addr", ); - self.bcx.load(self.bcx.type_ptr(), ptr, "ecx.memory.buffer.ptr.shared") + self.bcx.load(self.ptr_type, ptr, "ecx.memory.buffer.ptr.shared") }; let buffer_ptr = self.bcx.gep( self.i8_type, @@ -2088,10 +2048,163 @@ impl<'a, B: Backend> FunctionCx<'a, B> { } } - let cont = self.bcx.iconst(self.i8_type, InstructionResult::Continue as i64); + let cont = self.const_continue(); + self.bcx.ret(&[cont]); + } + + fn call_func_stack_push(&mut self, pc: B::Value, new_idx: usize) { + let new_idx = self.bcx.iconst(self.isize_type, new_idx as i64); + self.call_fallible_builtin(Builtin::FuncStackPush, &[self.ecx, pc, new_idx]); + /* + let ret = self + .call_ir_builtin( + "func_stack_push", + &[self.ecx, pc, new_idx], + &[self.ptr_type, self.ptr_type, self.isize_type], + Some(self.i8_type), + Self::build_func_stack_push, + ) + .unwrap(); + self.build_check_instruction_result(ret); + */ + } + + #[allow(dead_code)] + fn build_func_stack_push(&mut self) { + let ecx = self.bcx.fn_param(0); + let value = self.bcx.fn_param(1); + let new_idx = self.bcx.fn_param(2); + + let func_stack = self.func_stack(ecx); + let return_stack_offset = mem::offset_of!(FunctionStack, return_stack); + + // Increment the length. + let len_ptr = self.get_field( + func_stack, + return_stack_offset + mem::offset_of!(pf::Vec, len), + "ecx.func_stack.return_stack.len.addr", + ); + let old_len = self.bcx.load(self.isize_type, len_ptr, "ecx.func_stack.return_stack.len"); + let len = self.bcx.iadd_imm(old_len, 1); + let cond = self.bcx.icmp_imm(IntCC::UnsignedGreaterThan, len, STACK_CAP as i64); + self.build_check(cond, InstructionResult::StackOverflow); + + // Grow the capacity if needed. + let cap = { + let cap_ptr = self.get_field( + func_stack, + return_stack_offset + mem::offset_of!(pf::Vec, cap), + "ecx.func_stack.return_stack.cap.addr", + ); + self.bcx.load(self.isize_type, cap_ptr, "ecx.func_stack.return_stack.capacity") + }; + let cond = self.bcx.icmp(IntCC::Equal, len, cap); + let grow = self.create_block_after_current("grow"); + let cont = self.create_block_after_current("contd"); + self.bcx.brif_cold(cond, grow, cont, true); + + self.bcx.switch_to_block(grow); + let _ = self.call_builtin(Builtin::FuncStackGrow, &[func_stack]); + self.bcx.br(cont); + + self.bcx.switch_to_block(cont); + + // Store the length. + self.bcx.store(len, len_ptr); + + // Store the element. + let ptr = { + let ptr_ptr = self.get_field( + func_stack, + return_stack_offset + mem::offset_of!(pf::Vec, ptr), + "ecx.func_stack.return_stack.ptr.addr", + ); + self.bcx.load(self.ptr_type, ptr_ptr, "ecx.func_stack.return_stack.ptr") + }; + let frame_ty = self.bcx.type_array(self.ptr_type, 2); + let frame = self.bcx.gep(frame_ty, ptr, &[old_len], "frame.addr"); + + // Store the return address into the frame. + let frame_pc = { + let idx = &[self.bcx.iconst(self.isize_type, 0), self.bcx.iconst(self.isize_type, 1)]; + self.bcx.gep(frame_ty, frame, idx, "frame.pc") + }; + self.bcx.store(value, frame_pc); + + // Store the current index into the frame. + let current_idx_ptr = self.get_field( + func_stack, + mem::offset_of!(FunctionStack, current_code_idx), + "ecx.func_stack.current_code_idx", + ); + let current_idx = + self.bcx.load(self.isize_type, current_idx_ptr, "ecx.func_stack.current_code_idx"); + let frame_idx = { + let idx = &[self.bcx.iconst(self.isize_type, 0), self.bcx.iconst(self.isize_type, 0)]; + self.bcx.gep(frame_ty, frame, idx, "frame.idx") + }; + self.bcx.store(current_idx, frame_idx); + + // Store the new index. + self.bcx.store(new_idx, current_idx_ptr); + + let cont = self.const_continue(); self.bcx.ret(&[cont]); } + fn call_func_stack_pop(&mut self) -> B::Value { + self.call_builtin(Builtin::FuncStackPop, &[self.ecx]).unwrap() + /* + self.call_ir_builtin( + "func_stack_pop", + &[self.ecx], + &[self.ptr_type], + Some(self.ptr_type), + Self::build_func_stack_pop, + ) + .unwrap() + */ + } + + #[allow(dead_code)] + fn build_func_stack_pop(&mut self) { + let ecx = self.bcx.fn_param(0); + + let func_stack = self.func_stack(ecx); + let return_stack_offset = mem::offset_of!(FunctionStack, return_stack); + + // Decrement the length. + // This is a debug assertion because EOF validation should have caught this. + let len_ptr = self.get_field( + func_stack, + return_stack_offset + mem::offset_of!(pf::Vec, len), + "ecx.func_stack.return_stack.len", + ); + let len = self.bcx.load(self.isize_type, len_ptr, "ecx.func_stack.return_stack.len"); + if self.config.debug_assertions { + let cond = self.bcx.icmp_imm(IntCC::Equal, len, 0); + self.build_assertion(cond, "RETF with empty function stack"); + } + let len = self.bcx.isub_imm(len, 1); + self.bcx.store(len, len_ptr); + + // Get the address from the frame. + let ptr = { + let ptr_ptr = self.get_field( + func_stack, + return_stack_offset + mem::offset_of!(pf::Vec, ptr), + "ecx.func_stack.return_stack.ptr.addr", + ); + self.bcx.load(self.ptr_type, ptr_ptr, "ecx.func_stack.return_stack.ptr") + }; + let pc = { + let frame_type = self.bcx.type_array(self.ptr_type, 2); + let idx = self.bcx.iconst(self.isize_type, 1); + self.bcx.gep(frame_type, ptr, &[len, idx], "frame.pc") + }; + self.bcx.ret(&[pc]); + } + fn call_ir_binop_builtin( &mut self, name: &str, @@ -2237,15 +2350,6 @@ mod pf { } } -fn op_block_name_with(op: Inst, data: &InstData, with: &str) -> String { - let data = data.to_op(); - if with.is_empty() { - format!("op.{op}.{data}") - } else { - format!("op.{op}.{data}.{with}") - } -} - fn get_field(bcx: &mut B, ptr: B::Value, offset: usize, name: &str) -> B::Value { let offset = bcx.iconst(bcx.type_ptr_sized_int(), offset as i64); bcx.gep(bcx.type_int(8), ptr, &[offset], name) diff --git a/crates/revmc/src/lib.rs b/crates/revmc/src/lib.rs index da518b3..0f43270 100644 --- a/crates/revmc/src/lib.rs +++ b/crates/revmc/src/lib.rs @@ -13,7 +13,7 @@ mod bytecode; pub use bytecode::*; mod compiler; -pub use compiler::EvmCompiler; +pub use compiler::{EvmCompiler, EvmCompilerInput}; mod linker; pub use linker::Linker; diff --git a/crates/revmc/src/linker.rs b/crates/revmc/src/linker.rs index d75e3e0..3846004 100644 --- a/crates/revmc/src/linker.rs +++ b/crates/revmc/src/linker.rs @@ -102,7 +102,7 @@ mod tests { let opt_level = revmc_backend::OptimizationLevel::Aggressive; let backend = crate::EvmLlvmBackend::new(&cx, true, opt_level).unwrap(); let mut compiler = crate::EvmCompiler::new(backend); - if let Err(e) = compiler.translate("link_test_basic", &[], SpecId::CANCUN) { + if let Err(e) = compiler.translate("link_test_basic", &[][..], SpecId::CANCUN) { panic!("failed to compile: {e}"); } diff --git a/crates/revmc/src/tests/resume.rs b/crates/revmc/src/tests/resume.rs index 0106bb9..c17f2f6 100644 --- a/crates/revmc/src/tests/resume.rs +++ b/crates/revmc/src/tests/resume.rs @@ -1,18 +1,17 @@ -use super::{eof, with_evm_context, DEF_SPEC}; +use super::{eof, eof_sections_unchecked, with_evm_context, DEF_SPEC}; use crate::{Backend, EvmCompiler, TEST_SUSPEND}; use revm_interpreter::{opcode as op, InstructionResult}; use revm_primitives::{SpecId, U256}; matrix_tests!(legacy = |compiler| run(compiler, TEST, DEF_SPEC)); matrix_tests!(eof_one_section = |compiler| run(compiler, &eof(TEST), SpecId::PRAGUE_EOF)); -// TODO -// matrix_tests!( -// eof_two_sections = |compiler| run( -// compiler, -// &eof_sections_unchecked(&[&[op::JUMPF, 0x00, 0x01], TEST]).raw, -// SpecId::PRAGUE_EOF -// ) -// ); +matrix_tests!( + eof_two_sections = |compiler| run( + compiler, + &eof_sections_unchecked(&[&[op::JUMPF, 0x00, 0x01], TEST]).raw, + SpecId::PRAGUE_EOF + ) +); #[rustfmt::skip] const TEST: &[u8] = &[ diff --git a/examples/compiler/src/main.rs b/examples/compiler/src/main.rs index 25257cb..78ad48a 100644 --- a/examples/compiler/src/main.rs +++ b/examples/compiler/src/main.rs @@ -35,7 +35,7 @@ fn main() -> eyre::Result<()> { let context = revmc::llvm::inkwell::context::Context::create(); let backend = EvmLlvmBackend::new(&context, false, OptimizationLevel::Aggressive)?; let mut compiler = EvmCompiler::new(backend); - let f = unsafe { compiler.jit("test", &bytecode, SpecId::CANCUN) } + let f = unsafe { compiler.jit("test", &bytecode[..], SpecId::CANCUN) } .wrap_err("Failed to JIT-compile code")?; // Set up runtime context and run the function.