diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fc69969..6b8571e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.2.0] - 2023-12-08 +### Crate +- `awint` 0.15 + +### Changes +- Dramatically improved performance by a variety of changes + ## [0.1.0] - 2023-12-05 ### Crate - `awint` 0.14 diff --git a/starlight/Cargo.toml b/starlight/Cargo.toml index 0605a77f..c865db9f 100644 --- a/starlight/Cargo.toml +++ b/starlight/Cargo.toml @@ -1,19 +1,19 @@ [package] name = "starlight" -version = "0.1.1" +version = "0.2.0" edition = "2021" authors = ["Aaron Kutch "] license = "MIT OR Apache-2.0" readme = "README.md" repository = "https://github.com/AaronKutch/starlight" documentation = "https://docs.rs/starlight" -description = "reservation" +description = "experimental HDL and optimizer for DAGs of lookup tables" keywords = ["dag", "rtl", "hdl"] -categories = [] +categories = ["algorithms"] [dependencies] #awint = { path = "../../awint/awint", default-features = false, features = ["rand_support", "dag"] } -awint = { version = "0.14", default-features = false, features = ["rand_support", "dag"] } +awint = { version = "0.15", default-features = false, features = ["rand_support", "dag"] } rand_xoshiro = { version = "0.6", default-features = false } [features] diff --git a/starlight/src/awi_structs.rs b/starlight/src/awi_structs.rs index 24d30d47..ce1de563 100644 --- a/starlight/src/awi_structs.rs +++ b/starlight/src/awi_structs.rs @@ -6,4 +6,4 @@ mod temporal; pub use epoch::{Assertions, Epoch}; pub use eval_awi::EvalAwi; pub use lazy_awi::{LazyAwi, LazyInlAwi}; -pub use temporal::{Loop, LoopHandle, Net}; +pub use temporal::{Loop, Net}; diff --git a/starlight/src/awi_structs/epoch.rs b/starlight/src/awi_structs/epoch.rs index 3739ccb5..28bad472 100644 --- a/starlight/src/awi_structs/epoch.rs +++ b/starlight/src/awi_structs/epoch.rs @@ -8,7 +8,7 @@ use std::{cell::RefCell, mem, num::NonZeroUsize, rc::Rc, thread::panicking}; use awint::{ awint_dag::{ epoch::{EpochCallback, EpochKey}, - triple_arena::{ptr_struct, Arena}, + triple_arena::{ptr_struct, Advancer, Arena}, EvalError, Lineage, Location, Op, PState, }, bw, dag, @@ -58,13 +58,25 @@ pub struct EpochData { pub epoch_key: EpochKey, pub ensemble: Ensemble, pub responsible_for: Arena, - pub keep_flag: bool, } impl Drop for EpochData { fn drop(&mut self) { // prevent invoking recursive panics and a buffer overrun if !panicking() { + // if `responsible_for` is not empty, then this `EpochData` is probably being + // dropped in a special case like a panic (I have `panicking` guards on all the + // impls, but it seems that in some cases that for some reason a panic on unwrap + // can start dropping `EpochData`s before the `Epoch`s, and there are + // arbitrarily bad interactions so we always need to forget any `EvalAwi`s here) + // in which the `Epoch` is not going to be useful anyway. We need to + // `mem::forget` just the `EvalAwi`s of the assertions + for (_, mut shared) in self.responsible_for.drain() { + for eval_awi in shared.assertions.bits.drain(..) { + // avoid the `EvalAwi` drop code trying to access recursively + mem::forget(eval_awi); + } + } self.epoch_key.pop_off_epoch_stack(); } } @@ -85,7 +97,6 @@ impl EpochShared { epoch_key: _callback().push_on_epoch_stack(), ensemble: Ensemble::new(), responsible_for: Arena::new(), - keep_flag: true, }; let p_self = epoch_data.responsible_for.insert(PerEpochShared::new()); Self { @@ -125,13 +136,99 @@ impl EpochShared { drop(epoch_data); let mut cloned = vec![]; for p_state in states { - if let Some(eval) = EvalAwi::from_state(p_state) { - cloned.push(eval) - } + cloned.push(EvalAwi::from_state(p_state)) } Assertions { bits: cloned } } + /// Using `EpochShared::assertions` creates all new `Assertions`. This + /// eliminates assertions that evaluate to a constant true. + pub fn assert_assertions(&self, strict: bool) -> Result<(), EvalError> { + let p_self = self.p_self; + let epoch_data = self.epoch_data.borrow(); + let mut len = epoch_data + .responsible_for + .get(p_self) + .unwrap() + .assertions + .bits + .len(); + drop(epoch_data); + let mut unknown = None; + let mut i = 0; + loop { + if i >= len { + break + } + let epoch_data = self.epoch_data.borrow(); + let eval_awi = &epoch_data + .responsible_for + .get(p_self) + .unwrap() + .assertions + .bits[i]; + let p_state = eval_awi.state(); + let p_note = eval_awi.p_note(); + drop(epoch_data); + let val = Ensemble::calculate_thread_local_note_value(p_note, 0)?; + if let Some(val) = val.known_value() { + if !val { + let epoch_data = self.epoch_data.borrow(); + let s = epoch_data.ensemble.get_state_debug(p_state); + if let Some(s) = s { + return Err(EvalError::OtherString(format!( + "an assertion bit evaluated to false, failed on {p_note} {:?}", + s + ))) + } else { + return Err(EvalError::OtherString(format!( + "an assertion bit evaluated to false, failed on {p_note} {p_state}" + ))) + } + } + } else if unknown.is_none() { + // get the earliest failure to evaluate, should be closest to the root cause. + // Wait for all bits to be checked for falsity + unknown = Some((p_note, p_state)); + } + if val.is_const() { + // remove the assertion + let mut epoch_data = self.epoch_data.borrow_mut(); + let eval_awi = epoch_data + .responsible_for + .get_mut(p_self) + .unwrap() + .assertions + .bits + .swap_remove(i); + drop(epoch_data); + drop(eval_awi); + len -= 1; + } else { + i += 1; + } + } + if strict { + if let Some((p_note, p_state)) = unknown { + let epoch_data = self.epoch_data.borrow(); + let s = epoch_data.ensemble.get_state_debug(p_state); + if let Some(s) = s { + return Err(EvalError::OtherString(format!( + "an assertion bit could not be evaluated to a known value, failed on \ + {p_note} {:?}", + s + ))) + } else { + return Err(EvalError::OtherString(format!( + "an assertion bit could not be evaluated to a known value, failed on \ + {p_note} {p_state}" + ))) + } + } + } + Ok(()) + } + /// Returns a clone of the ensemble pub fn ensemble(&self) -> Ensemble { self.epoch_data.borrow().ensemble.clone() @@ -152,11 +249,13 @@ impl EpochShared { /// Removes associated states and assertions pub fn remove_associated(&self) { let mut epoch_data = self.epoch_data.borrow_mut(); - let mut ours = epoch_data.responsible_for.remove(self.p_self).unwrap(); - for p_state in ours.states_inserted { - let _ = epoch_data.ensemble.remove_state(p_state); + let ours = epoch_data.responsible_for.remove(self.p_self).unwrap(); + for p_state in &ours.states_inserted { + let _ = epoch_data.ensemble.remove_state(*p_state); } - ours.assertions.bits.clear(); + drop(epoch_data); + // drop the `EvalAwi`s of the assertions after unlocking + drop(ours); } pub fn set_as_current(&self) { @@ -209,6 +308,58 @@ impl EpochShared { } }); } + + fn internal_drive_loops_with_lower_capability(&self) -> Result<(), EvalError> { + // `Loop`s register states to lower so that the below loops can find them + Ensemble::handle_requests_with_lower_capability(self)?; + // first evaluate all loop drivers + let lock = self.epoch_data.borrow(); + let mut adv = lock.ensemble.lnodes.advancer(); + drop(lock); + loop { + let lock = self.epoch_data.borrow(); + if let Some(p_lnode) = adv.advance(&lock.ensemble.lnodes) { + let lnode = lock.ensemble.lnodes.get(p_lnode).unwrap(); + let p_driver = lnode.p_driver; + drop(lock); + Ensemble::calculate_value_with_lower_capability(self, p_driver)?; + } else { + break + } + } + // second do all loopback changes + let mut lock = self.epoch_data.borrow_mut(); + let mut adv = lock.ensemble.lnodes.advancer(); + while let Some(p_lnode) = adv.advance(&lock.ensemble.lnodes) { + let lnode = lock.ensemble.lnodes.get(p_lnode).unwrap(); + let val = lock.ensemble.backrefs.get_val(lnode.p_driver).unwrap().val; + let p_self = lnode.p_self; + lock.ensemble.change_value(p_self, val).unwrap(); + } + Ok(()) + } + + fn internal_drive_loops(&self) -> Result<(), EvalError> { + // first evaluate all loop drivers + let mut lock = self.epoch_data.borrow_mut(); + let ensemble = &mut lock.ensemble; + + let mut adv = ensemble.lnodes.advancer(); + while let Some(p_lnode) = adv.advance(&ensemble.lnodes) { + let lnode = ensemble.lnodes.get(p_lnode).unwrap(); + let p_driver = lnode.p_driver; + ensemble.calculate_value(p_driver)?; + } + // second do all loopback changes + let mut adv = ensemble.lnodes.advancer(); + while let Some(p_lnode) = adv.advance(&ensemble.lnodes) { + let lnode = ensemble.lnodes.get(p_lnode).unwrap(); + let val = ensemble.backrefs.get_val(lnode.p_driver).unwrap().val; + let p_self = lnode.p_self; + ensemble.change_value(p_self, val).unwrap(); + } + Ok(()) + } } thread_local!( @@ -257,10 +408,7 @@ pub fn _callback() -> EpochCallback { fn new_pstate(nzbw: NonZeroUsize, op: Op, location: Option) -> PState { no_recursive_current_epoch_mut(|current| { let mut epoch_data = current.epoch_data.borrow_mut(); - let keep = epoch_data.keep_flag; - let p_state = epoch_data - .ensemble - .make_state(nzbw, op.clone(), location, keep); + let p_state = epoch_data.ensemble.make_state(nzbw, op.clone(), location); epoch_data .responsible_for .get_mut(current.p_self) @@ -273,21 +421,22 @@ pub fn _callback() -> EpochCallback { fn register_assertion_bit(bit: dag::bool, location: Location) { // need a new bit to attach new location data to let new_bit = new_pstate(bw(1), Op::Assert([bit.state()]), Some(location)); - no_recursive_current_epoch_mut(|current| { - let mut epoch_data = current.epoch_data.borrow_mut(); - // need to manually construct to get around closure issues - let p_note = epoch_data.ensemble.note_pstate(new_bit).unwrap(); - let eval_awi = EvalAwi { - p_state: new_bit, - p_note, - }; - epoch_data - .responsible_for - .get_mut(current.p_self) - .unwrap() - .assertions - .bits - .push(eval_awi); + let eval_awi = EvalAwi::from_state(new_bit); + // manual to get around closure issue + CURRENT_EPOCH.with(|top| { + let mut top = top.borrow_mut(); + if let Some(current) = top.as_mut() { + let mut epoch_data = current.epoch_data.borrow_mut(); + epoch_data + .responsible_for + .get_mut(current.p_self) + .unwrap() + .assertions + .bits + .push(eval_awi); + } else { + panic!("There needs to be an `Epoch` in scope for this to work"); + } }) } fn get_nzbw(p_state: PState) -> NonZeroUsize { @@ -380,8 +529,9 @@ impl Epoch { } /// Intended primarily for developer use - pub fn internal_epoch_shared(&self) -> &EpochShared { - &self.shared + #[doc(hidden)] + pub fn internal_epoch_shared(this: &Epoch) -> &EpochShared { + &this.shared } /// Gets the assertions associated with this Epoch (not including assertions @@ -391,83 +541,43 @@ impl Epoch { self.shared.assertions() } - // TODO fix the EvalError enum situation - /// If any assertion bit evaluates to false, this returns an error. pub fn assert_assertions(&self) -> Result<(), EvalError> { - let bits = self.shared.assertions().bits; - for eval_awi in bits { - let val = eval_awi.eval_bit()?; - if let Some(val) = val.known_value() { - if !val { - return Err(EvalError::OtherString(format!( - "an assertion bit evaluated to false, failed on {}", - self.shared - .epoch_data - .borrow() - .ensemble - .get_state_debug(eval_awi.state()) - .unwrap() - ))) - } - } - } - Ok(()) + self.shared.assert_assertions(false) } /// If any assertion bit evaluates to false, this returns an error. If there /// were no known false assertions but some are `Value::Unknown`, this /// returns a specific error for it. pub fn assert_assertions_strict(&self) -> Result<(), EvalError> { - let bits = self.shared.assertions().bits; - let mut unknown = None; - for eval_awi in bits { - let val = eval_awi.eval_bit()?; - if let Some(val) = val.known_value() { - if !val { - return Err(EvalError::OtherString(format!( - "assertion bits are not all true, failed on {}", - self.shared - .epoch_data - .borrow() - .ensemble - .get_state_debug(eval_awi.state()) - .unwrap() - ))) - } - } else if unknown.is_none() { - // get the earliest failure to evaluate wait for all bits to be checked for - // falsity - unknown = Some(eval_awi.p_state); - } - } - if let Some(p_state) = unknown { - Err(EvalError::OtherString(format!( - "an assertion bit could not be evaluated to a known value, failed on {}", - self.shared - .epoch_data - .borrow() - .ensemble - .get_state_debug(p_state) - .unwrap() - ))) - } else { - Ok(()) - } + self.shared.assert_assertions(true) } pub fn ensemble(&self) -> Ensemble { self.shared.ensemble() } - /// Removes all non-noted states + /// Used for testing + pub fn prune_ignore_assertions(&self) -> Result<(), EvalError> { + let epoch_shared = get_current_epoch().unwrap(); + if !Rc::ptr_eq(&epoch_shared.epoch_data, &self.shared.epoch_data) { + return Err(EvalError::OtherStr("epoch is not the current epoch")) + } + // do not assert assertions because that can trigger lowering + let mut lock = epoch_shared.epoch_data.borrow_mut(); + lock.ensemble.prune_states() + } + + /// For users, this removes all states that do not lead to a live `EvalAwi` pub fn prune(&self) -> Result<(), EvalError> { let epoch_shared = get_current_epoch().unwrap(); if !Rc::ptr_eq(&epoch_shared.epoch_data, &self.shared.epoch_data) { return Err(EvalError::OtherStr("epoch is not the current epoch")) } + // get rid of constant assertions + let _ = epoch_shared.assert_assertions(false); let mut lock = epoch_shared.epoch_data.borrow_mut(); - lock.ensemble.prune_unnoted_states() + lock.ensemble.prune_states() } /// Lowers all states. This is not needed in most circumstances, `EvalAwi` @@ -477,9 +587,12 @@ impl Epoch { if !Rc::ptr_eq(&epoch_shared.epoch_data, &self.shared.epoch_data) { return Err(EvalError::OtherStr("epoch is not the current epoch")) } - Ensemble::lower_all(&epoch_shared) + Ensemble::lower_all(&epoch_shared)?; + let _ = epoch_shared.assert_assertions(false); + Ok(()) } + /// Runs optimization including lowering then pruning all states. pub fn optimize(&self) -> Result<(), EvalError> { let epoch_shared = get_current_epoch().unwrap(); if !Rc::ptr_eq(&epoch_shared.epoch_data, &self.shared.epoch_data) { @@ -488,9 +601,28 @@ impl Epoch { Ensemble::lower_all(&epoch_shared)?; let mut lock = epoch_shared.epoch_data.borrow_mut(); lock.ensemble.optimize_all(); + drop(lock); + let _ = epoch_shared.assert_assertions(false); Ok(()) } - // TODO - //pub fn prune_nonnoted + /// This evaluates all loop drivers, and then registers loopback changes + pub fn drive_loops(&self) -> Result<(), EvalError> { + let epoch_shared = get_current_epoch().unwrap(); + if !Rc::ptr_eq(&epoch_shared.epoch_data, &self.shared.epoch_data) { + return Err(EvalError::OtherStr("epoch is not the current epoch")) + } + if epoch_shared + .epoch_data + .borrow() + .ensemble + .stator + .states + .is_empty() + { + epoch_shared.internal_drive_loops() + } else { + epoch_shared.internal_drive_loops_with_lower_capability() + } + } } diff --git a/starlight/src/awi_structs/eval_awi.rs b/starlight/src/awi_structs/eval_awi.rs index c8015f7e..cb082352 100644 --- a/starlight/src/awi_structs/eval_awi.rs +++ b/starlight/src/awi_structs/eval_awi.rs @@ -1,16 +1,21 @@ -use std::{fmt, num::NonZeroUsize}; +use std::{fmt, num::NonZeroUsize, thread::panicking}; use awint::{ - awint_dag::{dag, epoch, EvalError, Lineage, PState}, + awint_dag::{dag, EvalError, Lineage, PState}, awint_internals::forward_debug_fmt, }; use crate::{ awi, - ensemble::{Evaluator, PNote, Value}, + ensemble::{Ensemble, PNote}, epoch::get_current_epoch, }; +// Note: `mem::forget` can be used on `EvalAwi`s, but in this crate it should +// only be done in special cases like if a `EpochShared` is being force dropped +// by a panic or something that would necessitate giving up on `Epoch` +// invariants anyway + /// When created from a type implementing `AsRef`, it can later be /// used to evaluate its dynamic value. /// @@ -18,13 +23,33 @@ use crate::{ /// /// # Custom Drop /// -/// TODO +/// Upon being dropped, this will remove special references being kept by the +/// current `Epoch`. pub struct EvalAwi { - pub(crate) p_state: PState, - pub(crate) p_note: PNote, + p_state: PState, + p_note: PNote, } -// TODO impl drop to remove note +impl Drop for EvalAwi { + fn drop(&mut self) { + // prevent invoking recursive panics and a buffer overrun + if !panicking() { + if let Some(epoch) = get_current_epoch() { + let mut lock = epoch.epoch_data.borrow_mut(); + let res = lock.ensemble.remove_note(self.p_note); + if res.is_err() { + panic!( + "most likely, an `EvalAwi` created in one `Epoch` was dropped in another" + ) + } + if let Some(state) = lock.ensemble.stator.states.get_mut(self.p_state) { + state.dec_extern_rc(); + } + } + // else the epoch has been dropped + } + } +} impl Lineage for EvalAwi { fn state(&self) -> PState { @@ -34,24 +59,42 @@ impl Lineage for EvalAwi { impl Clone for EvalAwi { /// This makes another note to the same state that `self` pointed to. + #[track_caller] fn clone(&self) -> Self { - let p_note = get_current_epoch() - .unwrap() - .epoch_data - .borrow_mut() - .ensemble - .note_pstate(self.p_state) - .unwrap(); - Self { - p_state: self.p_state, - p_note, - } + Self::from_state(self.p_state) + } +} + +macro_rules! evalawi_from_impl { + ($($fn:ident $t:ident);*;) => { + $( + #[track_caller] + pub fn $fn(x: dag::$t) -> Self { + Self::from_state(x.state()) + } + )* } } impl EvalAwi { + evalawi_from_impl!( + from_bool bool; + from_u8 u8; + from_i8 i8; + from_u16 u16; + from_i16 i16; + from_u32 u32; + from_i32 i32; + from_u64 u64; + from_i64 i64; + from_u128 u128; + from_i128 i128; + from_usize usize; + from_isize isize; + ); + pub fn nzbw(&self) -> NonZeroUsize { - epoch::get_nzbw_from_current_epoch(self.p_state) + Ensemble::get_thread_local_note_nzbw(self.p_note).unwrap() } pub fn bw(&self) -> usize { @@ -62,27 +105,48 @@ impl EvalAwi { self.p_note } - pub(crate) fn from_state(p_state: PState) -> Option { - let p_note = get_current_epoch() - .unwrap() - .epoch_data - .borrow_mut() - .ensemble - .note_pstate(p_state)?; - Some(Self { p_state, p_note }) + /// Used internally to create `EvalAwi`s + /// + /// # Panics + /// + /// If an `Epoch` does not exist or the `PState` was pruned + #[track_caller] + pub fn from_state(p_state: PState) -> Self { + if let Some(epoch) = get_current_epoch() { + let mut lock = epoch.epoch_data.borrow_mut(); + match lock.ensemble.note_pstate(p_state) { + Some(p_note) => { + lock.ensemble + .stator + .states + .get_mut(p_state) + .unwrap() + .inc_extern_rc(); + Self { p_state, p_note } + } + None => { + panic!( + "could not create an `EvalAwi` from the given mimicking state, probably \ + because the state was pruned or came from a different `Epoch`" + ) + } + } + } else { + panic!("attempted to create an `EvalAwi` when no live `Epoch` exists") + } } - /// Can return `None` if the state has been pruned - pub fn from_bits(bits: &dag::Bits) -> Option { + /// Can panic if the state has been pruned + #[track_caller] + pub fn from_bits(bits: &dag::Bits) -> Self { Self::from_state(bits.state()) } pub fn eval(&self) -> Result { let nzbw = self.nzbw(); - let p_self = self.state(); let mut res = awi::Awi::zero(nzbw); for bit_i in 0..res.bw() { - let val = Evaluator::calculate_thread_local_state_value(p_self, bit_i)?; + let val = Ensemble::calculate_thread_local_note_value(self.p_note, bit_i)?; if let Some(val) = val.known_value() { res.set(bit_i, val).unwrap(); } else { @@ -93,7 +157,7 @@ impl EvalAwi { .epoch_data .borrow() .ensemble - .get_state_debug(p_self) + .get_state_debug(self.p_state) .unwrap() ))) } @@ -101,36 +165,39 @@ impl EvalAwi { Ok(res) } - /// Assumes `self` is a single bit - pub(crate) fn eval_bit(&self) -> Result { - let p_self = self.state(); - assert_eq!(self.bw(), 1); - Evaluator::calculate_thread_local_state_value(p_self, 0) - } - pub fn zero(w: NonZeroUsize) -> Self { - Self::from_bits(&dag::Awi::zero(w)).unwrap() + Self::from_bits(&dag::Awi::zero(w)) } pub fn umax(w: NonZeroUsize) -> Self { - Self::from_bits(&dag::Awi::umax(w)).unwrap() + Self::from_bits(&dag::Awi::umax(w)) } pub fn imax(w: NonZeroUsize) -> Self { - Self::from_bits(&dag::Awi::imax(w)).unwrap() + Self::from_bits(&dag::Awi::imax(w)) } pub fn imin(w: NonZeroUsize) -> Self { - Self::from_bits(&dag::Awi::imin(w)).unwrap() + Self::from_bits(&dag::Awi::imin(w)) } pub fn uone(w: NonZeroUsize) -> Self { - Self::from_bits(&dag::Awi::uone(w)).unwrap() + Self::from_bits(&dag::Awi::uone(w)) } } impl fmt::Debug for EvalAwi { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(epoch) = get_current_epoch() { + if let Some(s) = epoch + .epoch_data + .borrow() + .ensemble + .get_state_debug(self.state()) + { + return write!(f, "EvalAwi({s})"); + } + } write!(f, "EvalAwi({:?})", self.state()) } } @@ -140,6 +207,6 @@ forward_debug_fmt!(EvalAwi); impl> From for EvalAwi { #[track_caller] fn from(b: B) -> Self { - Self::from_bits(b.as_ref()).unwrap() + Self::from_bits(b.as_ref()) } } diff --git a/starlight/src/awi_structs/lazy_awi.rs b/starlight/src/awi_structs/lazy_awi.rs index 83506b42..45457182 100644 --- a/starlight/src/awi_structs/lazy_awi.rs +++ b/starlight/src/awi_structs/lazy_awi.rs @@ -3,6 +3,7 @@ use std::{ fmt, num::NonZeroUsize, ops::{Deref, Index, RangeFull}, + thread::panicking, }; use awint::{ @@ -12,28 +13,49 @@ use awint::{ use crate::{ awi, - ensemble::{Evaluator, PNote}, + ensemble::{Ensemble, PNote}, epoch::get_current_epoch, }; // do not implement `Clone` for this, we would need a separate `LazyCellAwi` // type -// TODO I have attached a note to `LazyAwi` because without debug assertions, -// states could get clobbered. I suspect that it naturally requires `Note`s to -// get involved because of the `nzbw` problem. +// Note: `mem::forget` can be used on `LazyAwi`s, but in this crate it should +// only be done in special cases like if a `EpochShared` is being force dropped +// by a panic or something that would necessitate giving up on `Epoch` +// invariants anyway /// When other mimicking types are created from a reference of this, `retro_` /// can later be called to retroactively change the input values of the DAG. +/// +/// # Custom Drop +/// +/// Upon being dropped, this will remove special references being kept by the +/// current `Epoch` pub struct LazyAwi { - // this must remain the same opaque and noted in order for `retro_` to work opaque: dag::Awi, - // needs to be kept in case the `LazyAwi` is optimized away, but we still need bitwidth - // comparisons - nzbw: NonZeroUsize, p_note: PNote, } +impl Drop for LazyAwi { + fn drop(&mut self) { + // prevent invoking recursive panics and a buffer overrun + if !panicking() { + if let Some(epoch) = get_current_epoch() { + let res = epoch + .epoch_data + .borrow_mut() + .ensemble + .remove_note(self.p_note); + if res.is_err() { + panic!("most likely, a `LazyAwi` created in one `Epoch` was dropped in another") + } + } + // else the epoch has been dropped + } + } +} + impl Lineage for LazyAwi { fn state(&self) -> PState { self.opaque.state() @@ -46,15 +68,11 @@ impl LazyAwi { } pub fn nzbw(&self) -> NonZeroUsize { - self.nzbw + Ensemble::get_thread_local_note_nzbw(self.p_note).unwrap() } pub fn bw(&self) -> usize { - self.nzbw.get() - } - - pub fn p_note(&self) -> PNote { - self.p_note + self.nzbw().get() } pub fn opaque(w: NonZeroUsize) -> Self { @@ -66,11 +84,7 @@ impl LazyAwi { .ensemble .note_pstate(opaque.state()) .unwrap(); - Self { - opaque, - nzbw: w, - p_note, - } + Self { opaque, p_note } } // TODO it probably does need to be an extra `Awi` in the `Opaque` variant, @@ -107,14 +121,7 @@ impl LazyAwi { /// if this is being called after the corresponding Epoch is dropped and /// states have been pruned. pub fn retro_(&self, rhs: &awi::Bits) -> Result<(), EvalError> { - if self.nzbw != rhs.nzbw() { - // `change_thread_local_state_value` will return without error if it does not - // find the state, but we need to return an error if there is a bitwidth - // mismatch - return Err(EvalError::WrongBitwidth) - } - let p_lhs = self.state(); - Evaluator::change_thread_local_state_value(p_lhs, rhs) + Ensemble::change_thread_local_note_value(self.p_note, rhs) } } @@ -215,14 +222,7 @@ impl LazyInlAwi { /// if this is being called after the corresponding Epoch is dropped and /// states have been pruned. pub fn retro_(&self, rhs: &awi::Bits) -> Result<(), EvalError> { - if BW != rhs.bw() { - // `change_thread_local_state_value` will return without error if it does not - // find the state, but we need to return an error if there is a bitwidth - // mismatch - return Err(EvalError::WrongBitwidth) - } - let p_lhs = self.state(); - Evaluator::change_thread_local_state_value(p_lhs, rhs) + Ensemble::change_thread_local_note_value(self.p_note, rhs) } } diff --git a/starlight/src/awi_structs/temporal.rs b/starlight/src/awi_structs/temporal.rs index 8da3d6a7..8a1d295a 100644 --- a/starlight/src/awi_structs/temporal.rs +++ b/starlight/src/awi_structs/temporal.rs @@ -1,130 +1,160 @@ -use std::{borrow::Borrow, num::NonZeroUsize, ops::Deref}; +use std::{borrow::Borrow, cmp::min, num::NonZeroUsize, ops::Deref}; use awint::{ - awint_dag::{Lineage, PState}, - dag::{self, Awi, Bits, InlAwi}, + awint_dag::{smallvec::smallvec, Lineage, Op}, + dag::{self, awi, Awi, Bits, InlAwi}, }; -/// Returned from `Loop::drive` and other structures like `Net::drive` that use -/// `Loop`s internally, implements [awint::awint_dag::Lineage] so that the whole -/// DAG can be captured. -#[derive(Debug, Clone)] // TODO make Copy -pub struct LoopHandle { - // just use this for now to have the non-sendability - awi: Awi, -} - -impl Lineage for LoopHandle { - fn state(&self) -> PState { - self.awi.state() - } -} +use crate::{epoch::get_current_epoch, lower::meta::selector}; /// Provides a way to temporally wrap around a combinatorial circuit. /// -/// Get a `&Bits` reference from a `Loop` via the `Deref`, `Borrow`, or -/// `AsRef` impls, then consume the `Loop` with [Loop::drive]. +/// Get a `&Bits` temporal value from a `Loop` via one of the traits like +/// `Deref` or `AsRef`, then drive the `Loop` with +/// [Loop::drive]. When [crate::Epoch::drive_loops] is run, it will evaluate the +/// value of the driver and use that to retroactively change the temporal value +/// of the loop. /// -/// The fundamental reason for temporal asymmetry is that there needs to be a -/// well defined root evaluation state and value. +/// ``` +/// use starlight::{awi, dag::*, Epoch, EvalAwi, Loop}; +/// let epoch = Epoch::new(); +/// +/// let looper = Loop::zero(bw(4)); +/// // evaluate the value of `looper` at this point later +/// let val = EvalAwi::from(&looper); +/// let mut tmp = awi!(looper); +/// tmp.inc_(true); +/// // drive the `Loop` with itself incremented +/// looper.drive(&tmp).unwrap(); +/// +/// { +/// use awi::*; +/// for i in 0..16 { +/// // check that the evaluated value is equal to +/// // this loop iteration number +/// awi::assert_eq!(i, val.eval().unwrap().to_usize()); +/// // every time `drive_loops` is called, +/// // the evaluated value increases by one +/// epoch.drive_loops().unwrap(); +/// } +/// } +/// drop(epoch); +/// ``` +// The fundamental reason for temporal asymmetry is that there needs to be a +// well defined root evaluation state and value. #[derive(Debug)] // do not implement `Clone`, but maybe implement a `duplicate` function that // explicitly duplicates drivers and loopbacks? pub struct Loop { - awi: Awi, + source: Awi, } impl Loop { /// Creates a `Loop` with an intial temporal value of zero and bitwidth `w` pub fn zero(w: NonZeroUsize) -> Self { - // TODO add flag on opaque for initial value, and a way to notify if the - // `LoopHandle` is not included in the graph - Self { - awi: Awi::opaque(w), - } + let source = Awi::new(w, Op::Opaque(smallvec![], Some("LoopSource"))); + Self { source } } - // TODO pub fn opaque() umax(), etc + // TODO pub fn opaque(), umax(), From<&Bits>, etc. What we could do is have an + // extra input to "LoopSource" that designates the initial value, but there are + // many questions to be resolved /// Returns the bitwidth of `self` as a `NonZeroUsize` #[must_use] pub fn nzbw(&self) -> NonZeroUsize { - self.awi.nzbw() + self.source.nzbw() } /// Returns the bitwidth of `self` as a `usize` #[must_use] pub fn bw(&self) -> usize { - self.awi.bw() - } - - /// Get the loop value. This can conveniently be obtained by the `Deref`, - /// `Borrow`, and `AsRef` impls on `Loop`. - #[must_use] - pub fn get(&self) -> &Bits { - &self.awi + self.source.bw() } /// Consumes `self`, looping back with the value of `driver` to change the - /// `Loop`s temporal value in a iterative temporal evaluation. Returns a - /// `LoopHandle`. Returns `None` if `self.bw() != driver.bw()`. + /// `Loop`s temporal value in a iterative temporal evaluation. Returns + /// `None` if `self.bw() != driver.bw()`. #[must_use] - pub fn drive(mut self, driver: &Bits) -> Option { - // TODO use id from `awi`, for now since there are only `Loops` we denote a loop - // with a double input `Opaque` - if self.awi.bw() != driver.bw() { + pub fn drive(self, driver: &Bits) -> Option<()> { + if self.source.bw() != driver.bw() { None } else { - self.awi.opaque_with_(&[driver], Some("LoopHandle")); - Some(LoopHandle { awi: self.awi }) + let epoch = get_current_epoch().unwrap(); + let mut lock = epoch.epoch_data.borrow_mut(); + lock.ensemble + .stator + .states + .get_mut(self.source.state()) + .unwrap() + .op = Op::Opaque(smallvec![driver.state()], Some("LoopSource")); + lock.ensemble + .stator + .states + .get_mut(driver.state()) + .unwrap() + .inc_rc(); + // in order for loop driving to always work we need to do this (otherwise + // `drive_loops` would have to search all states) + lock.ensemble + .stator + .states_to_lower + .push(self.source.state()); + Some(()) } } } -// TODO From<&Bits> and other constructions - impl Deref for Loop { type Target = Bits; fn deref(&self) -> &Self::Target { - self.get() + &self.source } } impl Borrow for Loop { fn borrow(&self) -> &Bits { - self.get() + &self.source } } impl AsRef for Loop { fn as_ref(&self) -> &Bits { - self.get() + &self.source } } /// A reconfigurable `Net` that is a `Vec`-like vector of "ports" that are -/// multiplexed to drive an internal `Loop`. First, [Net::get] or the trait -/// impls can be used to get the temporal value. Second, `Net::push_*` and -/// [Net::get_mut] can be write values to each of the ports. Third, [Net::drive] -/// takes a possibly dynamic index that multiplexes one of the values of the -/// ports to drive the temporal value. +/// multiplexed to drive an internal `Loop`. First, use a trait like +/// `Deref` or `AsRef` to get the temporal value. Second, +/// [Net::push] and [Net::get_mut] can be used to write values to each of the +/// ports. Third, [Net::drive] takes a possibly dynamic index that multiplexes +/// one of the values of the ports to drive the temporal value across +/// [crate::Epoch::drive_loops] calls. #[derive(Debug)] pub struct Net { - driver: Loop, - initial: Awi, + source: Loop, ports: Vec, } impl Net { - /// Create a `Net` with an initial value of zero and bitwidth `w` + /// Create a `Net` with an initial temporal value of zero and bitwidth `w` pub fn zero(w: NonZeroUsize) -> Self { Self { - driver: Loop::zero(w), - initial: Awi::zero(w), + source: Loop::zero(w), ports: vec![], } } + /// Creates a `Net` with [Net::zero] and pushes on `num_ports` ports + /// initialized to zero. + pub fn zero_with_ports(w: NonZeroUsize, num_ports: usize) -> Self { + Self { + source: Loop::zero(w), + ports: vec![Awi::zero(w); num_ports], + } + } + /// Returns the current number of ports #[must_use] pub fn len(&self) -> usize { @@ -140,34 +170,29 @@ impl Net { /// Returns the bitwidth of `self` as a `NonZeroUsize` #[must_use] pub fn nzbw(&self) -> NonZeroUsize { - self.driver.nzbw() + self.source.nzbw() } /// Returns the bitwidth of `self` as a `usize` #[must_use] pub fn bw(&self) -> usize { - self.driver.bw() - } - - /// Pushes on a new port that is initially set to the initial value this - /// `Net` was constructed with (and not the temporal value). If nothing is - /// done to the port, and this port is selected as the driver, then the - /// loop value will be the initial value this `Net` was originally - /// constructed with. Returns a mutable reference to the port for - /// immediate use (or the port can be accessed later by `get_mut`). - pub fn push(&mut self) -> &mut Bits { - self.ports.push(self.initial.clone()); - self.ports.last_mut().unwrap() + self.source.bw() } - /// Get the temporal value. This can conveniently be obtained by the - /// `Deref`, `Borrow`, and `AsRef` impls on `Net`. + /// Pushes on a new port. Returns `None` if the bitwidth mismatches the + /// width that this `Net` was created with #[must_use] - pub fn get(&self) -> &Bits { - &self.driver + pub fn push(&mut self, port: &Bits) -> Option<()> { + if port.bw() != self.bw() { + None + } else { + self.ports.push(Awi::from(port)); + Some(()) + } } - /// Gets the port at index `i`. Returns `None` if `i >= self.len()`. + /// Gets a mutable reference to the port at index `i`. Returns `None` if `i + /// >= self.len()`. #[must_use] pub fn get_mut(&mut self, i: usize) -> Option<&mut Bits> { self.ports.get_mut(i).map(|x| x.as_mut()) @@ -180,8 +205,8 @@ impl Net { if self.bw() != rhs.bw() { None } else { - self.ports.push(Awi::from(rhs.get())); - rhs.ports.push(Awi::from(self.get())); + self.ports.push(Awi::from(rhs.as_ref())); + rhs.ports.push(Awi::from(self.as_ref())); Some(()) } } @@ -189,35 +214,55 @@ impl Net { /// Drives with the value of the `inx`th port. Note that `inx` can be from /// a dynamic `dag::usize`. /// - /// If `inx` is out of range, the initial value is driven (and _not_ the - /// current temporal value). If `self.is_empty()`, the `LoopHandle` points - /// to a loop being driven with the initial value. + /// If `inx` is out of range, the return value is a runtime or dynamic + /// `None`. The source value if the `inx` is out of range is not specified, + /// and it may result in an undriven `Loop` in some cases, so the return + /// `Option` should probably be `unwrap`ed. #[must_use] - pub fn drive(mut self, inx: impl Into) -> LoopHandle { - let last = InlAwi::from_usize(self.len()); - // this elegantly handles the `self.is_empty()` case in addition to the out of - // range case - self.push(); - - // set the index to `last` if it is out of range - let mut inx = InlAwi::from_usize(inx); - let gt = inx.ugt(&last).unwrap(); - inx.mux_(&last, gt).unwrap(); - - // TODO need an optimized onehot selector from `awint_dag` - let mut selector = Awi::uone(NonZeroUsize::new(self.len()).unwrap()); - selector.shl_(inx.to_usize()).unwrap(); + pub fn drive(self, inx: &Bits) -> dag::Option<()> { + if self.is_empty() { + return dag::Option::None; + } + if self.len() == 1 { + self.source.drive(&self.ports[0]).unwrap(); + return dag::Option::some_at_dagtime((), inx.is_zero()); + } + let max_inx = self.len() - 1; + let max_inx_bits = self.len().next_power_of_two().trailing_zeros() as usize; + // we detect overflow by seeing if any of these bits are nonzero or if the rest + // of the index is greater than the expected max bits (only needed if the + // self.len() is not a power of two) + let should_stay_zero = if max_inx_bits < inx.bw() { + awi!(inx[max_inx_bits..]).unwrap() + } else { + awi!(0) + }; + let mut in_range = should_stay_zero.is_zero(); + let inx = if max_inx_bits < inx.bw() { + awi!(inx[..max_inx_bits]).unwrap() + } else { + Awi::from(inx) + }; + let signals = selector(&inx, None); + if (!self.len().is_power_of_two()) && (inx.bw() == max_inx_bits) { + // dance to avoid stuff that can get lowered into a full `BITS` sized comparison + let mut max = Awi::zero(inx.nzbw()); + max.usize_(max_inx); + let le = inx.ule(&max).unwrap(); + in_range &= le; + } + let mut tmp = Awi::zero(self.nzbw()); - for i in 0..self.len() { - tmp.mux_(self.get_mut(i).unwrap(), selector.get(i).unwrap()) - .unwrap(); + for i in 0..min(self.len(), signals.len()) { + tmp.mux_(&self.ports[i], signals[i].to_bool()).unwrap(); } - self.driver.drive(&tmp).unwrap() + self.source.drive(&tmp).unwrap(); + dag::Option::some_at_dagtime((), in_range) } // TODO we can do this // Drives with a one-hot vector of selectors. - //pub fn drive_priority(mut self, inx: impl Into) -> LoopHandle { + //pub fn drive_priority(mut self, inx: impl Into) { //pub fn drive_onehot(mut self, onehot) } @@ -225,19 +270,19 @@ impl Deref for Net { type Target = Bits; fn deref(&self) -> &Self::Target { - self.get() + &self.source } } impl Borrow for Net { fn borrow(&self) -> &Bits { - self.get() + &self.source } } impl AsRef for Net { fn as_ref(&self) -> &Bits { - self.get() + &self.source } } diff --git a/starlight/src/ensemble.rs b/starlight/src/ensemble.rs index 999afb37..e806a012 100644 --- a/starlight/src/ensemble.rs +++ b/starlight/src/ensemble.rs @@ -1,5 +1,6 @@ #[cfg(feature = "debug")] mod debug; +mod lnode; mod note; mod optimize; mod state; @@ -7,6 +8,7 @@ mod tnode; mod together; mod value; +pub use lnode::{LNode, PLNode}; pub use note::{Note, PNote}; pub use optimize::Optimizer; pub use state::{State, Stator}; diff --git a/starlight/src/ensemble/debug.rs b/starlight/src/ensemble/debug.rs index 793d6c0a..f5dbe3bc 100644 --- a/starlight/src/ensemble/debug.rs +++ b/starlight/src/ensemble/debug.rs @@ -6,7 +6,7 @@ use awint::{ }; use crate::{ - ensemble::{Ensemble, Equiv, PBack, PNote, Referent, State, TNode}, + ensemble::{Ensemble, Equiv, PBack, PLNode, PNote, Referent, State, TNode}, triple_arena::{Advancer, ChainArena}, triple_arena_render::{render_to_svg_file, DebugNode, DebugNodeTrait}, Epoch, @@ -18,7 +18,11 @@ impl DebugNodeTrait for State { sources: { let mut v = vec![]; for i in 0..this.op.operands_len() { - v.push((this.op.operands()[i], this.op.operand_names()[i].to_owned())) + if let Some(name) = this.op.operand_names().get(i) { + v.push((this.op.operands()[i], (*name).to_owned())); + } else { + v.push((this.op.operands()[i], "".to_owned())); + } } v }, @@ -31,9 +35,6 @@ impl DebugNodeTrait for State { Op::StaticGet(_, inx) => { v.push(format!("{} get({})", this.nzbw, inx)); } - Op::StaticSet(_, inx) => { - v.push(format!("{} set({})", this.nzbw, inx)); - } Op::StaticLut(_, ref lut) => { v.push(format!("{} lut({})", this.nzbw, lut)); } @@ -51,7 +52,7 @@ impl DebugNodeTrait for State { v.push(format!( "{} {} {} {}", this.rc, - short(this.keep), + this.extern_rc, short(this.lowered_to_elementary), short(this.lowered_to_tnodes) )); @@ -70,16 +71,33 @@ impl DebugNodeTrait for State { #[derive(Debug, Clone)] pub struct StateBit { + p_equiv: Option, p_state: PState, i: usize, } +#[derive(Debug, Clone)] +pub struct LNodeTmp { + p_self: PBack, + p_driver: PBack, + p_lnode: PLNode, +} + +#[derive(Debug, Clone)] +pub struct NoteTmp { + p_self: PBack, + p_equiv: PBack, + p_note: PNote, + i: u64, +} + #[derive(Debug, Clone)] pub enum NodeKind { StateBit(StateBit), TNode(TNode), + LNode(LNodeTmp), Equiv(Equiv, Vec), - Note(PBack, PNote, u64), + Note(NoteTmp), Remove, } @@ -90,10 +108,16 @@ impl DebugNodeTrait for NodeKind { sources: vec![], center: { let mut v = vec![format!("{:?}", p_this)]; - v.push(format!("{} {}", state_bit.p_state, state_bit.i)); + v.push(format!("{} [{}]", state_bit.p_state, state_bit.i)); v }, - sinks: vec![], + sinks: { + if let Some(p_equiv) = state_bit.p_equiv { + vec![(p_equiv, "".to_string())] + } else { + vec![] + } + }, }, NodeKind::TNode(tnode) => DebugNode { sources: tnode @@ -107,9 +131,6 @@ impl DebugNodeTrait for NodeKind { if let Some(ref lut) = tnode.lut { v.push(format!("{:?} ", lut)); } - if let Some(driver) = tnode.loop_driver { - v.push(format!("driver: {:?}", driver)); - } if let Some(lowered_from) = tnode.lowered_from { v.push(format!("{:?}", lowered_from)); } @@ -117,6 +138,18 @@ impl DebugNodeTrait for NodeKind { }, sinks: vec![], }, + NodeKind::LNode(lnode) => DebugNode { + sources: vec![ + (lnode.p_self, "self".to_owned()), + (lnode.p_driver, "driver".to_owned()), + ], + center: { + let mut v = vec![format!("{:?}", p_this)]; + v.push(format!("{:?}", lnode.p_lnode)); + v + }, + sinks: vec![], + }, NodeKind::Equiv(equiv, p_tnodes) => DebugNode { sources: p_tnodes.iter().map(|p| (*p, String::new())).collect(), center: { @@ -127,9 +160,14 @@ impl DebugNodeTrait for NodeKind { }, sinks: vec![], }, - NodeKind::Note(p_back, p_note, inx) => DebugNode { - sources: vec![(*p_back, String::new())], - center: { vec![format!("{p_note} [{inx}]")] }, + NodeKind::Note(note) => DebugNode { + sources: vec![(note.p_equiv, String::new())], + center: { + vec![ + format!("{}", note.p_self), + format!("{} [{}]", note.p_note, note.i), + ] + }, sinks: vec![], }, NodeKind::Remove => panic!("should have been removed"), @@ -161,8 +199,22 @@ impl Ensemble { } NodeKind::Equiv(self.backrefs.get_val(p_self).unwrap().clone(), v) } - Referent::ThisStateBit(p, i) => { - NodeKind::StateBit(StateBit { p_state: *p, i: *i }) + Referent::ThisStateBit(p_state, i) => { + let state = self.stator.states.get(*p_state).unwrap().clone(); + if let Some(p_bit) = state.p_self_bits[*i] { + let p_equiv = self.backrefs.get_val(p_bit).unwrap().p_self_equiv; + NodeKind::StateBit(StateBit { + p_equiv: Some(p_equiv), + p_state: *p_state, + i: *i, + }) + } else { + NodeKind::StateBit(StateBit { + p_equiv: None, + p_state: *p_state, + i: *i, + }) + } } Referent::ThisTNode(p_tnode) => { let mut tnode = self.tnodes.get(*p_tnode).unwrap().clone(); @@ -173,17 +225,19 @@ impl Ensemble { *inp = p_input; } } - if let Some(loop_driver) = tnode.loop_driver.as_mut() { - if let Referent::LoopDriver(_) = - self.backrefs.get_key(*loop_driver).unwrap() - { - let p_driver = - self.backrefs.get_val(*loop_driver).unwrap().p_self_equiv; - *loop_driver = p_driver; - } - } NodeKind::TNode(tnode) } + Referent::ThisLNode(p_lnode) => { + let lnode = self.lnodes.get(*p_lnode).unwrap(); + // forward to the `PBack`s + let p_self = self.backrefs.get_val(lnode.p_self).unwrap().p_self_equiv; + let p_driver = self.backrefs.get_val(lnode.p_driver).unwrap().p_self_equiv; + NodeKind::LNode(LNodeTmp { + p_self, + p_driver, + p_lnode: *p_lnode, + }) + } Referent::Note(p_note) => { let note = self.notes.get(*p_note).unwrap(); let mut inx = u64::MAX; @@ -193,7 +247,12 @@ impl Ensemble { } } let equiv = self.backrefs.get_val(p_self).unwrap(); - NodeKind::Note(equiv.p_self_equiv, *p_note, inx) + NodeKind::Note(NoteTmp { + p_self, + p_equiv: equiv.p_self_equiv, + p_note: *p_note, + i: inx, + }) } _ => NodeKind::Remove, } diff --git a/starlight/src/ensemble/lnode.rs b/starlight/src/ensemble/lnode.rs new file mode 100644 index 00000000..5a6555f1 --- /dev/null +++ b/starlight/src/ensemble/lnode.rs @@ -0,0 +1,19 @@ +use awint::awint_dag::triple_arena::ptr_struct; + +use crate::ensemble::PBack; + +// We use this because our algorithms depend on generation counters +ptr_struct!(PLNode); + +/// A temporal loopback node +#[derive(Debug, Clone)] +pub struct LNode { + pub p_self: PBack, + pub p_driver: PBack, +} + +impl LNode { + pub fn new(p_self: PBack, p_driver: PBack) -> Self { + Self { p_self, p_driver } + } +} diff --git a/starlight/src/ensemble/note.rs b/starlight/src/ensemble/note.rs index 9e7a6bbd..17d64e56 100644 --- a/starlight/src/ensemble/note.rs +++ b/starlight/src/ensemble/note.rs @@ -1,6 +1,12 @@ -use awint::awint_dag::{triple_arena::ptr_struct, PState}; +use std::num::NonZeroUsize; -use crate::ensemble::{Ensemble, PBack, Referent}; +use awint::awint_dag::{triple_arena::ptr_struct, EvalError, PState}; + +use crate::{ + awi, + ensemble::{Ensemble, PBack, Referent, Value}, + epoch::get_current_epoch, +}; ptr_struct!(PNote); @@ -43,6 +49,88 @@ impl Ensemble { } Some(p_note) } + + pub fn remove_note(&mut self, p_note: PNote) -> Result<(), EvalError> { + if let Some(note) = self.notes.remove(p_note) { + for p_back in note.bits { + if let Some(p_back) = p_back { + let referent = self.backrefs.remove_key(p_back).unwrap().0; + assert!(matches!(referent, Referent::Note(_))); + } + } + Ok(()) + } else { + Err(EvalError::InvalidPtr) + } + } + + pub fn get_thread_local_note_nzbw(p_note: PNote) -> Result { + let epoch_shared = get_current_epoch().unwrap(); + let mut lock = epoch_shared.epoch_data.borrow_mut(); + let ensemble = &mut lock.ensemble; + if let Some(note) = ensemble.notes.get(p_note) { + Ok(NonZeroUsize::new(note.bits.len()).unwrap()) + } else { + Err(EvalError::OtherStr("could not find thread local `Note`")) + } + } + + pub fn change_thread_local_note_value( + p_note: PNote, + bits: &awi::Bits, + ) -> Result<(), EvalError> { + let epoch_shared = get_current_epoch().unwrap(); + let mut lock = epoch_shared.epoch_data.borrow_mut(); + let ensemble = &mut lock.ensemble; + if let Some(note) = ensemble.notes.get(p_note) { + if note.bits.len() != bits.bw() { + return Err(EvalError::WrongBitwidth); + } + } else { + return Err(EvalError::OtherStr("could not find thread local `Note`")) + } + for bit_i in 0..bits.bw() { + let p_back = ensemble.notes[p_note].bits[bit_i]; + if let Some(p_back) = p_back { + ensemble + .change_value(p_back, Value::Dynam(bits.get(bit_i).unwrap())) + .unwrap(); + } + } + Ok(()) + } + + pub fn calculate_thread_local_note_value( + p_note: PNote, + bit_i: usize, + ) -> Result { + let epoch_shared = get_current_epoch().unwrap(); + let mut lock = epoch_shared.epoch_data.borrow_mut(); + let ensemble = &mut lock.ensemble; + let p_back = if let Some(note) = ensemble.notes.get(p_note) { + if bit_i >= note.bits.len() { + return Err(EvalError::OtherStr( + "something went wrong with note bitwidth", + )); + } + if let Some(p_back) = note.bits[bit_i] { + p_back + } else { + return Err(EvalError::OtherStr( + "something went wrong, found `Note` for evaluator but a bit was denoted", + )) + } + } else { + return Err(EvalError::OtherStr("could not find thread local `Note`")) + }; + if ensemble.stator.states.is_empty() { + // optimization after total pruning from `optimization` + ensemble.calculate_value(p_back) + } else { + drop(lock); + Ensemble::calculate_value_with_lower_capability(&epoch_shared, p_back) + } + } } impl Default for Note { diff --git a/starlight/src/ensemble/optimize.rs b/starlight/src/ensemble/optimize.rs index 90b37bb2..53a7286c 100644 --- a/starlight/src/ensemble/optimize.rs +++ b/starlight/src/ensemble/optimize.rs @@ -10,7 +10,7 @@ use awint::{ }; use crate::{ - ensemble::{Ensemble, PBack, PTNode, Referent, TNode, Value}, + ensemble::{Ensemble, PBack, PLNode, PTNode, Referent, TNode, Value}, triple_arena::{ptr_struct, OrdArena}, SmallMap, }; @@ -48,6 +48,8 @@ pub enum Optimization { InvestigateUsed(PBack), /// If an input was constified InvestigateConst(PTNode), + /// If a driver was constified + InvestigateLoopDriverConst(PLNode), /// The optimization state that equivalences are set to after the /// preinvestigation finds nothing InvestigateEquiv0(PBack), @@ -80,7 +82,8 @@ impl Optimizer { impl Ensemble { /// Removes all `Const` inputs and assigns `Const` result if possible. - /// Returns if a `Const` result was assigned. + /// Returns if a `Const` result was assigned (`Optimization::ConstifyEquiv` + /// needs to be run by the caller). pub fn const_eval_tnode(&mut self, p_tnode: PTNode) -> bool { let tnode = self.tnodes.get_mut(p_tnode).unwrap(); if let Some(original_lut) = &tnode.lut { @@ -157,8 +160,6 @@ impl Ensemble { if lut.bw() == 1 { let equiv = self.backrefs.get_val_mut(tnode.p_self).unwrap(); equiv.val = Value::Const(lut.to_bool()); - self.optimizer - .insert(Optimization::ConstifyEquiv(equiv.p_self_equiv)); // fix the `lut` to its new state, do this even if we are doing the constant // optimization tnode.lut = Some(lut); @@ -189,7 +190,21 @@ impl Ensemble { false } } else { - // TODO loopbacks + false + } + } + + /// Assigns `Const` result if possible. + /// Returns if a `Const` result was assigned. + pub fn const_eval_lnode(&mut self, p_lnode: PLNode) -> bool { + let lnode = self.lnodes.get(p_lnode).unwrap(); + let p_self = lnode.p_self; + let p_driver = lnode.p_driver; + let equiv = self.backrefs.get_val(p_driver).unwrap(); + if let Value::Const(val) = equiv.val { + self.backrefs.get_val_mut(p_self).unwrap().val = Value::Const(val); + true + } else { false } } @@ -207,6 +222,12 @@ impl Ensemble { let referent = *self.backrefs.get_key(p_back).unwrap(); match referent { Referent::ThisEquiv => (), + Referent::ThisLNode(p_lnode) => { + // avoid checking more if it was already determined to be constant + if !is_const && self.const_eval_lnode(p_lnode) { + is_const = true; + } + } Referent::ThisTNode(p_tnode) => { // avoid checking more if it was already determined to be constant if !is_const && self.const_eval_tnode(p_tnode) { @@ -215,7 +236,9 @@ impl Ensemble { } Referent::ThisStateBit(p_state, _) => { let state = &self.stator.states[p_state]; - if state.keep { + // the state bits can always be disregarded on a per-tnode basis unless they are + // being used externally + if state.extern_rc != 0 { non_self_rc += 1; } } @@ -224,7 +247,7 @@ impl Ensemble { // the way `LoopDriver` networks with no real dependencies will work, is // that const propogation and other simplifications will eventually result // in a single node equivalence that drives itself, which we can remove - let p_back_driver = self.tnodes.get(p_driver).unwrap().p_self; + let p_back_driver = self.lnodes.get(p_driver).unwrap().p_self; if !self.backrefs.in_same_set(p_back, p_back_driver).unwrap() { non_self_rc += 1; } @@ -269,12 +292,6 @@ impl Ensemble { /// `Advancer`s. pub fn remove_tnode_not_p_self(&mut self, p_tnode: PTNode) { let tnode = self.tnodes.remove(p_tnode).unwrap(); - if let Some(p_driver) = tnode.loop_driver { - let p_equiv = self.backrefs.get_val(p_driver).unwrap().p_self_equiv; - self.optimizer - .insert(Optimization::InvestigateUsed(p_equiv)); - self.backrefs.remove_key(p_driver).unwrap(); - } for inp in tnode.inp { let p_equiv = self.backrefs.get_val(inp).unwrap().p_self_equiv; self.optimizer @@ -283,7 +300,20 @@ impl Ensemble { } } + /// Does not perform the final step + /// `ensemble.backrefs.remove(lnode.p_self).unwrap()` which is important for + /// `Advancer`s. + pub fn remove_lnode_not_p_self(&mut self, p_lnode: PLNode) { + let lnode = self.lnodes.remove(p_lnode).unwrap(); + let p_equiv = self.backrefs.get_val(lnode.p_driver).unwrap().p_self_equiv; + self.optimizer + .insert(Optimization::InvestigateUsed(p_equiv)); + self.backrefs.remove_key(lnode.p_driver).unwrap(); + } + + /// Also removes all states pub fn optimize_all(&mut self) { + self.force_remove_all_states().unwrap(); // need to preinvestigate everything before starting a priority loop let mut adv = self.backrefs.advancer(); while let Some(p_back) = adv.advance(&self.backrefs) { @@ -324,8 +354,9 @@ impl Ensemble { Referent::ThisTNode(p_tnode) => { self.remove_tnode_not_p_self(*p_tnode); } - // TODO check self reference case - Referent::LoopDriver(_) => todo!(), + Referent::ThisLNode(p_lnode) => { + self.remove_lnode_not_p_self(*p_lnode); + } _ => unreachable!(), } } @@ -355,6 +386,9 @@ impl Ensemble { Referent::ThisTNode(p_tnode) => { self.remove_tnode_not_p_self(p_tnode); } + Referent::ThisLNode(p_lnode) => { + self.remove_lnode_not_p_self(p_lnode); + } Referent::ThisStateBit(p_state, i_bit) => { let p_bit = self.stator.states[p_state].p_self_bits[i_bit] .as_mut() @@ -382,13 +416,13 @@ impl Ensemble { assert!(found); } Referent::LoopDriver(p_driver) => { - let tnode = self.tnodes.get_mut(p_driver).unwrap(); - assert_eq!(tnode.loop_driver, Some(p_back)); + let lnode = self.lnodes.get_mut(p_driver).unwrap(); + assert_eq!(lnode.p_driver, p_back); let p_back_new = self .backrefs .insert_key(p_source, Referent::LoopDriver(p_driver)) .unwrap(); - tnode.loop_driver = Some(p_back_new); + lnode.p_driver = p_back_new; } Referent::Note(p_note) => { // here we see a major advantage of the backref system @@ -430,6 +464,10 @@ impl Ensemble { self.remove_tnode_not_p_self(*p_tnode); remove.push(p_back); } + Referent::ThisLNode(p_lnode) => { + self.remove_lnode_not_p_self(*p_lnode); + remove.push(p_back); + } Referent::ThisStateBit(..) => (), Referent::Input(p_inp) => { self.optimizer @@ -437,7 +475,7 @@ impl Ensemble { } Referent::LoopDriver(p_driver) => { self.optimizer - .insert(Optimization::InvestigateConst(*p_driver)); + .insert(Optimization::InvestigateLoopDriverConst(*p_driver)); } Referent::Note(_) => (), } @@ -463,9 +501,12 @@ impl Ensemble { match referent { Referent::ThisEquiv => (), Referent::ThisTNode(_) => (), + Referent::ThisLNode(_) => (), Referent::ThisStateBit(p_state, _) => { let state = &self.stator.states[p_state]; - if state.keep { + // the state bits can always be disregarded on a per-tnode basis unless + // they are being used externally + if state.extern_rc != 0 { found_use = true; } } @@ -474,7 +515,7 @@ impl Ensemble { break } Referent::LoopDriver(p_driver) => { - let p_back_driver = self.tnodes.get(p_driver).unwrap().p_self; + let p_back_driver = self.lnodes.get(p_driver).unwrap().p_self; if !self.backrefs.in_same_set(p_back, p_back_driver).unwrap() { found_use = true; break @@ -500,10 +541,22 @@ impl Ensemble { )); } } + Optimization::InvestigateLoopDriverConst(p_lnode) => { + if !self.lnodes.contains(p_lnode) { + return + }; + if self.const_eval_lnode(p_lnode) { + self.optimizer.insert(Optimization::ConstifyEquiv( + self.lnodes.get(p_lnode).unwrap().p_self, + )); + } + } Optimization::InvestigateEquiv0(_p_back) => { /*if !self.backrefs.contains(p_back) { return };*/ + // TODO eliminate equal TNodes, combine equal equivalences etc. + // TODO compare TNodes // TODO compress inverters by inverting inx table // TODO fusion of structures like diff --git a/starlight/src/ensemble/state.rs b/starlight/src/ensemble/state.rs index ab8767f9..ff776336 100644 --- a/starlight/src/ensemble/state.rs +++ b/starlight/src/ensemble/state.rs @@ -12,10 +12,9 @@ use crate::{ awi, ensemble::{ value::{Change, Eval}, - Ensemble, PBack, Referent, Value, + Ensemble, PBack, Value, }, epoch::EpochShared, - lower::{lower_state, LowerManagement}, }; /// Represents a single state that `awint_dag::mimick::Bits` is in at one point @@ -36,7 +35,7 @@ pub struct State { /// The number of other `State`s, and only other `State`s, that reference /// this one through the `Op`s pub rc: usize, - pub keep: bool, + pub extern_rc: usize, /// If the `State` has been lowered to elementary `State`s (`Static-` /// operations and roots). Note that a DFS might set this before actually /// being lowered. @@ -46,6 +45,30 @@ pub struct State { pub lowered_to_tnodes: bool, } +impl State { + /// Returns if pruning this state is allowed + pub fn pruning_allowed(&self) -> bool { + (self.rc == 0) && (self.extern_rc == 0) && !matches!(self.op, Opaque(_, Some(_))) + } + + pub fn inc_rc(&mut self) { + self.rc = self.rc.checked_add(1).unwrap() + } + + pub fn dec_rc(&mut self) -> Option<()> { + self.rc = self.rc.checked_sub(1)?; + Some(()) + } + + pub fn inc_extern_rc(&mut self) { + self.extern_rc = self.extern_rc.checked_add(1).unwrap() + } + + pub fn dec_extern_rc(&mut self) { + self.extern_rc = self.extern_rc.checked_sub(1).unwrap() + } +} + #[derive(Debug, Clone)] pub struct Stator { pub states: Arena, @@ -76,7 +99,7 @@ impl Ensemble { } else { return Err(EvalError::OtherStr("tried to subtract a 0 reference count")) }; - if (state.rc == 0) && (!state.keep) { + if state.pruning_allowed() { self.remove_state(p_state)?; } Ok(()) @@ -85,27 +108,12 @@ impl Ensemble { } } - // TODO need to slightly rethink the PState/PNode system. - // For now, we just prune states if any of their bits shares a surject with a - // note. - pub fn prune_unnoted_states(&mut self) -> Result<(), EvalError> { + /// Prunes all states with `pruning_allowed()` + pub fn prune_states(&mut self) -> Result<(), EvalError> { let mut adv = self.stator.states.advancer(); while let Some(p_state) = adv.advance(&self.stator.states) { let state = &self.stator.states[p_state]; - let mut remove = true; - 'outer: for p_bit in &state.p_self_bits { - if let Some(p_bit) = p_bit { - let mut equiv_adv = self.backrefs.advancer_surject(*p_bit); - while let Some(p_back) = equiv_adv.advance(&self.backrefs) { - if let Referent::Note(_) = self.backrefs.get_key(p_back).unwrap() { - remove = false; - break 'outer - } - } - } - } - if remove { - self.stator.states.get_mut(p_state).unwrap().keep = false; + if state.pruning_allowed() { self.remove_state(p_state).unwrap(); } } @@ -172,10 +180,7 @@ impl Ensemble { // anything let state = self.stator.states.get_mut(p_state).unwrap(); assert_eq!(state.rc, 0); - // FIXME we definitely need to go through Notes for assertions, - // doc example fails otherwise on release - //state.keep = false; - //self.remove_state(p_state).unwrap(); + self.remove_state(p_state).unwrap(); Ok(()) } else { unreachable!() @@ -200,359 +205,6 @@ impl Ensemble { } } - /// Used for forbidden meta psuedo-DSL techniques in which a single state is - /// replaced by more basic states. - pub fn graft(&mut self, p_state: PState, operands: &[PState]) -> Result<(), EvalError> { - #[cfg(debug_assertions)] - { - if (self.stator.states[p_state].op.operands_len() + 1) != operands.len() { - return Err(EvalError::WrongNumberOfOperands) - } - for (i, op) in self.stator.states[p_state].op.operands().iter().enumerate() { - let current_nzbw = self.stator.states[operands[i + 1]].nzbw; - let current_is_opaque = self.stator.states[operands[i + 1]].op.is_opaque(); - if self.stator.states[op].nzbw != current_nzbw { - return Err(EvalError::OtherString(format!( - "operand {}: a bitwidth of {:?} is trying to be grafted to a bitwidth of \ - {:?}", - i, current_nzbw, self.stator.states[op].nzbw - ))) - } - if !current_is_opaque { - return Err(EvalError::ExpectedOpaque) - } - } - if self.stator.states[p_state].nzbw != self.stator.states[operands[0]].nzbw { - return Err(EvalError::WrongBitwidth) - } - } - - // TODO what do we do when we make multi-output things - // graft input - for i in 1..operands.len() { - let grafted = operands[i]; - let graftee = self.stator.states.get(p_state).unwrap().op.operands()[i - 1]; - if let Some(grafted) = self.stator.states.get_mut(grafted) { - // change the grafted `Opaque` into a `Copy` that routes to the graftee instead - // of needing to change all the operands of potentially many nodes - grafted.op = Copy([graftee]); - } else { - // else the operand is not used because it was optimized away, this is removing - // a tree outside of the grafted part - self.dec_rc(graftee).unwrap(); - } - } - - // graft output - let grafted = operands[0]; - self.stator.states.get_mut(p_state).unwrap().op = Copy([grafted]); - self.stator.states[grafted].rc = self.stator.states[grafted].rc.checked_add(1).unwrap(); - - Ok(()) - } - - pub fn lower_state(epoch_shared: &EpochShared, p_state: PState) -> Result { - // TODO optimization to remove unused nodes early - //let epoch = StateEpoch::new(); - struct Tmp<'a> { - ptr: PState, - epoch_shared: &'a EpochShared, - } - impl<'a> LowerManagement for Tmp<'a> { - fn graft(&mut self, operands: &[PState]) { - self.epoch_shared - .epoch_data - .borrow_mut() - .ensemble - .graft(self.ptr, operands) - .unwrap(); - } - - fn get_nzbw(&self, p: PState) -> NonZeroUsize { - self.epoch_shared - .epoch_data - .borrow() - .ensemble - .stator - .states - .get(p) - .unwrap() - .nzbw - } - - fn is_literal(&self, p: PState) -> bool { - self.epoch_shared - .epoch_data - .borrow() - .ensemble - .stator - .states - .get(p) - .unwrap() - .op - .is_literal() - } - - fn usize(&self, p: PState) -> usize { - if let Literal(ref lit) = self - .epoch_shared - .epoch_data - .borrow() - .ensemble - .stator - .states - .get(p) - .unwrap() - .op - { - if lit.bw() != 64 { - panic!() - } - lit.to_usize() - } else { - panic!() - } - } - - fn bool(&self, p: PState) -> bool { - if let Literal(ref lit) = self - .epoch_shared - .epoch_data - .borrow() - .ensemble - .stator - .states - .get(p) - .unwrap() - .op - { - if lit.bw() != 1 { - panic!() - } - lit.to_bool() - } else { - panic!() - } - } - - fn dec_rc(&mut self, p: PState) { - self.epoch_shared - .epoch_data - .borrow_mut() - .ensemble - .dec_rc(p) - .unwrap() - } - } - let lock = epoch_shared.epoch_data.borrow(); - let state = lock.ensemble.stator.states.get(p_state).unwrap(); - let start_op = state.op.clone(); - let out_w = state.nzbw; - drop(lock); - lower_state(start_op, out_w, Tmp { - ptr: p_state, - epoch_shared, - }) - } - - /// Lowers the rootward tree from `p_state` down to the elementary `Op`s - pub fn dfs_lower_states_to_elementary( - epoch_shared: &EpochShared, - p_state: PState, - ) -> Result<(), EvalError> { - let mut unimplemented = false; - let mut lock = epoch_shared.epoch_data.borrow_mut(); - if let Some(state) = lock.ensemble.stator.states.get(p_state) { - if state.lowered_to_elementary { - return Ok(()) - } - } else { - return Err(EvalError::InvalidPtr) - } - lock.ensemble.stator.states[p_state].lowered_to_elementary = true; - - // NOTE be sure to reset this before returning from the function - lock.keep_flag = false; - drop(lock); - let mut path: Vec<(usize, PState)> = vec![(0, p_state)]; - loop { - let (i, p_state) = path[path.len() - 1]; - let mut lock = epoch_shared.epoch_data.borrow_mut(); - let state = &lock.ensemble.stator.states[p_state]; - let ops = state.op.operands(); - if ops.is_empty() { - // reached a root - path.pop().unwrap(); - if path.is_empty() { - break - } - path.last_mut().unwrap().0 += 1; - } else if i >= ops.len() { - // checked all sources, attempt evaluation first, this is crucial in preventing - // wasted work in multiple layer lowerings - match lock.ensemble.eval_state(p_state) { - Ok(()) => { - path.pop().unwrap(); - if path.is_empty() { - break - } else { - continue - } - } - // Continue on to lowering - Err(EvalError::Unevaluatable) => (), - Err(e) => { - lock.ensemble.stator.states[p_state].err = Some(e.clone()); - return Err(e) - } - } - let needs_lower = match lock.ensemble.stator.states[p_state].op { - Opaque(..) | Literal(_) | Assert(_) | Copy(_) | StaticGet(..) - | StaticSet(..) | StaticLut(..) => false, - Lut([lut, inx]) => { - if let Literal(ref lit) = lock.ensemble.stator.states[lut].op { - let lit = lit.clone(); - let out_w = lock.ensemble.stator.states[p_state].nzbw.get(); - let inx_w = lock.ensemble.stator.states[inx].nzbw.get(); - let no_op = if let Ok(inx_w) = u32::try_from(inx_w) { - if let Some(num_entries) = 1usize.checked_shl(inx_w) { - (out_w * num_entries) != lit.bw() - } else { - true - } - } else { - true - }; - if no_op { - // TODO should I add the extra arg to `Lut` to fix this edge case? - lock.ensemble.stator.states[p_state].op = Opaque(smallvec![], None); - lock.ensemble.dec_rc(inx).unwrap(); - } else { - lock.ensemble.stator.states[p_state].op = StaticLut([inx], lit); - } - lock.ensemble.dec_rc(lut).unwrap(); - false - } else { - true - } - } - Get([bits, inx]) => { - if let Literal(ref lit) = lock.ensemble.stator.states[inx].op { - let lit = lit.clone(); - let lit_u = lit.to_usize(); - if lit_u >= lock.ensemble.stator.states[bits].nzbw.get() { - // TODO I realize now that no-op `get` specifically is fundamentally - // ill-defined to some extend because it returns `Option`, it - // must be asserted against, this - // provides the next best thing - lock.ensemble.stator.states[p_state].op = Opaque(smallvec![], None); - lock.ensemble.dec_rc(bits).unwrap(); - } else { - lock.ensemble.stator.states[p_state].op = StaticGet([bits], lit_u); - } - lock.ensemble.dec_rc(inx).unwrap(); - false - } else { - true - } - } - Set([bits, inx, bit]) => { - if let Literal(ref lit) = lock.ensemble.stator.states[inx].op { - let lit = lit.clone(); - let lit_u = lit.to_usize(); - if lit_u >= lock.ensemble.stator.states[bits].nzbw.get() { - // no-op - lock.ensemble.stator.states[p_state].op = Copy([bits]); - lock.ensemble.dec_rc(bit).unwrap(); - } else { - lock.ensemble.stator.states[p_state].op = - StaticSet([bits, bit], lit.to_usize()); - } - lock.ensemble.dec_rc(inx).unwrap(); - false - } else { - true - } - } - _ => true, - }; - drop(lock); - let lowering_done = if needs_lower { - // this is used to be able to remove ultimately unused temporaries - let mut temporary = EpochShared::shared_with(epoch_shared); - temporary.set_as_current(); - let lowering_done = match Ensemble::lower_state(&temporary, p_state) { - Ok(lowering_done) => lowering_done, - Err(EvalError::Unimplemented) => { - // finish lowering as much as possible - unimplemented = true; - true - } - Err(e) => { - temporary.remove_as_current(); - let mut lock = epoch_shared.epoch_data.borrow_mut(); - lock.ensemble.stator.states[p_state].err = Some(e.clone()); - lock.keep_flag = true; - return Err(e) - } - }; - // shouldn't be adding additional assertions - // TODO after migrating the old lowering tests to a starlight-like system, make - // sure there are none using assertions assert!(temporary. - // assertions_empty()); - let states = temporary.take_states_added(); - temporary.remove_as_current(); - let mut lock = epoch_shared.epoch_data.borrow_mut(); - for p_state in states { - let state = &lock.ensemble.stator.states[p_state]; - if (!state.keep) && (state.rc == 0) { - lock.ensemble.remove_state(p_state).unwrap(); - } - } - lowering_done - } else { - true - }; - if lowering_done { - path.pop().unwrap(); - if path.is_empty() { - break - } - } else { - // else do not call `path.pop`, restart the DFS here - path.last_mut().unwrap().0 = 0; - } - } else { - let mut p_next = ops[i]; - if lock.ensemble.stator.states[p_next].lowered_to_elementary { - // do not visit - path.last_mut().unwrap().0 += 1; - } else { - while let Copy([a]) = lock.ensemble.stator.states[p_next].op { - // special optimization case: forward Copies - lock.ensemble.stator.states[p_state].op.operands_mut()[i] = a; - let rc = &mut lock.ensemble.stator.states[a].rc; - *rc = (*rc).checked_add(1).unwrap(); - lock.ensemble.dec_rc(p_next).unwrap(); - p_next = a; - } - lock.ensemble.stator.states[p_next].lowered_to_elementary = true; - path.push((0, p_next)); - } - drop(lock); - } - } - - let mut lock = epoch_shared.epoch_data.borrow_mut(); - lock.keep_flag = true; - - if unimplemented { - Err(EvalError::Unimplemented) - } else { - Ok(()) - } - } - /// Assuming that the rootward tree from `p_state` is lowered down to the /// elementary `Op`s, this will create the `TNode` network pub fn dfs_lower_elementary_to_tnodes(&mut self, p_state: PState) -> Result<(), EvalError> { @@ -579,6 +231,12 @@ impl Ensemble { } Opaque(_, name) => { if let Some(name) = name { + if name == "LoopSource" { + return Err(EvalError::OtherStr( + "cannot lower LoopSource opaque with no driver, most likely \ + some `Loop` or `Net` has been left undriven", + )) + } return Err(EvalError::OtherString(format!( "cannot lower root opaque with name {name}" ))) @@ -594,126 +252,7 @@ impl Ensemble { path.last_mut().unwrap().0 += 1; } else if i >= ops.len() { // checked all sources - match self.stator.states[p_state].op { - Assert([x]) => { - // this is the only foolproof way of doing this, at least without more - // branches - self.initialize_state_bits_if_needed(p_state).unwrap(); - let len = self.stator.states[p_state].p_self_bits.len(); - assert_eq!(len, self.stator.states[x].p_self_bits.len()); - for i in 0..len { - let p_equiv0 = self.stator.states[p_state].p_self_bits[i].unwrap(); - let p_equiv1 = self.stator.states[x].p_self_bits[i].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - } - } - Copy([x]) => { - // this is the only foolproof way of doing this, at least without more - // branches - self.initialize_state_bits_if_needed(p_state).unwrap(); - let len = self.stator.states[p_state].p_self_bits.len(); - assert_eq!(len, self.stator.states[x].p_self_bits.len()); - for i in 0..len { - let p_equiv0 = self.stator.states[p_state].p_self_bits[i].unwrap(); - let p_equiv1 = self.stator.states[x].p_self_bits[i].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - } - } - StaticGet([bits], inx) => { - self.initialize_state_bits_if_needed(p_state).unwrap(); - let len = self.stator.states[bits].p_self_bits.len(); - assert!(inx < len); - let p_self_bits = &self.stator.states[p_state].p_self_bits; - assert_eq!(p_self_bits.len(), 1); - let p_equiv0 = p_self_bits[0].unwrap(); - let p_equiv1 = self.stator.states[bits].p_self_bits[inx].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - } - StaticSet([bits, bit], inx) => { - self.initialize_state_bits_if_needed(p_state).unwrap(); - let len = self.stator.states[p_state].p_self_bits.len(); - assert_eq!(len, self.stator.states[bits].p_self_bits.len()); - // this must be handled upstream - assert!(inx < len); - for i in 0..len { - let p_equiv0 = self.stator.states[p_state].p_self_bits[i].unwrap(); - if i == inx { - let p_bit = &self.stator.states[bit].p_self_bits; - assert_eq!(p_bit.len(), 1); - let p_equiv1 = p_bit[0].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - } else { - let p_equiv1 = self.stator.states[bits].p_self_bits[i].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - }; - } - } - StaticLut([inx], ref table) => { - let table = table.clone(); - self.initialize_state_bits_if_needed(p_state).unwrap(); - let inx_bits = self.stator.states[inx].p_self_bits.clone(); - let inx_len = inx_bits.len(); - let out_bw = self.stator.states[p_state].p_self_bits.len(); - let num_entries = - 1usize.checked_shl(u32::try_from(inx_len).unwrap()).unwrap(); - // this must be handled upstream - assert_eq!(out_bw * num_entries, table.bw()); - // convert from multiple out to single out bit lut - for bit_i in 0..out_bw { - let single_bit_table = if out_bw == 1 { - table.clone() - } else { - let mut val = - awi::Awi::zero(NonZeroUsize::new(num_entries).unwrap()); - for i in 0..num_entries { - val.set(i, table.get((i * out_bw) + bit_i).unwrap()) - .unwrap(); - } - val - }; - let p_equiv0 = self - .make_lut(&inx_bits, &single_bit_table, Some(p_state)) - .unwrap(); - let p_equiv1 = self.stator.states[p_state].p_self_bits[bit_i].unwrap(); - self.union_equiv(p_equiv0, p_equiv1).unwrap(); - } - } - Opaque(ref v, name) => { - if name == Some("LoopHandle") { - if v.len() != 2 { - return Err(EvalError::OtherStr( - "LoopHandle `Opaque` does not have 2 arguments", - )) - } - let v0 = v[0]; - let v1 = v[1]; - let w = self.stator.states[v0].p_self_bits.len(); - if w != self.stator.states[v1].p_self_bits.len() { - return Err(EvalError::OtherStr( - "LoopHandle `Opaque` has a bitwidth mismatch of looper and \ - driver", - )) - } - // Loops work by an initial `Opaque` that gets registered earlier - // and is used by things that use the loop value. A second - // LoopHandle Opaque references the first with `p_looper` and - // supplies a driver. - for i in 0..w { - let p_looper = self.stator.states[v0].p_self_bits[i].unwrap(); - let p_driver = self.stator.states[v1].p_self_bits[i].unwrap(); - self.make_loop(p_looper, p_driver, Value::Dynam(false)) - .unwrap(); - } - } else if let Some(name) = name { - return Err(EvalError::OtherString(format!( - "cannot lower opaque with name {name}" - ))) - } else { - return Err(EvalError::OtherStr("cannot lower opaque with no name")) - } - } - ref op => return Err(EvalError::OtherString(format!("cannot lower {op:?}"))), - } + lower_elementary_to_tnodes_intermediate(self, p_state)?; path.pop().unwrap(); if path.is_empty() { break @@ -721,6 +260,10 @@ impl Ensemble { } else { let p_next = ops[i]; if self.stator.states[p_next].lowered_to_tnodes { + // in the case of circular cases with `Loop`s, if the DFS goes around and does + // not encounter a root, the argument needs to be initialized or else any branch + // of `lower_elementary_to_tnodes_intermediate` could fail + self.initialize_state_bits_if_needed(p_next).unwrap(); // do not visit path.last_mut().unwrap().0 += 1; } else { @@ -768,6 +311,168 @@ impl Ensemble { } } +fn lower_elementary_to_tnodes_intermediate( + this: &mut Ensemble, + p_state: PState, +) -> Result<(), EvalError> { + this.initialize_state_bits_if_needed(p_state).unwrap(); + match this.stator.states[p_state].op { + Assert([x]) => { + // this is the only foolproof way of doing this, at least without more + // branches + let len = this.stator.states[p_state].p_self_bits.len(); + assert_eq!(len, this.stator.states[x].p_self_bits.len()); + for i in 0..len { + let p_equiv0 = this.stator.states[p_state].p_self_bits[i].unwrap(); + let p_equiv1 = this.stator.states[x].p_self_bits[i].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + } + Copy([x]) => { + // this is the only foolproof way of doing this, at least without more + // branches + let len = this.stator.states[p_state].p_self_bits.len(); + assert_eq!(len, this.stator.states[x].p_self_bits.len()); + for i in 0..len { + let p_equiv0 = this.stator.states[p_state].p_self_bits[i].unwrap(); + let p_equiv1 = this.stator.states[x].p_self_bits[i].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + } + StaticGet([bits], inx) => { + let len = this.stator.states[bits].p_self_bits.len(); + assert!(inx < len); + let p_self_bits = &this.stator.states[p_state].p_self_bits; + assert_eq!(p_self_bits.len(), 1); + let p_equiv0 = p_self_bits[0].unwrap(); + let p_equiv1 = this.stator.states[bits].p_self_bits[inx].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + Concat(ref concat) => { + let concat_len = concat.len(); + let total_len = this.stator.states[p_state].p_self_bits.len(); + let mut to = 0; + for c_i in 0..concat_len { + let c = if let Concat(ref concat) = this.stator.states[p_state].op { + concat.as_slice()[c_i] + } else { + unreachable!() + }; + let len = this.stator.states[c].p_self_bits.len(); + for i in 0..len { + let p_equiv0 = this.stator.states[p_state].p_self_bits[to + i].unwrap(); + let p_equiv1 = this.stator.states[c].p_self_bits[i].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + to += len; + } + assert_eq!(total_len, to); + } + ConcatFields(ref concat) => { + let concat_len = concat.len(); + let total_len = this.stator.states[p_state].p_self_bits.len(); + let mut to = 0; + for c_i in 0..concat_len { + let (c, (from, width)) = + if let ConcatFields(ref concat) = this.stator.states[p_state].op { + (concat.t_as_slice()[c_i], concat.field_as_slice()[c_i]) + } else { + unreachable!() + }; + let len = width.get(); + for i in 0..len { + let p_equiv0 = this.stator.states[p_state].p_self_bits[to + i].unwrap(); + let p_equiv1 = this.stator.states[c].p_self_bits[from + i].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + to += len; + } + assert_eq!(total_len, to); + } + Repeat([x]) => { + let len = this.stator.states[p_state].p_self_bits.len(); + let x_w = this.stator.states[x].p_self_bits.len(); + assert!((len % x_w) == 0); + let mut from = 0; + for to in 0..len { + if from >= x_w { + from = 0; + } + let p_equiv0 = this.stator.states[p_state].p_self_bits[to].unwrap(); + let p_equiv1 = this.stator.states[x].p_self_bits[from].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + from += 1; + } + } + StaticLut(ref concat, ref table) => { + let table = table.clone(); + let concat_len = concat.len(); + let mut inx_bits: SmallVec<[Option; 8]> = smallvec![]; + for c_i in 0..concat_len { + let c = if let StaticLut(ref concat, _) = this.stator.states[p_state].op { + concat.as_slice()[c_i] + } else { + unreachable!() + }; + let bits = &this.stator.states[c].p_self_bits; + inx_bits.extend(bits.iter().cloned()); + } + + let inx_len = inx_bits.len(); + let out_bw = this.stator.states[p_state].p_self_bits.len(); + let num_entries = 1usize.checked_shl(u32::try_from(inx_len).unwrap()).unwrap(); + // this must be handled upstream + assert_eq!(out_bw * num_entries, table.bw()); + // convert from multiple out to single out bit lut + for bit_i in 0..out_bw { + let single_bit_table = if out_bw == 1 { + table.clone() + } else { + let mut val = awi::Awi::zero(NonZeroUsize::new(num_entries).unwrap()); + for i in 0..num_entries { + val.set(i, table.get((i * out_bw) + bit_i).unwrap()) + .unwrap(); + } + val + }; + let p_equiv0 = this + .make_lut(&inx_bits, &single_bit_table, Some(p_state)) + .unwrap(); + let p_equiv1 = this.stator.states[p_state].p_self_bits[bit_i].unwrap(); + this.union_equiv(p_equiv0, p_equiv1).unwrap(); + } + } + Opaque(ref v, name) => { + if name == Some("LoopSource") { + if v.len() != 1 { + return Err(EvalError::OtherStr("cannot lower an undriven `Loop`")) + } + let p_driver_state = v[0]; + let w = this.stator.states[p_state].p_self_bits.len(); + if w != this.stator.states[p_driver_state].p_self_bits.len() { + return Err(EvalError::OtherStr( + "`Loop` has a bitwidth mismatch of looper and driver", + )) + } + for i in 0..w { + let p_looper = this.stator.states[p_state].p_self_bits[i].unwrap(); + let p_driver = this.stator.states[p_driver_state].p_self_bits[i].unwrap(); + this.make_loop(p_looper, p_driver, Value::Dynam(false)) + .unwrap(); + } + } else if let Some(name) = name { + return Err(EvalError::OtherString(format!( + "cannot lower opaque with name \"{name}\"" + ))) + } else { + return Err(EvalError::OtherStr("cannot lower opaque with no name")) + } + } + ref op => return Err(EvalError::OtherString(format!("cannot lower {op:?}"))), + } + Ok(()) +} + impl Default for Stator { fn default() -> Self { Self::new() diff --git a/starlight/src/ensemble/tnode.rs b/starlight/src/ensemble/tnode.rs index b614b248..e679ab18 100644 --- a/starlight/src/ensemble/tnode.rs +++ b/starlight/src/ensemble/tnode.rs @@ -11,7 +11,7 @@ use crate::{ensemble::PBack, triple_arena::ptr_struct}; // We use this because our algorithms depend on generation counters ptr_struct!(PTNode); -/// A "table" node meant to evoke some kind of one-way table in a DAG. +/// A lookup table node #[derive(Debug, Clone)] pub struct TNode { pub p_self: PBack, @@ -19,11 +19,6 @@ pub struct TNode { pub inp: SmallVec<[PBack; 4]>, /// Lookup Table that outputs one bit pub lut: Option, - // If the value cannot be temporally changed with respect to what the - // simplification algorithms can assume. - //pub is_permanent: bool, - /// If the value is temporally driven by a `Loop` - pub loop_driver: Option, pub lowered_from: Option, } @@ -33,7 +28,6 @@ impl TNode { p_self, inp: SmallVec::new(), lut: None, - loop_driver: None, lowered_from, } } diff --git a/starlight/src/ensemble/together.rs b/starlight/src/ensemble/together.rs index f8293908..e5c61080 100644 --- a/starlight/src/ensemble/together.rs +++ b/starlight/src/ensemble/together.rs @@ -5,12 +5,14 @@ use awint::{ smallvec::{smallvec, SmallVec}, EvalError, Location, Op, PState, }, - awint_macro_internals::triple_arena::Advancer, Awi, Bits, }; use crate::{ - ensemble::{value::Evaluator, Note, Optimizer, PNote, PTNode, State, Stator, TNode, Value}, + ensemble::{ + value::Evaluator, LNode, Note, Optimizer, PLNode, PNote, PTNode, State, Stator, TNode, + Value, + }, triple_arena::{ptr_struct, Arena, SurjectArena}, }; @@ -44,11 +46,14 @@ pub enum Referent { ThisEquiv, /// Self referent, used by all the `Tnode`s of an equivalence class ThisTNode(PTNode), + /// Self referent for an `LNode` + ThisLNode(PLNode), /// Self referent to a particular bit of a `State` ThisStateBit(PState, usize), /// Referent is using this for registering an input dependency Input(PTNode), - LoopDriver(PTNode), + /// Referent is using this for a loop driver + LoopDriver(PLNode), /// Referent is a note Note(PNote), } @@ -59,8 +64,10 @@ pub struct Ensemble { pub notes: Arena, pub stator: Stator, pub tnodes: Arena, + pub lnodes: Arena, pub evaluator: Evaluator, pub optimizer: Optimizer, + pub debug_counter: u64, } impl Ensemble { @@ -70,8 +77,10 @@ impl Ensemble { notes: Arena::new(), stator: Stator::new(), tnodes: Arena::new(), + lnodes: Arena::new(), evaluator: Evaluator::new(), optimizer: Optimizer::new(), + debug_counter: 0, } } @@ -151,15 +160,29 @@ impl Ensemble { ))) } } + for (p_lnode, lnode) in &self.lnodes { + if let Some(Referent::ThisLNode(p_self)) = self.backrefs.get_key(lnode.p_self) { + if p_lnode != *p_self { + return Err(EvalError::OtherString(format!( + "{lnode:?}.p_self roundtrip fail" + ))) + } + } else { + return Err(EvalError::OtherString(format!( + "{lnode:?}.p_self is invalid" + ))) + } + } // check other referent validities for referent in self.backrefs.keys() { let invalid = match referent { // already checked Referent::ThisEquiv => false, Referent::ThisTNode(_) => false, + Referent::ThisLNode(_) => false, Referent::ThisStateBit(..) => false, Referent::Input(p_input) => !self.tnodes.contains(*p_input), - Referent::LoopDriver(p_driver) => !self.tnodes.contains(*p_driver), + Referent::LoopDriver(p_driver) => !self.lnodes.contains(*p_driver), Referent::Note(p_note) => !self.notes.contains(*p_note), }; if invalid { @@ -189,24 +212,26 @@ impl Ensemble { ))) } } - if let Some(loop_driver) = tnode.loop_driver { - if let Some(referent) = self.backrefs.get_key(loop_driver) { - if let Referent::LoopDriver(p_driver) = referent { - if !self.tnodes.contains(*p_driver) { - return Err(EvalError::OtherString(format!( - "{p_tnode}: {tnode:?} loop driver referrent {p_driver} is invalid" - ))) - } - } else { + } + for p_lnode in self.lnodes.ptrs() { + let lnode = self.lnodes.get(p_lnode).unwrap(); + if let Some(referent) = self.backrefs.get_key(lnode.p_driver) { + if let Referent::LoopDriver(p_driver) = referent { + if !self.lnodes.contains(*p_driver) { return Err(EvalError::OtherString(format!( - "{p_tnode}: {tnode:?} loop driver has incorrect referrent" + "{p_lnode}: {lnode:?} loop driver referrent {p_driver} is invalid" ))) } } else { return Err(EvalError::OtherString(format!( - "{p_tnode}: {tnode:?} loop driver {loop_driver} is invalid" + "{p_lnode}: {lnode:?} loop driver has incorrect referrent" ))) } + } else { + return Err(EvalError::OtherString(format!( + "{p_lnode}: {lnode:?} loop driver {} is invalid", + lnode.p_driver + ))) } } for note in self.notes.vals() { @@ -240,6 +265,10 @@ impl Ensemble { let tnode = self.tnodes.get(*p_tnode).unwrap(); p_back != tnode.p_self } + Referent::ThisLNode(p_lnode) => { + let lnode = self.lnodes.get(*p_lnode).unwrap(); + p_back != lnode.p_self + } Referent::ThisStateBit(p_state, inx) => { let state = self.stator.states.get(*p_state).unwrap(); let p_bit = state.p_self_bits.get(*inx).unwrap(); @@ -250,9 +279,9 @@ impl Ensemble { } } Referent::Input(p_input) => { - let tnode1 = self.tnodes.get(*p_input).unwrap(); + let tnode = self.tnodes.get(*p_input).unwrap(); let mut found = false; - for p_back1 in &tnode1.inp { + for p_back1 in &tnode.inp { if *p_back1 == p_back { found = true; break @@ -260,9 +289,9 @@ impl Ensemble { } !found } - Referent::LoopDriver(p_loop) => { - let tnode1 = self.tnodes.get(*p_loop).unwrap(); - tnode1.loop_driver != Some(p_back) + Referent::LoopDriver(p_lnode) => { + let lnode = self.lnodes.get(*p_lnode).unwrap(); + lnode.p_driver != p_back } Referent::Note(p_note) => { let note = self.notes.get(*p_note).unwrap(); @@ -320,7 +349,6 @@ impl Ensemble { } } - // TODO verify DAGness Ok(()) } @@ -329,7 +357,6 @@ impl Ensemble { nzbw: NonZeroUsize, op: Op, location: Option, - keep: bool, ) -> PState { for operand in op.operands() { let state = self.stator.states.get_mut(*operand).unwrap(); @@ -342,7 +369,7 @@ impl Ensemble { location, err: None, rc: 0, - keep, + extern_rc: 0, lowered_to_elementary: false, lowered_to_tnodes: false, }) @@ -437,37 +464,26 @@ impl Ensemble { /// Sets up a loop from the loop source `p_looper` and driver `p_driver` #[must_use] - pub fn make_loop(&mut self, p_looper: PBack, p_driver: PBack, init_val: Value) -> Option<()> { - let looper_equiv = self.backrefs.get_val_mut(p_looper)?; - match looper_equiv.val { - Value::Unknown => (), - // shouldn't fail unless the special Opaque loopback structure is broken - _ => panic!("looper is already set to a known value"), - } - looper_equiv.val = init_val; - - let referent = self.backrefs.get_key(p_looper)?; - let p_looper_tnode = match referent { - Referent::ThisEquiv => { - // need to create the TNode - self.tnodes.insert_with(|p_tnode| { - let p_back_self = self - .backrefs - .insert_key(p_looper, Referent::ThisTNode(p_tnode)) - .unwrap(); - TNode::new(p_back_self, None) - }) - } - // we might want to support more cases in the future - _ => panic!("bad referent {referent:?}"), - }; - let p_back_driver = self - .backrefs - .insert_key(p_driver, Referent::LoopDriver(p_looper_tnode)) - .unwrap(); - let tnode = self.tnodes.get_mut(p_looper_tnode).unwrap(); - tnode.loop_driver = Some(p_back_driver); - Some(()) + pub fn make_loop( + &mut self, + p_looper: PBack, + p_driver: PBack, + init_val: Value, + ) -> Option { + let p_lnode = self.lnodes.insert_with(|p_lnode| { + let p_driver = self + .backrefs + .insert_key(p_driver, Referent::LoopDriver(p_lnode)) + .unwrap(); + let p_self = self + .backrefs + .insert_key(p_looper, Referent::ThisLNode(p_lnode)) + .unwrap(); + LNode::new(p_self, p_driver) + }); + // in order for the value to register correctly + self.change_value(p_looper, init_val).unwrap(); + Some(p_lnode) } pub fn union_equiv(&mut self, p_equiv0: PBack, p_equiv1: PBack) -> Result<(), EvalError> { @@ -510,27 +526,26 @@ impl Ensemble { Ok(()) } - /// Removes the state (it does not necessarily need to still be contained) - /// and removes its source tree of states with resulting zero reference - /// count and `!state.keep` + /// Triggers a cascade of state removals if `pruning_allowed()` and + /// their reference counts are zero pub fn remove_state(&mut self, p_state: PState) -> Result<(), EvalError> { + if !self.stator.states.contains(p_state) { + return Err(EvalError::InvalidPtr); + } let mut pstate_stack = vec![p_state]; while let Some(p) = pstate_stack.pop() { let mut delete = false; if let Some(state) = self.stator.states.get(p) { - if (state.rc == 0) && !state.keep { + if state.pruning_allowed() { delete = true; } } if delete { for i in 0..self.stator.states[p].op.operands_len() { let op = self.stator.states[p].op.operands()[i]; - self.stator.states[op].rc = - if let Some(x) = self.stator.states[op].rc.checked_sub(1) { - x - } else { - return Err(EvalError::OtherStr("tried to subtract a 0 reference count")) - }; + if self.stator.states[op].dec_rc().is_none() { + return Err(EvalError::OtherStr("tried to subtract a 0 reference count")) + }; pstate_stack.push(op); } let mut state = self.stator.states.remove(p).unwrap(); @@ -544,17 +559,19 @@ impl Ensemble { Ok(()) } - pub fn drive_loops(&mut self) { - let mut adv = self.tnodes.advancer(); - while let Some(p_tnode) = adv.advance(&self.tnodes) { - let tnode = self.tnodes.get(p_tnode).unwrap(); - if let Some(p_driver) = tnode.loop_driver { - let driver_equiv = self.backrefs.get_val(p_driver).unwrap(); - let val = driver_equiv.val; - let looper_equiv = self.backrefs.get_val_mut(tnode.p_self).unwrap(); - looper_equiv.val = val; + pub fn force_remove_all_states(&mut self) -> Result<(), EvalError> { + for (_, mut state) in self.stator.states.drain() { + for p_self_state in state.p_self_bits.drain(..) { + if let Some(p_self_state) = p_self_state { + self.backrefs.remove_key(p_self_state).unwrap(); + } } } + Ok(()) + } + + pub fn inc_debug_counter(&mut self) { + self.debug_counter = self.debug_counter.checked_add(1).unwrap() } } diff --git a/starlight/src/ensemble/value.rs b/starlight/src/ensemble/value.rs index ef3a32fc..1056c6fd 100644 --- a/starlight/src/ensemble/value.rs +++ b/starlight/src/ensemble/value.rs @@ -3,15 +3,14 @@ use std::num::{NonZeroU64, NonZeroUsize}; use awint::{ awint_dag::{ triple_arena::{ptr_struct, Advancer, OrdArena}, - EvalError, PState, + EvalError, }, Awi, }; use crate::{ - awi, - ensemble::{Ensemble, PBack, PTNode, Referent, TNode}, - epoch::{get_current_epoch, EpochShared}, + ensemble::{Ensemble, PBack, PLNode, PTNode, Referent, TNode}, + epoch::EpochShared, }; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] @@ -92,6 +91,13 @@ pub struct RequestTNode { pub p_back_tnode: PBack, } +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct RequestLNode { + pub depth: i64, + pub number_a: u8, + pub p_back_lnode: PBack, +} + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct Change { pub depth: i64, @@ -103,8 +109,10 @@ pub struct Change { pub enum Eval { Investigate0(i64, PBack), ChangeTNode(PTNode), + ChangeLNode(PLNode), Change(Change), RequestTNode(RequestTNode), + RequestLNode(RequestLNode), /// When we have run out of normal things this will activate lowering Investigate1(PBack), } @@ -150,83 +158,6 @@ impl Evaluator { pub fn insert(&mut self, eval_step: Eval) { let _ = self.evaluations.insert(eval_step, ()); } - - /// This will return no error if `p_state` is not contained - pub fn change_thread_local_state_value( - p_state: PState, - bits: &awi::Bits, - ) -> Result<(), EvalError> { - let epoch_shared = get_current_epoch().unwrap(); - let mut lock = epoch_shared.epoch_data.borrow_mut(); - let ensemble = &mut lock.ensemble; - if let Some(state) = ensemble.stator.states.get(p_state) { - if state.nzbw != bits.nzbw() { - return Err(EvalError::WrongBitwidth); - } - // switch to change phase - if ensemble.evaluator.phase != EvalPhase::Change { - ensemble.evaluator.phase = EvalPhase::Change; - ensemble.evaluator.next_change_visit_gen(); - } - ensemble.initialize_state_bits_if_needed(p_state).unwrap(); - for bit_i in 0..bits.bw() { - let p_bit = ensemble.stator.states.get(p_state).unwrap().p_self_bits[bit_i]; - if let Some(p_bit) = p_bit { - let _ = ensemble.change_value(p_bit, Value::Dynam(bits.get(bit_i).unwrap())); - } - } - } - Ok(()) - } - - // stepping loops should request their drivers, evaluating everything requests - // everything - pub fn calculate_thread_local_state_value( - p_state: PState, - bit_i: usize, - ) -> Result { - let epoch_shared = get_current_epoch().unwrap(); - let mut lock = epoch_shared.epoch_data.borrow_mut(); - let ensemble = &mut lock.ensemble; - ensemble.initialize_state_bits_if_needed(p_state).unwrap(); - let state = ensemble.stator.states.get(p_state).unwrap(); - let p_back = *state.p_self_bits.get(bit_i).unwrap(); - let p_back = if let Some(p) = p_back { - p - } else { - return Err(EvalError::OtherString(format!( - "state {p_state} bit {bit_i} has been removed, something was not noted correctly" - ))); - }; - if let Some(equiv) = ensemble.backrefs.get_val_mut(p_back) { - // switch to request phase - if ensemble.evaluator.phase != EvalPhase::Request { - ensemble.evaluator.phase = EvalPhase::Request; - ensemble.evaluator.next_request_visit_gen(); - } - let visit = ensemble.evaluator.request_visit_gen(); - if equiv.request_visit != visit { - equiv.request_visit = visit; - ensemble - .evaluator - .insert(Eval::Investigate0(0, equiv.p_self_equiv)); - drop(lock); - Ensemble::handle_requests(&epoch_shared)?; - } else { - drop(lock); - } - Ok(epoch_shared - .epoch_data - .borrow() - .ensemble - .backrefs - .get_val(p_back) - .unwrap() - .val) - } else { - Err(EvalError::InvalidPtr) - } - } } impl Ensemble { @@ -245,7 +176,7 @@ impl Ensemble { // corresponding bits are set if the input is either a const value or is // already evaluated let mut fixed = inp.clone(); - // corresponding bits ar set if the input is `Value::Unknown` + // corresponding bits are set if the input is `Value::Unknown` let mut unknown = inp.clone(); for i in 0..len { let p_inp = tnode.inp[i]; @@ -351,25 +282,122 @@ impl Ensemble { res } - /// Returns `None` only if `p_back` does not exist or was removed + /// If the returned vector is empty, evaluation was successful, otherwise + /// what is needed for evaluation is returned + pub fn try_eval_lnode(&mut self, p_lnode: PLNode, depth: i64) -> Option { + // read current inputs + let lnode = self.lnodes.get(p_lnode).unwrap(); + let p_equiv = self.backrefs.get_val(lnode.p_self).unwrap().p_self_equiv; + let p_driver = lnode.p_driver; + let equiv = self.backrefs.get_val(p_driver).unwrap(); + if let Value::Const(val) = equiv.val { + self.evaluator.insert(Eval::Change(Change { + depth, + p_equiv, + value: Value::Const(val), + })); + None + } else if equiv.change_visit == self.evaluator.change_visit_gen() { + // fixed + self.evaluator.insert(Eval::Change(Change { + depth, + p_equiv, + value: equiv.val, + })); + None + } else { + Some(RequestLNode { + depth: depth - 1, + number_a: 0, + p_back_lnode: p_driver, + }) + } + } + pub fn change_value(&mut self, p_back: PBack, value: Value) -> Option<()> { if let Some(equiv) = self.backrefs.get_val_mut(p_back) { + if equiv.val.is_const() && (equiv.val != value) { + // not allowed + panic!(); + } + // switch to change phase if not already if self.evaluator.phase != EvalPhase::Change { self.evaluator.phase = EvalPhase::Change; self.evaluator.next_change_visit_gen(); } - if equiv.val.is_const() { - // not allowed - panic!(); - } equiv.val = value; + equiv.change_visit = self.evaluator.change_visit_gen(); Some(()) } else { None } } - fn handle_requests(epoch_shared: &EpochShared) -> Result<(), EvalError> { + pub fn calculate_value_with_lower_capability( + epoch_shared: &EpochShared, + p_back: PBack, + ) -> Result { + let mut lock = epoch_shared.epoch_data.borrow_mut(); + let ensemble = &mut lock.ensemble; + if let Some(equiv) = ensemble.backrefs.get_val_mut(p_back) { + if equiv.val.is_const() { + return Ok(equiv.val) + } + // switch to request phase if not already + if ensemble.evaluator.phase != EvalPhase::Request { + ensemble.evaluator.phase = EvalPhase::Request; + ensemble.evaluator.next_request_visit_gen(); + } + let visit = ensemble.evaluator.request_visit_gen(); + if equiv.request_visit != visit { + equiv.request_visit = visit; + ensemble + .evaluator + .insert(Eval::Investigate0(0, equiv.p_self_equiv)); + drop(lock); + Ensemble::handle_requests_with_lower_capability(epoch_shared)?; + } else { + drop(lock); + } + Ok(epoch_shared + .epoch_data + .borrow() + .ensemble + .backrefs + .get_val(p_back) + .unwrap() + .val) + } else { + Err(EvalError::InvalidPtr) + } + } + + pub fn calculate_value(&mut self, p_back: PBack) -> Result { + if let Some(equiv) = self.backrefs.get_val_mut(p_back) { + if equiv.val.is_const() { + return Ok(equiv.val) + } + // switch to request phase if not already + if self.evaluator.phase != EvalPhase::Request { + self.evaluator.phase = EvalPhase::Request; + self.evaluator.next_request_visit_gen(); + } + let visit = self.evaluator.request_visit_gen(); + if equiv.request_visit != visit { + equiv.request_visit = visit; + self.evaluator + .insert(Eval::Investigate0(0, equiv.p_self_equiv)); + self.handle_requests()?; + } + Ok(self.backrefs.get_val(p_back).unwrap().val) + } else { + Err(EvalError::InvalidPtr) + } + } + + pub(crate) fn handle_requests_with_lower_capability( + epoch_shared: &EpochShared, + ) -> Result<(), EvalError> { // TODO currently, the only way of avoiding N^2 worst case scenarios where // different change cascades lead to large groups of nodes being evaluated // repeatedly, is to use the front strategy. Only a powers of two reduction tree @@ -381,21 +409,22 @@ impl Ensemble { loop { let mut lock = epoch_shared.epoch_data.borrow_mut(); if let Some(p_state) = lock.ensemble.stator.states_to_lower.pop() { - let state = &lock.ensemble.stator.states[p_state]; - // first check that it has not already been lowered - if !state.lowered_to_tnodes { - drop(lock); - Ensemble::dfs_lower(epoch_shared, p_state)?; - let mut lock = epoch_shared.epoch_data.borrow_mut(); - // reinvestigate - let len = lock.ensemble.stator.states[p_state].p_self_bits.len(); - for i in 0..len { - let p_bit = lock.ensemble.stator.states[p_state].p_self_bits[i]; - if let Some(p_bit) = p_bit { - lock.ensemble.evaluator.insert(Eval::Investigate0(0, p_bit)); + if let Some(state) = lock.ensemble.stator.states.get(p_state) { + // first check that it has not already been lowered + if !state.lowered_to_tnodes { + drop(lock); + Ensemble::dfs_lower(epoch_shared, p_state)?; + let mut lock = epoch_shared.epoch_data.borrow_mut(); + // reinvestigate + let len = lock.ensemble.stator.states[p_state].p_self_bits.len(); + for i in 0..len { + let p_bit = lock.ensemble.stator.states[p_state].p_self_bits[i]; + if let Some(p_bit) = p_bit { + lock.ensemble.evaluator.insert(Eval::Investigate0(0, p_bit)); + } } + drop(lock); } - drop(lock); } } else { break @@ -417,6 +446,13 @@ impl Ensemble { Ok(()) } + pub(crate) fn handle_requests(&mut self) -> Result<(), EvalError> { + while let Some(p_eval) = self.evaluator.evaluations.min() { + self.evaluate(p_eval); + } + Ok(()) + } + fn evaluate(&mut self, p_eval: PEval) { let evaluation = self.evaluator.evaluations.remove(p_eval).unwrap().0; match evaluation { @@ -426,6 +462,9 @@ impl Ensemble { // TODO get priorities right let _ = self.try_eval_tnode(p_tnode, 0); } + Eval::ChangeLNode(p_lnode) => { + let _ = self.try_eval_lnode(p_lnode, 0); + } Eval::Change(change) => { let equiv = self.backrefs.get_val_mut(change.p_equiv).unwrap(); equiv.change_visit = self.evaluator.change_visit_gen(); @@ -440,9 +479,10 @@ impl Ensemble { while let Some(p_back) = adv.advance(&self.backrefs) { let referent = *self.backrefs.get_key(p_back).unwrap(); match referent { - Referent::ThisEquiv => (), - Referent::ThisTNode(_) => (), - Referent::ThisStateBit(..) => (), + Referent::ThisEquiv + | Referent::ThisTNode(_) + | Referent::ThisLNode(_) + | Referent::ThisStateBit(..) => (), Referent::Input(p_tnode) => { let tnode = self.tnodes.get(p_tnode).unwrap(); let p_self = tnode.p_self; @@ -455,7 +495,18 @@ impl Ensemble { self.evaluator.insert(Eval::ChangeTNode(p_tnode)); } } - Referent::LoopDriver(_) => (), + Referent::LoopDriver(p_lnode) => { + let lnode = self.lnodes.get(p_lnode).unwrap(); + let p_self = lnode.p_self; + let equiv = self.backrefs.get_val(p_self).unwrap(); + if (equiv.request_visit == self.evaluator.request_visit_gen()) + && (equiv.change_visit != self.evaluator.change_visit_gen()) + { + // only go leafward to the given input if it was in the request + // front and it hasn't been updated by some other route + self.evaluator.insert(Eval::ChangeLNode(p_lnode)); + } + } Referent::Note(_) => (), } } @@ -463,9 +514,20 @@ impl Ensemble { Eval::RequestTNode(request) => { if let Referent::Input(_) = self.backrefs.get_key(request.p_back_tnode).unwrap() { let equiv = self.backrefs.get_val(request.p_back_tnode).unwrap(); - if (equiv.change_visit != self.evaluator.change_visit_gen()) - || (equiv.request_visit != self.evaluator.request_visit_gen()) - { + if equiv.request_visit != self.evaluator.request_visit_gen() { + self.evaluator + .insert(Eval::Investigate0(request.depth, equiv.p_self_equiv)); + } + } else { + unreachable!() + } + } + Eval::RequestLNode(request) => { + if let Referent::LoopDriver(_) = + self.backrefs.get_key(request.p_back_lnode).unwrap() + { + let equiv = self.backrefs.get_val(request.p_back_lnode).unwrap(); + if equiv.request_visit != self.evaluator.request_visit_gen() { self.evaluator .insert(Eval::Investigate0(request.depth, equiv.p_self_equiv)); } @@ -489,7 +551,7 @@ impl Ensemble { // eval but is only inserted if nothing like the TNode evaluation is able to // prove early value setting let mut insert_if_no_early_exit = vec![]; - let mut saw_tnode = false; + let mut saw_node = false; let mut saw_state = None; let mut adv = self.backrefs.advancer_surject(p_equiv); while let Some(p_back) = adv.advance(&self.backrefs) { @@ -502,20 +564,29 @@ impl Ensemble { // early exit because evaluation was successful return } - for eval in v { - insert_if_no_early_exit.push(Eval::RequestTNode(eval)); + for request in v { + insert_if_no_early_exit.push(Eval::RequestTNode(request)); + } + saw_node = true; + } + Referent::ThisLNode(p_lnode) => { + if let Some(request) = self.try_eval_lnode(p_lnode, depth) { + insert_if_no_early_exit.push(Eval::RequestLNode(request)); + } else { + // early exit because evaluation was successful + return } - saw_tnode = true; + saw_node = true; } Referent::ThisStateBit(p_state, _) => { saw_state = Some(p_state); } Referent::Input(_) => (), - Referent::LoopDriver(_) => {} + Referent::LoopDriver(_) => (), Referent::Note(_) => (), } } - if !saw_tnode { + if !saw_node { let mut will_lower = false; if let Some(p_state) = saw_state { if !self.stator.states[p_state].lowered_to_tnodes { diff --git a/starlight/src/lib.rs b/starlight/src/lib.rs index eb647a9d..20504b80 100644 --- a/starlight/src/lib.rs +++ b/starlight/src/lib.rs @@ -2,9 +2,7 @@ //! typical DSL (Domain Specific Language) approach, this allows RTL //! descriptions in ordinary Rust code with all the features that Rust provides. //! -//! This crate is still a WIP, but it currently can describe most combinational -//! logic. The temporal structs (`Loop` and `Net`) need more development before -//! they will work properly. Many optimizations are planned in the near future. +//! This crate still has a considerable amount of WIP stuff //! //! See the documentation of `awint`/`awint_dag` which is used as the backend //! for this. @@ -163,11 +161,9 @@ mod awi_structs; /// Internals used by this crate to deal with states and TNode DAGs pub mod ensemble; -pub(crate) mod lower; +pub mod lower; mod misc; -pub use awi_structs::{ - epoch, Assertions, Epoch, EvalAwi, LazyAwi, LazyInlAwi, Loop, LoopHandle, Net, -}; +pub use awi_structs::{epoch, Assertions, Epoch, EvalAwi, LazyAwi, LazyInlAwi, Loop, Net}; #[cfg(feature = "debug")] pub use awint::awint_dag::triple_arena_render; pub use awint::{self, awint_dag, awint_dag::triple_arena}; @@ -191,9 +187,11 @@ pub mod dag { *, }; - pub use crate::{Loop, LoopHandle, Net}; + pub use crate::{Loop, Net}; } +// TODO fix the EvalError enum situation + // TODO use modified Lagrangians that appear different to nets with different // requirements on critical path, plus small differencing values to prevent // alternating constraint problems diff --git a/starlight/src/lower.rs b/starlight/src/lower.rs index 4c0f0e6d..22a7915a 100644 --- a/starlight/src/lower.rs +++ b/starlight/src/lower.rs @@ -1,4 +1,5 @@ +mod lower_op; mod lower_state; -mod meta; +pub mod meta; -pub use lower_state::{lower_state, LowerManagement}; +pub use lower_op::{lower_op, LowerManagement}; diff --git a/starlight/src/lower/lower_op.rs b/starlight/src/lower/lower_op.rs new file mode 100644 index 00000000..bcc9e095 --- /dev/null +++ b/starlight/src/lower/lower_op.rs @@ -0,0 +1,715 @@ +//! Lowers everything into LUT form + +// TODO https://github.com/rust-lang/rust-clippy/issues/10577 +#![allow(clippy::redundant_clone)] + +use std::{cmp::min, num::NonZeroUsize}; + +use awint::{ + awint_dag::{ + triple_arena::Ptr, + DummyDefault, EvalError, Lineage, + Op::{self, *}, + PState, + }, + bw, + dag::{awi, inlawi, Awi, Bits, InlAwi}, +}; + +use super::meta::*; +use crate::awi; + +pub trait LowerManagement { + fn graft(&mut self, output_and_operands: &[PState]); + fn get_nzbw(&self, p: P) -> NonZeroUsize; + fn is_literal(&self, p: P) -> bool; + fn usize(&self, p: P) -> usize; + fn bool(&self, p: P) -> bool; + fn dec_rc(&mut self, p: P); +} + +/// Returns if the lowering is done +pub fn lower_op( + start_op: Op

, + out_w: NonZeroUsize, + mut m: impl LowerManagement

, +) -> Result { + match start_op { + Invalid => return Err(EvalError::OtherStr("encountered `Invalid` in lowering")), + Opaque(..) | Literal(_) | Assert(_) | Copy(_) | StaticGet(..) | Concat(_) + | ConcatFields(_) | Repeat(_) | StaticLut(..) => return Ok(true), + Lut([lut, inx]) => { + if m.is_literal(lut) { + return Err(EvalError::OtherStr( + "this needs to be handled before this function", + )); + } else { + let mut out = Awi::zero(out_w); + let lut = Awi::opaque(m.get_nzbw(lut)); + let inx = Awi::opaque(m.get_nzbw(inx)); + dynamic_to_static_lut(&mut out, &lut, &inx); + m.graft(&[out.state(), lut.state(), inx.state()]); + } + } + Get([bits, inx]) => { + if m.is_literal(inx) { + return Err(EvalError::OtherStr( + "this needs to be handled before this function", + )); + } else { + let bits = Awi::opaque(m.get_nzbw(bits)); + let inx = Awi::opaque(m.get_nzbw(inx)); + let out = dynamic_to_static_get(&bits, &inx); + m.graft(&[out.state(), bits.state(), inx.state()]); + } + } + Set([bits, inx, bit]) => { + if m.is_literal(inx) { + return Err(EvalError::OtherStr( + "this needs to be handled before this function", + )); + } else { + let bits = Awi::opaque(m.get_nzbw(bits)); + let inx = Awi::opaque(m.get_nzbw(inx)); + let bit = Awi::opaque(m.get_nzbw(bit)); + let out = dynamic_to_static_set(&bits, &inx, &bit); + m.graft(&[out.state(), bits.state(), inx.state(), bit.state()]); + } + } + FieldBit([lhs, to, rhs, from]) => { + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let from = Awi::opaque(m.get_nzbw(from)); + let bit = rhs.get(from.to_usize()).unwrap(); + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let to = Awi::opaque(m.get_nzbw(to)); + // keep `lhs` the same, `out` has the set bit + let mut out = lhs.clone(); + out.set(to.to_usize(), bit).unwrap(); + m.graft(&[ + out.state(), + lhs.state(), + to.state(), + rhs.state(), + from.state(), + ]); + } + ZeroResize([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = resize(&x, out_w, false); + m.graft(&[out.state(), x.state()]); + } + SignResize([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = resize(&x, out_w, true); + m.graft(&[out.state(), x.state()]); + } + Resize([x, b]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let b = Awi::opaque(m.get_nzbw(b)); + let out = resize_cond(&x, out_w, &b); + m.graft(&[out.state(), x.state(), b.state()]); + } + Lsb([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = x.get(0).unwrap(); + m.graft(&[out.state(), x.state()]); + } + Msb([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = x.get(x.bw() - 1).unwrap(); + m.graft(&[out.state(), x.state()]); + } + FieldWidth([lhs, rhs, width]) => { + let lhs_w = m.get_nzbw(lhs); + let rhs_w = m.get_nzbw(rhs); + let width_w = m.get_nzbw(width); + if m.is_literal(width) { + let width_u = m.usize(width); + let lhs = Awi::opaque(lhs_w); + let rhs = Awi::opaque(rhs_w); + // If `width_u` is out of bounds `out` is created as a no-op of `lhs` as + // expected + let out = static_field(&lhs, 0, &rhs, 0, width_u).0; + m.graft(&[ + out.state(), + lhs.state(), + rhs.state(), + Awi::opaque(width_w).state(), + ]); + } else { + let lhs = Awi::opaque(lhs_w); + let rhs = Awi::opaque(rhs_w); + let width = Awi::opaque(width_w); + let fail = width.ugt(&InlAwi::from_usize(lhs_w.get())).unwrap() + | width.ugt(&InlAwi::from_usize(rhs_w.get())).unwrap(); + let mut tmp_width = width.clone(); + tmp_width.mux_(&InlAwi::from_usize(0), fail).unwrap(); + let out = field_width(&lhs, &rhs, &tmp_width); + m.graft(&[out.state(), lhs.state(), rhs.state(), width.state()]); + } + } + Funnel([x, s]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = funnel_(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + FieldFrom([lhs, rhs, from, width]) => { + let lhs_w = m.get_nzbw(lhs); + let rhs_w = m.get_nzbw(rhs); + let width_w = m.get_nzbw(width); + if m.is_literal(from) { + let lhs = Awi::opaque(lhs_w); + let rhs = Awi::opaque(rhs_w); + let width = Awi::opaque(m.get_nzbw(width)); + let from_u = m.usize(from); + let out = if rhs.bw() <= from_u { + lhs.clone() + } else { + // since `from_u` is known the less significant part of `rhs` can be disregarded + let sub_rhs_w = rhs.bw() - from_u; + if let Some(w) = NonZeroUsize::new(sub_rhs_w) { + let tmp0 = Awi::zero(w); + let (tmp1, o) = static_field(&tmp0, 0, &rhs, from_u, sub_rhs_w); + let mut out = lhs.clone(); + if o { + out + } else { + out.field_width(&tmp1, width.to_usize()).unwrap(); + out + } + } else { + lhs.clone() + } + }; + m.graft(&[ + out.state(), + lhs.state(), + rhs.state(), + Awi::opaque(m.get_nzbw(from)).state(), + width.state(), + ]); + } else { + let lhs = Awi::opaque(lhs_w); + let rhs = Awi::opaque(rhs_w); + let from = Awi::opaque(m.get_nzbw(from)); + let width = Awi::opaque(width_w); + let mut tmp = InlAwi::from_usize(rhs_w.get()); + tmp.sub_(&width).unwrap(); + // the other two fail conditions are in `field_width` + let fail = from.ugt(&tmp).unwrap(); + let mut tmp_width = width.clone(); + tmp_width.mux_(&InlAwi::from_usize(0), fail).unwrap(); + // the optimizations on `width` are done later on an inner `field_width` call + let out = field_from(&lhs, &rhs, &from, &tmp_width); + m.graft(&[ + out.state(), + lhs.state(), + rhs.state(), + from.state(), + width.state(), + ]); + } + } + Shl([x, s]) => { + if m.is_literal(s) { + let x = Awi::opaque(m.get_nzbw(x)); + let s_u = m.usize(s); + let out = if (s_u == 0) || (x.bw() <= s_u) { + x.clone() + } else { + let tmp = Awi::zero(x.nzbw()); + static_field(&tmp, s_u, &x, 0, x.bw() - s_u).0 + }; + m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); + } else { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = shl(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + } + Lshr([x, s]) => { + if m.is_literal(s) { + let x = Awi::opaque(m.get_nzbw(x)); + let s_u = m.usize(s); + let out = if (s_u == 0) || (x.bw() <= s_u) { + x.clone() + } else { + let tmp = Awi::zero(x.nzbw()); + static_field(&tmp, 0, &x, s_u, x.bw() - s_u).0 + }; + m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); + } else { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = lshr(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + } + Ashr([x, s]) => { + if m.is_literal(s) { + let x = Awi::opaque(m.get_nzbw(x)); + let s_u = m.usize(s); + let out = if (s_u == 0) || (x.bw() <= s_u) { + x.clone() + } else { + let mut tmp = Awi::zero(x.nzbw()); + for i in 0..x.bw() { + tmp.set(i, x.msb()).unwrap(); + } + static_field(&tmp, 0, &x, s_u, x.bw() - s_u).0 + }; + m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); + } else { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = ashr(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + } + Rotl([x, s]) => { + if m.is_literal(s) { + let x = Awi::opaque(m.get_nzbw(x)); + let s_u = m.usize(s); + let out = if (s_u == 0) || (x.bw() <= s_u) { + x.clone() + } else { + let tmp = static_field(&Awi::zero(x.nzbw()), s_u, &x, 0, x.bw() - s_u).0; + static_field(&tmp, 0, &x, x.bw() - s_u, s_u).0 + }; + m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); + } else { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = rotl(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + } + Rotr([x, s]) => { + if m.is_literal(s) { + let x = Awi::opaque(m.get_nzbw(x)); + let s_u = m.usize(s); + let out = if (s_u == 0) || (x.bw() <= s_u) { + x.clone() + } else { + let tmp = static_field(&Awi::zero(x.nzbw()), 0, &x, s_u, x.bw() - s_u).0; + static_field(&tmp, x.bw() - s_u, &x, 0, s_u).0 + }; + m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); + } else { + let x = Awi::opaque(m.get_nzbw(x)); + let s = Awi::opaque(m.get_nzbw(s)); + let out = rotr(&x, &s); + m.graft(&[out.state(), x.state(), s.state()]); + } + } + Not([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = bitwise_not(&x); + m.graft(&[out.state(), x.state()]); + } + Or([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = bitwise(&lhs, &rhs, { + use awi::*; + awi!(1110) + }); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + And([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = bitwise(&lhs, &rhs, { + use awi::*; + awi!(1000) + }); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Xor([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = bitwise(&lhs, &rhs, { + use awi::*; + awi!(0110) + }); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Inc([x, cin]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let cin = Awi::opaque(m.get_nzbw(cin)); + let out = incrementer(&x, &cin, false).0; + m.graft(&[out.state(), x.state(), cin.state()]); + } + IncCout([x, cin]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let cin = Awi::opaque(m.get_nzbw(cin)); + let out = incrementer(&x, &cin, false).1; + m.graft(&[out.state(), x.state(), cin.state()]); + } + Dec([x, cin]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let cin = Awi::opaque(m.get_nzbw(cin)); + let out = incrementer(&x, &cin, true).0; + m.graft(&[out.state(), x.state(), cin.state()]); + } + DecCout([x, cin]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let cin = Awi::opaque(m.get_nzbw(cin)); + let out = incrementer(&x, &cin, true).1; + m.graft(&[out.state(), x.state(), cin.state()]); + } + CinSum([cin, lhs, rhs]) => { + let cin = Awi::opaque(m.get_nzbw(cin)); + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = cin_sum(&cin, &lhs, &rhs).0; + m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); + } + UnsignedOverflow([cin, lhs, rhs]) => { + let cin = Awi::opaque(m.get_nzbw(cin)); + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = cin_sum(&cin, &lhs, &rhs).1; + m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); + } + SignedOverflow([cin, lhs, rhs]) => { + let cin = Awi::opaque(m.get_nzbw(cin)); + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = cin_sum(&cin, &lhs, &rhs).2; + m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); + } + Neg([x, neg]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let neg = Awi::opaque(m.get_nzbw(neg)); + assert_eq!(neg.bw(), 1); + let out = negator(&x, &neg); + m.graft(&[out.state(), x.state(), neg.state()]); + } + Abs([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let mut out = x.clone(); + out.neg_(x.msb()); + m.graft(&[out.state(), x.state()]); + } + Add([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = cin_sum(&inlawi!(0), &lhs, &rhs).0; + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Sub([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut rhs_tmp = rhs.clone(); + rhs_tmp.neg_(true); + let mut out = lhs.clone(); + out.add_(&rhs_tmp).unwrap(); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Rsb([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut out = lhs.clone(); + out.neg_(true); + out.add_(&rhs).unwrap(); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + FieldTo([lhs, to, rhs, width]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let width = Awi::opaque(m.get_nzbw(width)); + if m.is_literal(to) { + let to_u = m.usize(to); + + let out = if lhs.bw() < to_u { + lhs.clone() + } else if let Some(w) = NonZeroUsize::new(lhs.bw() - to_u) { + let (mut lhs_hi, o) = static_field(&Awi::zero(w), 0, &lhs, to_u, w.get()); + lhs_hi.field_width(&rhs, width.to_usize()).unwrap(); + if o { + lhs.clone() + } else { + static_field(&lhs, to_u, &lhs_hi, 0, w.get()).0 + } + } else { + lhs.clone() + }; + m.graft(&[ + out.state(), + lhs.state(), + Awi::opaque(m.get_nzbw(to)).state(), + rhs.state(), + width.state(), + ]); + } else { + let to = Awi::opaque(m.get_nzbw(to)); + let out = field_to(&lhs, &to, &rhs, &width); + m.graft(&[ + out.state(), + lhs.state(), + to.state(), + rhs.state(), + width.state(), + ]); + } + } + Field([lhs, to, rhs, from, width]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let width = Awi::opaque(m.get_nzbw(width)); + if m.is_literal(to) || m.is_literal(from) { + let to = Awi::opaque(m.get_nzbw(to)); + let from = Awi::opaque(m.get_nzbw(from)); + let min_w = min(lhs.bw(), rhs.bw()); + let mut tmp = Awi::zero(NonZeroUsize::new(min_w).unwrap()); + tmp.field_from(&rhs, from.to_usize(), width.to_usize()) + .unwrap(); + let mut out = lhs.clone(); + out.field_to(to.to_usize(), &tmp, width.to_usize()).unwrap(); + + m.graft(&[ + out.state(), + lhs.state(), + to.state(), + rhs.state(), + from.state(), + width.state(), + ]); + } else { + let to = Awi::opaque(m.get_nzbw(to)); + let from = Awi::opaque(m.get_nzbw(from)); + let out = field(&lhs, &to, &rhs, &from, &width); + m.graft(&[ + out.state(), + lhs.state(), + to.state(), + rhs.state(), + from.state(), + width.state(), + ]); + } + } + Rev([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = reverse(&x); + m.graft(&[out.state(), x.state()]); + } + Eq([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = equal(&lhs, &rhs); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Ne([lhs, rhs]) => { + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut out = equal(&lhs, &rhs); + out.not_(); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Ult([lhs, rhs]) => { + let w = m.get_nzbw(lhs); + let lhs = Awi::opaque(w); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut not_lhs = lhs.clone(); + not_lhs.not_(); + let mut tmp = Awi::zero(w); + // TODO should probably use some short termination circuit like what + // `tsmear_inx` uses + let (out, _) = tmp.cin_sum_(false, ¬_lhs, &rhs).unwrap(); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Ule([lhs, rhs]) => { + let w = m.get_nzbw(lhs); + let lhs = Awi::opaque(w); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut not_lhs = lhs.clone(); + not_lhs.not_(); + let mut tmp = Awi::zero(w); + let (out, _) = tmp.cin_sum_(true, ¬_lhs, &rhs).unwrap(); + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Ilt([lhs, rhs]) => { + let w = m.get_nzbw(lhs); + let lhs = Awi::opaque(w); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut out = inlawi!(0); + if w.get() == 1 { + let mut tmp = inlawi!(00); + tmp.set(0, lhs.msb()).unwrap(); + tmp.set(1, rhs.msb()).unwrap(); + out.lut_(&inlawi!(0010), &tmp).unwrap(); + } else { + let lhs_lo = awi!(lhs[..(lhs.bw() - 1)]).unwrap(); + let rhs_lo = awi!(rhs[..(rhs.bw() - 1)]).unwrap(); + let lo_lt = lhs_lo.ult(&rhs_lo).unwrap(); + let mut tmp = inlawi!(000); + tmp.set(0, lo_lt).unwrap(); + tmp.set(1, lhs.msb()).unwrap(); + tmp.set(2, rhs.msb()).unwrap(); + // if `lhs.msb() != rhs.msb()` then `lhs.msb()` determines signed-less-than, + // otherwise `lo_lt` determines + out.lut_(&inlawi!(10001110), &tmp).unwrap(); + } + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + Ile([lhs, rhs]) => { + let w = m.get_nzbw(lhs); + let lhs = Awi::opaque(w); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let mut out = inlawi!(0); + if w.get() == 1 { + let mut tmp = inlawi!(00); + tmp.set(0, lhs.msb()).unwrap(); + tmp.set(1, rhs.msb()).unwrap(); + out.lut_(&inlawi!(1011), &tmp).unwrap(); + } else { + let lhs_lo = awi!(lhs[..(lhs.bw() - 1)]).unwrap(); + let rhs_lo = awi!(rhs[..(rhs.bw() - 1)]).unwrap(); + let lo_lt = lhs_lo.ule(&rhs_lo).unwrap(); + let mut tmp = inlawi!(000); + tmp.set(0, lo_lt).unwrap(); + tmp.set(1, lhs.msb()).unwrap(); + tmp.set(2, rhs.msb()).unwrap(); + out.lut_(&inlawi!(10001110), &tmp).unwrap(); + } + m.graft(&[out.state(), lhs.state(), rhs.state()]); + } + op @ (IsZero(_) | IsUmax(_) | IsImax(_) | IsImin(_) | IsUone(_)) => { + let x = Awi::opaque(m.get_nzbw(op.operands()[0])); + let w = x.bw(); + let out = InlAwi::from(match op { + IsZero(_) => x.const_eq(&awi!(zero: ..w).unwrap()).unwrap(), + IsUmax(_) => x.const_eq(&awi!(umax: ..w).unwrap()).unwrap(), + IsImax(_) => x.const_eq(&awi!(imax: ..w).unwrap()).unwrap(), + IsImin(_) => x.const_eq(&awi!(imin: ..w).unwrap()).unwrap(), + IsUone(_) => x.const_eq(&awi!(uone: ..w).unwrap()).unwrap(), + _ => unreachable!(), + }); + m.graft(&[out.state(), x.state()]); + } + CountOnes([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = count_ones(&x).to_usize(); + m.graft(&[out.state(), x.state()]); + } + Lz([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = leading_zeros(&x).to_usize(); + m.graft(&[out.state(), x.state()]); + } + Tz([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = trailing_zeros(&x).to_usize(); + m.graft(&[out.state(), x.state()]); + } + Sig([x]) => { + let x = Awi::opaque(m.get_nzbw(x)); + let out = significant_bits(&x).to_usize(); + m.graft(&[out.state(), x.state()]); + } + LutSet([table, entry, inx]) => { + let table = Awi::opaque(m.get_nzbw(table)); + let entry = Awi::opaque(m.get_nzbw(entry)); + let inx = Awi::opaque(m.get_nzbw(inx)); + let out = lut_set(&table, &entry, &inx); + m.graft(&[out.state(), table.state(), entry.state(), inx.state()]); + } + ZeroResizeOverflow([x], w) => { + let x = Awi::opaque(m.get_nzbw(x)); + let mut out = Awi::zero(bw(1)); + let w = w.get(); + if w < x.bw() { + out.bool_(!awi!(x[w..]).unwrap().is_zero()); + } + m.graft(&[out.state(), x.state()]); + } + SignResizeOverflow([x], w) => { + let x = Awi::opaque(m.get_nzbw(x)); + let mut out = Awi::zero(bw(1)); + let w = w.get(); + if w < x.bw() { + // the new msb and the bits above it should equal the old msb + let critical = awi!(x[(w - 1)..]).unwrap(); + let mut tmp = inlawi!(00); + tmp.set(0, critical.is_zero()).unwrap(); + tmp.set(1, critical.is_umax()).unwrap(); + out.lut_(&inlawi!(1001), &tmp).unwrap(); + } + m.graft(&[out.state(), x.state()]); + } + ArbMulAdd([add, lhs, rhs]) => { + let w = m.get_nzbw(add); + let add = Awi::opaque(w); + let lhs = Awi::opaque(m.get_nzbw(lhs)); + let rhs = Awi::opaque(m.get_nzbw(rhs)); + let out = mul_add(w, Some(&add), &lhs, &rhs); + m.graft(&[out.state(), add.state(), lhs.state(), rhs.state()]); + } + Mux([x0, x1, inx]) => { + let x0 = Awi::opaque(m.get_nzbw(x0)); + let x1 = Awi::opaque(m.get_nzbw(x1)); + let inx_tmp = Awi::opaque(m.get_nzbw(inx)); + let out = if m.is_literal(inx) { + let b = m.bool(inx); + if b { + x1.clone() + } else { + x0.clone() + } + } else { + mux_(&x0, &x1, &inx_tmp) + }; + m.graft(&[out.state(), x0.state(), x1.state(), inx_tmp.state()]); + } + // TODO in the divisions especially and in other operations, we need to look at the + // operand tree and combine multiple ops together in a single lowering operation + UQuo([duo, div]) => { + let duo = Awi::opaque(m.get_nzbw(duo)); + let div = Awi::opaque(m.get_nzbw(div)); + let quo = division(&duo, &div).0; + m.graft(&[quo.state(), duo.state(), div.state()]); + } + URem([duo, div]) => { + let duo = Awi::opaque(m.get_nzbw(duo)); + let div = Awi::opaque(m.get_nzbw(div)); + let rem = division(&duo, &div).1; + m.graft(&[rem.state(), duo.state(), div.state()]); + } + IQuo([duo, div]) => { + let duo = Awi::opaque(m.get_nzbw(duo)); + let div = Awi::opaque(m.get_nzbw(div)); + let duo_msb = duo.msb(); + let div_msb = div.msb(); + // keeping arguments opaque + let mut tmp_duo = duo.clone(); + let mut tmp_div = div.clone(); + tmp_duo.neg_(duo_msb); + tmp_div.neg_(div_msb); + let mut quo = division(&tmp_duo, &tmp_div).0; + let mut tmp0 = InlAwi::from(duo_msb); + let tmp1 = InlAwi::from(div_msb); + tmp0.xor_(&tmp1).unwrap(); + quo.neg_(tmp0.to_bool()); + m.graft(&[quo.state(), duo.state(), div.state()]); + } + IRem([duo, div]) => { + let duo = Awi::opaque(m.get_nzbw(duo)); + let div = Awi::opaque(m.get_nzbw(div)); + let duo_msb = duo.msb(); + let div_msb = div.msb(); + // keeping arguments opaque + let mut tmp_duo = duo.clone(); + let mut tmp_div = div.clone(); + tmp_duo.neg_(duo_msb); + tmp_div.neg_(div_msb); + let mut rem = division(&tmp_duo, &tmp_div).1; + rem.neg_(duo_msb); + m.graft(&[rem.state(), duo.state(), div.state()]); + } + } + Ok(false) +} diff --git a/starlight/src/lower/lower_state.rs b/starlight/src/lower/lower_state.rs index 0ac3349d..8036febf 100644 --- a/starlight/src/lower/lower_state.rs +++ b/starlight/src/lower/lower_state.rs @@ -1,708 +1,385 @@ -//! Lowers everything into LUT form - -// TODO https://github.com/rust-lang/rust-clippy/issues/10577 -#![allow(clippy::redundant_clone)] - -use std::{cmp::min, num::NonZeroUsize}; +use std::num::NonZeroUsize; use awint::{ - awint_dag::{ - triple_arena::Ptr, - DummyDefault, EvalError, Lineage, - Op::{self, *}, - PState, - }, + awint_dag::{smallvec::smallvec, ConcatFieldsType, ConcatType, EvalError, Op::*, PState}, bw, - dag::{awi, inlawi, Awi, Bits, InlAwi}, }; -use super::meta::*; - -pub trait LowerManagement { - fn graft(&mut self, output_and_operands: &[PState]); - fn get_nzbw(&self, p: P) -> NonZeroUsize; - fn is_literal(&self, p: P) -> bool; - fn usize(&self, p: P) -> usize; - fn bool(&self, p: P) -> bool; - fn dec_rc(&mut self, p: P); -} +use crate::{ + ensemble::Ensemble, + epoch::EpochShared, + lower::{lower_op, LowerManagement}, +}; -/// Returns if the lowering is done -pub fn lower_state( - start_op: Op

, - out_w: NonZeroUsize, - mut m: impl LowerManagement

, -) -> Result { - match start_op { - Invalid => return Err(EvalError::OtherStr("encountered `Invalid` in lowering")), - Opaque(..) | Literal(_) | Assert(_) | Copy(_) | StaticLut(..) | StaticGet(..) - | StaticSet(..) => return Ok(true), - Lut([lut, inx]) => { - if m.is_literal(lut) { - return Err(EvalError::OtherStr( - "this needs to be handled before this function", - )); - } else { - let mut out = Awi::zero(out_w); - let lut = Awi::opaque(m.get_nzbw(lut)); - let inx = Awi::opaque(m.get_nzbw(inx)); - dynamic_to_static_lut(&mut out, &lut, &inx); - m.graft(&[out.state(), lut.state(), inx.state()]); +impl Ensemble { + /// Used for forbidden meta psuedo-DSL techniques in which a single state is + /// replaced by more basic states. + pub fn graft(&mut self, p_state: PState, operands: &[PState]) -> Result<(), EvalError> { + #[cfg(debug_assertions)] + { + if (self.stator.states[p_state].op.operands_len() + 1) != operands.len() { + return Err(EvalError::WrongNumberOfOperands) } - } - Get([bits, inx]) => { - if m.is_literal(inx) { - return Err(EvalError::OtherStr( - "this needs to be handled before this function", - )); - } else { - let bits = Awi::opaque(m.get_nzbw(bits)); - let inx = Awi::opaque(m.get_nzbw(inx)); - let out = dynamic_to_static_get(&bits, &inx); - m.graft(&[out.state(), bits.state(), inx.state()]); + for (i, op) in self.stator.states[p_state].op.operands().iter().enumerate() { + let current_nzbw = self.stator.states[operands[i + 1]].nzbw; + let current_is_opaque = self.stator.states[operands[i + 1]].op.is_opaque(); + if self.stator.states[op].nzbw != current_nzbw { + return Err(EvalError::OtherString(format!( + "operand {}: a bitwidth of {:?} is trying to be grafted to a bitwidth of \ + {:?}", + i, current_nzbw, self.stator.states[op].nzbw + ))) + } + if !current_is_opaque { + return Err(EvalError::ExpectedOpaque) + } } - } - Set([bits, inx, bit]) => { - if m.is_literal(inx) { - return Err(EvalError::OtherStr( - "this needs to be handled before this function", - )); - } else { - let bits = Awi::opaque(m.get_nzbw(bits)); - let inx = Awi::opaque(m.get_nzbw(inx)); - let bit = Awi::opaque(m.get_nzbw(bit)); - let out = dynamic_to_static_set(&bits, &inx, &bit); - m.graft(&[out.state(), bits.state(), inx.state(), bit.state()]); + if self.stator.states[p_state].nzbw != self.stator.states[operands[0]].nzbw { + return Err(EvalError::WrongBitwidth) } } - FieldBit([lhs, to, rhs, from]) => { - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let from = Awi::opaque(m.get_nzbw(from)); - let bit = rhs.get(from.to_usize()).unwrap(); - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let to = Awi::opaque(m.get_nzbw(to)); - // keep `lhs` the same, `out` has the set bit - let mut out = lhs.clone(); - out.set(to.to_usize(), bit).unwrap(); - m.graft(&[ - out.state(), - lhs.state(), - to.state(), - rhs.state(), - from.state(), - ]); - } - ZeroResize([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = resize(&x, out_w, false); - m.graft(&[out.state(), x.state()]); - } - SignResize([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = resize(&x, out_w, true); - m.graft(&[out.state(), x.state()]); - } - Resize([x, b]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let b = Awi::opaque(m.get_nzbw(b)); - let out = resize_cond(&x, out_w, &b); - m.graft(&[out.state(), x.state(), b.state()]); - } - Lsb([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = x.get(0).unwrap(); - m.graft(&[out.state(), x.state()]); - } - Msb([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = x.get(x.bw() - 1).unwrap(); - m.graft(&[out.state(), x.state()]); - } - FieldWidth([lhs, rhs, width]) => { - let lhs_w = m.get_nzbw(lhs); - let rhs_w = m.get_nzbw(rhs); - let width_w = m.get_nzbw(width); - if m.is_literal(width) { - let width_u = m.usize(width); - let lhs = Awi::opaque(lhs_w); - let rhs = Awi::opaque(rhs_w); - // If `width_u` is out of bounds `out` is created as a no-op of `lhs` as - // expected - let out = static_field(&lhs, 0, &rhs, 0, width_u).0; - m.graft(&[ - out.state(), - lhs.state(), - rhs.state(), - Awi::opaque(width_w).state(), - ]); + + // graft input + for i in 1..operands.len() { + let grafted = operands[i]; + let graftee = self.stator.states.get(p_state).unwrap().op.operands()[i - 1]; + if let Some(grafted) = self.stator.states.get_mut(grafted) { + // change the grafted `Opaque` into a `Copy` that routes to the graftee instead + // of needing to change all the operands of potentially many nodes + grafted.op = Copy([graftee]); } else { - let lhs = Awi::opaque(lhs_w); - let rhs = Awi::opaque(rhs_w); - let width = Awi::opaque(width_w); - let fail = width.ugt(&InlAwi::from_usize(lhs_w.get())).unwrap() - | width.ugt(&InlAwi::from_usize(rhs_w.get())).unwrap(); - let mut tmp_width = width.clone(); - tmp_width.mux_(&InlAwi::from_usize(0), fail).unwrap(); - let out = field_width(&lhs, &rhs, &tmp_width); - m.graft(&[out.state(), lhs.state(), rhs.state(), width.state()]); + // else the operand is not used because it was optimized away, this is removing + // a tree outside of the grafted part + self.dec_rc(graftee).unwrap(); } } - Funnel([x, s]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = funnel_(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); - } - FieldFrom([lhs, rhs, from, width]) => { - let lhs_w = m.get_nzbw(lhs); - let rhs_w = m.get_nzbw(rhs); - let width_w = m.get_nzbw(width); - if m.is_literal(from) { - let lhs = Awi::opaque(lhs_w); - let rhs = Awi::opaque(rhs_w); - let width = Awi::opaque(m.get_nzbw(width)); - let from_u = m.usize(from); - let out = if rhs.bw() <= from_u { - lhs.clone() - } else { - // since `from_u` is known the less significant part of `rhs` can be disregarded - let sub_rhs_w = rhs.bw() - from_u; - if let Some(w) = NonZeroUsize::new(sub_rhs_w) { - let tmp0 = Awi::zero(w); - let (tmp1, o) = static_field(&tmp0, 0, &rhs, from_u, sub_rhs_w); - let mut out = lhs.clone(); - if o { - out - } else { - out.field_width(&tmp1, width.to_usize()).unwrap(); - out - } - } else { - lhs.clone() - } - }; - m.graft(&[ - out.state(), - lhs.state(), - rhs.state(), - Awi::opaque(m.get_nzbw(from)).state(), - width.state(), - ]); - } else { - let lhs = Awi::opaque(lhs_w); - let rhs = Awi::opaque(rhs_w); - let from = Awi::opaque(m.get_nzbw(from)); - let width = Awi::opaque(width_w); - let mut tmp = InlAwi::from_usize(rhs_w.get()); - tmp.sub_(&width).unwrap(); - // the other two fail conditions are in `field_width` - let fail = from.ugt(&tmp).unwrap(); - let mut tmp_width = width.clone(); - tmp_width.mux_(&InlAwi::from_usize(0), fail).unwrap(); - // the optimizations on `width` are done later on an inner `field_width` call - let out = field_from(&lhs, &rhs, &from, &tmp_width); - m.graft(&[ - out.state(), - lhs.state(), - rhs.state(), - from.state(), - width.state(), - ]); + + // graft output + let grafted = operands[0]; + self.stator.states.get_mut(p_state).unwrap().op = Copy([grafted]); + self.stator.states[grafted].inc_rc(); + + Ok(()) + } + + pub fn lower_op(epoch_shared: &EpochShared, p_state: PState) -> Result { + struct Tmp<'a> { + ptr: PState, + epoch_shared: &'a EpochShared, + } + impl<'a> LowerManagement for Tmp<'a> { + fn graft(&mut self, operands: &[PState]) { + self.epoch_shared + .epoch_data + .borrow_mut() + .ensemble + .graft(self.ptr, operands) + .unwrap(); } - } - Shl([x, s]) => { - if m.is_literal(s) { - let x = Awi::opaque(m.get_nzbw(x)); - let s_u = m.usize(s); - let out = if (s_u == 0) || (x.bw() <= s_u) { - x.clone() - } else { - let tmp = Awi::zero(x.nzbw()); - static_field(&tmp, s_u, &x, 0, x.bw() - s_u).0 - }; - m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); - } else { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = shl(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); + + fn get_nzbw(&self, p: PState) -> NonZeroUsize { + self.epoch_shared + .epoch_data + .borrow() + .ensemble + .stator + .states + .get(p) + .unwrap() + .nzbw } - } - Lshr([x, s]) => { - if m.is_literal(s) { - let x = Awi::opaque(m.get_nzbw(x)); - let s_u = m.usize(s); - let out = if (s_u == 0) || (x.bw() <= s_u) { - x.clone() - } else { - let tmp = Awi::zero(x.nzbw()); - static_field(&tmp, 0, &x, s_u, x.bw() - s_u).0 - }; - m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); - } else { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = lshr(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); + + fn is_literal(&self, p: PState) -> bool { + self.epoch_shared + .epoch_data + .borrow() + .ensemble + .stator + .states + .get(p) + .unwrap() + .op + .is_literal() } - } - Ashr([x, s]) => { - if m.is_literal(s) { - let x = Awi::opaque(m.get_nzbw(x)); - let s_u = m.usize(s); - let out = if (s_u == 0) || (x.bw() <= s_u) { - x.clone() - } else { - let mut tmp = Awi::zero(x.nzbw()); - for i in 0..x.bw() { - tmp.set(i, x.msb()).unwrap(); + + fn usize(&self, p: PState) -> usize { + if let Literal(ref lit) = self + .epoch_shared + .epoch_data + .borrow() + .ensemble + .stator + .states + .get(p) + .unwrap() + .op + { + if lit.bw() != 64 { + panic!() } - static_field(&tmp, 0, &x, s_u, x.bw() - s_u).0 - }; - m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); - } else { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = ashr(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); - } - } - Rotl([x, s]) => { - if m.is_literal(s) { - let x = Awi::opaque(m.get_nzbw(x)); - let s_u = m.usize(s); - let out = if (s_u == 0) || (x.bw() <= s_u) { - x.clone() + lit.to_usize() } else { - let tmp = static_field(&Awi::zero(x.nzbw()), s_u, &x, 0, x.bw() - s_u).0; - static_field(&tmp, 0, &x, x.bw() - s_u, s_u).0 - }; - m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); - } else { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = rotl(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); - } - } - Rotr([x, s]) => { - if m.is_literal(s) { - let x = Awi::opaque(m.get_nzbw(x)); - let s_u = m.usize(s); - let out = if (s_u == 0) || (x.bw() <= s_u) { - x.clone() - } else { - let tmp = static_field(&Awi::zero(x.nzbw()), 0, &x, s_u, x.bw() - s_u).0; - static_field(&tmp, x.bw() - s_u, &x, 0, s_u).0 - }; - m.graft(&[out.state(), x.state(), Awi::opaque(m.get_nzbw(s)).state()]); - } else { - let x = Awi::opaque(m.get_nzbw(x)); - let s = Awi::opaque(m.get_nzbw(s)); - let out = rotr(&x, &s); - m.graft(&[out.state(), x.state(), s.state()]); + panic!() + } } - } - Not([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = bitwise_not(&x); - m.graft(&[out.state(), x.state()]); - } - Or([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = bitwise(&lhs, &rhs, inlawi!(1110)); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - And([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = bitwise(&lhs, &rhs, inlawi!(1000)); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Xor([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = bitwise(&lhs, &rhs, inlawi!(0110)); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Inc([x, cin]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let cin = Awi::opaque(m.get_nzbw(cin)); - let out = incrementer(&x, &cin, false).0; - m.graft(&[out.state(), x.state(), cin.state()]); - } - IncCout([x, cin]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let cin = Awi::opaque(m.get_nzbw(cin)); - let out = incrementer(&x, &cin, false).1; - m.graft(&[out.state(), x.state(), cin.state()]); - } - Dec([x, cin]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let cin = Awi::opaque(m.get_nzbw(cin)); - let out = incrementer(&x, &cin, true).0; - m.graft(&[out.state(), x.state(), cin.state()]); - } - DecCout([x, cin]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let cin = Awi::opaque(m.get_nzbw(cin)); - let out = incrementer(&x, &cin, true).1; - m.graft(&[out.state(), x.state(), cin.state()]); - } - CinSum([cin, lhs, rhs]) => { - let cin = Awi::opaque(m.get_nzbw(cin)); - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = cin_sum(&cin, &lhs, &rhs).0; - m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); - } - UnsignedOverflow([cin, lhs, rhs]) => { - let cin = Awi::opaque(m.get_nzbw(cin)); - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = cin_sum(&cin, &lhs, &rhs).1; - m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); - } - SignedOverflow([cin, lhs, rhs]) => { - let cin = Awi::opaque(m.get_nzbw(cin)); - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = cin_sum(&cin, &lhs, &rhs).2; - m.graft(&[out.state(), cin.state(), lhs.state(), rhs.state()]); - } - Neg([x, neg]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let neg = Awi::opaque(m.get_nzbw(neg)); - assert_eq!(neg.bw(), 1); - let out = negator(&x, &neg); - m.graft(&[out.state(), x.state(), neg.state()]); - } - Abs([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let mut out = x.clone(); - out.neg_(x.msb()); - m.graft(&[out.state(), x.state()]); - } - Add([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = cin_sum(&inlawi!(0), &lhs, &rhs).0; - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Sub([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut rhs_tmp = rhs.clone(); - rhs_tmp.neg_(true); - let mut out = lhs.clone(); - out.add_(&rhs_tmp).unwrap(); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Rsb([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut out = lhs.clone(); - out.neg_(true); - out.add_(&rhs).unwrap(); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - FieldTo([lhs, to, rhs, width]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let width = Awi::opaque(m.get_nzbw(width)); - if m.is_literal(to) { - let to_u = m.usize(to); - let out = if lhs.bw() < to_u { - lhs.clone() - } else if let Some(w) = NonZeroUsize::new(lhs.bw() - to_u) { - let (mut lhs_hi, o) = static_field(&Awi::zero(w), 0, &lhs, to_u, w.get()); - lhs_hi.field_width(&rhs, width.to_usize()).unwrap(); - if o { - lhs.clone() - } else { - static_field(&lhs, to_u, &lhs_hi, 0, w.get()).0 + fn bool(&self, p: PState) -> bool { + if let Literal(ref lit) = self + .epoch_shared + .epoch_data + .borrow() + .ensemble + .stator + .states + .get(p) + .unwrap() + .op + { + if lit.bw() != 1 { + panic!() } + lit.to_bool() } else { - lhs.clone() - }; - m.graft(&[ - out.state(), - lhs.state(), - Awi::opaque(m.get_nzbw(to)).state(), - rhs.state(), - width.state(), - ]); - } else { - let to = Awi::opaque(m.get_nzbw(to)); - let out = field_to(&lhs, &to, &rhs, &width); - m.graft(&[ - out.state(), - lhs.state(), - to.state(), - rhs.state(), - width.state(), - ]); + panic!() + } } - } - Field([lhs, to, rhs, from, width]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let width = Awi::opaque(m.get_nzbw(width)); - if m.is_literal(to) || m.is_literal(from) { - let to = Awi::opaque(m.get_nzbw(to)); - let from = Awi::opaque(m.get_nzbw(from)); - let min_w = min(lhs.bw(), rhs.bw()); - let mut tmp = Awi::zero(NonZeroUsize::new(min_w).unwrap()); - tmp.field_from(&rhs, from.to_usize(), width.to_usize()) - .unwrap(); - let mut out = lhs.clone(); - out.field_to(to.to_usize(), &tmp, width.to_usize()).unwrap(); - m.graft(&[ - out.state(), - lhs.state(), - to.state(), - rhs.state(), - from.state(), - width.state(), - ]); - } else { - let to = Awi::opaque(m.get_nzbw(to)); - let from = Awi::opaque(m.get_nzbw(from)); - let out = field(&lhs, &to, &rhs, &from, &width); - m.graft(&[ - out.state(), - lhs.state(), - to.state(), - rhs.state(), - from.state(), - width.state(), - ]); - } - } - Rev([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let mut out = Awi::zero(x.nzbw()); - for i in 0..x.bw() { - out.set(i, x.get(x.bw() - 1 - i).unwrap()).unwrap() - } - m.graft(&[out.state(), x.state()]); - } - Eq([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = equal(&lhs, &rhs); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Ne([lhs, rhs]) => { - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut out = equal(&lhs, &rhs); - out.not_(); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Ult([lhs, rhs]) => { - let w = m.get_nzbw(lhs); - let lhs = Awi::opaque(w); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut not_lhs = lhs.clone(); - not_lhs.not_(); - let mut tmp = Awi::zero(w); - // TODO should probably use some short termination circuit like what - // `tsmear_inx` uses - let (out, _) = tmp.cin_sum_(false, ¬_lhs, &rhs).unwrap(); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Ule([lhs, rhs]) => { - let w = m.get_nzbw(lhs); - let lhs = Awi::opaque(w); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut not_lhs = lhs.clone(); - not_lhs.not_(); - let mut tmp = Awi::zero(w); - let (out, _) = tmp.cin_sum_(true, ¬_lhs, &rhs).unwrap(); - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Ilt([lhs, rhs]) => { - let w = m.get_nzbw(lhs); - let lhs = Awi::opaque(w); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut out = inlawi!(0); - if w.get() == 1 { - let mut tmp = inlawi!(00); - tmp.set(0, lhs.msb()).unwrap(); - tmp.set(1, rhs.msb()).unwrap(); - out.lut_(&inlawi!(0010), &tmp).unwrap(); - } else { - let lhs_lo = awi!(lhs[..(lhs.bw() - 1)]).unwrap(); - let rhs_lo = awi!(rhs[..(rhs.bw() - 1)]).unwrap(); - let lo_lt = lhs_lo.ult(&rhs_lo).unwrap(); - let mut tmp = inlawi!(000); - tmp.set(0, lo_lt).unwrap(); - tmp.set(1, lhs.msb()).unwrap(); - tmp.set(2, rhs.msb()).unwrap(); - // if `lhs.msb() != rhs.msb()` then `lhs.msb()` determines signed-less-than, - // otherwise `lo_lt` determines - out.lut_(&inlawi!(10001110), &tmp).unwrap(); + fn dec_rc(&mut self, p: PState) { + self.epoch_shared + .epoch_data + .borrow_mut() + .ensemble + .dec_rc(p) + .unwrap() } - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - Ile([lhs, rhs]) => { - let w = m.get_nzbw(lhs); - let lhs = Awi::opaque(w); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let mut out = inlawi!(0); - if w.get() == 1 { - let mut tmp = inlawi!(00); - tmp.set(0, lhs.msb()).unwrap(); - tmp.set(1, rhs.msb()).unwrap(); - out.lut_(&inlawi!(1011), &tmp).unwrap(); - } else { - let lhs_lo = awi!(lhs[..(lhs.bw() - 1)]).unwrap(); - let rhs_lo = awi!(rhs[..(rhs.bw() - 1)]).unwrap(); - let lo_lt = lhs_lo.ule(&rhs_lo).unwrap(); - let mut tmp = inlawi!(000); - tmp.set(0, lo_lt).unwrap(); - tmp.set(1, lhs.msb()).unwrap(); - tmp.set(2, rhs.msb()).unwrap(); - out.lut_(&inlawi!(10001110), &tmp).unwrap(); - } - m.graft(&[out.state(), lhs.state(), rhs.state()]); - } - op @ (IsZero(_) | IsUmax(_) | IsImax(_) | IsImin(_) | IsUone(_)) => { - let x = Awi::opaque(m.get_nzbw(op.operands()[0])); - let w = x.bw(); - let out = InlAwi::from(match op { - IsZero(_) => x.const_eq(&awi!(zero: ..w).unwrap()).unwrap(), - IsUmax(_) => x.const_eq(&awi!(umax: ..w).unwrap()).unwrap(), - IsImax(_) => x.const_eq(&awi!(imax: ..w).unwrap()).unwrap(), - IsImin(_) => x.const_eq(&awi!(imin: ..w).unwrap()).unwrap(), - IsUone(_) => x.const_eq(&awi!(uone: ..w).unwrap()).unwrap(), - _ => unreachable!(), - }); - m.graft(&[out.state(), x.state()]); - } - CountOnes([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = count_ones(&x).to_usize(); - m.graft(&[out.state(), x.state()]); } - Lz([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = leading_zeros(&x).to_usize(); - m.graft(&[out.state(), x.state()]); - } - Tz([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = trailing_zeros(&x).to_usize(); - m.graft(&[out.state(), x.state()]); - } - Sig([x]) => { - let x = Awi::opaque(m.get_nzbw(x)); - let out = significant_bits(&x).to_usize(); - m.graft(&[out.state(), x.state()]); - } - LutSet([table, entry, inx]) => { - let table = Awi::opaque(m.get_nzbw(table)); - let entry = Awi::opaque(m.get_nzbw(entry)); - let inx = Awi::opaque(m.get_nzbw(inx)); - let out = lut_set(&table, &entry, &inx); - m.graft(&[out.state(), table.state(), entry.state(), inx.state()]); - } - ZeroResizeOverflow([x], w) => { - let x = Awi::opaque(m.get_nzbw(x)); - let mut out = Awi::zero(bw(1)); - let w = w.get(); - if w < x.bw() { - out.bool_(!awi!(x[w..]).unwrap().is_zero()); - } - m.graft(&[out.state(), x.state()]); - } - SignResizeOverflow([x], w) => { - let x = Awi::opaque(m.get_nzbw(x)); - let mut out = Awi::zero(bw(1)); - let w = w.get(); - if w < x.bw() { - // the new msb and the bits above it should equal the old msb - let critical = awi!(x[(w - 1)..]).unwrap(); - let mut tmp = inlawi!(00); - tmp.set(0, critical.is_zero()).unwrap(); - tmp.set(1, critical.is_umax()).unwrap(); - out.lut_(&inlawi!(1001), &tmp).unwrap(); + let lock = epoch_shared.epoch_data.borrow(); + let state = lock.ensemble.stator.states.get(p_state).unwrap(); + let start_op = state.op.clone(); + let out_w = state.nzbw; + drop(lock); + lower_op(start_op, out_w, Tmp { + ptr: p_state, + epoch_shared, + }) + } + + /// Lowers the rootward tree from `p_state` down to the elementary `Op`s + pub fn dfs_lower_states_to_elementary( + epoch_shared: &EpochShared, + p_state: PState, + ) -> Result<(), EvalError> { + let mut unimplemented = false; + let mut lock = epoch_shared.epoch_data.borrow_mut(); + if let Some(state) = lock.ensemble.stator.states.get(p_state) { + if state.lowered_to_elementary { + return Ok(()) } - m.graft(&[out.state(), x.state()]); + } else { + return Err(EvalError::InvalidPtr) } - ArbMulAdd([add, lhs, rhs]) => { - let w = m.get_nzbw(add); - let add = Awi::opaque(w); - let lhs = Awi::opaque(m.get_nzbw(lhs)); - let rhs = Awi::opaque(m.get_nzbw(rhs)); - let out = mul_add(w, Some(&add), &lhs, &rhs); - m.graft(&[out.state(), add.state(), lhs.state(), rhs.state()]); - } - Mux([x0, x1, inx]) => { - let x0 = Awi::opaque(m.get_nzbw(x0)); - let x1 = Awi::opaque(m.get_nzbw(x1)); - let inx_tmp = Awi::opaque(m.get_nzbw(inx)); - let out = if m.is_literal(inx) { - let b = m.bool(inx); - if b { - x1.clone() + lock.ensemble.stator.states[p_state].lowered_to_elementary = true; + + drop(lock); + let mut path: Vec<(usize, PState)> = vec![(0, p_state)]; + loop { + let (i, p_state) = path[path.len() - 1]; + let mut lock = epoch_shared.epoch_data.borrow_mut(); + let state = &lock.ensemble.stator.states[p_state]; + let ops = state.op.operands(); + if ops.is_empty() { + // reached a root + path.pop().unwrap(); + if path.is_empty() { + break + } + path.last_mut().unwrap().0 += 1; + } else if i >= ops.len() { + // checked all sources, attempt evaluation first, this is crucial in preventing + // wasted work in multiple layer lowerings + match lock.ensemble.eval_state(p_state) { + Ok(()) => { + path.pop().unwrap(); + if path.is_empty() { + break + } else { + continue + } + } + // Continue on to lowering + Err(EvalError::Unevaluatable) => (), + Err(e) => { + lock.ensemble.stator.states[p_state].err = Some(e.clone()); + return Err(e) + } + } + let needs_lower = match lock.ensemble.stator.states[p_state].op { + Opaque(..) | Literal(_) | Assert(_) | Copy(_) | StaticGet(..) | Repeat(_) + | StaticLut(..) => false, + Lut([lut, inx]) => { + if let Literal(ref lit) = lock.ensemble.stator.states[lut].op { + let lit = lit.clone(); + let out_w = lock.ensemble.stator.states[p_state].nzbw.get(); + let inx_w = lock.ensemble.stator.states[inx].nzbw.get(); + let no_op = if let Ok(inx_w) = u32::try_from(inx_w) { + if let Some(num_entries) = 1usize.checked_shl(inx_w) { + (out_w * num_entries) != lit.bw() + } else { + true + } + } else { + true + }; + if no_op { + // TODO should I add the extra arg to `Lut` to fix this edge case? + lock.ensemble.stator.states[p_state].op = Opaque(smallvec![], None); + lock.ensemble.dec_rc(inx).unwrap(); + } else { + lock.ensemble.stator.states[p_state].op = + StaticLut(ConcatType::from_iter([inx]), lit); + } + lock.ensemble.dec_rc(lut).unwrap(); + false + } else { + true + } + } + Get([bits, inx]) => { + if let Literal(ref lit) = lock.ensemble.stator.states[inx].op { + let lit = lit.clone(); + let lit_u = lit.to_usize(); + if lit_u >= lock.ensemble.stator.states[bits].nzbw.get() { + // TODO I realize now that no-op `get` specifically is fundamentally + // ill-defined to some extend because it returns `Option`, it + // must be asserted against, this + // provides the next best thing + lock.ensemble.stator.states[p_state].op = Opaque(smallvec![], None); + lock.ensemble.dec_rc(bits).unwrap(); + } else { + lock.ensemble.stator.states[p_state].op = ConcatFields( + ConcatFieldsType::from_iter([(bits, lit_u, bw(1))]), + ); + } + lock.ensemble.dec_rc(inx).unwrap(); + false + } else { + true + } + } + Set([bits, inx, bit]) => { + if let Literal(ref lit) = lock.ensemble.stator.states[inx].op { + let lit = lit.clone(); + let lit_u = lit.to_usize(); + let bits_w = lock.ensemble.stator.states[bits].nzbw.get(); + if lit_u >= bits_w { + // no-op + lock.ensemble.stator.states[p_state].op = Copy([bits]); + lock.ensemble.dec_rc(bit).unwrap(); + } else if let Some(lo_rem) = NonZeroUsize::new(lit_u) { + if let Some(hi_rem) = NonZeroUsize::new(bits_w - 1 - lit_u) { + lock.ensemble.stator.states[p_state].op = + ConcatFields(ConcatFieldsType::from_iter([ + (bits, 0, lo_rem), + (bit, 0, bw(1)), + (bits, lit_u + 1, hi_rem), + ])); + } else { + // setting the last bit + lock.ensemble.stator.states[p_state].op = + ConcatFields(ConcatFieldsType::from_iter([ + (bits, 0, lo_rem), + (bit, 0, bw(1)), + ])); + } + } else if let Some(rem) = NonZeroUsize::new(bits_w - 1) { + // setting the first bit + lock.ensemble.stator.states[p_state].op = + ConcatFields(ConcatFieldsType::from_iter([ + (bit, 0, bw(1)), + (bits, 1, rem), + ])); + } else { + // setting a single bit + lock.ensemble.stator.states[p_state].op = Copy([bit]); + lock.ensemble.dec_rc(bits).unwrap(); + } + lock.ensemble.dec_rc(inx).unwrap(); + false + } else { + true + } + } + _ => true, + }; + drop(lock); + let lowering_done = if needs_lower { + // this is used to be able to remove ultimately unused temporaries + let mut temporary = EpochShared::shared_with(epoch_shared); + temporary.set_as_current(); + let lowering_done = match Ensemble::lower_op(&temporary, p_state) { + Ok(lowering_done) => lowering_done, + Err(EvalError::Unimplemented) => { + // finish lowering as much as possible + unimplemented = true; + true + } + Err(e) => { + temporary.remove_as_current(); + let mut lock = epoch_shared.epoch_data.borrow_mut(); + lock.ensemble.stator.states[p_state].err = Some(e.clone()); + return Err(e) + } + }; + // shouldn't be adding additional assertions + // TODO after migrating the old lowering tests to a starlight-like system, make + // sure there are none using assertions assert!(temporary. + // assertions_empty()); + let states = temporary.take_states_added(); + temporary.remove_as_current(); + let mut lock = epoch_shared.epoch_data.borrow_mut(); + for p_state in states { + let state = &lock.ensemble.stator.states[p_state]; + if state.pruning_allowed() { + lock.ensemble.remove_state(p_state).unwrap(); + } + } + lowering_done + } else { + true + }; + if lowering_done { + path.pop().unwrap(); + if path.is_empty() { + break + } } else { - x0.clone() + // else do not call `path.pop`, restart the DFS here + path.last_mut().unwrap().0 = 0; } } else { - mux_(&x0, &x1, &inx_tmp) - }; - m.graft(&[out.state(), x0.state(), x1.state(), inx_tmp.state()]); - } - // TODO in the divisions especially and in other operations, we need to look at the - // operand tree and combine multiple ops together in a single lowering operation - UQuo([duo, div]) => { - let duo = Awi::opaque(m.get_nzbw(duo)); - let div = Awi::opaque(m.get_nzbw(div)); - let quo = division(&duo, &div).0; - m.graft(&[quo.state(), duo.state(), div.state()]); - } - URem([duo, div]) => { - let duo = Awi::opaque(m.get_nzbw(duo)); - let div = Awi::opaque(m.get_nzbw(div)); - let rem = division(&duo, &div).1; - m.graft(&[rem.state(), duo.state(), div.state()]); - } - IQuo([duo, div]) => { - let duo = Awi::opaque(m.get_nzbw(duo)); - let div = Awi::opaque(m.get_nzbw(div)); - let duo_msb = duo.msb(); - let div_msb = div.msb(); - // keeping arguments opaque - let mut tmp_duo = duo.clone(); - let mut tmp_div = div.clone(); - tmp_duo.neg_(duo_msb); - tmp_div.neg_(div_msb); - let mut quo = division(&tmp_duo, &tmp_div).0; - let mut tmp0 = InlAwi::from(duo_msb); - let tmp1 = InlAwi::from(div_msb); - tmp0.xor_(&tmp1).unwrap(); - quo.neg_(tmp0.to_bool()); - m.graft(&[quo.state(), duo.state(), div.state()]); + let mut p_next = ops[i]; + if lock.ensemble.stator.states[p_next].lowered_to_elementary { + // do not visit + path.last_mut().unwrap().0 += 1; + } else { + while let Copy([a]) = lock.ensemble.stator.states[p_next].op { + // special optimization case: forward Copies + lock.ensemble.stator.states[p_state].op.operands_mut()[i] = a; + lock.ensemble.stator.states[a].inc_rc(); + lock.ensemble.dec_rc(p_next).unwrap(); + p_next = a; + } + lock.ensemble.stator.states[p_next].lowered_to_elementary = true; + path.push((0, p_next)); + } + drop(lock); + } } - IRem([duo, div]) => { - let duo = Awi::opaque(m.get_nzbw(duo)); - let div = Awi::opaque(m.get_nzbw(div)); - let duo_msb = duo.msb(); - let div_msb = div.msb(); - // keeping arguments opaque - let mut tmp_duo = duo.clone(); - let mut tmp_div = div.clone(); - tmp_duo.neg_(duo_msb); - tmp_div.neg_(div_msb); - let mut rem = division(&tmp_duo, &tmp_div).1; - rem.neg_(duo_msb); - m.graft(&[rem.state(), duo.state(), div.state()]); + + if unimplemented { + Err(EvalError::Unimplemented) + } else { + Ok(()) } } - Ok(false) } diff --git a/starlight/src/lower/meta.rs b/starlight/src/lower/meta.rs index 0eee0c83..211e90b2 100644 --- a/starlight/src/lower/meta.rs +++ b/starlight/src/lower/meta.rs @@ -2,16 +2,57 @@ use std::{cmp::min, mem, num::NonZeroUsize}; +use awint::{ + awint_dag::{ + smallvec::{smallvec, SmallVec}, + ConcatFieldsType, + }, + bw, +}; + use crate::{ awi, + awint_dag::{ConcatType, Lineage, Op}, dag::{awi, inlawi, inlawi_ty, Awi, Bits, InlAwi}, }; const USIZE_BITS: usize = usize::BITS as usize; // This code here is especially messy because we do not want to get into -// infinite lowering loops. These first few functions need to use manual `get` -// and `set` and only literal macros within loop blocks. +// infinite lowering loops. These first few functions need to use manual +// concatenation and only literal macros within loop blocks. + +// Everything used to be done through `get` and `set`, but going straight to +// `StaticLut` or `Concat` or `ConcatFields` is a massive performance boost. + +// TODO In the future if we want something more, we should have some kind of +// caching for known optimization results. + +// note that the $inx arguments are in order from least to most significant +macro_rules! static_lut { + ($lhs:ident; $lut:expr; $($inx:expr),*) => {{ + let nzbw = $lhs.state_nzbw(); + let op = Op::StaticLut( + ConcatType::from_iter([$( + $inx.state(), + )*]), + {use awi::*; awi!($lut)} + ); + $lhs.update_state( + nzbw, + op, + ).unwrap_at_runtime() + }}; +} + +pub fn reverse(x: &Bits) -> Awi { + let nzbw = x.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); + for i in 0..x.bw() { + out.push(x.get(x.bw() - 1 - i).unwrap().state()) + } + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) +} /// Given `inx.bw()` bits, this returns `2^inx.bw()` signals for every possible /// state of `inx`. The `i`th signal is true only if `inx.to_usize() == i`. @@ -28,19 +69,14 @@ pub fn selector(inx: &Bits, cap: Option) -> Vec { } let lb_num = num.next_power_of_two().trailing_zeros() as usize; let mut signals = vec![]; - let lut0 = inlawi!(0100); - let lut1 = inlawi!(1000); for i in 0..num { let mut signal = inlawi!(1); for j in 0..lb_num { - let mut tmp = inlawi!(00); - tmp.set(0, inx.get(j).unwrap()).unwrap(); - tmp.set(1, signal.to_bool()).unwrap(); // depending on the `j`th bit of `i`, keep the signal line true if (i & (1 << j)) == 0 { - signal.lut_(&lut0, &tmp).unwrap(); + static_lut!(signal; 0100; inx.get(j).unwrap(), signal); } else { - signal.lut_(&lut1, &tmp).unwrap(); + static_lut!(signal; 1000; inx.get(j).unwrap(), signal); } } signals.push(signal); @@ -58,25 +94,21 @@ pub fn selector_awi(inx: &Bits, cap: Option) -> Awi { return awi!(1) } let lb_num = num.next_power_of_two().trailing_zeros() as usize; - let mut signals = Awi::zero(NonZeroUsize::new(num).unwrap()); - let lut0 = inlawi!(0100); - let lut1 = inlawi!(1000); + let nzbw = NonZeroUsize::new(num).unwrap(); + let mut signals = SmallVec::with_capacity(num); for i in 0..num { let mut signal = inlawi!(1); for j in 0..lb_num { - let mut tmp = inlawi!(00); - tmp.set(0, inx.get(j).unwrap()).unwrap(); - tmp.set(1, signal.to_bool()).unwrap(); // depending on the `j`th bit of `i`, keep the signal line true if (i & (1 << j)) == 0 { - signal.lut_(&lut0, &tmp).unwrap(); + static_lut!(signal; 0100; inx.get(j).unwrap(), signal); } else { - signal.lut_(&lut1, &tmp).unwrap(); + static_lut!(signal; 1000; inx.get(j).unwrap(), signal); } } - signals.set(i, signal.to_bool()).unwrap(); + signals.push(signal.state()); } - signals + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(signals))) } /// Trailing smear, given the value of `inx` it will set all bits in the vector @@ -92,9 +124,6 @@ pub fn tsmear_inx(inx: &Bits, num_signals: usize) -> Vec { lb_num += 1; } let mut signals = vec![]; - let lut_s0 = inlawi!(10010000); - let lut_and = inlawi!(1000); - let lut_or = inlawi!(1110); for i in 0..num_signals { // if `inx < i` let mut signal = inlawi!(0); @@ -105,25 +134,15 @@ pub fn tsmear_inx(inx: &Bits, num_signals: usize) -> Vec { if (i & (1 << j)) == 0 { // update equality, and if the prefix is true and the `j` bit of `inx` is set // then the signal is set - let mut tmp0 = inlawi!(00); - tmp0.set(0, inx.get(j).unwrap()).unwrap(); - tmp0.set(1, prefix_equal.to_bool()).unwrap(); - let mut tmp1 = inlawi!(00); - tmp1.lut_(&lut_s0, &tmp0).unwrap(); - prefix_equal.set(0, tmp1.get(0).unwrap()).unwrap(); - - // or into `signal` - let mut tmp = inlawi!(00); - tmp.set(0, tmp1.get(1).unwrap()).unwrap(); - tmp.set(1, signal.to_bool()).unwrap(); - signal.lut_(&lut_or, &tmp).unwrap(); + + let inx_j = inx.get(j).unwrap(); + static_lut!(signal; 11111000; inx_j, prefix_equal, signal); + + static_lut!(prefix_equal; 0100; inx_j, prefix_equal); } else { // just update equality, the `j`th bit of `i` is 1 and cannot be less than // whatever the `inx` bit is - let mut tmp = inlawi!(00); - tmp.set(0, inx.get(j).unwrap()).unwrap(); - tmp.set(1, prefix_equal.to_bool()).unwrap(); - prefix_equal.lut_(&lut_and, &tmp).unwrap(); + static_lut!(prefix_equal; 1000; inx.get(j).unwrap(), prefix_equal); } } signals.push(signal); @@ -138,10 +157,8 @@ pub fn tsmear_awi(inx: &Bits, num_signals: usize) -> Awi { // need extra bit to get all `n + 1` lb_num += 1; } - let mut signals = Awi::zero(NonZeroUsize::new(num_signals).unwrap()); - let lut_s0 = inlawi!(10010000); - let lut_and = inlawi!(1000); - let lut_or = inlawi!(1110); + let nzbw = NonZeroUsize::new(num_signals).unwrap(); + let mut signals = SmallVec::with_capacity(num_signals); for i in 0..num_signals { // if `inx < i` let mut signal = inlawi!(0); @@ -152,47 +169,33 @@ pub fn tsmear_awi(inx: &Bits, num_signals: usize) -> Awi { if (i & (1 << j)) == 0 { // update equality, and if the prefix is true and the `j` bit of `inx` is set // then the signal is set - let mut tmp0 = inlawi!(00); - tmp0.set(0, inx.get(j).unwrap()).unwrap(); - tmp0.set(1, prefix_equal.to_bool()).unwrap(); - let mut tmp1 = inlawi!(00); - tmp1.lut_(&lut_s0, &tmp0).unwrap(); - prefix_equal.set(0, tmp1.get(0).unwrap()).unwrap(); - - // or into `signal` - let mut tmp = inlawi!(00); - tmp.set(0, tmp1.get(1).unwrap()).unwrap(); - tmp.set(1, signal.to_bool()).unwrap(); - signal.lut_(&lut_or, &tmp).unwrap(); + + let inx_j = inx.get(j).unwrap(); + static_lut!(signal; 11111000; inx_j, prefix_equal, signal); + + static_lut!(prefix_equal; 0100; inx_j, prefix_equal); } else { // just update equality, the `j`th bit of `i` is 1 and cannot be less than // whatever the `inx` bit is - let mut tmp = inlawi!(00); - tmp.set(0, inx.get(j).unwrap()).unwrap(); - tmp.set(1, prefix_equal.to_bool()).unwrap(); - prefix_equal.lut_(&lut_and, &tmp).unwrap(); + static_lut!(prefix_equal; 1000; inx.get(j).unwrap(), prefix_equal); } } - signals.set(i, signal.to_bool()).unwrap(); + signals.push(signal.state()); } - signals + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(signals))) } pub fn mux_(x0: &Bits, x1: &Bits, inx: &Bits) -> Awi { assert_eq!(x0.bw(), x1.bw()); assert_eq!(inx.bw(), 1); - let mut out = Awi::zero(x0.nzbw()); - let lut = inlawi!(1100_1010); + let nzbw = x0.nzbw(); + let mut signals = SmallVec::with_capacity(nzbw.get()); for i in 0..x0.bw() { - let mut tmp0 = inlawi!(000); - tmp0.set(0, x0.get(i).unwrap()).unwrap(); - tmp0.set(1, x1.get(i).unwrap()).unwrap(); - tmp0.set(2, inx.to_bool()).unwrap(); - let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut, &tmp0).unwrap(); - out.set(i, tmp1.to_bool()).unwrap(); + let mut tmp = inlawi!(0); + static_lut!(tmp; 1100_1010; x0.get(i).unwrap(), x1.get(i).unwrap(), inx); + signals.push(tmp.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(signals))) } /* @@ -218,19 +221,17 @@ pub fn dynamic_to_static_lut(out: &mut Bits, table: &Bits, inx: &Bits) { // if this is broken it breaks a lot of stuff assert!(table.bw() == (out.bw().checked_mul(1 << inx.bw()).unwrap())); let signals = selector(inx, None); - let lut = inlawi!(1111_1000); + let nzbw = out.nzbw(); + let mut tmp_output = SmallVec::with_capacity(nzbw.get()); for j in 0..out.bw() { let mut column = inlawi!(0); for (i, signal) in signals.iter().enumerate() { - let mut tmp = inlawi!(000); - tmp.set(0, signal.to_bool()).unwrap(); - tmp.set(1, table.get((i * out.bw()) + j).unwrap()).unwrap(); - tmp.set(2, column.to_bool()).unwrap(); - // if the column is set or both the cell and signal are set - column.lut_(&lut, &tmp).unwrap(); + static_lut!(column; 1111_1000; signal, table.get((i * out.bw()) + j).unwrap(), column); } - out.set(j, column.to_bool()).unwrap(); + tmp_output.push(column.state()); } + out.update_state(nzbw, Op::Concat(ConcatType::from_smallvec(tmp_output))) + .unwrap_at_runtime(); } pub fn dynamic_to_static_get(bits: &Bits, inx: &Bits) -> inlawi_ty!(1) { @@ -238,15 +239,9 @@ pub fn dynamic_to_static_get(bits: &Bits, inx: &Bits) -> inlawi_ty!(1) { return InlAwi::from(bits.to_bool()) } let signals = selector(inx, Some(bits.bw())); - let lut = inlawi!(1111_1000); let mut out = inlawi!(0); for (i, signal) in signals.iter().enumerate() { - let mut tmp = inlawi!(000); - tmp.set(0, signal.to_bool()).unwrap(); - tmp.set(1, bits.get(i).unwrap()).unwrap(); - tmp.set(2, out.to_bool()).unwrap(); - // horizontally OR the product of the signals and `bits` - out.lut_(&lut, &tmp).unwrap(); + static_lut!(out; 1111_1000; signal, bits.get(i).unwrap(), out); } out } @@ -256,97 +251,146 @@ pub fn dynamic_to_static_set(bits: &Bits, inx: &Bits, bit: &Bits) -> Awi { return Awi::from(bit) } let signals = selector(inx, Some(bits.bw())); - let mut out = Awi::zero(bits.nzbw()); - let lut = inlawi!(1101_1000); + let nzbw = bits.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); for (i, signal) in signals.iter().enumerate() { - let mut tmp0 = inlawi!(000); - tmp0.set(0, signal.to_bool()).unwrap(); - tmp0.set(1, bit.to_bool()).unwrap(); - tmp0.set(2, bits.get(i).unwrap()).unwrap(); - let mut tmp1 = inlawi!(0); // multiplex between using `bits` or the `bit` depending on the signal - tmp1.lut_(&lut, &tmp0).unwrap(); - out.set(i, tmp1.to_bool()).unwrap(); + let mut tmp = inlawi!(0); + static_lut!(tmp; 1101_1000; signal, bit, bits.get(i).unwrap()); + out.push(tmp.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } pub fn resize(x: &Bits, w: NonZeroUsize, signed: bool) -> Awi { - let mut out = Awi::zero(w); - if out.nzbw() == x.nzbw() { - out.copy_(x).unwrap(); - } else if out.nzbw() < x.nzbw() { - for i in 0..out.bw() { - out.set(i, x.get(i).unwrap()).unwrap(); - } + if w == x.nzbw() { + Awi::from_bits(x) + } else if w < x.nzbw() { + Awi::new( + w, + Op::ConcatFields(ConcatFieldsType::from_iter([(x.state(), 0usize, w)])), + ) + } else if signed { + let extension = Awi::new( + NonZeroUsize::new(w.get() - x.bw()).unwrap(), + Op::Repeat([x.msb().state()]), + ); + Awi::new( + w, + Op::Concat(ConcatType::from_smallvec(smallvec![ + x.state(), + extension.state() + ])), + ) } else { - for i in 0..x.bw() { - out.set(i, x.get(i).unwrap()).unwrap(); - } - if signed { - for i in x.bw()..out.bw() { - out.set(i, x.get(x.bw() - 1).unwrap()).unwrap(); - } - } // else the bits in `out` are automatically zero + let zero = Awi::zero(NonZeroUsize::new(w.get() - x.bw()).unwrap()); + Awi::new( + w, + Op::Concat(ConcatType::from_smallvec(smallvec![ + x.state(), + zero.state() + ])), + ) } - out } pub fn resize_cond(x: &Bits, w: NonZeroUsize, signed: &Bits) -> Awi { assert_eq!(signed.bw(), 1); - let mut out = Awi::zero(w); - if out.nzbw() == x.nzbw() { - out.copy_(x).unwrap(); - } else if out.nzbw() < x.nzbw() { - for i in 0..out.bw() { - out.set(i, x.get(i).unwrap()).unwrap(); - } + if w == x.nzbw() { + Awi::from_bits(x) + } else if w < x.nzbw() { + Awi::new( + w, + Op::ConcatFields(ConcatFieldsType::from_iter([(x.state(), 0usize, w)])), + ) } else { - for i in 0..x.bw() { - out.set(i, x.get(i).unwrap()).unwrap(); - } - let signed = signed.to_bool(); - for i in x.bw()..out.bw() { - out.set(i, signed).unwrap(); - } + let extension = Awi::new( + NonZeroUsize::new(w.get() - x.bw()).unwrap(), + Op::Repeat([signed.state()]), + ); + Awi::new( + w, + Op::Concat(ConcatType::from_smallvec(smallvec![ + x.state(), + extension.state() + ])), + ) } - out } /// Returns (`lhs`, true) if there are invalid values pub fn static_field(lhs: &Bits, to: usize, rhs: &Bits, from: usize, width: usize) -> (Awi, bool) { - let mut out = Awi::from_bits(lhs); if (width > lhs.bw()) || (width > rhs.bw()) || (to > (lhs.bw() - width)) || (from > (rhs.bw() - width)) { - (out, true) - } else { - for i in 0..width { - out.set(i + to, rhs.get(i + from).unwrap()).unwrap(); - } - (out, false) + return (Awi::from_bits(lhs), true); } + let res = if let Some(width) = NonZeroUsize::new(width) { + if let Some(lhs_rem_lo) = NonZeroUsize::new(to) { + if let Some(lhs_rem_hi) = NonZeroUsize::new(from) { + Awi::new( + lhs.nzbw(), + Op::ConcatFields(ConcatFieldsType::from_iter([ + (lhs.state(), 0usize, lhs_rem_lo), + (rhs.state(), from, width), + (lhs.state(), to + width.get(), lhs_rem_hi), + ])), + ) + } else { + Awi::new( + lhs.nzbw(), + Op::ConcatFields(ConcatFieldsType::from_iter([ + (lhs.state(), 0usize, lhs_rem_lo), + (rhs.state(), from, width), + ])), + ) + } + } else if let Some(lhs_rem_hi) = NonZeroUsize::new(lhs.bw() - width.get()) { + Awi::new( + lhs.nzbw(), + Op::ConcatFields(ConcatFieldsType::from_iter([ + (rhs.state(), from, width), + (lhs.state(), width.get(), lhs_rem_hi), + ])), + ) + } else { + Awi::new( + lhs.nzbw(), + Op::ConcatFields(ConcatFieldsType::from_iter([(rhs.state(), from, width)])), + ) + } + } else { + Awi::from_bits(lhs) + }; + (res, false) } /// This does not handle invalid arguments; set `width` to zero to cause no-ops pub fn field_width(lhs: &Bits, rhs: &Bits, width: &Bits) -> Awi { - let mut out = Awi::from_bits(lhs); let min_w = min(lhs.bw(), rhs.bw()); let signals = tsmear_inx(width, min_w); - let lut = inlawi!(1100_1010); + let nzbw = NonZeroUsize::new(signals.len()).unwrap(); + let mut mux_part = SmallVec::with_capacity(nzbw.get()); for (i, signal) in signals.into_iter().enumerate() { // mux_ between `lhs` or `rhs` based on the signal - let mut tmp0 = inlawi!(000); - tmp0.set(0, lhs.get(i).unwrap()).unwrap(); - tmp0.set(1, rhs.get(i).unwrap()).unwrap(); - tmp0.set(2, signal.to_bool()).unwrap(); - let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut, &tmp0).unwrap(); - out.set(i, tmp1.to_bool()).unwrap(); + let mut tmp = inlawi!(0); + static_lut!(tmp; 1100_1010; lhs.get(i).unwrap(), rhs.get(i).unwrap(), signal); + mux_part.push(tmp.state()); + } + let mux_part = Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(mux_part))); + if let Some(lhs_rem_hi) = NonZeroUsize::new(lhs.bw() - nzbw.get()) { + Awi::new( + lhs.nzbw(), + Op::ConcatFields(ConcatFieldsType::from_iter([ + (mux_part.state(), 0usize, nzbw), + (lhs.state(), nzbw.get(), lhs_rem_hi), + ])), + ) + } else { + mux_part } - out } /// Given the diagonal control lines and input of a crossbar with output width @@ -362,22 +406,27 @@ pub fn crossbar( ) { assert!(signal_range.0 < signal_range.1); assert_eq!(signal_range.1 - signal_range.0, signals.len()); + + let nzbw = output.nzbw(); + let mut tmp_output = SmallVec::with_capacity(nzbw.get()); for j in 0..output.bw() { // output bar for ORing let mut out_bar = inlawi!(0); for i in 0..input.bw() { let signal_inx = output.bw() - 1 + i - j; if (signal_inx >= signal_range.0) && (signal_inx < signal_range.1) { - let mut inx = inlawi!(000); - inx.set(0, input.get(i).unwrap()).unwrap(); - inx.set(1, signals[signal_inx - signal_range.0].to_bool()) - .unwrap(); - inx.set(2, out_bar.to_bool()).unwrap(); - out_bar.lut_(&inlawi!(1111_1000), &inx).unwrap(); + static_lut!(out_bar; 1111_1000; + input.get(i).unwrap(), + signals[signal_inx - signal_range.0], + out_bar + ); } } - output.set(j, out_bar.to_bool()).unwrap(); + tmp_output.push(out_bar.state()); } + output + .update_state(nzbw, Op::Concat(ConcatType::from_smallvec(tmp_output))) + .unwrap_at_runtime(); } pub fn funnel_(x: &Bits, s: &Bits) -> Awi { @@ -436,6 +485,7 @@ pub fn ashr(x: &Bits, s: &Bits) -> Awi { // Not sure if there is a better way to do this. If we try to use the crossbar // signals in some way, we are guaranteed some kind of > O(1) time thing. + let msb = x.msb(); // get the `lb_num` that `tsmear_inx` uses, it can be `x.bw() - 1` because of // the `s < x.bw()` requirement, this single bit of difference is important // for powers of two because of the `lb_num += 1` condition it avoids. @@ -448,26 +498,18 @@ pub fn ashr(x: &Bits, s: &Bits) -> Awi { } if let Some(w) = NonZeroUsize::new(lb_num) { let mut gated_s = Awi::zero(w); - let lut_and = inlawi!(1000); // `gated_s` will be zero if `x.msb()` is zero, in which case `tsmear_inx` // produces all zeros to be ORed for i in 0..gated_s.bw() { - let mut tmp0 = inlawi!(00); - tmp0.set(0, s.get(i).unwrap()).unwrap(); - tmp0.set(1, x.msb()).unwrap(); let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut_and, &tmp0).unwrap(); + static_lut!(tmp1; 1000; s.get(i).unwrap(), msb); gated_s.set(i, tmp1.to_bool()).unwrap(); } let or_mask = tsmear_awi(&gated_s, num); - let lut_or = inlawi!(1110); for i in 0..or_mask.bw() { let out_i = out.bw() - 1 - i; - let mut tmp0 = inlawi!(00); - tmp0.set(0, out.get(out_i).unwrap()).unwrap(); - tmp0.set(1, or_mask.get(i).unwrap()).unwrap(); let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut_or, &tmp0).unwrap(); + static_lut!(tmp1; 1110; out.get(out_i).unwrap(), or_mask.get(i).unwrap()); out.set(out_i, tmp1.to_bool()).unwrap(); } } @@ -509,50 +551,64 @@ pub fn rotr(x: &Bits, s: &Bits) -> Awi { } pub fn bitwise_not(x: &Bits) -> Awi { - let mut out = Awi::zero(x.nzbw()); + let nzbw = x.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); for i in 0..x.bw() { let mut tmp = inlawi!(0); - let inx = InlAwi::from(x.get(i).unwrap()); - tmp.lut_(&inlawi!(01), &inx).unwrap(); - out.set(i, tmp.to_bool()).unwrap(); + static_lut!(tmp; 01; x.get(i).unwrap()); + out.push(tmp.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } -pub fn bitwise(lhs: &Bits, rhs: &Bits, lut: inlawi_ty!(4)) -> Awi { +pub fn bitwise(lhs: &Bits, rhs: &Bits, lut: awi::Awi) -> Awi { assert_eq!(lhs.bw(), rhs.bw()); - let mut out = Awi::zero(lhs.nzbw()); + assert_eq!(lut.bw(), 4); + let nzbw = lhs.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); for i in 0..lhs.bw() { let mut tmp = inlawi!(0); - let mut inx = inlawi!(00); - inx.set(0, lhs.get(i).unwrap()).unwrap(); - inx.set(1, rhs.get(i).unwrap()).unwrap(); - tmp.lut_(&lut, &inx).unwrap(); - out.set(i, tmp.to_bool()).unwrap(); + tmp.update_state( + bw(1), + Op::StaticLut( + ConcatType::from_iter([lhs.get(i).unwrap().state(), rhs.get(i).unwrap().state()]), + lut.clone(), + ), + ) + .unwrap_at_runtime(); + out.push(tmp.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } pub fn incrementer(x: &Bits, cin: &Bits, dec: bool) -> (Awi, inlawi_ty!(1)) { assert_eq!(cin.bw(), 1); - // half adder or subtractor - let lut = if dec { - inlawi!(1110_1001) - } else { - inlawi!(1001_0100) - }; - let mut out = Awi::zero(x.nzbw()); + let nzbw = x.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); let mut carry = InlAwi::from(cin.to_bool()); - for i in 0..x.bw() { - let mut carry_sum = inlawi!(00); - let mut inx = inlawi!(00); - inx.set(0, carry.to_bool()).unwrap(); - inx.set(1, x.get(i).unwrap()).unwrap(); - carry_sum.lut_(&lut, &inx).unwrap(); - out.set(i, carry_sum.get(0).unwrap()).unwrap(); - carry.bool_(carry_sum.get(1).unwrap()); + if dec { + for i in 0..x.bw() { + let mut tmp = inlawi!(0); + let b = x.get(i).unwrap(); + // half subtractor + static_lut!(tmp; 1001; carry, b); + out.push(tmp.state()); + static_lut!(carry; 1110; carry, b); + } + } else { + for i in 0..x.bw() { + let mut tmp = inlawi!(0); + let b = x.get(i).unwrap(); + // half adder + static_lut!(tmp; 0110; carry, b); + out.push(tmp.state()); + static_lut!(carry; 1000; carry, b); + } } - (out, carry) + ( + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))), + carry, + ) } // TODO select carry adder @@ -577,46 +633,52 @@ pub fn cin_sum(cin: &Bits, lhs: &Bits, rhs: &Bits) -> (Awi, inlawi_ty!(1), inlaw assert_eq!(cin.bw(), 1); assert_eq!(lhs.bw(), rhs.bw()); let w = lhs.bw(); - // full adder - let lut = inlawi!(1110_1001_1001_0100); - let mut out = Awi::zero(lhs.nzbw()); + let nzbw = lhs.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); let mut carry = InlAwi::from(cin.to_bool()); for i in 0..w { let mut carry_sum = inlawi!(00); - let mut inx = inlawi!(000); - inx.set(0, carry.to_bool()).unwrap(); - inx.set(1, lhs.get(i).unwrap()).unwrap(); - inx.set(2, rhs.get(i).unwrap()).unwrap(); - carry_sum.lut_(&lut, &inx).unwrap(); - out.set(i, carry_sum.get(0).unwrap()).unwrap(); + static_lut!(carry_sum; 1110_1001_1001_0100; + carry, + lhs.get(i).unwrap(), + rhs.get(i).unwrap() + ); + out.push(carry_sum.get(0).unwrap().state()); carry.bool_(carry_sum.get(1).unwrap()); } let mut signed_overflow = inlawi!(0); - let mut inx = inlawi!(000); - inx.set(0, lhs.get(w - 1).unwrap()).unwrap(); - inx.set(1, rhs.get(w - 1).unwrap()).unwrap(); - inx.set(2, out.get(w - 1).unwrap()).unwrap(); - signed_overflow.lut_(&inlawi!(0001_1000), &inx).unwrap(); - (out, carry, signed_overflow) + let a = lhs.get(w - 1).unwrap().state(); + let b = rhs.get(w - 1).unwrap().state(); + let c = *out.get(w - 1).unwrap(); + signed_overflow + .update_state( + bw(1), + Op::StaticLut(ConcatType::from_iter([a, b, c]), { + use awi::*; + awi!(0001_1000) + }), + ) + .unwrap_at_runtime(); + ( + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))), + carry, + signed_overflow, + ) } pub fn negator(x: &Bits, neg: &Bits) -> Awi { assert_eq!(neg.bw(), 1); - // half adder with input inversion control - let lut = inlawi!(0100_1001_1001_0100); - let mut out = Awi::zero(x.nzbw()); + let nzbw = x.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); let mut carry = InlAwi::from(neg.to_bool()); for i in 0..x.bw() { let mut carry_sum = inlawi!(00); - let mut inx = inlawi!(000); - inx.set(0, carry.to_bool()).unwrap(); - inx.set(1, x.get(i).unwrap()).unwrap(); - inx.set(2, neg.to_bool()).unwrap(); - carry_sum.lut_(&lut, &inx).unwrap(); - out.set(i, carry_sum.get(0).unwrap()).unwrap(); + // half adder with input inversion control + static_lut!(carry_sum; 0100_1001_1001_0100; carry, x.get(i).unwrap(), neg); + out.push(carry_sum.get(0).unwrap().state()); carry.bool_(carry_sum.get(1).unwrap()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } /// Setting `width` to 0 guarantees that nothing happens even with other @@ -653,19 +715,20 @@ pub fn field_to(lhs: &Bits, to: &Bits, rhs: &Bits, width: &Bits) -> Awi { let mut lmask = tsmear_inx(&tmp, lhs.bw()); lmask.reverse(); - let mut out = Awi::from_bits(lhs); - let lut = inlawi!(1011_1111_1000_0000); + let nzbw = lhs.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); + // when `tmask` and `lmask` are both set, mux_ in `rhs` for i in 0..lhs.bw() { - let mut tmp = inlawi!(0000); - tmp.set(0, rhs_to_lhs.get(i).unwrap()).unwrap(); - tmp.set(1, tmask[i].to_bool()).unwrap(); - tmp.set(2, lmask[i].to_bool()).unwrap(); - tmp.set(3, lhs.get(i).unwrap()).unwrap(); let mut lut_out = inlawi!(0); - lut_out.lut_(&lut, &tmp).unwrap(); - out.set(i, lut_out.to_bool()).unwrap(); + static_lut!(lut_out; 1011_1111_1000_0000; + rhs_to_lhs.get(i).unwrap(), + tmask[i], + lmask[i], + lhs.get(i).unwrap() + ); + out.push(lut_out.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } else { let lut = inlawi!(rhs[0], lhs[0]).unwrap(); let mut out = awi!(0); @@ -720,20 +783,20 @@ pub fn field(lhs: &Bits, to: &Bits, rhs: &Bits, from: &Bits, width: &Bits) -> Aw let mut lmask = tsmear_inx(&tmp, lhs.bw()); lmask.reverse(); - let mut out = Awi::from_bits(lhs); + let nzbw = lhs.nzbw(); + let mut out = SmallVec::with_capacity(nzbw.get()); // when `tmask` and `lmask` are both set, mux_ in `rhs` - let lut = inlawi!(1011_1111_1000_0000); for i in 0..lhs.bw() { - let mut tmp = inlawi!(0000); - tmp.set(0, rhs_to_lhs.get(i).unwrap()).unwrap(); - tmp.set(1, tmask[i].to_bool()).unwrap(); - tmp.set(2, lmask[i].to_bool()).unwrap(); - tmp.set(3, lhs.get(i).unwrap()).unwrap(); let mut lut_out = inlawi!(0); - lut_out.lut_(&lut, &tmp).unwrap(); - out.set(i, lut_out.to_bool()).unwrap(); + static_lut!(lut_out; 1011_1111_1000_0000; + rhs_to_lhs.get(i).unwrap(), + tmask[i], + lmask[i], + lhs.get(i).unwrap() + ); + out.push(lut_out.state()); } - out + Awi::new(nzbw, Op::Concat(ConcatType::from_smallvec(out))) } else { // `lhs.bw() == 1`, `rhs.bw() == 1`, `width` is the only thing that matters let lut = inlawi!(rhs[0], lhs[0]).unwrap(); @@ -745,17 +808,12 @@ pub fn field(lhs: &Bits, to: &Bits, rhs: &Bits, from: &Bits, width: &Bits) -> Aw pub fn equal(lhs: &Bits, rhs: &Bits) -> inlawi_ty!(1) { let mut ranks = vec![vec![]]; - let lut_xnor = inlawi!(1001); for i in 0..lhs.bw() { - let mut tmp0 = inlawi!(00); - tmp0.set(0, lhs.get(i).unwrap()).unwrap(); - tmp0.set(1, rhs.get(i).unwrap()).unwrap(); let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut_xnor, &tmp0).unwrap(); + static_lut!(tmp1; 1001; lhs.get(i).unwrap(), rhs.get(i).unwrap()); ranks[0].push(tmp1); } // binary tree reduce - let lut_and = inlawi!(1000); loop { let prev_rank = ranks.last().unwrap(); let rank_len = prev_rank.len(); @@ -764,11 +822,8 @@ pub fn equal(lhs: &Bits, rhs: &Bits) -> inlawi_ty!(1) { } let mut next_rank = vec![]; for i in 0..(rank_len / 2) { - let mut tmp0 = inlawi!(00); - tmp0.set(0, prev_rank[2 * i].to_bool()).unwrap(); - tmp0.set(1, prev_rank[2 * i + 1].to_bool()).unwrap(); let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut_and, &tmp0).unwrap(); + static_lut!(tmp1; 1000; prev_rank[2 * i], prev_rank[2 * i + 1]); next_rank.push(tmp1); } if (rank_len & 1) != 0 { @@ -877,17 +932,16 @@ pub fn lut_set(table: &Bits, entry: &Bits, inx: &Bits) -> Awi { assert_eq!(table.bw(), entry.bw() * num_entries); let signals = selector(inx, Some(num_entries)); let mut out = Awi::from_bits(table); - let lut_mux = inlawi!(1100_1010); for (j, signal) in signals.into_iter().enumerate() { for i in 0..entry.bw() { let lut_inx = i + (j * entry.bw()); // mux_ between `lhs` or `entry` based on the signal - let mut tmp0 = inlawi!(000); - tmp0.set(0, table.get(lut_inx).unwrap()).unwrap(); - tmp0.set(1, entry.get(i).unwrap()).unwrap(); - tmp0.set(2, signal.to_bool()).unwrap(); let mut tmp1 = inlawi!(0); - tmp1.lut_(&lut_mux, &tmp0).unwrap(); + static_lut!(tmp1; 1100_1010; + table.get(lut_inx).unwrap(), + entry.get(i).unwrap(), + signal + ); out.set(lut_inx, tmp1.to_bool()).unwrap(); } } @@ -902,7 +956,6 @@ pub fn mul_add(out_w: NonZeroUsize, add: Option<&Bits>, lhs: &Bits, rhs: &Bits) (lhs, rhs) }; - let and = inlawi!(1000); let place_map0: &mut Vec> = &mut vec![]; let place_map1: &mut Vec> = &mut vec![]; for _ in 0..out_w.get() { @@ -910,13 +963,11 @@ pub fn mul_add(out_w: NonZeroUsize, add: Option<&Bits>, lhs: &Bits, rhs: &Bits) place_map1.push(vec![]); } for j in 0..rhs.bw() { + let rhs_j = rhs.get(j).unwrap(); for i in 0..lhs.bw() { if let Some(place) = place_map0.get_mut(i + j) { - let mut tmp = inlawi!(00); - tmp.set(0, rhs.get(j).unwrap()).unwrap(); - tmp.set(1, lhs.get(i).unwrap()).unwrap(); let mut ji = inlawi!(0); - ji.lut_(&and, &tmp).unwrap(); + static_lut!(ji; 1000; rhs_j, lhs.get(i).unwrap()); place.push(ji); } } diff --git a/testcrate/benches/bench.rs b/testcrate/benches/bench.rs index 0d8f90ee..01a20f60 100644 --- a/testcrate/benches/bench.rs +++ b/testcrate/benches/bench.rs @@ -1,7 +1,7 @@ #![feature(test)] extern crate test; -use starlight::{dag::*, Epoch, EvalAwi, LazyAwi}; +use starlight::{awi, dag::*, Epoch, EvalAwi, LazyAwi}; use test::Bencher; #[bench] @@ -14,12 +14,9 @@ fn lower_funnel(bencher: &mut Bencher) { let mut out = inlawi!(0u32); out.funnel_(&rhs, &s).unwrap(); let _eval = EvalAwi::from(&out); + epoch0.prune().unwrap(); epoch0.lower().unwrap(); epoch0.assert_assertions().unwrap(); - // FIXME - //awi::assert_eq!(epoch0.ensemble().stator.states.len(), 7045); - //awi::assert_eq!(epoch0.ensemble().backrefs.len_keys(), 26250); - //awi::assert_eq!(epoch0.ensemble().backrefs.len_vals(), 4773); }) } @@ -36,9 +33,40 @@ fn optimize_funnel(bencher: &mut Bencher) { epoch0.prune().unwrap(); epoch0.optimize().unwrap(); epoch0.assert_assertions().unwrap(); - // FIXME - //awi::assert_eq!(epoch0.ensemble().stator.states.len(), 7044); - //awi::assert_eq!(epoch0.ensemble().backrefs.len_keys(), 15304); - //awi::assert_eq!(epoch0.ensemble().backrefs.len_vals(), 1236); }) } + +#[bench] +fn loop_net(bencher: &mut Bencher) { + let epoch0 = Epoch::new(); + + let num_ports = 16; + let mut net = Net::zero(bw(5)); + for i in 0..num_ports { + let mut port = awi!(0u5); + port.usize_(i); + net.push(&port).unwrap(); + } + let w = bw(4); + let lazy = LazyAwi::opaque(w); + let eval_net = EvalAwi::from(&net); + let res = net.drive(&lazy); + let eval_res = EvalAwi::from_bool(res.is_none()); + { + use awi::*; + epoch0.optimize().unwrap(); + bencher.iter(|| { + for i in 0..(1 << w.get()) { + let mut inx = Awi::zero(w); + inx.usize_(i); + lazy.retro_(&inx).unwrap(); + epoch0.drive_loops().unwrap(); + awi::assert_eq!(eval_res.eval().unwrap().to_bool(), i >= num_ports); + if i < num_ports { + awi::assert_eq!(eval_net.eval().unwrap().to_usize(), i); + } + } + }); + drop(epoch0); + } +} diff --git a/testcrate/tests/fuzz_elementary.rs b/testcrate/tests/fuzz_elementary.rs index 8b53e519..2d7571e1 100644 --- a/testcrate/tests/fuzz_elementary.rs +++ b/testcrate/tests/fuzz_elementary.rs @@ -103,13 +103,14 @@ impl Mem { epoch.prune().unwrap(); } - pub fn verify_equivalence(&mut self, _epoch: &Epoch) -> Result<(), EvalError> { + pub fn verify_equivalence(&mut self, epoch: &Epoch) -> Result<(), EvalError> { // set all lazy roots for (lazy, lit) in &mut self.roots { lazy.retro_(lit).unwrap(); } // evaluate all + epoch.assert_assertions().unwrap(); for pair in self.a.vals() { assert_eq!(pair.eval.as_ref().unwrap().eval().unwrap(), pair.awi); } diff --git a/testcrate/tests/fuzz_lower.rs b/testcrate/tests/fuzz_lower.rs index 0bd49f82..60e7862e 100644 --- a/testcrate/tests/fuzz_lower.rs +++ b/testcrate/tests/fuzz_lower.rs @@ -148,10 +148,11 @@ impl Mem { &mut self.a[inx].dag } - pub fn finish(&mut self, _epoch: &Epoch) { + pub fn finish(&mut self, epoch: &Epoch) { for pair in self.a.vals_mut() { pair.eval = Some(EvalAwi::from(&pair.dag)) } + epoch.prune().unwrap(); } pub fn eval_and_verify_equal(&mut self, epoch: &Epoch) -> Result<(), EvalError> { @@ -165,6 +166,7 @@ impl Mem { // lower epoch.lower().unwrap(); + epoch.assert_assertions().unwrap(); // set remaining lazy roots for (lazy, lit) in self.roots.drain(..) { @@ -172,6 +174,7 @@ impl Mem { } // evaluate all + epoch.assert_assertions_strict().unwrap(); for pair in self.a.vals() { assert_eq!(pair.eval.as_ref().unwrap().eval().unwrap(), pair.awi); } @@ -766,12 +769,12 @@ fn fuzz_lower() { for _ in 0..N.1 { let epoch = Epoch::new(); - m.clear(); for _ in 0..N.0 { num_dag_duo(&mut rng, &mut m) } m.finish(&epoch); m.eval_and_verify_equal(&epoch).unwrap(); + m.clear(); drop(epoch); } } diff --git a/testcrate/tests/loop.rs b/testcrate/tests/loop.rs index aea5f03a..3a1f2155 100644 --- a/testcrate/tests/loop.rs +++ b/testcrate/tests/loop.rs @@ -1,8 +1,9 @@ -// TODO should loop be a capability of LazyAwi or something? Have an enum on the -// inside? -/* +use std::num::NonZeroUsize; + +use starlight::{awi, dag::*, Epoch, EvalAwi, LazyAwi, Loop}; + #[test] -fn invert_in_loop() { +fn loop_invert() { let epoch0 = Epoch::new(); let looper = Loop::zero(bw(1)); let mut x = awi!(looper); @@ -16,48 +17,142 @@ fn invert_in_loop() { { use awi::{assert_eq, *}; - t_dag.eval_all().unwrap(); - assert_eq!(t_dag.get_noted_as_extawi(p_x).unwrap(), awi!(1)); - t_dag.drive_loops(); - t_dag.eval_all().unwrap(); - assert_eq!(t_dag.get_noted_as_extawi(p_x).unwrap(), awi!(0)); - t_dag.drive_loops(); - t_dag.eval_all().unwrap(); - assert_eq!(t_dag.get_noted_as_extawi(p_x).unwrap(), awi!(1)); + let eval_x = EvalAwi::from(&x); + assert_eq!(eval_x.eval().unwrap(), awi!(1)); + epoch0.drive_loops().unwrap(); + assert_eq!(eval_x.eval().unwrap(), awi!(0)); + epoch0.drive_loops().unwrap(); + assert_eq!(eval_x.eval().unwrap(), awi!(1)); } + drop(epoch0); } // tests an incrementing counter #[test] -fn incrementer() { - let epoch0 = StateEpoch::new(); +fn loop_incrementer() { + let epoch0 = Epoch::new(); let looper = Loop::zero(bw(4)); - let val = Awi::from(looper.as_ref()); - let mut tmp = Awi::from(looper.as_ref()); + let val = EvalAwi::from(&looper); + let mut tmp = awi!(looper); tmp.inc_(true); looper.drive(&tmp).unwrap(); - let (mut op_dag, res) = OpDag::from_epoch(&epoch0); - res.unwrap(); + { + for i in 0..16 { + awi::assert_eq!(i, val.eval().unwrap().to_usize()); + epoch0.drive_loops().unwrap(); + } + } + drop(epoch0); +} - let p_val = op_dag.note_pstate(&epoch0, val.state()).unwrap(); +#[test] +fn loop_net4() { + let epoch0 = Epoch::new(); + let mut net = Net::zero(bw(4)); + net.push(&awi!(0xa_u4)).unwrap(); + net.push(&awi!(0xb_u4)).unwrap(); + net.push(&awi!(0xc_u4)).unwrap(); + net.push(&awi!(0xd_u4)).unwrap(); + let val = EvalAwi::from(&net); + let inx = LazyAwi::opaque(bw(2)); + net.drive(&inx).unwrap(); - op_dag.lower_all().unwrap(); + { + use awi::{assert_eq, *}; + inx.retro_(&awi!(0_u2)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(val.eval().unwrap(), awi!(0xa_u4)); - let (mut t_dag, res) = TDag::from_op_dag(&mut op_dag); - res.unwrap(); + inx.retro_(&awi!(2_u2)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(val.eval().unwrap(), awi!(0xc_u4)); - t_dag.verify_integrity().unwrap(); + inx.retro_(&awi!(1_u2)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(val.eval().unwrap(), awi!(0xb_u4)); - t_dag.eval_all().unwrap(); + inx.retro_(&awi!(3_u2)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(val.eval().unwrap(), awi!(0xd_u4)); + } + drop(epoch0); +} - t_dag.optimize_basic(); +fn exhaustive_net_test(epoch0: &Epoch, num_ports: awi::usize, diff: awi::isize) { + let mut net = Net::zero(bw(5)); + for i in 0..num_ports { + let mut port = awi!(0u5); + port.usize_(i); + net.push(&port).unwrap(); + } + let min_w = num_ports.next_power_of_two().trailing_zeros() as awi::usize; + let w = NonZeroUsize::new((min_w as awi::isize + diff) as awi::usize).unwrap(); + let lazy = LazyAwi::opaque(w); + let eval_net = EvalAwi::from(&net); + let res = net.drive(&lazy); + let eval_res = EvalAwi::from_bool(res.is_none()); + { + use awi::*; + epoch0.optimize().unwrap(); + for i in 0..(1 << w.get()) { + let mut inx = Awi::zero(w); + inx.usize_(i); + lazy.retro_(&inx).unwrap(); + epoch0.drive_loops().unwrap(); + awi::assert_eq!(eval_res.eval().unwrap().to_bool(), i >= num_ports); + if i < num_ports { + awi::assert_eq!(eval_net.eval().unwrap().to_usize(), i); + } + } + } +} - for i in 0..16 { - std::assert_eq!(i, t_dag.get_noted_as_extawi(p_val).unwrap().to_usize()); +#[test] +fn loop_net_no_ports() { + let epoch0 = Epoch::new(); + // done separately because it results in an undriven `Loop` + { + let net = Net::zero(bw(5)); + let res = net.drive(&awi!(0)); + { + use awi::assert; + // always none + assert!(res.is_none_at_runtime()); + } + } + drop(epoch0); +} - t_dag.drive_loops(); - t_dag.eval_all().unwrap(); +#[test] +fn loop_net() { + let epoch0 = Epoch::new(); + // one port + { + let mut net = Net::zero(bw(5)); + net.push(&awi!(0xa_u5)).unwrap(); + let lazy = LazyAwi::opaque(bw(1)); + let eval_net = EvalAwi::from(&net); + let res = net.drive(&lazy); + let eval_res = EvalAwi::from_bool(res.is_none()); + { + use awi::{assert_eq, *}; + lazy.retro_(&awi!(0)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(eval_res.eval().unwrap(), awi!(0)); + assert_eq!(eval_net.eval().unwrap(), awi!(0xa_u5)); + // any nonzero index always returns a `None` from the function + lazy.retro_(&awi!(1)).unwrap(); + epoch0.drive_loops().unwrap(); + assert_eq!(eval_res.eval().unwrap(), awi!(1)); + } + } + for num_ports in 3..17 { + // test with index size one less than needed to index all ports + exhaustive_net_test(&epoch0, num_ports, -1); + exhaustive_net_test(&epoch0, num_ports, 0); + exhaustive_net_test(&epoch0, num_ports, 1); } + + drop(epoch0); } -*/ diff --git a/testcrate/tests/stats.rs b/testcrate/tests/stats.rs new file mode 100644 index 00000000..f8abba32 --- /dev/null +++ b/testcrate/tests/stats.rs @@ -0,0 +1,27 @@ +use starlight::{awi, dag::*, Epoch, EvalAwi, LazyAwi}; + +// this is done separately from the benchmarks because getting the `ensemble` is +// expensive +#[test] +fn stats_optimize_funnel() { + let epoch0 = Epoch::new(); + + let rhs = LazyAwi::opaque(bw(64)); + let s = LazyAwi::opaque(bw(5)); + let mut out = inlawi!(0u32); + out.funnel_(&rhs, &s).unwrap(); + let _eval = EvalAwi::from(&out); + epoch0.prune().unwrap(); + epoch0.lower().unwrap(); + epoch0.assert_assertions().unwrap(); + let ensemble = epoch0.ensemble(); + awi::assert_eq!(ensemble.stator.states.len(), 2436); + awi::assert_eq!(ensemble.backrefs.len_keys(), 8559); + awi::assert_eq!(ensemble.backrefs.len_vals(), 1317); + epoch0.optimize().unwrap(); + epoch0.assert_assertions().unwrap(); + let ensemble = epoch0.ensemble(); + awi::assert_eq!(ensemble.stator.states.len(), 0); + awi::assert_eq!(ensemble.backrefs.len_keys(), 5818); + awi::assert_eq!(ensemble.backrefs.len_vals(), 1237); +}