From 890d10cc4fd131ee27059e3c309ea38679293ab2 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sun, 16 Oct 2022 21:52:48 -0300 Subject: Finish step_in and step_out (VM core) --- src/pattern.rs | 7 ++- src/vm/de.rs | 187 ++++++++++++++++++++++++++++++++++++--------------------- src/vm/mod.rs | 66 +++++++++++++++----- 3 files changed, 175 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/pattern.rs b/src/pattern.rs index 6795481..2f7166a 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -50,9 +50,14 @@ impl Pattern { &mut frames, //&mut output, ); - let (pack, obj) = vm::Packer::new(interp, MAX_CALLS).deserialize(der)?; + let (mut packs, obj) = vm::Packer::new( + interp, + MAX_CALLS, + ).deserialize(der)?; // this should always be None debug_assert!(obj.is_none()); + debug_assert!(packs.len() == 1); + let pack = packs.pop().unwrap(); let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); todo!() } diff --git a/src/vm/de.rs b/src/vm/de.rs index 3290ab0..9583962 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -52,47 +52,9 @@ impl<'packer, 'pat> FramesMut<'packer, 'pat> { fn iter_active_mut<'a>(&'a mut self) -> impl Iterator> where 'packer: 'a { self.iter_mut().filter(|frame| { - frame.matches() + frame.active() }) } - - /// Steps the VM into the next operation. - fn step_in(&mut self) { - self.iter_mut().for_each(|frame| { - if let Some(ref mut overstep) = frame.overstep { - *overstep += 1; - } else { - if !frame.next() { - frame.overstep = Some(1); - } else if matches!( - frame.op(), - PatternElement::ValueSubtree { .. }, - ) { - todo!() - } - } - }); - } - - fn step_out<'de>(&mut self, pack: Pack<'pat, 'de>) -> Pack<'pat, 'de> { - self.iter_mut().for_each(|frame| { - if let Some(ref mut overstep) = frame.overstep { - if *overstep > 0 { - *overstep -= 1; - } - } else { - if let Some(0) = frame.overstep { - frame.overstep = None; - } - if frame.overstep.is_none() { - if frame.prev() { - todo!(); - } - } - } - }); - pack - } } impl<'packer, 'pat> Frames<'packer, 'pat> { @@ -102,7 +64,7 @@ impl<'packer, 'pat> Frames<'packer, 'pat> { fn iter_active<'a>(&'a self) -> impl Iterator> where 'packer: 'a { self.iter().filter(|frame| { - frame.matches() + frame.active() }) } } @@ -131,33 +93,118 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { frames: &*self.interp.frames, } } -} -// what steps do we have to take? -// -// 1. figure out what type we need to deserialize (and ask the deserializer -// for it). -// 2. visit value. figure out whether we need to store it or not? -// 3. if we need to store it how do we figure out *where* to store it? -// 4. if we *don't* need to store it, what do we do? -// 5. how do we tell if we do or don't need to store it? how do we propagate -// those requirements deeper into the Deserialize's and how do we bring -// the values back out (recursively?) to parent Deserialize's, without -// wasting time storing things we don't actually care about? -// 5.a. just have a flag in the DeserializeSeed for whether to capture the -// values. propagation is more or less trivial from there. -// 6. how do you handle value subtrees? -// 6.a. you don't. for now. -// 7. how do you handle errors? -// 7.a. put them into a "state" and raise a D::Error::custom. then -// override it in the relevant Pattern call. + /// Steps the VM into the next operation. + fn step_in(&mut self) { + // iterate up to the *live* length (i.e. the loop is allowed to modify + // the length). + // NOTE: we need to use while-let so as to not borrow anything in an + // iterator. filtering happens on the *result* of the iterator. + let mut index_iter = 0..; + while let Some(index) = index_iter.next().filter(|&i| { + i < self.interp.frames.len() + }) { + let frame = &mut self.interp.frames[index]; + if frame.overstep > 0 || !frame.matches { + // overstepped and non-matching frames + frame.overstep += 1; + } else { + if !frame.next() { + // empty/end-of frames + // 1 layer of step-in. + // step-out will undo this. + // this is correct because this branch implies overstep = 0 + frame.overstep = 1; + } else if matches!( + frame.op(), + PatternElement::SubtreeMarker, + ) { + // subtrees! + // these are tricky, because the current frame can be moved + // in memory. so we have to use indexing every time. + // tho first we set it as overstep because it has special + // handling. + frame.overstep = 1; + let mut at = index + 1; + while self.interp.frames[index].next() { + let op = self.interp.frames[index].op(); + if let PatternElement::ValueSubtree { + index: subtree, .. + } = op { + let new_frame = Frame { + ops: &self.interp.pat.protos[subtree][..], + iar: None, + overstep: 0, + matches: true, + }; + // we want the "newest" frame last, so it is + // easier to unwind back. + self.interp.frames.insert(at, new_frame); + at += 1; + } else { + unreachable!() + } + } + } + } + } + } + + /// Steps the VM back into the previous operation. + fn step_out( + &mut self, + mut packs: Vec>, + ) -> Vec> { + let mut index_iter = 0..; + while let Some(index) = index_iter.next().filter(|&i| { + i < self.interp.frames.len() + }) { + // iterate backwards + let index = self.interp.frames.len() - index - 1; + let frame = &mut self.interp.frames[index]; + if frame.overstep > 0 { + // handle overstep + frame.overstep -= 1; + } else { + // unwind frame + if frame.prev() { + // successfully unwound. do nothing. + } else { + // find parent frame. + let mut count = 1; + let mut target = index; + while count > 0 && target > 0 { + target -= 1; + match self.interp.frames[target].num_subtrees() { + Some(num) if num <= count => { + count -= num; + }, + Some(num) => { + count = 0; + }, + None => { + count += 1; + }, + } + } + if count == 0 { + // has parent frame + let pack = packs.remove(index); + todo!("merge packs") + } + } + } + } + packs + } +} impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de> for &mut Packer<'pat, 'state, O> where O: Serialize, { - type Value = (Pack<'pat, 'de>, Option>); + type Value = (Vec>, Option>); fn deserialize( mut self, deserializer: D, @@ -165,7 +212,7 @@ where where D: serde::Deserializer<'de> { - self.frames_mut().step_in(); + self.step_in(); let pat = self.interp.pat; let target_type = self.frames().iter_active().fold( Type::IgnoredAny, @@ -248,7 +295,7 @@ where Type::Enum { name, variants } => { deserializer.deserialize_enum(name, variants, &mut *self) }, - }.map(|(pack, obj)| (self.frames_mut().step_out(pack), obj)) + }.map(|(packs, obj)| (self.step_out(packs), obj)) } } @@ -265,16 +312,18 @@ macro_rules! vs { if self.collecting { obj = Some(SerdeObject::$obj(v)); } - let mut pack = Pack::default(); + let mut packs = Vec::new(); self.frames_mut().iter_active_mut().try_for_each(|frame| { + let mut pack = Pack::default(); let mut map = IndexMap::new(); if let Some(name) = frame.get_name(pat) { map.insert(name, (Pack::default(), SerdeObject::$obj(v))); } pack.subpacks.push(map); + packs.push(pack); Ok(()) })?; - Ok((pack, obj)) + Ok((packs, obj)) } } } @@ -284,7 +333,7 @@ for &mut Packer<'pat, 'state, O> where O: Serialize, { - type Value = (Pack<'pat, 'de>, Option>); + type Value = (Vec>, Option>); fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "unsure") } @@ -390,6 +439,7 @@ where { // FIXME subtrees let mut obj = None; + let mut packs = Vec::new(); let mut pack = Pack::default(); if self.collecting { obj = Some(SerdeObject::Unit); @@ -399,7 +449,8 @@ where // map.insert(name, (Default::default(), SerdeObject::Unit)); //} pack.subpacks.push(map); - Ok((pack, obj)) + packs.push(pack); + Ok((packs, obj)) } fn visit_newtype_struct( self, @@ -633,9 +684,9 @@ mod tests { let mut frames = Default::default(); let interp = Interpreter::new(&consts, &mut err, &mut frames); let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - let (pack, obj) = packed.unwrap(); + let (packs, obj) = packed.unwrap(); assert!(obj.is_none()); - assert_eq!(pack.subpacks[0]["hello"].1, SerdeObject::U64(3)); + assert_eq!(packs[0].subpacks[0]["hello"].1, SerdeObject::U64(3)); } } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 8122778..dca15a9 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -95,6 +95,8 @@ pub(crate) enum PatternElement { /// expected value of this entry. name_and_value: These, }, + /// Marks the end of pattern iteration and the start of subtrees (if any). + SubtreeMarker, /// A value subtree is a subtree for values. /// /// It is applied *after* tags, and thus any value subtrees come last in @@ -102,10 +104,10 @@ pub(crate) enum PatternElement { ValueSubtree { /// The proto index of the subtree. index: usize, - /// Whether to allow this value subtree to produce no results. + /// Whether to allow this value subtree to match nothing. /// /// By default, a datafu pattern only matches a tree if every branch of - /// the tree produces results. This enables opting out of that. + /// the tree matches something. This enables opting out of that. optional: bool, }, } @@ -292,15 +294,16 @@ pub(crate) enum SerdeObject<'de> { } impl<'de> SerdeObject<'de> { - fn check(self, ty: Option) -> Result { + /// Checks the type of this object. + fn check(&self, ty: Option) -> Result<(), E> { let ty = match ty { - None => return Ok(self), + None => return Ok(()), Some(ty) => ty, }; match (ty, self) { | (Type::Any, v) | (Type::IgnoredAny, v) - => Ok(v), + => Ok(()), | (Type::Bool, v @ SerdeObject::Bool(_)) | (Type::I8, v @ SerdeObject::I8(_)) | (Type::I16, v @ SerdeObject::I16(_)) @@ -324,7 +327,7 @@ impl<'de> SerdeObject<'de> { | (Type::Unit, v @ SerdeObject::Unit) | (Type::Seq, v @ SerdeObject::Seq(_)) | (Type::Map, v @ SerdeObject::Map(_)) - => Ok(v), + => Ok(()), _ => todo!(), } } @@ -358,8 +361,6 @@ pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { error: &'state mut Option, /// The current interpreter frames. frames: &'state mut Vec>, - ///// The final output. - //output: &'state Cell>, } pub(crate) struct Frame<'pat> { @@ -367,10 +368,12 @@ pub(crate) struct Frame<'pat> { ops: &'pat [PatternElement], /// The instruction index being processed. iar: Option, - /// How many steps this frame has failed to match. - overstep: Option, - ///// Elements collected while processing this frame? - //path: Pack<'pat, 'de>, + /// How many steps this frame has not actually advanced for. + /// + /// This is used at end of frame and on match failure. + overstep: usize, + /// Whether this frame matches the data so far. + matches: bool, } impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { @@ -384,7 +387,8 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { frames.push(Frame { ops: &pat.protos[0], iar: None, - overstep: None, + overstep: 0, + matches: true, //path: Default::default(), }); Self { @@ -461,12 +465,42 @@ impl<'pat> Frame<'pat> { /// /// Panics if called on a non-matching frame or if iteration hasn't begun. fn op(&self) -> PatternElement { - assert!(self.matches(), "op() called on non-matching frame"); + assert!(self.active(), "op() called on inactive frame"); + self.ops[self.iar.expect("ops[iar]")] + } + + /// Counts the number of *active* subtrees, if any. + /// + /// # Panics + /// + /// Panics if iteration hasn't begun. + fn num_subtrees(&self) -> Option { + let iar = self.iar.expect("iar"); + // check if there are any subtrees + matches!( + self.ops[iar], + | PatternElement::ValueSubtree { .. } + | PatternElement::SubtreeMarker + ).then(|| { + // count the number of subtrees + self.ops[0..=iar].iter().rev().take_while(|x| { + matches!(x, PatternElement::ValueSubtree { .. }) + }).count() + }) + } + + /// Returns the raw instruction. + /// + /// # Panics + /// + /// Panics if iteration hasn't begun. + fn raw_op(&self) -> PatternElement { self.ops[self.iar.expect("ops[iar]")] } - fn matches(&self) -> bool { - self.overstep.is_none() + /// Returns whether this frame is active (not overstepped). + fn active(&self) -> bool { + self.overstep == 0 } /// Rewinds the instruction address register. -- cgit 1.4.1