summary refs log blame commit diff stats
path: root/src/vm.rs
blob: 62fb074b354e678aed74bf0f0a9a3fa4c58ae7a6 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

















                                                                              




                               
                  
                  



                              

                                        
                                                           








                                                                                                               
                                   
                                                  


                                                                              
                                 

 











                                                       






















                                                                                        
                                    











































                                                        
                                  
             
                                  
                 
                          
                       
                          
                                                            

                                                             








                                                                              
                                                            








                                                                         



                                 



                                                                             

                                                           

                                                          
                                                     


                     
















                                                                            




                                                                            

                                                                               


                                        
                                                
                                                                  
                                                                                                        


                                                 































                                                             
         












                                                                            








                                                             
                                         
                                        








                                             





















































                                                                
                                                  
                                                                                                                                                









                                                                            

                                                             
                        

                                                           
                                                                         
                                








                                                         










































                                                                            





                                                             
                                      

                                                                            
                                                   

                                                                              




                                                                   

























                                                                          
              




















                                                                                 
                                                    








                                                                              
             












                                                                     
                                                    











                                                                     
                                                                     






























                                                             
/*
 * This file is part of Datafu
 * Copyright (C) 2021  Soni L.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use std::collections::BTreeMap;
use std::marker::PhantomData;

use regex::Regex;

use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;

pub(crate) const MAX_CALLS: usize = 250;

type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;

// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<T: PatternTypes> {
    // last proto is implicitly the whole pattern.
    pub(crate) protos: Vec<Vec<PatternElement<T>>>,
    // Note that we can borrow these when creating the output map.
    // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
    pub(crate) strings: Vec<String>,
    pub(crate) regices: Vec<Regex>,
    pub(crate) predicates: Vec<Box<Predicate<T>>>,
    // NOTE these are part of the constant pool and so have lifetime analogous
    // to 'a (consistently used to indicate constant pool lifetime) when used
    // elsewhere. In particular, they can't be yielded by the iterator.
    pub(crate) defs: Vec<T::Own>,
}

impl<T: PatternTypes> Default for PatternConstants<T> {
    fn default() -> Self {
        Self {
            protos: Default::default(),
            strings: Default::default(),
            regices: Default::default(),
            predicates: Default::default(),
            defs: Default::default(),
        }
    }
}

/// A pattern element.
pub(crate) enum PatternElement<T: PatternTypes> {
    Arrow,

    Identifier(usize),

    StringKey(usize, bool),
    RegexKey(usize, bool),
    ParameterKey(usize, bool),
    KeySubtree(usize, bool),
    ValueSubtree(usize, bool),
    ApplyPredicate(usize, bool, PhantomData<fn(&PatternConstants<T>) -> &Predicate<T>>),

    End
}

impl<T: PatternTypes> Copy for PatternElement<T> {}

impl<T: PatternTypes> Clone for PatternElement<T> {
    fn clone(&self) -> Self { *self }
}

struct Frame<'a, 'b, T: PatternTypes> {
    obj: RefOwn<'b, T::Ref, T::Own>,
    ops: &'a Vec<PatternElement<T>>,
    iar: Option<usize>,
    depth: usize,
    path: Vec<Holder<'a, 'b, T>>,
    in_key: bool,
}

impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
    /// Advances the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn next(&mut self) -> bool {
        let new = self.iar.map_or(0, |v| v + 1);
        new < self.ops.len() && {
            self.iar = Some(new);
            true
        }
    }

    /// Returns the current instruction.
    fn op(&self) -> PatternElement<T> {
        self.ops[self.iar.expect("ops[iar]")]
    }

    /// Rewinds the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn prev(&mut self) -> bool {
        let new = self.iar.expect("iar").checked_sub(1);
        new.is_some() && {
            self.iar = new;
            true
        }
    }
}

/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
    /// Empty holder, for KV pair.
    EmptyKey,
    /// Empty holder, for subtree.
    EmptySubtree,
    /// Non-empty KV pair.
    Key(KVPair<'b, T>),
    /// Non-empty subtree.
    Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>),
    /// Preloaded value. Usually the first holder in a frame.
    Value(RefOwn<'b, T::Ref, T::Own>),
}

impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
    fn clone(&self) -> Self {
        match self {
            HolderState::EmptyKey => HolderState::EmptyKey,
            HolderState::EmptySubtree => HolderState::EmptySubtree,
            HolderState::Key(v) => HolderState::Key(*v),
            HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v),
            HolderState::Value(v) => HolderState::Value(*v),
        }
    }
}

impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
    fn is_empty(&self) -> bool {
        matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
    }

    fn has_value(&self) -> bool {
        !self.is_empty()
    }

    fn is_subtree(&self) -> bool {
        matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
    }

    fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
        match *self {
            HolderState::Key((_, value)) => Some(value),
            HolderState::Subtree(_, value) => Some(value),
            HolderState::Value(value) => Some(value),
            _ => None
        }
    }

    fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
        match *self {
            HolderState::Key((key, _)) => Some(key),
            _ => None
        }
    }

    fn clear(&mut self) {
        match *self {
            HolderState::Key(_) => *self = HolderState::EmptyKey,
            HolderState::Subtree(_, _) => *self = HolderState::EmptySubtree,
            HolderState::Value(_) => unreachable!(),
            _ => {},
        };
        assert!(self.is_empty());
    }
}

/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches, or just a value for the initial
/// holder.
struct Holder<'a, 'b, T: PatternTypes> {
     name: Option<&'a str>,
     value: HolderState<'a, 'b, T>,
     parent: Option<RefOwn<'b, T::Ref, T::Own>>,
     iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>,
     filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
}

impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
    fn next(&mut self) -> Result<bool, MatchError> {
        self.ensure_iterator()?;
        match self {
            Self {
                value: ref mut v,
                iterator: Some(ref mut it),
                ref filters,
                ..
            } => {
                let is_subtree = v.is_subtree();
                let mut next_v;
                loop {
                    next_v = match it.next() {
                        Some(pair) => HolderState::Key(pair),
                        None => return Ok(false)
                    };
                    for filter in filters {
                        filter(&mut next_v)?;
                        if next_v.is_empty() {
                            break;
                        }
                    }
                    if next_v.has_value() {
                        break;
                    }
                }
                assert!(next_v.has_value());
                assert!(next_v.is_subtree() == is_subtree);
                *v = next_v;
                Ok(true)
            },
            _ => unreachable!()
        }
    }

    /// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
    fn ensure_iterator(&mut self) -> Result<(), MatchError> {
        if self.iterator.is_none() {
            let iter = T::pairs(self.parent.unwrap());
            if iter.is_none() {
                return Err(MatchError::UnsupportedOperation);
            }
            self.iterator = iter;
        }
        assert!(self.iterator.is_some());
        Ok(())
    }
}

impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
    fn default() -> Self {
        Self {
            name: Default::default(),
            value: HolderState::EmptyKey,
            parent: Default::default(),
            iterator: Default::default(),
            filters: Default::default(),
        }
    }
}

pub struct Matcher<'a, 'b, T: PatternTypes> {
    defs: &'a PatternConstants<T>,
    frame: Frame<'a, 'b, T>,
}

// TODO:
//
// [x] Arrow
// [x] StringKey
// [x] RegexKey
// [ ] KeySubtree
// [ ] ValueSubtree
// [x] Ident
// [ ] Param
// [x] ApplyPredicate
// [x] End

/// Helper for `PatternElement::StringKey`.
fn on_string_key<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    let path = matcher.frame.path.last_mut().unwrap();
    assert!(path.iterator.is_none());
    let key = &matcher.defs.strings[id];
    let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
    match iter {
        None => Err(MatchError::UnsupportedOperation),
        Some(opt) => {
            path.iterator = Some(Box::new(opt.into_iter()));
            Ok(true)
        }
    }
}

/// Helper for `PatternElement::RegexKey`.
fn on_regex_key<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
    let re = &matcher.defs.regices[id];
    let path = matcher.frame.path.last_mut().unwrap();
    path.filters.push(Box::new(move |value| {
        let s = T::as_str(value.key().unwrap());
        match (s.map_or(false, |s| re.is_match(s)), skippable) {
            (true, _) => Ok(()),
            (false, true) => {
                value.clear();
                Ok(())
            },
            (false, false) => Err(MatchError::ValidationError),
        }
    }));
    Ok(true)
}

impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
    pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
        let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
        let ops: &Vec<_> = &defs.protos[proto];
        Ok(Self {
            defs: defs,
            frame: Frame {
                obj: obj,
                ops: ops,
                iar: None,
                depth: depth,
                path: if ops.is_empty() {
                    // TODO decide on empty matcher behaviour
                    todo!()
                } else {
                    let mut holder = Holder::default();
                    holder.value = HolderState::Value(obj);
                    holder.iterator = Some(Box::new(std::iter::empty()));
                    vec![holder]
                },
                in_key: false,
            },
        })
    }

    fn on_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            PatternElement::End => {
                let path = self.frame.path.last_mut().unwrap();
                if path.next()? {
                    Ok(false)
                } else {
                    drop(path);
                    self.frame.path.pop().unwrap();
                    // stop at previous End, or start of frame
                    while self.frame.prev() {
                        if matches!(self.frame.op(), PatternElement::End) {
                            break;
                        }
                    }
                    // is start of frame?
                    if !self.frame.prev() {
                        self.frame.path.clear();
                    }
                    Ok(true)
                }
            },
            PatternElement::ApplyPredicate(id, skippable, _) => {
                // failing on T::get() is already handled, but we may need a
                // T::pairs(). construct it here.
                self.frame.path.last_mut().unwrap().ensure_iterator()?;
                let pred = &self.defs.predicates[id];
                let path = self.frame.path.last_mut().unwrap();
                path.filters.push(Box::new(move |value| {
                    match (pred(value.value().unwrap()), skippable) {
                        (true, _) => Ok(()),
                        (false, true) => {
                            value.clear();
                            Ok(())
                        },
                        (false, false) => Err(MatchError::ValidationError),
                    }
                }));
                Ok(true)
            },
            PatternElement::StringKey(id, skippable) => {
                on_string_key(self, id, skippable)
            },
            PatternElement::RegexKey(id, skippable) => {
                on_regex_key(self, id, skippable)
            },
            _ => unreachable!("on_in_key")
        }
    }

    fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            PatternElement::Arrow => {
                // this *should* always pass.
                assert!(self.frame.path.last().unwrap().iterator.is_some());
                let mut holder = Holder::default();
                holder.parent = self.frame.path.last().unwrap().value.value();
                assert!(holder.parent.is_some());
                self.frame.path.push(holder);
                Ok(false)
            },
            PatternElement::Identifier(id) => {
                let name = self.defs.strings.get(id).map(|s| &**s);
                let path = self.frame.path.last_mut().unwrap();
                path.name = name;
                assert!(path.iterator.is_none());
                // we don't actually create the iterator here,
                // as we may still wanna use T::get() instead.
                Ok(true)
            },
            PatternElement::ApplyPredicate(id, skippable, _) => {
                assert!(self.frame.path.len() == 1);
                let pred = &self.defs.predicates[id];
                let value = self.frame.path.last().unwrap().value.value();
                match (pred(value.unwrap()), skippable) {
                    (true, _) => Ok(false),
                    (false, true) => {
                        self.frame.path.clear();
                        // any Ok(_) will do
                        Ok(false)
                    },
                    (false, false) => Err(MatchError::ValidationError),
                }
            },
            PatternElement::StringKey(id, skippable) => {
                on_string_key(self, id, skippable)
            },
            PatternElement::RegexKey(id, skippable) => {
                on_regex_key(self, id, skippable)
            },
            _ => unreachable!("on_not_in_key")
        }
    }

    fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> {
        match self.frame.op() {
            PatternElement::End => {
                assert!(!self.frame.path.last().expect("path").value.is_empty());
                // TODO this for loop is duplicated with ApplyPredicate
                // TODO figure out how to deduplicate them
                let mut res: Matches<'a, 'b, T> = Default::default();
                for holder in &self.frame.path {
                    match holder.value {
                        HolderState::Subtree(ref matches, _) => {
                            res.extend(matches);
                        },
                        HolderState::Key(pair) => {
                            if let Some(name) = holder.name {
                                res.insert(name, pair);
                            }
                        },
                        HolderState::Value(_) => (),
                        _ => unreachable!("on_end (End)"),
                    }
                }
                if !self.frame.prev() {
                    // frame.prev() should always be called, even if this gets
                    // replaced with debug_assert!().
                    assert!(false, "frame.prev()");
                }
                Ok((true, res))
            }
            PatternElement::ApplyPredicate {..} => {
                assert!(!self.frame.in_key);
                let mut res: Matches<'a, 'b, T> = Default::default();
                for holder in &self.frame.path {
                    match holder.value {
                        HolderState::Subtree(ref matches, _) => {
                            res.extend(matches);
                        },
                        HolderState::Key(pair) => {
                            if let Some(name) = holder.name {
                                res.insert(name, pair);
                            }
                        },
                        HolderState::Value(_) => (),
                        _ => unreachable!("on_end (ApplyPredicate)"),
                    }
                }
                self.frame.path.clear();
                Ok((false, res))
            }
            _ => unreachable!("on_end")
        }
    }
}

impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
    type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;

    fn next(&mut self) -> Option<Self::Item> {
        while !self.frame.path.is_empty() {
            if !self.frame.next() {
                let (in_key, res) = match self.on_end() {
                    Ok(v) => v,
                    Err(e) => {
                        self.frame.path.clear();
                        return Some(Err(e))
                    },
                };
                self.frame.in_key = in_key;
                return Some(Ok(res));
            } else {
                let in_key = if self.frame.in_key {
                    self.on_in_key()
                } else {
                    self.on_not_in_key()
                };
                match in_key {
                    Ok(in_key) => self.frame.in_key = in_key,
                    Err(e) => {
                        self.frame.path.clear();
                        return Some(Err(e))
                    },
                }
            }
        }
        None
    }
}