summary refs log blame commit diff stats
path: root/src/vm.rs
blob: 44675f34327d90328dc1177362908e1d4ce3f4da (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

















                                                                              
                  
                  








                                        
                                                           










                                                                                                               


                                                                              
                                 
























                                                                                        
                                    













































                                                        
                       
                                                            





















                                                                              

                                                           













                                                                            
                                                
                                                          
                                                                            





                                                                           

                                                     















                                                                         


                                                                 









                                                  
                                                                                                                                                





















































































                                                                                 
                                                                     






























                                                             
/*
 * This file is part of Datafu
 * Copyright (C) 2021  Soni L.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;

use std::collections::BTreeMap;
use std::marker::PhantomData;

pub(crate) const MAX_CALLS: usize = 250;

type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;

// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<T: PatternTypes> {
    // last proto is implicitly the whole pattern.
    pub(crate) protos: Vec<Vec<PatternElement<T>>>,
    // Note that we can borrow these when creating the output map.
    // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
    pub(crate) strings: Vec<String>,
    pub(crate) regices: Vec<()/* TODO */>,
    pub(crate) predicates: Vec<Box<Predicate<T>>>,
    // NOTE these are part of the constant pool and so have lifetime analogous
    // to 'a (consistently used to indicate constant pool lifetime) when used
    // elsewhere. In particular, they can't be yielded by the iterator.
    pub(crate) defs: Vec<T::Own>,
}

/// A pattern element.
pub(crate) enum PatternElement<T: PatternTypes> {
    Arrow,

    Identifier(usize),

    StringKey(usize, bool),
    RegexKey(usize, bool),
    ParameterKey(usize, bool),
    KeySubtree(usize, bool),
    ValueSubtree(usize, bool),
    ApplyPredicate(usize, bool, PhantomData<fn(&PatternConstants<T>) -> &Predicate<T>>),

    End
}

impl<T: PatternTypes> Copy for PatternElement<T> {}

impl<T: PatternTypes> Clone for PatternElement<T> {
    fn clone(&self) -> Self { *self }
}

struct Frame<'a, 'b, T: PatternTypes> {
    obj: RefOwn<'b, T::Ref, T::Own>,
    ops: &'a Vec<PatternElement<T>>,
    iar: Option<usize>,
    depth: usize,
    path: Vec<Holder<'a, 'b, T>>,
    in_key: bool,
}

impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
    /// Advances the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn next(&mut self) -> bool {
        let new = self.iar.map_or(0, |v| v + 1);
        new < self.ops.len() && {
            self.iar = Some(new);
            true
        }
    }

    /// Returns the current instruction.
    fn op(&self) -> PatternElement<T> {
        self.ops[self.iar.expect("ops[iar]")]
    }

    /// Rewinds the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn prev(&mut self) -> bool {
        let new = self.iar.expect("iar").checked_sub(1);
        new.is_some() && {
            self.iar = new;
            true
        }
    }
}

/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
    EmptyKey,
    EmptySubtree,
    Key(KVPair<'b, T>),
    Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>),
}

impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
    fn clone(&self) -> Self {
        match self {
            HolderState::EmptyKey => HolderState::EmptyKey,
            HolderState::EmptySubtree => HolderState::EmptySubtree,
            HolderState::Key(v) => HolderState::Key(*v),
            HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v),
        }
    }
}

impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
    fn is_empty(&self) -> bool {
        matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
    }

    fn is_subtree(&self) -> bool {
        matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
    }

    fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
        match *self {
            HolderState::Key((_, value)) => Some(value),
            HolderState::Subtree(_, value) => Some(value),
            _ => None
        }
    }
}

/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches.
struct Holder<'a, 'b, T: PatternTypes> {
     name: Option<&'a str>,
     value: HolderState<'a, 'b, T>,
     parent: Option<RefOwn<'b, T::Ref, T::Own>>,
     iterator: Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>,
     filters: Vec<Box<dyn for<'c> Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>,
}

impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
    fn next(&mut self) -> Option<Result<(), MatchError>> {
        if let Self { value: ref mut v, iterator: ref mut it, .. } = self {
            let is_subtree = v.is_subtree();
            *v = match it.next() {
                Some(pair) => HolderState::Key(pair),
                None => return None
            };
            // just try to make sure the type doesn't change.
            // (and that if we get to this point the result isn't empty.)
            assert!(!v.is_empty() && v.is_subtree() == is_subtree);
        }
        Some(Ok(()))
    }
}

impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
    fn default() -> Self {
        Self {
            name: Default::default(),
            value: HolderState::EmptyKey,
            parent: Default::default(),
            // TODO https://github.com/rust-lang/rfcs/issues/3059
            iterator: Box::new(std::iter::empty()),
            filters: Default::default(),
        }
    }
}

pub struct Matcher<'a, 'b, T: PatternTypes> {
    defs: &'a PatternConstants<T>,
    frame: Frame<'a, 'b, T>,
}

impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
    pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
        let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
        let ops: &Vec<_> = &defs.protos[proto];
        Ok(Self {
            defs: defs,
            frame: Frame {
                obj: obj,
                ops: ops,
                iar: None,
                depth: depth,
                path: if ops.is_empty() {
                    Default::default()
                } else {
                    vec![Holder::default()]
                },
                in_key: false,
            },
        })
    }

    fn on_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            PatternElement::End => {
                unimplemented!()
            }
            _ => unreachable!("on_in_key")
        }
    }

    fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            _ => unreachable!("on_not_in_key")
        }
    }

    fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> {
        match self.frame.op() {
            PatternElement::End => {
                assert!(!self.frame.path.last().expect("path").value.is_empty());
                // TODO this for loop is duplicated with ApplyPredicate
                // TODO figure out how to deduplicate them
                let mut res: Matches<'a, 'b, T> = Default::default();
                for holder in &self.frame.path {
                    match holder.value {
                        HolderState::Subtree(ref matches, _) => {
                            res.extend(matches);
                        },
                        HolderState::Key(pair) => {
                            if let Some(name) = holder.name {
                                res.insert(name, pair);
                            }
                        },
                        _ => unreachable!("on_end (End)"),
                    }
                }
                if !self.frame.prev() {
                    // frame.prev() should always be called, even if this gets
                    // replaced with debug_assert!().
                    assert!(false, "frame.prev()");
                }
                Ok((true, res))
            },
            PatternElement::ApplyPredicate {..} => {
                assert!(!self.frame.in_key);
                let mut res: Matches<'a, 'b, T> = Default::default();
                for holder in &self.frame.path {
                    match holder.value {
                        HolderState::Subtree(ref matches, _) => {
                            res.extend(matches);
                        },
                        HolderState::Key(pair) => {
                            if let Some(name) = holder.name {
                                res.insert(name, pair);
                            }
                        },
                        _ => unreachable!("on_end (ApplyPredicate)"),
                    }
                }
                self.frame.path.clear();
                Ok((false, res))
            }
            _ => unreachable!("on_end")
        }
    }
}

impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
    type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;

    fn next(&mut self) -> Option<Self::Item> {
        while !self.frame.path.is_empty() {
            if !self.frame.next() {
                let (in_key, res) = match self.on_end() {
                    Ok(v) => v,
                    Err(e) => {
                        self.frame.path.clear();
                        return Some(Err(e))
                    },
                };
                self.frame.in_key = in_key;
                return Some(Ok(res));
            } else {
                let in_key = if self.frame.in_key {
                    self.on_in_key()
                } else {
                    self.on_not_in_key()
                };
                match in_key {
                    Ok(in_key) => self.frame.in_key = in_key,
                    Err(e) => {
                        self.frame.path.clear();
                        return Some(Err(e))
                    },
                }
            }
        }
        None
    }
}