path: root/src/parser.rs



// Copyright (C) 2021-2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0

//! The recursive-descent datafu language parser.

use std::borrow::Borrow;
use std::collections::BTreeMap;
use std::mem::ManuallyDrop;

use impl_trait::impl_trait;
use regex::Regex;
use serde::Serialize;

use crate::Predicate;
use crate::errors::PatternError;
use crate::vm;
use crate::vm::PatternConstants;
use crate::vm::PatternElement;
use crate::vm::PatternToken;
use crate::vm::Type;

/// try! with bools. (the b comes from bool.)
macro_rules! bry {
    ($l:lifetime $e:expr) => {
        if !$e {
            break $l false;
        }
    };
    ($e:expr) => {
        if !$e {
            return false;
        }
    };
}

macro_rules! lblock {
    // unfortunately ($l:lifetime : $b:block) => { $l: $b } didn't work.
    ($($t:tt)+) => {
        $($t)+
    }
}

/// Attempts to shift `s` forward by removing `prefix`.
///
/// Returns whether `s` has had `prefix` removed.
// can't use Pattern here :(
fn strip_prefix(s: &mut &str, prefix: &str) -> bool {
    s.strip_prefix(prefix).map(|ns| *s = ns).is_some()
}

/// Returns the position (index) of `sub` within `base`, in bytes.
///
/// Returns bogus results if `base` and `sub` are unrelated.
fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> {
    // FIXME is there any non-UB way to check if `sub` is in `base`?
    Some((sub.as_ptr() as usize) - (base.as_ptr() as usize))
}

/// Collects value-ish PatternTokens into a PatternElement::Value with the
/// given already-collected name.
fn collect_value(
    name: Option<usize>,
    tokens: &[PatternToken],
) -> PatternElement {
    let value = match tokens {
        &[PatternToken::String(index, skippable)] => {
            vm::Value::String { index, skippable }
        },
        &[PatternToken::Regex(index, skippable)] => {
            vm::Value::Regex { index, skippable }
        },
        &[PatternToken::Type(ty, skippable)] => {
            vm::Value::Type { ty, skippable }
        },
        other => {
            unreachable!("{other:?}")
        },
    };
    PatternElement::Value {
        name,
        value: Some(value),
    }
}

/// Collects a slice of PatternToken into a PatternElement::Value.
fn collect_name_and_value(tokens: &[PatternToken]) -> PatternElement {
    match tokens {
        &[PatternToken::Identifier(name)] => {
            PatternElement::Value {
                name: Some(name),
                value: None,
            }
        },
        &[PatternToken::Identifier(name), ref value @ ..] => {
            collect_value(Some(name), value)
        },
        value => collect_value(None, value),
    }
}

/// Helper to collect "subtree" sections of the pattern.
///
/// This is a RAII-like guard which handles cleaning up the parsed pattern when
/// dropped.
struct SubtreeHelper<'r, 's, PKey, OKey, O>
where
    Self: 'r,
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    root: &'r mut Parser<'s, PKey, OKey, O>,
}

impl_trait! {
    impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O>
    where
        Self: 'r,
        PKey: Borrow<str> + Ord,
        OKey: Borrow<str> + Ord,
        O: Serialize,
    {
        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
            value.consts.protos.push(Default::default());
            Self {
                root: value,
            }
        }

        fn is_empty(&self) -> bool {
            self.root.consts.protos.last().unwrap().is_empty()
        }

        fn commit(self) -> usize {
            let mut self_ = ManuallyDrop::new(self);
            let proto = self_.root.consts.protos.pop().unwrap();
            let id = self_.root.closed_subtrees.next().unwrap();
            self_.root.consts.protos.insert(id, proto);
            id
        }

        impl trait std::ops::Deref {
            type Target = Parser<'s, PKey, OKey, O>;

            fn deref(&self) -> &Self::Target {
                &*self.root
            }
        }

        impl trait std::ops::DerefMut {
            fn deref_mut(&mut self) -> &mut Self::Target {
                self.root
            }
        }

        impl trait Drop {
            fn drop(&mut self) {
                // remove "partial" proto
                self.root.consts.protos.pop().expect("SubtreeHelper");
            }
        }
    }
}

/// Helper to collect "tag" sections of the pattern.
///
/// This is a RAII-like guard which handles cleaning up the parsed pattern when
/// dropped.
struct TagHelper<'r, 's, PKey, OKey, O>
where
    Self: 'r,
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    root: &'r mut Parser<'s, PKey, OKey, O>,
    len: usize,
}

impl_trait! {
    impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O>
    where
        Self: 'r,
        PKey: Borrow<str> + Ord,
        OKey: Borrow<str> + Ord,
        O: Serialize,
    {
        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
            let len = value.tokens.len();
            Self {
                root: value,
                len : len,
            }
        }

        fn commit(self) {
            let self_ = &mut *std::mem::ManuallyDrop::new(self);
            // we could write a proper parser for the token stream.
            //
            // we could also just do this instead.
            match self_.root.tokens.drain(self_.len..).as_slice() {
                &[
                    PatternToken::Arrow,
                    PatternToken::KeySubtree(index, optional),
                    ref name_value @ ..,
                    PatternToken::End,
                ] => {
                    let tag = PatternElement::Tag {
                        key_subtree: index,
                        optional,
                    };
                    self_.root.consts.protos.last_mut().unwrap().push(tag);
                    let value = collect_name_and_value(name_value);
                    self_.root.consts.protos.last_mut().unwrap().push(value);
                },
                &[
                    PatternToken::Arrow,
                    ref name_value @ ..,
                    PatternToken::End,
                ] => {
                    let tag = PatternElement::EmptyTag;
                    self_.root.consts.protos.last_mut().unwrap().push(tag);
                    let value = collect_name_and_value(name_value);
                    self_.root.consts.protos.last_mut().unwrap().push(value);
                },
                other => {
                    unreachable!("{other:?}");
                },
            };
        }

        impl trait std::ops::Deref {
            type Target = Parser<'s, PKey, OKey, O>;

            fn deref(&self) -> &Self::Target {
                &*self.root
            }
        }

        impl trait std::ops::DerefMut {
            fn deref_mut(&mut self) -> &mut Self::Target {
                self.root
            }
        }

        impl trait Drop {
            fn drop(&mut self) {
                assert!(self.root.tokens.len() >= self.len);
                self.root.tokens.drain(self.len..);
            }
        }
    }
}

struct Parser<'s, PKey, OKey, O>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    base: &'s str,
    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
    objs: Option<BTreeMap<OKey, O>>,
    pred_ids: BTreeMap<PKey, usize>,
    obj_ids: BTreeMap<OKey, usize>,
    consts: PatternConstants<O>,
    tokens: Vec<PatternToken>,
    closed_subtrees: std::ops::RangeFrom<usize>,
}

// These are documented using LPeg.re syntax
// http://www.inf.puc-rio.br/~roberto/lpeg/re.html
#[rustfmt::skip]
impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    /// Creates a new `Parser`.
    fn new(
        base: &'s str,
        preds: Option<BTreeMap<PKey, Box<Predicate>>>,
        objs: Option<BTreeMap<OKey, O>>,
    ) -> Self {
        Self {
            base: base,
            preds: preds,
            objs: objs,
            tokens: Default::default(),
            pred_ids: Default::default(),
            obj_ids: Default::default(),
            consts: Default::default(),
            closed_subtrees: (0..),
        }
    }

    /// str_literal <- sp ( ( "'" str_char*  ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp
    /// str_char <- ( str_escape / [^%'] )
    /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape )
    fn str_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        const STRING_TOKEN: &str = "'";
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, STRING_TOKEN));
            let mut string = String::new();
            loop {
                let base = cursor;
                if strip_prefix(&mut cursor, "%") {
                    if strip_prefix(&mut cursor, STRING_TOKEN) {
                        string.push_str(STRING_TOKEN);
                    } else if strip_prefix(&mut cursor, "%") {
                        string.push_str("%");
                    } else {
                        return Err(PatternError::StringEscape(pos_of(self.base, base).unwrap(), cursor));
                    }
                } else {
                    cursor = cursor.trim_start_matches(|c: char| {
                        !matches!(c, '%' | '\'')
                    });
                    if base == cursor { // no match?
                        break;
                    }
                    string.push_str(&base[..pos_of(base, cursor).unwrap()]);
                }
            }
            if !strip_prefix(&mut cursor, STRING_TOKEN) {
                match cursor {
                    "" => return Err(PatternError::StringEnd(self.base.len(), cursor)),
                    _ => unreachable!(),
                }
            }
            let skippable = strip_prefix(&mut cursor, "?");
            self.sp(&mut cursor);
            let id = self.consts.strings.iter().position(|c| c == &string);
            let self_ = &mut *self;
            let id = id.unwrap_or_else(move || {
                string.shrink_to_fit();
                self_.consts.strings.insert(string);
                self_.consts.strings.len() - 1
            });
            self.tokens.push(PatternToken::String(id, skippable));
            *s = cursor;
            true
        }))
    }

    /// re_literal <- sp ( ( "/" re_char*  ( "/" / ( !. -> ErrorReEnd ) ) ( '?' -> MarkSkippable ) ) -> Regex ) sp
    /// re_char <- ( re_escape / [^%/] )
    /// re_escape <- '%' ( '%' / "/" ) / ( ( '%' .? ) -> ErrorReEscape )
    #[allow(unused_variables)]
    #[allow(unreachable_code)]
    fn re_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        const RE_TOKEN: &str = "/";
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            let orig = cursor;
            bry!('matches strip_prefix(&mut cursor, RE_TOKEN));
            let mut string = String::new();
            loop {
                let base = cursor;
                if strip_prefix(&mut cursor, "%") {
                    if strip_prefix(&mut cursor, RE_TOKEN) {
                        string.push_str(RE_TOKEN);
                    } else if strip_prefix(&mut cursor, "%") {
                        string.push_str("%");
                    } else {
                        return Err(PatternError::RegexEscape(pos_of(self.base, base).unwrap(), cursor));
                    }
                } else {
                    cursor = cursor.trim_start_matches(|c: char| {
                        !matches!(c, '%' | '/')
                    });
                    if base == cursor { // no match?
                        break;
                    }
                    string.push_str(&base[..pos_of(base, cursor).unwrap()]);
                }
            }
            if !strip_prefix(&mut cursor, RE_TOKEN) {
                match cursor {
                    "" => return Err(PatternError::RegexEnd(self.base.len(), cursor)),
                    _ => unreachable!(),
                }
            }
            let end = cursor;
            let skippable = strip_prefix(&mut cursor, "?");
            self.sp(&mut cursor);
            let re = Regex::new(&string).map_err(|e| {
                PatternError::Regex(pos_of(self.base, orig).unwrap(), &orig[..pos_of(orig, end).unwrap()], e)
            })?;
            let id = self.consts.regices.iter().position(|c| c.as_str() == re.as_str());
            let self_ = &mut *self;
            let id = id.unwrap_or_else(move || {
                self_.consts.regices.push(re);
                self_.consts.regices.len() - 1
            });
            self.tokens.push(PatternToken::Regex(id, skippable));
            *s = cursor;
            true
        }))
    }

    /// matcher <- sp ( parameter / str_literal / re_literal / predicate / ty ) sp
    fn matcher(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            match () {
                _ if self.parameter(&mut cursor)? => {},
                _ if self.str_literal(&mut cursor)? => {},
                _ if self.re_literal(&mut cursor)? => {},
                _ if self.predicate(&mut cursor)? => {},
                _ if self.ty(&mut cursor)? => {},
                _ => bry!('matches false),
            }
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// tag <- sp ( '->' -> Arrow ) sp key_subtree? sp ( matcher / name sp matcher? ) ( '' -> End ) sp
    fn tag(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "->"));
            let mut self_ = TagHelper::start(&mut *self);
            {
                self_.tokens.push(PatternToken::Arrow);
            }
            self_.sp(&mut cursor);
            let _ = self_.key_subtree(&mut cursor)?;
            self_.sp(&mut cursor);
            if !self_.matcher(&mut cursor)? {
                bry!('matches self_.name(&mut cursor)?);
                self_.sp(&mut cursor);
                let _ = self_.matcher(&mut cursor)?;
            }
            self_.sp(&mut cursor);
            {
                self_.tokens.push(PatternToken::End);
            }
            self_.commit();
            *s = cursor;
            true
        }))
    }

    /// sp <- spaces*
    ///
    /// (always succeeds, so doesn't return anything)
    fn sp(&mut self, s: &mut &'s str) {
        *s = s.trim_start_matches(|c: char| {
            matches!(c, ' ' | '\n' | '\r' | '\t')
        });
    }

    /// identifier <- [A-Za-z_][A-Za-z0-9_]*
    fn identifier(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        if let Some('A'..='Z') | Some('a'..='z') | Some('_') = s.chars().next() {
            *s = s.trim_start_matches(|c: char| {
                matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_')
            });
            Ok(true)
        } else {
            Ok(false)
        }
    }

    /// name <- sp ( identifier -> Identifier ) sp
    fn name(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.consts.strings.iter().position(|c| c == name);
            let id = id.unwrap_or_else(|| {
                self.consts.strings.insert(name.into());
                self.consts.strings.len() - 1
            });
            self.tokens.push(PatternToken::Identifier(id));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// parameter <- sp '$' ( '?'? -> MarkSkippable ) ( identifier -> Parameter ) sp
    fn parameter(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "$"));
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.obj_ids.get(name).copied().map_or_else(
                || {
                    let (key, obj) = self.objs.as_mut().and_then(|map| {
                        map.remove_entry(name)
                    }).ok_or_else(|| {
                        let pos = pos_of(self.base, start).unwrap();
                        PatternError::UnknownParameter(pos, name)
                    })?;
                    let id = self.consts.defs.len();
                    self.consts.defs.push(obj);
                    self.obj_ids.insert(key, id);
                    Ok(id)
                },
                Ok,
            )?;
            self.tokens.push(PatternToken::Parameter(id, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// ty <- sp ':' ( '?'? -> MarkSkippable ) ( identifier -> Type ) sp
    fn ty(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, ":"));
            let custom = strip_prefix(&mut cursor, "$");
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            self.tokens.push(PatternToken::Type(match name {
                "any" => Type::Any,
                "ignored_any" => Type::IgnoredAny,
                "bool" => Type::Bool,
                "i8" => Type::I8,
                "i16" => Type::I16,
                "i32" => Type::I32,
                "i64" => Type::I64,
                "i128" => Type::I128,
                "u8" => Type::U8,
                "u16" => Type::U16,
                "u32" => Type::U32,
                "u64" => Type::U64,
                "u128" => Type::U128,
                "f32" => Type::F32,
                "f64" => Type::F64,
                "char" => Type::Char,
                "str" => Type::Str,
                "string" => Type::String,
                "bytes" => Type::Bytes,
                "bytebuf" => Type::ByteBuf,
                "option" => Type::Option,
                "unit" => Type::Unit,
                "seq" => Type::Seq,
                "map" => Type::Map,
                //"tuple" => Type::Tuple(usize),
                _ => {
                    let pos = pos_of(self.base, start).unwrap();
                    return Err(PatternError::UnknownPredicate(pos, name))
                }
            }, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// predicate <- sp ':' '$' ( '?'? -> MarkSkippable ) ( identifier -> Predicate ) sp
    fn predicate(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, ":"));
            bry!('matches strip_prefix(&mut cursor, "$"));
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.pred_ids.get(name).copied().map_or_else(
                || {
                    let (key, pred) = self.preds.as_mut().and_then(|map| {
                        map.remove_entry(name)
                    }).ok_or_else(|| {
                        let pos = pos_of(self.base, start).unwrap();
                        PatternError::UnknownPredicate(pos, name)
                    })?;
                    let id = self.consts.predicates.len();
                    self.consts.predicates.push(pred);
                    self.pred_ids.insert(key, id);
                    Ok(id)
                },
                Ok,
            )?;
            self.tokens.push(PatternToken::ApplyPredicate(id, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            let pos = pos_of(self.base, start).unwrap();
            return Err(PatternError::Unimplemented(pos, cursor));
            //true
        }))
    }

    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
    // ( '?'? -> MarkSkippable )
    fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "["));
            self.sp(&mut cursor);
            let mut subtree = SubtreeHelper::start(&mut *self);
            // FIXME we may want to clean up tokens if these fail
            let marker = subtree.tokens.len();
            if !subtree.matcher(&mut cursor)? {
                bry!('matches subtree.name(&mut cursor)?);
                subtree.sp(&mut cursor);
                let _ = subtree.matcher(&mut cursor)?;
            }
            let value = match subtree.tokens.drain(marker..).as_slice() {
                name_value => collect_name_and_value(name_value),
            };
            subtree.consts.protos.last_mut().unwrap().push(value);
            subtree.sp(&mut cursor);
            bry!('matches subtree.subtree(&mut cursor)?);
            subtree.sp(&mut cursor);
            bry!('matches
                strip_prefix(&mut cursor, "]")
                ||
                subtree.unexpected_token(&mut cursor)?
                ||
                subtree.unexpected_end(&mut cursor)?
            );
            let optional = strip_prefix(&mut cursor, "?");
            subtree.sp(&mut cursor);
            *s = cursor;
            let id = subtree.commit();
            self.tokens.push(PatternToken::KeySubtree(id, optional));
            true
        }))
    }

    /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
    fn value_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "("));
            self.sp(&mut cursor);
            let mut subtree = SubtreeHelper::start(&mut *self);
            bry!('matches subtree.subtree(&mut cursor)?);
            subtree.sp(&mut cursor);
            bry!('matches
                strip_prefix(&mut cursor, ")")
                ||
                subtree.unexpected_token(&mut cursor)?
                ||
                subtree.unexpected_end(&mut cursor)?
            );
            let optional = strip_prefix(&mut cursor, "?");
            subtree.sp(&mut cursor);
            *s = cursor;
            if !subtree.is_empty() {
                let id = subtree.commit();
                self.tokens.push(PatternToken::ValueSubtree(id, optional));
            }
            true
        }))
    }

    /// unexpected_end <- ( !. ) -> ErrorToken
    fn unexpected_end(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        match *s {
            "" => Err(PatternError::Token(self.base.len(), s)),
            _ => Ok(false),
        }
    }

    /// unexpected_token <- . -> ErrorToken
    fn unexpected_token(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        match *s {
            "" => Ok(false),
            _ => Err(PatternError::Token(pos_of(self.base, s).unwrap(), s)),
        }
    }

    /// subtree <- sp tag* sp ( value_subtree '' -> End )* sp
    fn subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        self.sp(&mut cursor);
        while self.tag(&mut cursor)? {
        }
        self.sp(&mut cursor);
        let mut has_subtrees = false;
        let marker = self.tokens.len();
        self.consts.protos.last_mut().unwrap().push({
            PatternElement::SubtreeMarker
        });
        while self.value_subtree(&mut cursor)? {
            let subtree = match self.tokens.drain(marker..).as_slice() {
                &[] => continue,
                &[PatternToken::ValueSubtree(index, optional)] => {
                    PatternElement::ValueSubtree { index, optional }
                },
                other => {
                    unreachable!("{other:?}")
                }
            };
            self.consts.protos.last_mut().unwrap().push(subtree);
        }
        let proto = self.consts.protos.last_mut().unwrap();
        if let Some(PatternElement::SubtreeMarker) = proto.last() {
            proto.pop();
        }
        self.sp(&mut cursor);
        *s = cursor;
        // always matches (may match empty)
        Ok(true)
    }

    /// pattern <- ( matcher? sp subtree / unexpected_token ) ( !. / unexpected_token )
    fn pattern(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            let mut subtree = SubtreeHelper::start(&mut *self);
            // FIXME we may want to clean up tokens if subtree.matcher errors
            let marker = subtree.tokens.len();
            let _ = subtree.matcher(&mut cursor)?;
            let value = match subtree.tokens.drain(marker..).as_slice() {
                &[] => {
                    PatternElement::Value {
                        name: None,
                        value: None,
                    }
                },
                value => collect_value(None, value),
            };
            subtree.consts.protos.last_mut().unwrap().push(value);
            bry!('matches
                subtree.subtree(&mut cursor)?
                ||
                subtree.unexpected_token(&mut cursor)?
            );
            bry!('matches
                cursor == ""
                ||
                subtree.unexpected_token(&mut cursor)?
            );
            subtree.commit();
            *s = cursor;
            true
        }))
    }
}

/// Parses a DFU expression.
///
/// The given 
pub(crate) fn parse<'s, PKey, OKey, O>(
    input: &'s str,
    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
    objs: Option<BTreeMap<OKey, O>>
) -> Result<PatternConstants<O>, PatternError<'s>>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    let mut parser = Parser::new(input, preds, objs);

    let mut parsed = input;
    let matched = parser.pattern(&mut parsed)?;

    assert!(matched);
    assert_eq!(parsed, "");
    assert_eq!(
        parser.closed_subtrees.next().unwrap(),
        parser.consts.protos.len(),
    );
    assert!(parser.consts.protos.iter().all(|proto| !proto.is_empty()));

    Ok(parser.consts)
}

#[cfg(test)]
mod tests {
    use crate::errors::PatternError;
    use super::Parser;

    use proptest::prelude::*;

    fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> {
        let mut parser = Parser::<
            's, &'static str, &'static str, ()
        >::new(s, None, None);
        parser
    }

    #[test]
    fn test_identifier() {
        fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> {
            let mut parser = Parser::<
                's, &'static str, &'static str, ()
            >::new(s, None, None);
            parser.identifier(s)
        }
        for mut identifier in vec!["test", "Test", "_test", "_Test", "_123",] {
            println!("{}", identifier);
            assert!(identifier_input(&mut identifier).unwrap());
            assert_eq!(identifier, "");
        }
        for mut identifier in vec!["123", "1_", " ",] {
            println!("{}", identifier);
            assert!(!identifier_input(&mut identifier).unwrap());
            assert_ne!(identifier, "");
        }
    }

    proptest! {
        #[test]
        fn test_no_crash(s in ".{0,4096}") {
            let _ = prep_parser(&s).str_literal(&mut &*s);
            let _ = prep_parser(&s).re_literal(&mut &*s);
            let _ = prep_parser(&s).matcher(&mut &*s);
            let _ = prep_parser(&s).tag(&mut &*s);
            let _ = prep_parser(&s).sp(&mut &*s);
            let _ = prep_parser(&s).identifier(&mut &*s);
            let _ = prep_parser(&s).name(&mut &*s);
            let _ = prep_parser(&s).parameter(&mut &*s);
            let _ = prep_parser(&s).predicate(&mut &*s);
            let _ = prep_parser(&s).ty(&mut &*s);
            let _ = prep_parser(&s).key_subtree(&mut &*s);
            let _ = prep_parser(&s).value_subtree(&mut &*s);
            let _ = prep_parser(&s).unexpected_end(&mut &*s);
            let _ = prep_parser(&s).unexpected_token(&mut &*s);
            // NOTE: never call subtree directly!!!
            //let _ = prep_parser(&s).subtree(&mut &*s);
            let _ = prep_parser(&s).pattern(&mut &*s);
        }
    }

    #[test]
    fn test_no_crash_some_patterns() {
        fn run_pattern(mut s: &str) {
            let _ = prep_parser(s).pattern(&mut s);
        }
        run_pattern("hello");
        run_pattern("/test/");
        run_pattern("'this'");
        run_pattern(":map");
        run_pattern(":?map");
        run_pattern(":map->[:str]:str");
        run_pattern(":map(->[:str]:str)()");
    }

    #[test]
    fn test_documented_edge_cases() {
        // tests that these patterns parse successfully
        fn run_pattern(mut s: &str) {
            let _ = prep_parser(s).pattern(&mut s).unwrap();
        }
        // Empty pattern.
        run_pattern("");
        // Empty subvalue
        run_pattern("()");
    }
}
// Copyright (C) 2021-2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0

//! The recursive-descent datafu language parser.

use std::borrow::Borrow;
use std::collections::BTreeMap;
use std::mem::ManuallyDrop;

use impl_trait::impl_trait;
use regex::Regex;
use serde::Serialize;

use crate::Predicate;
use crate::errors::PatternError;
use crate::vm;
use crate::vm::PatternConstants;
use crate::vm::PatternElement;
use crate::vm::PatternToken;
use crate::vm::Type;

/// try! with bools. (the b comes from bool.)
macro_rules! bry {
    ($l:lifetime $e:expr) => {
        if !$e {
            break $l false;
        }
    };
    ($e:expr) => {
        if !$e {
            return false;
        }
    };
}

macro_rules! lblock {
    // unfortunately ($l:lifetime : $b:block) => { $l: $b } didn't work.
    ($($t:tt)+) => {
        $($t)+
    }
}

/// Attempts to shift `s` forward by removing `prefix`.
///
/// Returns whether `s` has had `prefix` removed.
// can't use Pattern here :(
fn strip_prefix(s: &mut &str, prefix: &str) -> bool {
    s.strip_prefix(prefix).map(|ns| *s = ns).is_some()
}

/// Returns the position (index) of `sub` within `base`, in bytes.
///
/// Returns bogus results if `base` and `sub` are unrelated.
fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> {
    // FIXME is there any non-UB way to check if `sub` is in `base`?
    Some((sub.as_ptr() as usize) - (base.as_ptr() as usize))
}

/// Collects value-ish PatternTokens into a PatternElement::Value with the
/// given already-collected name.
fn collect_value(
    name: Option<usize>,
    tokens: &[PatternToken],
) -> PatternElement {
    let value = match tokens {
        &[PatternToken::String(index, skippable)] => {
            vm::Value::String { index, skippable }
        },
        &[PatternToken::Regex(index, skippable)] => {
            vm::Value::Regex { index, skippable }
        },
        &[PatternToken::Type(ty, skippable)] => {
            vm::Value::Type { ty, skippable }
        },
        other => {
            unreachable!("{other:?}")
        },
    };
    PatternElement::Value {
        name,
        value: Some(value),
    }
}

/// Collects a slice of PatternToken into a PatternElement::Value.
fn collect_name_and_value(tokens: &[PatternToken]) -> PatternElement {
    match tokens {
        &[PatternToken::Identifier(name)] => {
            PatternElement::Value {
                name: Some(name),
                value: None,
            }
        },
        &[PatternToken::Identifier(name), ref value @ ..] => {
            collect_value(Some(name), value)
        },
        value => collect_value(None, value),
    }
}

/// Helper to collect "subtree" sections of the pattern.
///
/// This is a RAII-like guard which handles cleaning up the parsed pattern when
/// dropped.
struct SubtreeHelper<'r, 's, PKey, OKey, O>
where
    Self: 'r,
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    root: &'r mut Parser<'s, PKey, OKey, O>,
}

impl_trait! {
    impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O>
    where
        Self: 'r,
        PKey: Borrow<str> + Ord,
        OKey: Borrow<str> + Ord,
        O: Serialize,
    {
        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
            value.consts.protos.push(Default::default());
            Self {
                root: value,
            }
        }

        fn is_empty(&self) -> bool {
            self.root.consts.protos.last().unwrap().is_empty()
        }

        fn commit(self) -> usize {
            let mut self_ = ManuallyDrop::new(self);
            let proto = self_.root.consts.protos.pop().unwrap();
            let id = self_.root.closed_subtrees.next().unwrap();
            self_.root.consts.protos.insert(id, proto);
            id
        }

        impl trait std::ops::Deref {
            type Target = Parser<'s, PKey, OKey, O>;

            fn deref(&self) -> &Self::Target {
                &*self.root
            }
        }

        impl trait std::ops::DerefMut {
            fn deref_mut(&mut self) -> &mut Self::Target {
                self.root
            }
        }

        impl trait Drop {
            fn drop(&mut self) {
                // remove "partial" proto
                self.root.consts.protos.pop().expect("SubtreeHelper");
            }
        }
    }
}

/// Helper to collect "tag" sections of the pattern.
///
/// This is a RAII-like guard which handles cleaning up the parsed pattern when
/// dropped.
struct TagHelper<'r, 's, PKey, OKey, O>
where
    Self: 'r,
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    root: &'r mut Parser<'s, PKey, OKey, O>,
    len: usize,
}

impl_trait! {
    impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O>
    where
        Self: 'r,
        PKey: Borrow<str> + Ord,
        OKey: Borrow<str> + Ord,
        O: Serialize,
    {
        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
            let len = value.tokens.len();
            Self {
                root: value,
                len : len,
            }
        }

        fn commit(self) {
            let self_ = &mut *std::mem::ManuallyDrop::new(self);
            // we could write a proper parser for the token stream.
            //
            // we could also just do this instead.
            match self_.root.tokens.drain(self_.len..).as_slice() {
                &[
                    PatternToken::Arrow,
                    PatternToken::KeySubtree(index, optional),
                    ref name_value @ ..,
                    PatternToken::End,
                ] => {
                    let tag = PatternElement::Tag {
                        key_subtree: index,
                        optional,
                    };
                    self_.root.consts.protos.last_mut().unwrap().push(tag);
                    let value = collect_name_and_value(name_value);
                    self_.root.consts.protos.last_mut().unwrap().push(value);
                },
                &[
                    PatternToken::Arrow,
                    ref name_value @ ..,
                    PatternToken::End,
                ] => {
                    let tag = PatternElement::EmptyTag;
                    self_.root.consts.protos.last_mut().unwrap().push(tag);
                    let value = collect_name_and_value(name_value);
                    self_.root.consts.protos.last_mut().unwrap().push(value);
                },
                other => {
                    unreachable!("{other:?}");
                },
            };
        }

        impl trait std::ops::Deref {
            type Target = Parser<'s, PKey, OKey, O>;

            fn deref(&self) -> &Self::Target {
                &*self.root
            }
        }

        impl trait std::ops::DerefMut {
            fn deref_mut(&mut self) -> &mut Self::Target {
                self.root
            }
        }

        impl trait Drop {
            fn drop(&mut self) {
                assert!(self.root.tokens.len() >= self.len);
                self.root.tokens.drain(self.len..);
            }
        }
    }
}

struct Parser<'s, PKey, OKey, O>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    base: &'s str,
    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
    objs: Option<BTreeMap<OKey, O>>,
    pred_ids: BTreeMap<PKey, usize>,
    obj_ids: BTreeMap<OKey, usize>,
    consts: PatternConstants<O>,
    tokens: Vec<PatternToken>,
    closed_subtrees: std::ops::RangeFrom<usize>,
}

// These are documented using LPeg.re syntax
// http://www.inf.puc-rio.br/~roberto/lpeg/re.html
#[rustfmt::skip]
impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    /// Creates a new `Parser`.
    fn new(
        base: &'s str,
        preds: Option<BTreeMap<PKey, Box<Predicate>>>,
        objs: Option<BTreeMap<OKey, O>>,
    ) -> Self {
        Self {
            base: base,
            preds: preds,
            objs: objs,
            tokens: Default::default(),
            pred_ids: Default::default(),
            obj_ids: Default::default(),
            consts: Default::default(),
            closed_subtrees: (0..),
        }
    }

    /// str_literal <- sp ( ( "'" str_char*  ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp
    /// str_char <- ( str_escape / [^%'] )
    /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape )
    fn str_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        const STRING_TOKEN: &str = "'";
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, STRING_TOKEN));
            let mut string = String::new();
            loop {
                let base = cursor;
                if strip_prefix(&mut cursor, "%") {
                    if strip_prefix(&mut cursor, STRING_TOKEN) {
                        string.push_str(STRING_TOKEN);
                    } else if strip_prefix(&mut cursor, "%") {
                        string.push_str("%");
                    } else {
                        return Err(PatternError::StringEscape(pos_of(self.base, base).unwrap(), cursor));
                    }
                } else {
                    cursor = cursor.trim_start_matches(|c: char| {
                        !matches!(c, '%' | '\'')
                    });
                    if base == cursor { // no match?
                        break;
                    }
                    string.push_str(&base[..pos_of(base, cursor).unwrap()]);
                }
            }
            if !strip_prefix(&mut cursor, STRING_TOKEN) {
                match cursor {
                    "" => return Err(PatternError::StringEnd(self.base.len(), cursor)),
                    _ => unreachable!(),
                }
            }
            let skippable = strip_prefix(&mut cursor, "?");
            self.sp(&mut cursor);
            let id = self.consts.strings.iter().position(|c| c == &string);
            let self_ = &mut *self;
            let id = id.unwrap_or_else(move || {
                string.shrink_to_fit();
                self_.consts.strings.insert(string);
                self_.consts.strings.len() - 1
            });
            self.tokens.push(PatternToken::String(id, skippable));
            *s = cursor;
            true
        }))
    }

    /// re_literal <- sp ( ( "/" re_char*  ( "/" / ( !. -> ErrorReEnd ) ) ( '?' -> MarkSkippable ) ) -> Regex ) sp
    /// re_char <- ( re_escape / [^%/] )
    /// re_escape <- '%' ( '%' / "/" ) / ( ( '%' .? ) -> ErrorReEscape )
    #[allow(unused_variables)]
    #[allow(unreachable_code)]
    fn re_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        const RE_TOKEN: &str = "/";
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            let orig = cursor;
            bry!('matches strip_prefix(&mut cursor, RE_TOKEN));
            let mut string = String::new();
            loop {
                let base = cursor;
                if strip_prefix(&mut cursor, "%") {
                    if strip_prefix(&mut cursor, RE_TOKEN) {
                        string.push_str(RE_TOKEN);
                    } else if strip_prefix(&mut cursor, "%") {
                        string.push_str("%");
                    } else {
                        return Err(PatternError::RegexEscape(pos_of(self.base, base).unwrap(), cursor));
                    }
                } else {
                    cursor = cursor.trim_start_matches(|c: char| {
                        !matches!(c, '%' | '/')
                    });
                    if base == cursor { // no match?
                        break;
                    }
                    string.push_str(&base[..pos_of(base, cursor).unwrap()]);
                }
            }
            if !strip_prefix(&mut cursor, RE_TOKEN) {
                match cursor {
                    "" => return Err(PatternError::RegexEnd(self.base.len(), cursor)),
                    _ => unreachable!(),
                }
            }
            let end = cursor;
            let skippable = strip_prefix(&mut cursor, "?");
            self.sp(&mut cursor);
            let re = Regex::new(&string).map_err(|e| {
                PatternError::Regex(pos_of(self.base, orig).unwrap(), &orig[..pos_of(orig, end).unwrap()], e)
            })?;
            let id = self.consts.regices.iter().position(|c| c.as_str() == re.as_str());
            let self_ = &mut *self;
            let id = id.unwrap_or_else(move || {
                self_.consts.regices.push(re);
                self_.consts.regices.len() - 1
            });
            self.tokens.push(PatternToken::Regex(id, skippable));
            *s = cursor;
            true
        }))
    }

    /// matcher <- sp ( parameter / str_literal / re_literal / predicate / ty ) sp
    fn matcher(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            match () {
                _ if self.parameter(&mut cursor)? => {},
                _ if self.str_literal(&mut cursor)? => {},
                _ if self.re_literal(&mut cursor)? => {},
                _ if self.predicate(&mut cursor)? => {},
                _ if self.ty(&mut cursor)? => {},
                _ => bry!('matches false),
            }
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// tag <- sp ( '->' -> Arrow ) sp key_subtree? sp ( matcher / name sp matcher? ) ( '' -> End ) sp
    fn tag(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "->"));
            let mut self_ = TagHelper::start(&mut *self);
            {
                self_.tokens.push(PatternToken::Arrow);
            }
            self_.sp(&mut cursor);
            let _ = self_.key_subtree(&mut cursor)?;
            self_.sp(&mut cursor);
            if !self_.matcher(&mut cursor)? {
                bry!('matches self_.name(&mut cursor)?);
                self_.sp(&mut cursor);
                let _ = self_.matcher(&mut cursor)?;
            }
            self_.sp(&mut cursor);
            {
                self_.tokens.push(PatternToken::End);
            }
            self_.commit();
            *s = cursor;
            true
        }))
    }

    /// sp <- spaces*
    ///
    /// (always succeeds, so doesn't return anything)
    fn sp(&mut self, s: &mut &'s str) {
        *s = s.trim_start_matches(|c: char| {
            matches!(c, ' ' | '\n' | '\r' | '\t')
        });
    }

    /// identifier <- [A-Za-z_][A-Za-z0-9_]*
    fn identifier(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        if let Some('A'..='Z') | Some('a'..='z') | Some('_') = s.chars().next() {
            *s = s.trim_start_matches(|c: char| {
                matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_')
            });
            Ok(true)
        } else {
            Ok(false)
        }
    }

    /// name <- sp ( identifier -> Identifier ) sp
    fn name(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.consts.strings.iter().position(|c| c == name);
            let id = id.unwrap_or_else(|| {
                self.consts.strings.insert(name.into());
                self.consts.strings.len() - 1
            });
            self.tokens.push(PatternToken::Identifier(id));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// parameter <- sp '$' ( '?'? -> MarkSkippable ) ( identifier -> Parameter ) sp
    fn parameter(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "$"));
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.obj_ids.get(name).copied().map_or_else(
                || {
                    let (key, obj) = self.objs.as_mut().and_then(|map| {
                        map.remove_entry(name)
                    }).ok_or_else(|| {
                        let pos = pos_of(self.base, start).unwrap();
                        PatternError::UnknownParameter(pos, name)
                    })?;
                    let id = self.consts.defs.len();
                    self.consts.defs.push(obj);
                    self.obj_ids.insert(key, id);
                    Ok(id)
                },
                Ok,
            )?;
            self.tokens.push(PatternToken::Parameter(id, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// ty <- sp ':' ( '?'? -> MarkSkippable ) ( identifier -> Type ) sp
    fn ty(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, ":"));
            let custom = strip_prefix(&mut cursor, "$");
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            self.tokens.push(PatternToken::Type(match name {
                "any" => Type::Any,
                "ignored_any" => Type::IgnoredAny,
                "bool" => Type::Bool,
                "i8" => Type::I8,
                "i16" => Type::I16,
                "i32" => Type::I32,
                "i64" => Type::I64,
                "i128" => Type::I128,
                "u8" => Type::U8,
                "u16" => Type::U16,
                "u32" => Type::U32,
                "u64" => Type::U64,
                "u128" => Type::U128,
                "f32" => Type::F32,
                "f64" => Type::F64,
                "char" => Type::Char,
                "str" => Type::Str,
                "string" => Type::String,
                "bytes" => Type::Bytes,
                "bytebuf" => Type::ByteBuf,
                "option" => Type::Option,
                "unit" => Type::Unit,
                "seq" => Type::Seq,
                "map" => Type::Map,
                //"tuple" => Type::Tuple(usize),
                _ => {
                    let pos = pos_of(self.base, start).unwrap();
                    return Err(PatternError::UnknownPredicate(pos, name))
                }
            }, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            true
        }))
    }

    /// predicate <- sp ':' '$' ( '?'? -> MarkSkippable ) ( identifier -> Predicate ) sp
    fn predicate(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, ":"));
            bry!('matches strip_prefix(&mut cursor, "$"));
            let skippable = strip_prefix(&mut cursor, "?");
            let start = cursor;
            bry!('matches self.identifier(&mut cursor)?);
            let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
            // no processing of `name` is required for this.
            let id = self.pred_ids.get(name).copied().map_or_else(
                || {
                    let (key, pred) = self.preds.as_mut().and_then(|map| {
                        map.remove_entry(name)
                    }).ok_or_else(|| {
                        let pos = pos_of(self.base, start).unwrap();
                        PatternError::UnknownPredicate(pos, name)
                    })?;
                    let id = self.consts.predicates.len();
                    self.consts.predicates.push(pred);
                    self.pred_ids.insert(key, id);
                    Ok(id)
                },
                Ok,
            )?;
            self.tokens.push(PatternToken::ApplyPredicate(id, skippable));
            self.sp(&mut cursor);
            *s = cursor;
            let pos = pos_of(self.base, start).unwrap();
            return Err(PatternError::Unimplemented(pos, cursor));
            //true
        }))
    }

    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
    // ( '?'? -> MarkSkippable )
    fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "["));
            self.sp(&mut cursor);
            let mut subtree = SubtreeHelper::start(&mut *self);
            // FIXME we may want to clean up tokens if these fail
            let marker = subtree.tokens.len();
            if !subtree.matcher(&mut cursor)? {
                bry!('matches subtree.name(&mut cursor)?);
                subtree.sp(&mut cursor);
                let _ = subtree.matcher(&mut cursor)?;
            }
            let value = match subtree.tokens.drain(marker..).as_slice() {
                name_value => collect_name_and_value(name_value),
            };
            subtree.consts.protos.last_mut().unwrap().push(value);
            subtree.sp(&mut cursor);
            bry!('matches subtree.subtree(&mut cursor)?);
            subtree.sp(&mut cursor);
            bry!('matches
                strip_prefix(&mut cursor, "]")
                ||
                subtree.unexpected_token(&mut cursor)?
                ||
                subtree.unexpected_end(&mut cursor)?
            );
            let optional = strip_prefix(&mut cursor, "?");
            subtree.sp(&mut cursor);
            *s = cursor;
            let id = subtree.commit();
            self.tokens.push(PatternToken::KeySubtree(id, optional));
            true
        }))
    }

    /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
    fn value_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            self.sp(&mut cursor);
            bry!('matches strip_prefix(&mut cursor, "("));
            self.sp(&mut cursor);
            let mut subtree = SubtreeHelper::start(&mut *self);
            bry!('matches subtree.subtree(&mut cursor)?);
            subtree.sp(&mut cursor);
            bry!('matches
                strip_prefix(&mut cursor, ")")
                ||
                subtree.unexpected_token(&mut cursor)?
                ||
                subtree.unexpected_end(&mut cursor)?
            );
            let optional = strip_prefix(&mut cursor, "?");
            subtree.sp(&mut cursor);
            *s = cursor;
            if !subtree.is_empty() {
                let id = subtree.commit();
                self.tokens.push(PatternToken::ValueSubtree(id, optional));
            }
            true
        }))
    }

    /// unexpected_end <- ( !. ) -> ErrorToken
    fn unexpected_end(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        match *s {
            "" => Err(PatternError::Token(self.base.len(), s)),
            _ => Ok(false),
        }
    }

    /// unexpected_token <- . -> ErrorToken
    fn unexpected_token(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        match *s {
            "" => Ok(false),
            _ => Err(PatternError::Token(pos_of(self.base, s).unwrap(), s)),
        }
    }

    /// subtree <- sp tag* sp ( value_subtree '' -> End )* sp
    fn subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        self.sp(&mut cursor);
        while self.tag(&mut cursor)? {
        }
        self.sp(&mut cursor);
        let mut has_subtrees = false;
        let marker = self.tokens.len();
        self.consts.protos.last_mut().unwrap().push({
            PatternElement::SubtreeMarker
        });
        while self.value_subtree(&mut cursor)? {
            let subtree = match self.tokens.drain(marker..).as_slice() {
                &[] => continue,
                &[PatternToken::ValueSubtree(index, optional)] => {
                    PatternElement::ValueSubtree { index, optional }
                },
                other => {
                    unreachable!("{other:?}")
                }
            };
            self.consts.protos.last_mut().unwrap().push(subtree);
        }
        let proto = self.consts.protos.last_mut().unwrap();
        if let Some(PatternElement::SubtreeMarker) = proto.last() {
            proto.pop();
        }
        self.sp(&mut cursor);
        *s = cursor;
        // always matches (may match empty)
        Ok(true)
    }

    /// pattern <- ( matcher? sp subtree / unexpected_token ) ( !. / unexpected_token )
    fn pattern(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
        let mut cursor = *s;
        Ok(lblock!('matches: {
            let mut subtree = SubtreeHelper::start(&mut *self);
            // FIXME we may want to clean up tokens if subtree.matcher errors
            let marker = subtree.tokens.len();
            let _ = subtree.matcher(&mut cursor)?;
            let value = match subtree.tokens.drain(marker..).as_slice() {
                &[] => {
                    PatternElement::Value {
                        name: None,
                        value: None,
                    }
                },
                value => collect_value(None, value),
            };
            subtree.consts.protos.last_mut().unwrap().push(value);
            bry!('matches
                subtree.subtree(&mut cursor)?
                ||
                subtree.unexpected_token(&mut cursor)?
            );
            bry!('matches
                cursor == ""
                ||
                subtree.unexpected_token(&mut cursor)?
            );
            subtree.commit();
            *s = cursor;
            true
        }))
    }
}

/// Parses a DFU expression.
///
/// The given 
pub(crate) fn parse<'s, PKey, OKey, O>(
    input: &'s str,
    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
    objs: Option<BTreeMap<OKey, O>>
) -> Result<PatternConstants<O>, PatternError<'s>>
where
    PKey: Borrow<str> + Ord,
    OKey: Borrow<str> + Ord,
    O: Serialize,
{
    let mut parser = Parser::new(input, preds, objs);

    let mut parsed = input;
    let matched = parser.pattern(&mut parsed)?;

    assert!(matched);
    assert_eq!(parsed, "");
    assert_eq!(
        parser.closed_subtrees.next().unwrap(),
        parser.consts.protos.len(),
    );
    assert!(parser.consts.protos.iter().all(|proto| !proto.is_empty()));

    Ok(parser.consts)
}

#[cfg(test)]
mod tests {
    use crate::errors::PatternError;
    use super::Parser;

    use proptest::prelude::*;

    fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> {
        let mut parser = Parser::<
            's, &'static str, &'static str, ()
        >::new(s, None, None);
        parser
    }

    #[test]
    fn test_identifier() {
        fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> {
            let mut parser = Parser::<
                's, &'static str, &'static str, ()
            >::new(s, None, None);
            parser.identifier(s)
        }
        for mut identifier in vec!["test", "Test", "_test", "_Test", "_123",] {
            println!("{}", identifier);
            assert!(identifier_input(&mut identifier).unwrap());
            assert_eq!(identifier, "");
        }
        for mut identifier in vec!["123", "1_", " ",] {
            println!("{}", identifier);
            assert!(!identifier_input(&mut identifier).unwrap());
            assert_ne!(identifier, "");
        }
    }

    proptest! {
        #[test]
        fn test_no_crash(s in ".{0,4096}") {
            let _ = prep_parser(&s).str_literal(&mut &*s);
            let _ = prep_parser(&s).re_literal(&mut &*s);
            let _ = prep_parser(&s).matcher(&mut &*s);
            let _ = prep_parser(&s).tag(&mut &*s);
            let _ = prep_parser(&s).sp(&mut &*s);
            let _ = prep_parser(&s).identifier(&mut &*s);
            let _ = prep_parser(&s).name(&mut &*s);
            let _ = prep_parser(&s).parameter(&mut &*s);
            let _ = prep_parser(&s).predicate(&mut &*s);
            let _ = prep_parser(&s).ty(&mut &*s);
            let _ = prep_parser(&s).key_subtree(&mut &*s);
            let _ = prep_parser(&s).value_subtree(&mut &*s);
            let _ = prep_parser(&s).unexpected_end(&mut &*s);
            let _ = prep_parser(&s).unexpected_token(&mut &*s);
            // NOTE: never call subtree directly!!!
            //let _ = prep_parser(&s).subtree(&mut &*s);
            let _ = prep_parser(&s).pattern(&mut &*s);
        }
    }

    #[test]
    fn test_no_crash_some_patterns() {
        fn run_pattern(mut s: &str) {
            let _ = prep_parser(s).pattern(&mut s);
        }
        run_pattern("hello");
        run_pattern("/test/");
        run_pattern("'this'");
        run_pattern(":map");
        run_pattern(":?map");
        run_pattern(":map->[:str]:str");
        run_pattern(":map(->[:str]:str)()");
    }

    #[test]
    fn test_documented_edge_cases() {
        // tests that these patterns parse successfully
        fn run_pattern(mut s: &str) {
            let _ = prep_parser(s).pattern(&mut s).unwrap();
        }
        // Empty pattern.
        run_pattern("");
        // Empty subvalue
        run_pattern("()");
    }
}