summary refs log blame commit diff stats
path: root/abdl/_parser.py
blob: 3c38ab2f09c28ff24e6f12f6fd033bc51b08739a (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16















                                                                             
                                     
 






                                                                       

                    














                                                                        
                    
                                     





                                         
                                      


                            













                                                                          
 



                                                                      
 




                                                                        

                         
                                          

                                                                  
                                               

                                                        
                                              

                                                                    
                                                   

                                 



                                                                       

                                                              



                                                                             

                             
                                                               


                                                                         

                                  


                                                                   



                                                                       
# This file is part of A Boneless Datastructure Language
# Copyright (C) 2020  Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""[Internal] pyparsing-based Parser.

Provides `BUILT_SYNTAX`.
"""

from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd
from pyparsing import Combine, Optional, Group, Word, srange, Empty

from abdl import exceptions as exc
from abdl import _vm

def _err_str_esc(match_str, pos, toks):
    raise exc.PatternError("Error in string escape", pos=pos, toks=toks)

def _err_str_end(match_str, pos, toks):
    raise exc.PatternError("Unfinished string", pos=pos, toks=toks)

def _err_re_esc(match_str, pos, toks):
    raise exc.PatternError("Error in regex escape", pos=pos, toks=toks)

def _err_re_end(match_str, pos, toks):
    raise exc.PatternError("Unfinished regex", pos=pos, toks=toks)

def _err_tok(match_str, pos, toks):
    raise exc.PatternError("Unexpected token", pos=pos, toks=toks)

def _build_syntax():
    # pylint: disable=too-many-locals

    subtree = Forward()

    skippable = Optional("?", default="")

    escape_char = Literal("%")
    any_char = CharsNotIn("", exact=1)
    str_token = Literal("'")
    re_token = Literal("/")

    unexpected_token = any_char.copy().setParseAction(_err_tok)
    unexpected_end = StringEnd().setParseAction(_err_tok)

    # TODO reformat these
    unexpected_str_escape = any_char.copy().setParseAction(_err_str_esc)
    str_escape = Suppress(escape_char) + (str_token | escape_char)
    str_escape |= escape_char + unexpected_str_escape
    str_char = (str_escape | CharsNotIn("%'"))

    str_literal = (Combine(Suppress(str_token) + str_char[...]
                           + (Suppress(str_token)
                              | StringEnd().setParseAction(_err_str_end)))
                   + skippable)
    str_literal.setParseAction(_vm.StringKey.action)

    unexpected_re_escape = any_char.copy().setParseAction(_err_re_esc)
    re_escape = Suppress(escape_char) + (re_token | escape_char)
    re_escape |= escape_char + unexpected_re_escape
    re_char = (re_escape | CharsNotIn("%/"))

    re_literal = (Combine(Suppress(re_token) + re_char[...]
                          + (Suppress(re_token)
                             | StringEnd().setParseAction(_err_re_end)))
                  + skippable)
    re_literal.setParseAction(_vm.RegexKey.action)

    arrow = Literal("->")
    arrow.setParseAction(_vm.Arrow.action)

    identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]"))
    identifier.setParseAction(_vm.Ident.action)

    parameter = (Suppress("$") + skippable + identifier)
    parameter.setParseAction(_vm.Param.action)

    type_ = (Suppress(":") + skippable + Suppress("$") + identifier)
    type_.setParseAction(_vm.ApplyPredicate.action)

    # support for objects-as-keys
    keysubtree = (Suppress("[") + Group(type_[...] + subtree)
                  + (Suppress("]") | unexpected_token | unexpected_end)
                  + skippable)
    keysubtree.setParseAction(_vm.KeySubtree.action)

    # represents key matching - switches from "key" to "value"
    tag = ((identifier
            + Optional(parameter | str_literal | re_literal | keysubtree)
            | parameter | str_literal | re_literal | keysubtree) + type_[...]
           + Empty().setParseAction(_vm.End.action))

    # multiple value matching
    valuesubtree = (Suppress("(") + Group(type_[...] + subtree)
                    + (Suppress(")") | unexpected_token | unexpected_end)
                    + Optional("?", default=""))
    valuesubtree.setParseAction(_vm.ValueSubtree.action)

    # arrow and tag, value subtree
    subtree <<= ((arrow + tag)[...]
                 + (valuesubtree
                    + Empty().setParseAction(_vm.End.action))[...])

    return ((subtree | unexpected_token) + StringEnd()).parseWithTabs()

BUILT_SYNTAX = _build_syntax()