summary refs log tree commit diff stats
path: root/abdl/_parser.py
blob: 074b3516e4446d94da2c582b34fecfc59abcd114 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# This file is part of A Boneless Datastructure Language
# Copyright (C) 2020  Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""[Internal] pyparsing-based Parser.

Provides `BUILT_SYNTAX`.
"""

from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd
from pyparsing import Combine, Optional, Group, Word, srange, Empty

from abdl import exceptions as exc
from abdl import _vm

def _err_str_esc(match_str, pos, toks):
    raise exc.PatternError("Error in string escape", pos=pos, toks=toks)

def _err_str_end(match_str, pos, toks):
    raise exc.PatternError("Unfinished string", pos=pos, toks=toks)

def _err_re_esc(match_str, pos, toks):
    raise exc.PatternError("Error in regex escape", pos=pos, toks=toks)

def _err_re_end(match_str, pos, toks):
    raise exc.PatternError("Unfinished regex", pos=pos, toks=toks)

def _err_tok(match_str, pos, toks):
    raise exc.PatternError("Unexpected token", pos=pos, toks=toks)

def _build_syntax():
    # pylint: disable=too-many-locals

    subtree = Forward()

    skippable = Optional("?", default="")

    escape_char = Literal("%")
    any_char = CharsNotIn("", exact=1)
    str_token = Literal("'")
    re_token = Literal("/")

    unexpected_token = any_char.copy().setParseAction(_err_tok)
    unexpected_end = StringEnd().setParseAction(_err_tok)

    # TODO reformat these
    unexpected_str_escape = any_char.copy().setParseAction(_err_str_esc)
    str_escape = Suppress(escape_char) + (str_token | escape_char)
    str_escape |= escape_char + unexpected_str_escape
    str_char = (str_escape | CharsNotIn("%'"))

    str_literal = (Combine(Suppress(str_token) + str_char[...]
                           + (Suppress(str_token)
                              | StringEnd().setParseAction(_err_str_end)))
                   + skippable)
    str_literal.setParseAction(_vm.StringKey.action)

    unexpected_re_escape = any_char.copy().setParseAction(_err_re_esc)
    re_escape = Suppress(escape_char) + (re_token | escape_char)
    re_escape |= escape_char + unexpected_re_escape
    re_char = (re_escape | CharsNotIn("%/"))

    re_literal = (Combine(Suppress(re_token) + re_char[...]
                          + (Suppress(re_token)
                             | StringEnd().setParseAction(_err_re_end)))
                  + skippable)
    re_literal.setParseAction(_vm.RegexKey.action)

    arrow = Literal("->")
    arrow.setParseAction(_vm.Arrow.action)

    identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]"))
    identifier.setParseAction(_vm.Ident.action)

    parameter = (Suppress("$") + skippable + identifier)
    parameter.setParseAction(_vm.Param.action)

    type_ = (Suppress(":") + skippable + Suppress("$") + identifier)
    type_.setParseAction(_vm.ApplyPredicate.action)

    # support for objects-as-keys
    keysubtree = (Suppress("[") + Group(type_[...] + subtree)
                  + (Suppress("]") | unexpected_token | unexpected_end)
                  + skippable)
    keysubtree.setParseAction(_vm.KeySubtree.action)

    # represents key matching - switches from "key" to "value"
    tag = ((identifier
            + Optional(parameter | str_literal | re_literal | keysubtree)
            | parameter | str_literal | re_literal | keysubtree) + type_[...]
           + Empty().setParseAction(_vm.End.action))

    # multiple value matching
    valuesubtree = (Suppress("(") + Group(type_[...] + subtree)
                    + (Suppress(")") | unexpected_token | unexpected_end)
                    + Optional("?", default=""))
    valuesubtree.setParseAction(_vm.ValueSubtree.action)

    # arrow and tag, value subtree
    subtree <<= ((arrow + tag)[...]
                 + (valuesubtree
                    + Empty().setParseAction(_vm.End.action))[...])

    return ((subtree | unexpected_token) + (StringEnd() | unexpected_token)).parseWithTabs()

BUILT_SYNTAX = _build_syntax()