Source code for probably.util.lark_expr_parser

r"""
----------------------
Lark Expression Parser
----------------------

Generate Lark grammars for expressions from operator tables.
The algorithm is pretty simple, but easy to mess up when doing it by hand.

.. doctest::

    >>> table = [
    ...    [infixl("plus", "+"), infixl("minus", "-")],
    ...    [prefix("neg", "-"), atom("parens", '"(" test ")"'), atom("int", "/[0-9]+/")]
    ... ]
    >>> print(build_expr_parser(table, "test"))
    ?test: test_0
    ?test_0: test_1
        | test_0 "+" test_1 -> plus
        | test_0 "-" test_1 -> minus
    ?test_1: "-" test_1 -> neg
        | "(" test ")" -> parens
        | /[0-9]+/ -> int
"""

from abc import ABC, abstractmethod
from enum import Enum, auto
from typing import List, Union

import attr


[docs] class Assoc(Enum): LEFT = auto() RIGHT = auto()
RuleName = str @attr.s class _OperatorClass(ABC): name: str = attr.ib() op: str = attr.ib() @abstractmethod def grammar(self, name: RuleName, next_name: RuleName) -> str: pass @attr.s class _InfixOperator(_OperatorClass): assoc: Assoc = attr.ib() def grammar(self, name: RuleName, next_name: RuleName) -> str: (lhs, rhs) = (name, next_name) if self.assoc == Assoc.LEFT else (next_name, name) return f'{lhs} "{self.op}" {rhs} -> {self.name}' @attr.s class _PrefixOperator(_OperatorClass): def grammar(self, name: RuleName, next_name: RuleName) -> str: return f'"{self.op}" {name} -> {self.name}' @attr.s class _PostfixOperator(_OperatorClass): def grammar(self, name: RuleName, next_name: RuleName) -> str: return f'{name} "{self.op}" -> {self.name}' @attr.s class _AtomOperator(_OperatorClass): def grammar(self, name: RuleName, next_name: RuleName) -> str: return f'{self.op} -> {self.name}' Operator = Union[_InfixOperator, _PrefixOperator, _PostfixOperator, _AtomOperator]
[docs] def infixl(name: RuleName, op: str) -> Operator: return _InfixOperator(name, op, Assoc.LEFT)
[docs] def infixr(name: RuleName, op: str) -> Operator: return _InfixOperator(name, op, Assoc.RIGHT)
[docs] def prefix(name: RuleName, op: str) -> Operator: return _PrefixOperator(name, op)
[docs] def postfix(name: RuleName, op: str) -> Operator: return _PostfixOperator(name, op)
[docs] def atom(name: RuleName, rule_str: str) -> Operator: return _AtomOperator(name, rule_str)
OperatorTable = List[List[Operator]] """ An OperatorTable contains a list of precedence levels in ascending order. """
[docs] def build_expr_parser(table: OperatorTable, rule_name: RuleName) -> str: """Generate Lark grammar from the operator table.""" # I've found the following example very instructive for the implementation: # https://github.com/lark-parser/lark/blob/84a876724958ac582c40737581d938cded37fe02/examples/calc.py def level_name(level: int) -> RuleName: return f'{rule_name}_{level}' grammar = [f'?{rule_name}: {level_name(0)}'] for i, level in enumerate(table): name = level_name(i) next_name = level_name((i + 1) % len(table)) # Either match an operator from this level... op_grammars = [operator.grammar(name, next_name) for operator in level] # ... or match one from the next (higher) level. if i < len(table) - 1: op_grammars.insert(0, next_name) rule = "\n | ".join(op_grammars) grammar.append(f'?{name}: {rule}') return "\n".join(grammar)