sane-date-math: convert to LR parser
This commit is contained in:
parent
51a96525d9
commit
16fa1e0eda
|
@ -2,8 +2,8 @@
|
||||||
|
|
||||||
# i just went overboard playing around with parsers, is all.
|
# i just went overboard playing around with parsers, is all.
|
||||||
# use this like `./sane-date-math 'today - 5d'`
|
# use this like `./sane-date-math 'today - 5d'`
|
||||||
# of course, it handles parenthesizes and operator precedence, so you can do sillier things like
|
# of course, it handles parentheses and operator precedence/associativity, so you can do sillier things like
|
||||||
# `./sane-date-math ' today - (3*4+1 - ((0)) ) *7d '`
|
# `./sane-date-math ' today - (1+3 *4 - ((0)) ) *7d '`
|
||||||
|
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
|
@ -15,8 +15,6 @@ class Token:
|
||||||
self.c = c
|
self.c = c
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
if self == EOF:
|
|
||||||
return "<EOF>"
|
|
||||||
return f"{self.c!r}"
|
return f"{self.c!r}"
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
|
@ -25,7 +23,6 @@ class Token:
|
||||||
def __eq__(self, other: 'Token') -> bool:
|
def __eq__(self, other: 'Token') -> bool:
|
||||||
return self.c == other.c
|
return self.c == other.c
|
||||||
|
|
||||||
EOF = Token('\x05')
|
|
||||||
PLUS = Token('+')
|
PLUS = Token('+')
|
||||||
MINUS = Token('-')
|
MINUS = Token('-')
|
||||||
ASTERISK = Token('*')
|
ASTERISK = Token('*')
|
||||||
|
@ -40,349 +37,235 @@ ALPHA = ALPHA_LOWER + ALPHA_UPPER
|
||||||
ALPHA_UNDER = ALPHA + [UNDERSCORE]
|
ALPHA_UNDER = ALPHA + [UNDERSCORE]
|
||||||
ALPHA_NUM_UNDER = ALPHA_UNDER + DIGITS
|
ALPHA_NUM_UNDER = ALPHA_UNDER + DIGITS
|
||||||
|
|
||||||
|
class ParserContext:
|
||||||
|
def feed(self, token: Token) -> 'ParserContext':
|
||||||
|
return None # can't ingest the token
|
||||||
|
|
||||||
|
def upgrade(self) -> 'ParserContext':
|
||||||
|
return None # no upgrade path
|
||||||
|
|
||||||
# TODO: should be enum
|
class Parser:
|
||||||
class ParseCode:
|
|
||||||
# return if the parser cannot parse the provided token
|
|
||||||
HALT = 0
|
|
||||||
# return if the parser is already "complete" and the token should be yielded to the outer context instead
|
|
||||||
YIELD = 1
|
|
||||||
# return is the parser successfully consumed the provided token and parsing should continue
|
|
||||||
CONTINUE = 2
|
|
||||||
|
|
||||||
class ParserContext(metaclass=abc.ABCMeta):
|
|
||||||
@abc.abstractmethod
|
|
||||||
def feed(self, token: Token) -> ParseCode:
|
|
||||||
"""
|
|
||||||
possibly ingests this token, modifying internal state,
|
|
||||||
and providing instruction to the outer parser layer on
|
|
||||||
how to proceed.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def context(self) -> 'ParserContext':
|
|
||||||
"""
|
|
||||||
hack to make type-level "Productions" compatible with instance-level "ParserContext"s.
|
|
||||||
"""
|
|
||||||
return self
|
|
||||||
|
|
||||||
def destructure(self) -> object:
|
|
||||||
"""
|
|
||||||
destructure the outer layer of this ParserContext to obtain access to whatever state it captured.
|
|
||||||
e.g. Then([A, Choice([B, C])]) destructures first to [A, Choice([B, C])].
|
|
||||||
it's not recursive; the inner layers must be manually destructured.
|
|
||||||
"""
|
|
||||||
return self
|
|
||||||
|
|
||||||
class Then(ParserContext):
|
|
||||||
"""
|
"""
|
||||||
primitive combinator: given a sequence of parser constructs, parse the input
|
LR parser.
|
||||||
using the first parser until that parser yields, then parse using the second
|
keeps exactly one root item, and for each input token
|
||||||
parser, and so on.
|
feeds it to the root, possibly "upgrading" the root N times
|
||||||
|
before it's able to be fed.
|
||||||
"""
|
"""
|
||||||
def __init__(self, items: list):
|
def __init__(self, root: ParserContext):
|
||||||
self.items = [i.context() for i in items]
|
self.root = root
|
||||||
|
|
||||||
|
def feed(self, token: Token) -> bool:
|
||||||
|
new_root = self.root.feed(token)
|
||||||
|
if new_root is not None:
|
||||||
|
self.root = new_root
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# root can't directly accept this item.
|
||||||
|
# "upgrade" it and try again.
|
||||||
|
new_root = self.root.upgrade()
|
||||||
|
if new_root is None: return False
|
||||||
|
self.root = new_root
|
||||||
|
return self.feed(token)
|
||||||
|
|
||||||
|
def complete(self) -> ParserContext:
|
||||||
|
# upgrade the root as far as possible before returning
|
||||||
|
root = None
|
||||||
|
new_root = self.root
|
||||||
|
while new_root is not None:
|
||||||
|
root = new_root
|
||||||
|
new_root = root.upgrade()
|
||||||
|
|
||||||
|
return root
|
||||||
|
|
||||||
|
class ReprParserContext(ParserContext):
|
||||||
|
""" helper that gives a good default repr to most contexts """
|
||||||
|
def __init__(self, items: list = None):
|
||||||
|
self.items = items if items is not None else []
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"Then({self.items!r})"
|
return f'{self.__class__.__name__}({self.items!r})'
|
||||||
|
|
||||||
def __str__(self) -> str:
|
|
||||||
return str(self.items)
|
|
||||||
|
|
||||||
def feed(self, token: Token) -> ParseCode:
|
class BaseContext(ReprParserContext):
|
||||||
# we expect parser contexts to be fused: once they YIELD,
|
""" empty context; initial state of the parser """
|
||||||
# they should yield on all future calls as well
|
def feed(self, token: Token) -> ParserContext:
|
||||||
for i in self.items:
|
if token == SPACE:
|
||||||
result = i.feed(token)
|
return self
|
||||||
if result != ParseCode.YIELD: return result
|
if token == OPEN_PAREN:
|
||||||
else:
|
return ParenContext(BaseContext())
|
||||||
# all items are done parsing; so are we!
|
if token in DIGITS:
|
||||||
return ParseCode.YIELD
|
return IntegerContext([token])
|
||||||
|
if token in ALPHA_UNDER:
|
||||||
|
return IdentifierContext([token])
|
||||||
|
|
||||||
def destructure(self) -> list:
|
class IdentifierContext(ReprParserContext):
|
||||||
return self.items
|
""" context is an identifier like `today` """
|
||||||
|
def __init__(self, tokens: list):
|
||||||
|
super().__init__(tokens)
|
||||||
|
self.tokens = tokens
|
||||||
|
|
||||||
class Choice(ParserContext):
|
def feed(self, token: Token) -> ParserContext:
|
||||||
|
if token in ALPHA_NUM_UNDER:
|
||||||
|
return IdentifierContext(self.tokens + [token])
|
||||||
|
|
||||||
|
def upgrade(self) -> ParserContext:
|
||||||
|
return StrongValueContext(self)
|
||||||
|
|
||||||
|
class IntegerContext(ReprParserContext):
|
||||||
|
""" context is an integer like `45` """
|
||||||
|
def __init__(self, tokens: list):
|
||||||
|
super().__init__(tokens)
|
||||||
|
self.tokens = tokens
|
||||||
|
|
||||||
|
def feed(self, token: Token) -> ParserContext:
|
||||||
|
if token in DIGITS:
|
||||||
|
return IntegerContext(self.tokens + [token])
|
||||||
|
if token == Token('d'):
|
||||||
|
return DurationContext(self)
|
||||||
|
|
||||||
|
def upgrade(self) -> ParserContext:
|
||||||
|
# can't continue the integer; it becomes a value
|
||||||
|
return StrongValueContext(self)
|
||||||
|
|
||||||
|
class DurationContext(ReprParserContext):
|
||||||
|
""" context is a duration like `14d` """
|
||||||
|
def __init__(self, value: IntegerContext):
|
||||||
|
super().__init__([value])
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def upgrade(self) -> ParserContext:
|
||||||
|
return StrongValueContext(self)
|
||||||
|
|
||||||
|
class BaseValueContext(ReprParserContext):
|
||||||
|
""" abstract base for types that can be used in compound expressions """
|
||||||
|
def __init__(self, value: ParserContext):
|
||||||
|
super().__init__([value])
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def feed(self, token: Token) -> ParserContext:
|
||||||
|
if token == SPACE:
|
||||||
|
return self
|
||||||
|
|
||||||
|
class StrongValueContext(BaseValueContext):
|
||||||
"""
|
"""
|
||||||
primitive combinator: try each parser in order and use the first match.
|
in the context of operators, a strong value is something which prefers
|
||||||
NB: there's no lookahead. whichever parser is able to parse the first token
|
to not be grabbed by a lhs value.
|
||||||
is used for the entire stream.
|
|
||||||
|
so for example, strong values have the opportunity to initiate a multiply operation before the lhs closes an addition operation that this strong value is a part of
|
||||||
"""
|
"""
|
||||||
def __init__(self, choices: list):
|
def feed(self, token: Token) -> ParserContext:
|
||||||
self.choices = choices
|
if token == ASTERISK:
|
||||||
self.active = None
|
return BinaryOpContext(self, token, BaseContext())
|
||||||
|
return super().feed(token)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def upgrade(self) -> ParserContext:
|
||||||
return f"Choice({self.choices!r})"
|
return WeakValueContext(self.value)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
class WeakValueContext(BaseValueContext):
|
||||||
if self.active is not None:
|
def feed(self, token: Token) -> ParserContext:
|
||||||
return str(self.active)
|
if token == PLUS:
|
||||||
else:
|
return BinaryOpContext(self, token, BaseContext())
|
||||||
return repr(self)
|
if token == MINUS:
|
||||||
|
return BinaryOpContext(self, token, BaseContext())
|
||||||
|
|
||||||
def feed(self, token: Token) -> ParseCode:
|
return super().feed(token)
|
||||||
if self.active is not None:
|
|
||||||
return self.active.feed(token)
|
|
||||||
|
|
||||||
for choice in self.choices:
|
class BinaryOpContext(ReprParserContext):
|
||||||
item = choice.context()
|
""" context for a binary operation. the LHS and operator are parsed, but the rhs may not yet contain a value """
|
||||||
result = item.feed(token)
|
def __init__(self, lhs: BaseValueContext, oper: Token, rhs: ParserContext):
|
||||||
if result is not ParseCode.HALT:
|
super().__init__([lhs, oper, rhs])
|
||||||
self.active = item
|
self.lhs = lhs
|
||||||
return result
|
self.oper = oper
|
||||||
|
self.rhs = rhs
|
||||||
|
|
||||||
return ParseCode.HALT # no matches
|
@property
|
||||||
|
def precedence_class(self) -> type:
|
||||||
|
if self.oper in [PLUS, MINUS]:
|
||||||
|
return WeakValueContext
|
||||||
|
if self.oper == ASTERISK:
|
||||||
|
return StrongValueContext
|
||||||
|
|
||||||
def destructure(self) -> ParserContext:
|
def feed(self, token: Token) -> ParserContext:
|
||||||
return self.active
|
new_rhs = self.rhs.feed(token)
|
||||||
|
if new_rhs is not None:
|
||||||
|
return BinaryOpContext(self.lhs, self.oper, new_rhs)
|
||||||
|
|
||||||
class WantToken(ParserContext):
|
def upgrade(self) -> ParserContext:
|
||||||
"""
|
new_rhs = self.rhs.upgrade()
|
||||||
match a single token out of a list of valid tokens
|
if new_rhs is None: return None
|
||||||
"""
|
|
||||||
def __init__(self, want: list):
|
|
||||||
self.has = None
|
|
||||||
self.want = [want] if isinstance(want, Token) else want
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
# upgrade self once the rhs has reach the required precedence compatible with this operator
|
||||||
return f"WantToken({self.want!r})"
|
new_self = BinaryOpContext(self.lhs, self.oper, new_rhs)
|
||||||
|
if isinstance(new_rhs, self.precedence_class):
|
||||||
|
return StrongValueContext(self) # close the operation
|
||||||
|
|
||||||
def feed(self, token: Token) -> ParseCode:
|
return new_self
|
||||||
if self.has is not None: return ParseCode.YIELD
|
|
||||||
if token in self.want:
|
|
||||||
self.has = token
|
|
||||||
return ParseCode.CONTINUE
|
|
||||||
return ParseCode.HALT
|
|
||||||
|
|
||||||
def destructure(self) -> Token:
|
class ParenContext(ReprParserContext):
|
||||||
return self.has
|
""" context for a value contained within parentheses """
|
||||||
|
def __init__(self, inner: ParserContext):
|
||||||
|
super().__init__([inner])
|
||||||
|
self.inner = inner
|
||||||
|
|
||||||
class Empty(ParserContext):
|
def feed(self, token: Token) -> ParserContext:
|
||||||
"""
|
new_inner = self.inner.feed(token)
|
||||||
used as a terminal to allow for constructs like `optional`
|
if new_inner is not None:
|
||||||
"""
|
return ParenContext(new_inner)
|
||||||
def feed(self, token: Token) -> ParseCode:
|
|
||||||
return ParseCode.YIELD
|
|
||||||
|
|
||||||
def destructure(self) -> None:
|
if token == CLOSE_PAREN and isinstance(self.inner, WeakValueContext):
|
||||||
return None
|
return StrongValueContext(self)
|
||||||
|
|
||||||
def optional(context: ParserContext) -> ParserContext:
|
def upgrade(self) -> ParserContext:
|
||||||
return Choice([context, Empty()])
|
new_inner = self.inner.upgrade()
|
||||||
|
if new_inner is not None:
|
||||||
|
return ParenContext(new_inner)
|
||||||
|
|
||||||
|
|
||||||
## "Productions" sit on top of these base ParserContexts in order to give names to
|
## AstItems are produced from a ParserContext input
|
||||||
## large token sequences and to "reduce" them into AST types more intelligently.
|
## ParserContext parse outputs are translated into `AstItem`s before evaluation
|
||||||
|
## so that we can operate on a higher-level tree that directly encodes native values like integers
|
||||||
class ProductionContext(ParserContext):
|
|
||||||
"""
|
|
||||||
this adapts from the Production system of specification to the ParserContext system.
|
|
||||||
this is instantiated for high-level productions where we specify a grammar
|
|
||||||
and then parse "all in one go", sealing away incomplete state, and converting
|
|
||||||
the parsed tokens into actually useful abstractions (like signed numbers).
|
|
||||||
"""
|
|
||||||
def __init__(self, production: 'Production', context: ParserContext = None):
|
|
||||||
self.production = production
|
|
||||||
self.context = context if context is not None else production.grammar()
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return f"ProductionContext({self.production!r}, {self.context!r})"
|
|
||||||
|
|
||||||
def __str__(self) -> str:
|
|
||||||
return str(self.context)
|
|
||||||
|
|
||||||
def feed(self, token: Token) -> ParseCode:
|
|
||||||
return self.context.feed(token)
|
|
||||||
|
|
||||||
def reduce_inner(self, inner: ParserContext):
|
|
||||||
if isinstance(inner, ProductionContext):
|
|
||||||
return inner.reduce() # easy
|
|
||||||
elif isinstance(inner, ParserContext):
|
|
||||||
return self.reduce_inner(inner.destructure())
|
|
||||||
elif isinstance(inner, list): # happens via unpacking of Then objects
|
|
||||||
return [self.reduce_inner(i) for i in inner]
|
|
||||||
else:
|
|
||||||
return inner
|
|
||||||
|
|
||||||
def reduce(self) -> object:
|
|
||||||
# XXX this ends up being a leaf -> root reduction,
|
|
||||||
# which generally makes it harder to achieve detailed control when nesting.
|
|
||||||
return self.production.reduce(self.reduce_inner(self.context))
|
|
||||||
|
|
||||||
class Production:
|
|
||||||
"""
|
|
||||||
non-generic, likely multi-token productions,
|
|
||||||
specified in terms of other Productions and the above primitives
|
|
||||||
"""
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def context(self) -> ParserContext:
|
|
||||||
return ProductionContext(self)
|
|
||||||
|
|
||||||
def reduce(self, inner: object) -> object:
|
|
||||||
"""
|
|
||||||
use to construct the outer types out of already-converted inner types.
|
|
||||||
e.g. Number = Then([optional(Minus), Digits, optional(Suffix)])
|
|
||||||
gets called with reduce([a, b, c]), where a is the already reduced `optional(Minus)`,
|
|
||||||
i.e. `None` or whatever type corresponds to the Minus token.
|
|
||||||
"""
|
|
||||||
return inner
|
|
||||||
|
|
||||||
class DigitProduction(Production):
|
|
||||||
""" one digit token """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return WantToken(DIGITS)
|
|
||||||
|
|
||||||
def reduce(self, inner: Token) -> int:
|
|
||||||
return int(inner.c)
|
|
||||||
|
|
||||||
class IntProduction(Production):
|
|
||||||
""" multi-digit integer """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
DigitProduction(),
|
|
||||||
optional(IntProduction()),
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list) -> int:
|
|
||||||
# TODO: wrong associativity
|
|
||||||
leading, trailing = inner
|
|
||||||
if trailing is None:
|
|
||||||
return leading
|
|
||||||
else:
|
|
||||||
return leading*10 + trailing
|
|
||||||
|
|
||||||
class DurationOrIntProduction(Production):
|
|
||||||
# due to a lack of lookahead, we combine duration and int parsing into one production
|
|
||||||
# because a duration shares a complete int as prefix
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
IntProduction(),
|
|
||||||
optional(WantToken(Token('d'))),
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list) -> 'Literal':
|
|
||||||
value, suffix = inner
|
|
||||||
if suffix is None:
|
|
||||||
return Literal(value)
|
|
||||||
else:
|
|
||||||
return Literal(timedelta(value))
|
|
||||||
|
|
||||||
class Whitespace(Production):
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
WantToken(SPACE),
|
|
||||||
optional(Whitespace()),
|
|
||||||
])
|
|
||||||
|
|
||||||
class ParenthesizedExpr(Production):
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
WantToken(OPEN_PAREN),
|
|
||||||
Expr(),
|
|
||||||
WantToken(CLOSE_PAREN),
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list) -> 'AstItem':
|
|
||||||
open, expr, close = inner
|
|
||||||
return expr
|
|
||||||
|
|
||||||
class IdentifierTail(Production):
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
WantToken(ALPHA_NUM_UNDER),
|
|
||||||
optional(IdentifierTail()),
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
class Identifier(Production):
|
|
||||||
""" variable-style identifier, e.g. 'TODAY' """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
WantToken(ALPHA_UNDER),
|
|
||||||
optional(IdentifierTail()),
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list) -> 'Literal':
|
|
||||||
# fold the tokens into a string
|
|
||||||
first, rest = inner
|
|
||||||
head = first.c
|
|
||||||
while rest is not None:
|
|
||||||
next, rest = rest
|
|
||||||
head += next.c
|
|
||||||
return Variable(head)
|
|
||||||
|
|
||||||
class UnaryExpr(Production):
|
|
||||||
""" some expression which does not invoke any operators at the outermost level """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
optional(Whitespace()),
|
|
||||||
Choice([
|
|
||||||
DurationOrIntProduction(),
|
|
||||||
Identifier(),
|
|
||||||
ParenthesizedExpr(),
|
|
||||||
]),
|
|
||||||
optional(Whitespace()),
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list):
|
|
||||||
# drop the whitespace
|
|
||||||
leading, primary, trailing = inner
|
|
||||||
return primary
|
|
||||||
|
|
||||||
class ExprRHS(Production):
|
|
||||||
""" right hand side of a binary operation """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
Choice([WantToken(ASTERISK), WantToken(PLUS), WantToken(MINUS)]),
|
|
||||||
# remaining, is just another `Expr`, but we need to keep the fields expanded here to control precedence.
|
|
||||||
UnaryExpr(),
|
|
||||||
Choice([ExprRHS(), Empty()]),
|
|
||||||
])
|
|
||||||
|
|
||||||
class Expr(Production):
|
|
||||||
""" this is the top-level production """
|
|
||||||
def grammar(self) -> ParserContext:
|
|
||||||
return Then([
|
|
||||||
UnaryExpr(),
|
|
||||||
Choice([ExprRHS(), Empty()])
|
|
||||||
])
|
|
||||||
|
|
||||||
def reduce(self, inner: list):
|
|
||||||
lhs, rhs = inner
|
|
||||||
if rhs is None: return lhs
|
|
||||||
|
|
||||||
# convert the whole right-hand-side of the tree, iteratively.
|
|
||||||
oper, rhs, rhs_next = rhs
|
|
||||||
if oper == ASTERISK:
|
|
||||||
# multiplication has high precedence and we grab the adjacent token ASAP
|
|
||||||
lhs = MulOp(lhs, rhs)
|
|
||||||
if rhs_next is not None:
|
|
||||||
lhs = self.reduce([lhs, rhs_next])
|
|
||||||
else:
|
|
||||||
# reduce the rhs and *then* apply this operator
|
|
||||||
if rhs_next is not None:
|
|
||||||
rhs = self.reduce([rhs, rhs_next])
|
|
||||||
|
|
||||||
if oper == PLUS:
|
|
||||||
lhs = AddOp(lhs, rhs)
|
|
||||||
elif oper == MINUS:
|
|
||||||
lhs = SubOp(lhs, rhs)
|
|
||||||
|
|
||||||
return lhs
|
|
||||||
|
|
||||||
|
|
||||||
## parsed productions are `reduce`d to more useful `AstItem` items which we use
|
|
||||||
## for the actual evaluation/computation
|
|
||||||
|
|
||||||
class AstItem(metaclass=abc.ABCMeta):
|
class AstItem(metaclass=abc.ABCMeta):
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def eval(self, context: dict):
|
def eval(self, context: dict):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def decode_item(p: ParserContext) -> 'AstItem':
|
||||||
|
if isinstance(p, IntegerContext):
|
||||||
|
return Literal(AstItem.decode_integer(p))
|
||||||
|
if isinstance(p, DurationContext):
|
||||||
|
return Literal(timedelta(AstItem.decode_integer(p.value)))
|
||||||
|
if isinstance(p, IdentifierContext):
|
||||||
|
return Variable(AstItem.decode_identifier(p))
|
||||||
|
if isinstance(p, BaseValueContext):
|
||||||
|
return AstItem.decode_item(p.value)
|
||||||
|
if isinstance(p, BinaryOpContext):
|
||||||
|
return AstItem.decode_bin_op(
|
||||||
|
p.oper.c,
|
||||||
|
AstItem.decode_item(p.lhs),
|
||||||
|
AstItem.decode_item(p.rhs)
|
||||||
|
)
|
||||||
|
if isinstance(p, ParenContext):
|
||||||
|
return AstItem.decode_item(p.inner)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def decode_integer(p: IntegerContext) -> int:
|
||||||
|
return int(''.join(t.c for t in p.tokens))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def decode_identifier(p: IdentifierContext) -> str:
|
||||||
|
return ''.join(t.c for t in p.tokens)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def decode_bin_op(ty: str, lhs: 'AstItem', rhs: 'AstItem') -> 'BinaryOp':
|
||||||
|
if ty == '+':
|
||||||
|
return AddOp(lhs, rhs)
|
||||||
|
if ty == '-':
|
||||||
|
return SubOp(lhs, rhs)
|
||||||
|
if ty == '*':
|
||||||
|
return MulOp(lhs, rhs)
|
||||||
|
|
||||||
class Literal(AstItem):
|
class Literal(AstItem):
|
||||||
def __init__(self, v):
|
def __init__(self, v):
|
||||||
self.v = v
|
self.v = v
|
||||||
|
@ -429,32 +312,33 @@ class MulOp(BinaryOp):
|
||||||
def eval(self, context: dict):
|
def eval(self, context: dict):
|
||||||
return self.lhs.eval(context) * self.rhs.eval(context)
|
return self.lhs.eval(context) * self.rhs.eval(context)
|
||||||
|
|
||||||
|
|
||||||
|
## toplevel routine. tokenize -> parse -> decode to AST -> evaluate
|
||||||
|
|
||||||
def tokenize(stream: str) -> list:
|
def tokenize(stream: str) -> list:
|
||||||
return [Token(char) for char in stream]
|
return [Token(char) for char in stream]
|
||||||
|
|
||||||
def parse(ty: Production, tokens: list) -> AstItem:
|
def parse(tokens: list) -> ParserContext:
|
||||||
ctx = Then([ty, Empty()])
|
parser = Parser(BaseContext())
|
||||||
for i, t in enumerate(tokens):
|
for i, t in enumerate(tokens):
|
||||||
result = ctx.feed(t)
|
result = parser.feed(t)
|
||||||
# print(f"i={i}; t={t}; state: {ctx!r}")
|
# print(f"i={i}; t={t}; state: {ctx!r}")
|
||||||
assert result == ParseCode.CONTINUE, f"unexpected token '{t}' at {i}; state: {ctx!r}"
|
assert result, f"unexpected token '{t}' at {i}; state: {parser.complete()!r}"
|
||||||
|
|
||||||
# feed a trailing EOF which no production should consume.
|
return parser.complete()
|
||||||
# this either drives the context to a HALT state, if it's expecting
|
|
||||||
# some specific other token, or YIELD if it's happy for the stream to be closed.
|
|
||||||
assert ctx.feed(EOF) == ParseCode.YIELD, f"incomplete expression: {ctx!r}"
|
|
||||||
|
|
||||||
return ctx.destructure()[0].reduce()
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate(expr: str) -> object:
|
def evaluate(expr: str) -> object:
|
||||||
tok = tokenize(expr)
|
tok = tokenize(expr)
|
||||||
expr = parse(Expr(), tok)
|
parse_tree = parse(tok)
|
||||||
print(expr)
|
print(parse_tree)
|
||||||
|
ast = AstItem.decode_item(parse_tree)
|
||||||
|
print(ast)
|
||||||
|
|
||||||
env = dict(
|
env = dict(
|
||||||
today=datetime.now()
|
today=datetime.now()
|
||||||
)
|
)
|
||||||
return expr.eval(env)
|
return ast.eval(env)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue
Block a user