some kind of sane-date-math date/time util thing. idk, parsers are fun ig

2022-12-23 15:57:56 +00:00 · 2022-12-23 15:57:56 +00:00 · 7b01822ee7
commit 7b01822ee7
parent f9aa36a620
2 changed files with 481 additions and 0 deletions
--- a/pkgs/sane-scripts/default.nix
+++ b/pkgs/sane-scripts/default.nix
@ -95,6 +95,7 @@ resholve.mkDerivation {
    # remove python scripts
    # TODO: figure out how to make resholve process only shell scripts
    rm sane-reclaim-boot-space
    rm sane-date-math
  '';
  installPhase = ''
--- a/pkgs/sane-scripts/src/sane-date-math
+++ b/pkgs/sane-scripts/src/sane-date-math
@ -0,0 +1,480 @@
 #!/usr/bin/env python3
 # i just went overboard playing around with parsers, is all.
 # use this like `./sane-date-math 'today - 5d'`
 # of course, it handles parenthesizes and operator precedence, so you can do sillier things like
 # `./sane-date-math '  today - (3*4+1 - ((0)) ) *7d '`
 import abc
 from datetime import datetime, timedelta
 import sys
 class Token:
    def __init__(self, c: str):
        self.c = c
    def __repr__(self) -> str:
        if self == EOF:
            return "<EOF>"
        return f"{self.c!r}"
    def __str__(self) -> str:
        return self.c
    def __eq__(self, other: 'Token') -> bool:
        return self.c == other.c
 EOF = Token('\x05')
 PLUS = Token('+')
 MINUS = Token('-')
 ASTERISK = Token('*')
 SPACE = Token(' ')
 OPEN_PAREN = Token('(')
 CLOSE_PAREN = Token(')')
 UNDERSCORE = Token('_')
 DIGITS = [Token(c) for c in '0123456789']
 ALPHA_LOWER = [Token(c) for c in 'abcdefghijklmnopqrstuvwxyz']
 ALPHA_UPPER = [Token(t.c.upper()) for t in ALPHA_LOWER]
 ALPHA = ALPHA_LOWER + ALPHA_UPPER
 ALPHA_UNDER = ALPHA + [UNDERSCORE]
 ALPHA_NUM_UNDER = ALPHA_UNDER + DIGITS
 # TODO: should be enum
 class ParseCode:
    # return if the parser cannot parse the provided token
    HALT = 0
    # return if the parser is already "complete" and the token should be yielded to the outer context instead
    YIELD = 1
    # return is the parser successfully consumed the provided token and parsing should continue
    CONTINUE = 2
 class ParserContext(metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def feed(self, token: Token) -> ParseCode:
        """
        possibly ingests this token, modifying internal state,
        and providing instruction to the outer parser layer on
        how to proceed.
        """
        pass
    def context(self) -> 'ParserContext':
        """
        hack to make type-level "Productions" compatible with instance-level "ParserContext"s.
        """
        return self
    def destructure(self) -> object:
        """
        destructure the outer layer of this ParserContext to obtain access to whatever state it captured.
        e.g. Then([A, Choice([B, C])]) destructures first to [A, Choice([B, C])].
        it's not recursive; the inner layers must be manually destructured.
        """
        return self
 class Then(ParserContext):
    """
    primitive combinator: given a sequence of parser constructs, parse the input
    using the first parser until that parser yields, then parse using the second
    parser, and so on.
    """
    def __init__(self, items: list):
        self.items = [i.context() for i in items]
    def __repr__(self) -> str:
        return f"Then({self.items!r})"
    def __str__(self) -> str:
        return str(self.items)
    def feed(self, token: Token) -> ParseCode:
        # we expect parser contexts to be fused: once they YIELD,
        # they should yield on all future calls as well
        for i in self.items:
            result = i.feed(token)
            if result != ParseCode.YIELD: return result
        else:
            # all items are done parsing; so are we!
            return ParseCode.YIELD
    def destructure(self) -> list:
        return self.items
 class Choice(ParserContext):
    """
    primitive combinator: try each parser in order and use the first match.
    NB: there's no lookahead. whichever parser is able to parse the first token
    is used for the entire stream.
    """
    def __init__(self, choices: list):
        self.choices = choices
        self.active = None
    def __repr__(self) -> str:
        return f"Choice({self.choices!r})"
    def __str__(self) -> str:
        if self.active is not None:
            return str(self.active)
        else:
            return repr(self)
    def feed(self, token: Token) -> ParseCode:
        if self.active is not None:
            return self.active.feed(token)
        for choice in self.choices:
            item = choice.context()
            result = item.feed(token)
            if result is not ParseCode.HALT:
                self.active = item
                return result
        return ParseCode.HALT  # no matches
    def destructure(self) -> ParserContext:
        return self.active
 class WantToken(ParserContext):
    """
    match a single token out of a list of valid tokens
    """
    def __init__(self, want: list):
        self.has = None
        self.want = [want] if isinstance(want, Token) else want
    def __repr__(self) -> str:
        return f"WantToken({self.want!r})"
    def feed(self, token: Token) -> ParseCode:
        if self.has is not None: return ParseCode.YIELD
        if token in self.want:
            self.has = token
            return ParseCode.CONTINUE
        return ParseCode.HALT
    def destructure(self) -> Token:
        return self.has
 class Empty(ParserContext):
    """
    used as a terminal to allow for constructs like `optional`
    """
    def feed(self, token: Token) -> ParseCode:
        return ParseCode.YIELD
    def destructure(self) -> None:
        return None
 def optional(context: ParserContext) -> ParserContext:
    return Choice([context, Empty()])
 ## "Productions" sit on top of these base ParserContexts in order to give names to
 ## large token sequences and to "reduce" them into AST types more intelligently.
 class ProductionContext(ParserContext):
    """
    this adapts from the Production system of specification to the ParserContext system.
    this is instantiated for high-level productions where we specify a grammar
    and then parse "all in one go", sealing away incomplete state, and converting
    the parsed tokens into actually useful abstractions (like signed numbers).
    """
    def __init__(self, production_cls: type, grammar: ParserContext):
        self.production_cls = production_cls
        self.context = grammar
    def __repr__(self) -> str:
        return f"ProductionContext({self.production_cls.__name__!r}, {self.context!r})"
    def __str__(self) -> str:
        return str(self.context)
    def feed(self, token: Token) -> ParseCode:
        return self.context.feed(token)
    def reduce_inner(self, inner: ParserContext):
        if isinstance(inner, ProductionContext):
            return inner.reduce()  # easy
        elif isinstance(inner, ParserContext):
            return self.reduce_inner(inner.destructure())
        elif isinstance(inner, list):  # happens via unpacking of Then objects
            return [self.reduce_inner(i) for i in inner]
        else:
            return inner
    def reduce(self) -> object:
        # XXX this ends up being a leaf -> root reduction,
        # which generally makes it harder to achieve detailed control when nesting.
        return self.production_cls.reduce(self.reduce_inner(self.context))
 class Production:
    """
    non-generic, likely multi-token productions,
    specified in terms of other Productions and the above primitives
    """
    @staticmethod
    def grammar() -> ParserContext:
        raise NotImplementedError()
    @classmethod
    def context(cls) -> ParserContext:
        return ProductionContext(cls, cls.grammar())
    @classmethod
    def reduce(cls, inner: object) -> object:
        """
        use to construct the outer types out of already-converted inner types.
        e.g. Number = Then([optional(Minus), Digits, optional(Suffix)])
            gets called with reduce([a, b, c]), where a is the already reduced `optional(Minus)`,
            i.e. `None` or whatever type corresponds to the Minus token.
        """
        return inner
 class DigitProduction(Production):
    """ one digit token """
    @staticmethod
    def grammar() -> ParserContext:
        return WantToken(DIGITS)
    @staticmethod
    def reduce(inner: Token) -> int:
        return int(inner.c)
 class IntProduction(Production):
    """ multi-digit integer """
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            DigitProduction,
            optional(IntProduction),
        ])
    @staticmethod
    def reduce(inner: list) -> int:
        leading, trailing = inner
        if trailing is None:
            return leading
        else:
            return leading*10 + trailing
 class DurationOrIntProduction(Production):
    # due to a lack of lookahead, we combine duration and int parsing into one production
    # because a duration shares a complete int as prefix
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            IntProduction,
            optional(WantToken(Token('d'))),
        ])
    def reduce(inner: list) -> 'Literal':
        value, suffix = inner
        if suffix is None:
            return Literal(value)
        else:
            return Literal(timedelta(value))
 class Whitespace(Production):
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            WantToken(SPACE),
            optional(Whitespace),
        ])
 class ParenthesizedExpr(Production):
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            WantToken(OPEN_PAREN),
            Expr,
            WantToken(CLOSE_PAREN),
        ])
    def reduce(inner: list) -> object:
        open, expr, close = inner
        return expr
 class IdentifierTail(Production):
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            WantToken(ALPHA_NUM_UNDER),
            optional(IdentifierTail),
        ])
 class Identifier(Production):
    """ variable-style identifier, e.g. 'TODAY' """
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            WantToken(ALPHA_UNDER),
            optional(IdentifierTail),
        ])
    @staticmethod
    def reduce(inner: list) -> 'Literal':
        # fold the tokens into a string
        first, rest = inner
        head = first.c
        while rest is not None:
            next, rest = rest
            head += next.c
        return Variable(head)
 class UnaryExpr(Production):
    """ some expression which does not invoke any operators at the outermost level """
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            optional(Whitespace),
            Choice([
                DurationOrIntProduction,
                Identifier,
                ParenthesizedExpr,
            ]),
            optional(Whitespace),
        ])
    @staticmethod
    def reduce(inner: list):
        # drop the whitespace
        leading, primary, trailing = inner
        return primary
 class ExprRHS(Production):
    """ right hand side of a binary operation """
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            Choice([WantToken(ASTERISK), WantToken(PLUS), WantToken(MINUS)]),
            # remaining, is just another `Expr`, but we need to keep the fields expanded here to control precedence.
            UnaryExpr,
            Choice([ExprRHS, Empty()]),
        ])
 class Expr(Production):
    """ this is the top-level production """
    @staticmethod
    def grammar() -> ParserContext:
        return Then([
            UnaryExpr,
            Choice([ExprRHS, Empty()])
        ])
    @staticmethod
    def reduce(inner: list):
        lhs, rhs = inner
        if rhs is None: return lhs
        # convert the whole right-hand-side of the tree, iteratively.
        oper, rhs, rhs_next = rhs
        if oper == ASTERISK:
            # multiplication has high precedence and we grab the adjacent token ASAP
            lhs = MulOp(lhs, rhs)
            if rhs_next is not None:
                lhs = Expr.reduce([lhs, rhs_next])
        else:
            # reduce the rhs and *then* apply this operator
            if rhs_next is not None:
                rhs = Expr.reduce([rhs, rhs_next])
            if oper == PLUS:
                lhs = AddOp(lhs, rhs)
            elif oper == MINUS:
                lhs = SubOp(lhs, rhs)
        return lhs
 ## parsed productions are `reduce`d to more useful `AstItem` items which we use
 ## for the actual evaluation/computation
 class AstItem(metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def eval(self, context: dict):
        pass
 class Literal(AstItem):
    def __init__(self, v):
        self.v = v
    def __str__(self) -> str:
        return str(self.v)
    def eval(self, context: dict):
        return self.v
 class Variable(AstItem):
    def __init__(self, name: str):
        self.name = name
    def __str__(self) -> str:
        return self.name
    def eval(self, context: dict):
        return context[self.name]
 class BinaryOp(AstItem):
    def __init__(self, lhs, rhs):
        self.lhs = lhs
        self.rhs = rhs
 class AddOp(BinaryOp):
    def __str__(self):
        return f"({self.lhs} + {self.rhs})"
    def eval(self, context: dict):
        return self.lhs.eval(context) + self.rhs.eval(context)
 class SubOp(BinaryOp):
    def __str__(self):
        return f"({self.lhs} - {self.rhs})"
    def eval(self, context: dict):
        return self.lhs.eval(context) - self.rhs.eval(context)
 class MulOp(BinaryOp):
    def __str__(self):
        return f"({self.lhs} * {self.rhs})"
    def eval(self, context: dict):
        return self.lhs.eval(context) * self.rhs.eval(context)
 def tokenize(stream: str) -> list:
    return [Token(char) for char in stream]
 def parse(ty: type, tokens: list) -> AstItem:
    ctx = Then([ty, Empty()])
    for i, t in enumerate(tokens):
        result = ctx.feed(t)
        # print(f"i={i}; t={t}; state: {ctx!r}")
        assert result == ParseCode.CONTINUE, f"unexpected token '{t}' at {i}; state: {ctx!r}"
    # feed a trailing EOF which no production should consume.
    # this either drives the context to a HALT state, if it's expecting
    # some specific other token, or YIELD if it's happy for the stream to be closed.
    assert ctx.feed(EOF) == ParseCode.YIELD, f"incomplete expression: {ctx!r}"
    return ctx.destructure()[0].reduce()
 def evaluate(expr: str) -> object:
    tok = tokenize(expr)
    expr = parse(Expr, tok)
    print(expr)
    env = dict(
        today=datetime.now()
    )
    return expr.eval(env)
 if __name__ == '__main__':
    expr = " ".join(sys.argv[1:])
    print(evaluate(expr))