Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
9.7 kB
1
Indexable
Never
"""
This module contains the parser for the calc language.
This is a recursive descent parser for an LL(1) Grammar
"""
import sys
from lexer import Token, Lexer
from enum import Enum, auto

# 1.) Create the generic parser.
#     - lexer reference
#     - 1 token 
#     - utility functions 
#        has(tok) - true if tok is the current token
#        must_be(tok) - like has but prints an error if it does not match
#                       It will also kill the program
#        consume() - Advance the parser to the next token
# 2.) Create parse_ functions from the BNF rules

# To build a parse tree
# 1.) Create an enumeration for all of our operations.
# 2.) Create an object to represent parse trees.
# 3.) Make each parse_* function return a parse tree.
# 4.) parse function returns the parse_program


class Op(Enum):
    PROGRAM = auto()
    ASSIGN = auto()
    ADD = auto()
    SUB = auto()
    MUL = auto()
    DIV = auto()
    POW = auto()
    NEG = auto()
    INPUT = auto()
    VAL = auto()

class ParseTree:
    def __init__(self, op=None, children=[], token=None):
        self.op = op
        self.children = list(children)
        self.token = token

    def print(self, level=0):
        n = int(len(self.children)/2)

        # print the right half
        for child in self.children[n:]:
            child.print(level = level+1)

        # print the node
        print(' '*level, end='')

        if self.op == Op.VAL:
            print(self.token.lexeme)
        else:
            print(self.op.name)


        # print the left half
        for child in self.children[0:n]:
            child.print(level = level+1)


    def insert_binary_left_leaf(self, t):
        """
        Assume that all operations we will encounter are binary.
          (Use only with binary opeartors)
        """
        if len(self.children)<2:
            self.children.insert(0, t)
        else:
            self.children[0].insert_binary_left_leaf(t)



class Parser:
    def __init__(self, lexer):
        # parser state
        self.lexer = lexer

        # grab that first token
        self.consume()


    def consume(self):
        """
        Advance the parser to the next token.
        """
        self.token = self.lexer.next()


    def has(self, t):
        """
        Returns true if the parser has token t in look-ahead.
        False otherwise.
        """
        return t == self.token.token


    def must_be(self, t):
        """
        Returns true if self.has(t). Otherwise, print an error
        message and abort the program.
        """
        if self.has(t):
            return True

        # report error and exit
        print(f"Error at line {self.token.line} and column {self.token.col} unexpected token: {self.token.token.name} {self.token.lexeme}")
        sys.exit(-1)


    def parse(self):
        """
        Attempt to parse.
        """
        return self.parse_program()

    ########## parser rules follow ##################
    def parse_program(self):
        """
        < Program > ::= < Program > < Statement >
                         | < Statement >         
        """
        
        tree = ParseTree(Op.PROGRAM, token=self.token)

        while not self.has(Token.EOF):
            tree.children.append(self.parse_statement())
        return tree


    def parse_statement(self):
        """
        < Statement > ::= REF < Assignment-Or-Expression > 
                          | < Input >
                          | < Expression >
        """
        if self.has(Token.REF):
            left = ParseTree(Op.VAL, token=self.token)
            self.consume()
            result = self.parse_assignment_or_expression()
            if result:
                result.children.insert(0, left)
            else:
                result = left
            return result
        elif self.has(Token.INPUT):
            return self.parse_input()
        else:
            return self.parse_expression()


    def parse_assignment_or_expression(self):
        """
        < Assignment-Or-Expression > ::= EQUAL < Expression > 
                                         | < Expression' >
        """
        if self.has(Token.EQUAL):
            # assignment
            result = ParseTree(Op.ASSIGN, token=self.token)
            self.consume()
            result.children.append(self.parse_expression())
            return result
        else:
            return self.parse_expression2()


    def parse_expression(self):
        """
        < Expression > ::= < Term > < Expression' >
        """
        t=self.parse_term()
        e = self.parse_expression2()
        if e:
            e.insert_binary_left_leaf(t)
            result = e
        else:
            result = t
        return result


    def parse_expression2(self):
        """
        < Expression' > ::=  PLUS < Term > < Expression' >
                           | MINUS < Term > < Expression' >
                           | ""
        """
        if self.has(Token.PLUS):
            # addition
            result = ParseTree(Op.ADD, token=self.token)
            self.consume()
            t = self.parse_term()
            e = self.parse_expression2()
        elif self.has(Token.MINUS):
            # subtraction 
            result = ParseTree(Op.SUB, token=self.token)
            self.consume()
            t = self.parse_term()
            e = self.parse_expression2()
        else:
            # epsilon
            return False

        # t is always the right child of result
        result.children.append(t)

        # check to see if we are really a child
        if e:
            e.insert_binary_left_leaf(result)
            result = e

        return result

    def parse_term(self):
        """
        < Term > ::= < Factor > < Term' >
        """
        f = self.parse_factor()
        t = self.parse_term2()
        if t:
            t.insert_binary_left_leaf(f)
            return t
        else:
            return f


    def parse_term2(self):
        """
        < Term' > ::=  TIMES < Factor > < Term' >
                     | DIVIDE < Factor > < Term' >
                     | ""
        """
        if self.has(Token.TIMES):
            # multiplication
            result = ParseTree(Op.MUL, token=self.token)
            self.consume()
            f = self.parse_factor()
            t = self.parse_term2()
        elif self.has(Token.DIVIDE):
            # division
            result = ParseTree(Op.DIV, token=self.token)
            self.consume()
            f=self.parse_factor()
            t=self.parse_term2()
        else:
            return False

        result.children.append(f)
        if t:
            t.insert_binary_left_leaf(result)
            result = t
        return result


    def parse_factor(self):
        """
        < Factor > ::= < Exp > < Factor' >
        """
        e = self.parse_exp()
        f = self.parse_factor2()

        if f:
            f.insert_binary_left_leaf(e)
            return f
        else:
            return e


    def parse_factor2(self):
        """
        < Factor' > ::=  EXPONENT < Exp > < Factor' >
                         | ""
        """
        if self.has(Token.EXPONENT):
            result = ParseTree(Op.POW, token=self.token)
            self.consume()

            e = self.parse_exp()
            f = self.parse_factor2()
            result.children.append(e)
            if f:
                f.insert_binary_left_leaf(result)
                return f
            else:
                return result
        else:
            return False


    def parse_exp(self):
        """
        < Exp > ::= MINUS < Val >
                    | < Val >
        """
        if self.has(Token.MINUS):
            # negated
            result = ParseTree(Op.NEG, token=self.token) 
            self.consume()
            result.children.append(self.parse_val())
            return result
        else:
            # non-negated
            return self.parse_val()

    def parse_input(self):
        """
        < Input > ::= INPUT REF
        """
        self.must_be(Token.INPUT)
        result = ParseTree(Op.INPUT, token=self.token)
        self.consume()
        self.must_be(Token.REF)

        # build the input tree
        result.children.append(ParseTree(Op.VAL, token=self.token))

        # consume reference
        self.consume()
        return result

    def parse_val(self):
        """
        < Val > ::= < Number >
                    | REF
                    | LPAREN < Expression > RPAREN 

        < Number > ::= INTLIT
                       | FLOATLIT
        """
        if self.has(Token.INTLIT):
            #integer
            result = ParseTree(Op.VAL, token = self.token)
            self.consume()
        elif self.has(Token.FLOATLIT):
            #float
            result = ParseTree(Op.VAL, token = self.token)
            self.consume()
        elif self.has(Token.REF):
            #reference
            result = ParseTree(Op.VAL, token = self.token)
            self.consume()
        elif self.must_be(Token.LPAREN):
            #consume LPAREN
            self.consume()
            result = self.parse_expression()
            self.must_be(Token.RPAREN)

            #consume RPAREN
            self.consume()
        return result



if __name__ == "__main__":
    p = Parser(Lexer())
    t = p.parse()
    t.print()