# Token types # # EOF (end-of-file) token is used to indicate that # there is no more input left for lexical analysis INTEGER, PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, EOF = ( 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', 'LPAREN', 'RPAREN', 'EOF' ) classToken: def__init__(self, type, value): self.type = type self.value = value def__str__(self): """String representation of the class instance. Examples: Token(INTEGER, 3) Token(PLUS, '+') Token(MUL, '*') """ returnf'Token({self.type}, {repr(self.value)})' def__repr__(self): return self.__str__() classLexer: def__init__(self, text): # client string input, e.g. "4 + 2 * 3 - 6 / 2" self.text = text # self.pos is an index into self.text self.pos = 0 self.current_char = self.text[self.pos] deferror(self): raise Exception('Lexer error') defadvance(self): """Advance the `pos` pointer and set the `current_char` variable.""" self.pos += 1 if self.pos >= len(self.text): self.current_char = None# Indicates end of input else: self.current_char = self.text[self.pos] defskip_whitespace(self): while self.current_char isnotNoneand self.current_char.isspace(): self.advance() definteger(self): """Return a (multidigit) integer consumed from the input.""" value = '' while self.current_char isnotNoneand self.current_char.isdigit(): value += self.current_char self.advance() return int(value) defget_next_token(self): """Lexical analyzer (also known as scanner or tokenizer) This method is responsible for breaking a sentence apart into tokens. One token at a time. """ while self.current_char isnotNone: if self.current_char.isspace(): self.skip_whitespace() continue elif self.current_char.isdigit(): return Token(INTEGER, self.integer()) elif self.current_char == '+': self.advance() return Token(PLUS, '+') elif self.current_char == '-': self.advance() return Token(MINUS, '-') elif self.current_char == '*': self.advance() return Token(MUL, '*') elif self.current_char == '/': self.advance() return Token(DIV, '/') elif self.current_char == '(': self.advance() return Token(LPAREN, '(') elif self.current_char == ')': self.advance() return Token(RPAREN, ')') else: self.error() return Token(EOF, None) classInterpreter: def__init__(self, lexer): self.lexer = lexer # set current token to the first token taken from the input self.current_token = self.lexer.get_next_token() deferror(self): raise Exception('Interpreter error')
defeat(self, token_type): # compare the current token type with the passed token # type and if they match then "eat" the current token # and assign the next token to the self.current_token, # otherwise raise an exception. if self.current_token.type == token_type: self.current_token = self.lexer.get_next_token() else: self.error() deffactor(self): """factor : INTEGER | LPAREN expr RPAREN""" token = self.current_token if token.type == INTEGER: self.eat(INTEGER) return token.value elif self.current_token.type == LPAREN: self.eat(LPAREN) value = self.expr() self.eat(RPAREN) return value defterm(self): """term : factor ((MUL | DIV) factor)*""" value = self.factor()
while self.current_token.type in (MUL, DIV): token_type = self.current_token.type if token_type == MUL: self.eat(MUL) value *= self.factor() elif token_type == DIV: self.eat(DIV) value /= self.factor() return value defexpr(self): """Arithmetic expression parser / interpreter. calc> 7 + 3 * (10 / (12 / (3 + 1) - 1)) 22 expr : term ((PLUS | MINUS) term)* term : factor ((MUL | DIV) factor)* factor : INTEGER | LPAREN expr RPAREN """ value = self.term() while self.current_token.type in (PLUS, MINUS): token_type = self.current_token.type if token_type == PLUS: self.eat(PLUS) value += self.term() elif token_type == MINUS: self.eat(MINUS) value -= self.term() return value defparse(self): return self.expr() defmain(): whileTrue: try: text = input('calc> ') except EOFError: print() break if len(text.strip()): interpreter = Interpreter(Lexer(text)) print(interpreter.parse()) else: continue if __name__ == '__main__': main()