From ceb2cb5d1195d84eb54f87aa6509a266eb372ab1 Mon Sep 17 00:00:00 2001 From: Chris <Christopher.Happe@uni-duesseldorf.de> Date: Thu, 12 Nov 2020 09:37:45 +0100 Subject: [PATCH] Lexer Iterativ gestaltet --- info4/kapitel-8/Interpreter/interpreter.py | 314 +++++++++++---------- info4/kapitel-8/Interpreter/lexer.py | 52 ++-- 2 files changed, 185 insertions(+), 181 deletions(-) diff --git a/info4/kapitel-8/Interpreter/interpreter.py b/info4/kapitel-8/Interpreter/interpreter.py index c40f09c..92379e5 100644 --- a/info4/kapitel-8/Interpreter/interpreter.py +++ b/info4/kapitel-8/Interpreter/interpreter.py @@ -1,55 +1,58 @@ import lexer import sys +import operator +import re + +regex_to_token = [(re.compile(r'\d+'), 'NUMBER'), + (re.compile(r'x\d+'), 'IDENTIFIER'), + (re.compile(r'\+'), 'PLUS'), + (re.compile(r'-'), 'MINUS'), + (re.compile(r':=|≔'), 'EQUALS'), + (re.compile(r'LOOP'), 'LOOP'), + (re.compile(r'DO'), 'DO'), + (re.compile(r'END'), 'END'), + (re.compile(r';'), 'SEMICOLON'), + (re.compile(r'\n', re.MULTILINE), 'LINEBREAK'), + (re.compile(r'\s+'), 'WHITESPACE'), + (re.compile(r'[^\n]*'), 'UNKNOWN')] + +global error_handler, lex, values class ErrorHandler: - def __init__(self, program, tokens): + def __init__(self, program): sys.tracebacklimit = 0 self.program = program - position_to_line = {} - position = 0 - line = 0 - for token in tokens: - if token.k == 'LINEBREAK': - line += 1 - else: - position_to_line[position] = line - position = position + 1 - self.position_to_line = position_to_line - - def handle_error(self, position, message): - line_number = self.position_to_line[position] - msg = ["Fehler in Zeile " + str(line_number + 1), self.program.split("\n")[line_number], message] - raise SyntaxError("\n".join(msg)) from None + self.line_number = 0 + def handle_error(self, message): + msg = ["Fehler in Zeile " + str(self.line_number + 1), self.program.split("\n")[self.line_number], message] + raise SyntaxError("\n".join(msg)) from None -global error_handler + def increase_line(self): + self.line_number += 1 -def process_assignment(token_queue, position, value_list, forbidden_identifiers): - identifier_1 = token_queue[position].v +def process_assignment(value_list, forbidden_identifiers, identifier_token_1): + identifier_1 = identifier_token_1.v if identifier_1 in forbidden_identifiers: - error_handler.handle_error(position, "Identifier " + identifier_1 + - "ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if not token_queue[position + 1].k == 'EQUALS': - error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.") + error_handler.handle_error("Identifier " + identifier_1 + + " ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if identifier_1 in value_list: - value_1 = value_list.get(identifier_1) - else: - value_1 = 0 + if next_nonempty_token("Zuweisung", ":=") == 'EQUALS': + error_handler.handle_error(":= in Zuweisung erwartet.") - if token_queue[position + 2].k == 'NUMBER': - value_1 = int(token_queue[position + 2].v) - value_list.update({identifier_1: value_1}) - return position + 3, value_list + identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER") + if identifier_token_2.k == 'NUMBER': + value_1 = int(identifier_token_2.v) + value_list.update({identifier_token_1.v: value_1}) + return next_token(), value_list - if not token_queue[position + 2].k == 'IDENTIFIER': - error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.") - identifier_2 = token_queue[position + 2].v + if not identifier_token_2.k == 'IDENTIFIER': + error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.") + identifier_2 = identifier_token_2.v if identifier_2 in forbidden_identifiers: - error_handler.handle_error(position + 2, - "Identifier " + identifier_2 + + error_handler.handle_error("Identifier " + identifier_2 + " ist bereits in Loop vorhanden und darf nicht verwendet werden.") if identifier_2 in value_list: @@ -57,176 +60,185 @@ def process_assignment(token_queue, position, value_list, forbidden_identifiers) else: value_2 = 0 - if not token_queue[position + 4].k == 'NUMBER': - error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.") - if token_queue[position + 3].k == 'PLUS': - value_1 = value_2 + int(token_queue[position + 4].v) - elif token_queue[position + 3].k == 'MINUS': - value_1 = max(0, value_2 + token_queue[position + 4].v) + operator_token = next_nonempty_token("Zuweisung", "+ oder -") + op = None + if operator_token.k == 'PLUS': + op = operator.__add__ + elif operator_token.k == 'MINUS': + op = operator.__sub__ else: - error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.") + error_handler.handle_error("+ oder - in Zuweisung erwartet.") + + number_token = next_nonempty_token("Zuweisung", "NUMBER") + + if not number_token.k == 'NUMBER': + error_handler.handle_error("NUMBER in Zuweisung erwartet.") + + value_1 = max(0, op(value_2, int(number_token.v))) value_list.update({identifier_1: value_1}) - return position + 5, value_list + return next_token(), value_list -def verify_assignment(token_queue, position, forbidden_identifiers): - identifier_1 = token_queue[position].v +def verify_assignment(forbidden_identifiers, identifier_token_1): + identifier_1 = identifier_token_1.v if identifier_1 in forbidden_identifiers: - error_handler.handle_error(position, - "Identifier " + identifier_1 + - "ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if not token_queue[position + 1].k == 'EQUALS': - error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.") - - if token_queue[position + 2].k == 'NUMBER': - return position + 3 - - if not token_queue[position + 2].k == 'IDENTIFIER': - error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.") - identifier_2 = token_queue[position + 2].v + error_handler.handle_error("Identifier " + identifier_1 + + " ist bereits in Loop vorhanden und darf nicht verwendet werden.") + if not next_nonempty_token("Zuweisung", ":=").k == 'EQUALS': + error_handler.handle_error(":= in Zuweisung erwartet.") + + identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER") + if identifier_token_2.k == 'NUMBER': + return next_token() + + if not identifier_token_2.k == 'IDENTIFIER': + error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.") + identifier_2 = identifier_token_2.v if identifier_2 in forbidden_identifiers: - error_handler.handle_error(position + 2, - "Identifier " + identifier_2 + + error_handler.handle_error("Identifier " + identifier_2 + " ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if not token_queue[position + 4].k == 'NUMBER': - error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.") - if not token_queue[position + 3].k in ['PLUS', 'MINUS']: - error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.") + if next_nonempty_token("Zuweisung", "+ oder -").k not in ['PLUS', 'MINUS']: + error_handler.handle_error("+ oder - in Zuweisung erwartet.") + if not next_nonempty_token("Zuweisung", "NUMBER").k == 'NUMBER': + error_handler.handle_error("NUMBER in Zuweisung erwartet.") - return position + 5 + return next_token() -def process_loop(token_queue, position, value_list, forbidden_identifiers): - identifier_token = token_queue[position + 1] +def process_loop(value_list, forbidden_identifiers, loop_token): + identifier_token = next_nonempty_token('LOOP', 'IDENTIFIER (x0, x1, ...)') if not identifier_token.k == 'IDENTIFIER': - error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.') + error_handler.handle_error('IDENTIFIER in LOOP erwartet.') if identifier_token.v in forbidden_identifiers: - error_handler.handle_error(position + 1, - "Identifier " + identifier_token.v + - "ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if not token_queue[position + 2].k == 'DO': - error_handler.handle_error(position + 2, 'DO in LOOP erwartet.') + error_handler.handle_error('Identifier ' + identifier_token.v + + ' ist bereits in Loop vorhanden und darf nicht verwendet werden.') + if not next_nonempty_token("LOOP", "DO").k == 'DO': + error_handler.handle_error('DO in LOOP erwartet.') if identifier_token.v in value_list: number_of_loops = int(value_list.get(identifier_token.v)) else: number_of_loops = 0 - saved_position = position + 3 + + saved_position = lex.current_position + saved_line = error_handler.line_number forbidden_identifiers.append(identifier_token.v) if number_of_loops == 0: end_found = False - position = saved_position + while not end_found: - position = verify_program(token_queue, position, forbidden_identifiers) - if token_queue[position].k == 'SEMICOLON': - position = position + 1 + token = verify_program(forbidden_identifiers, next_token()) + if token is None or token.k not in ['SEMICOLON', 'END']: + error_handler("SEMICOLON oder END in LOOP erwartet.") + elif token.k == 'SEMICOLON': continue - elif token_queue[position].k == 'END': + elif token.k == 'END': end_found = True - else: - error_handler.handle_error(position, "SEMICOLON oder END erwartet.") for index in range(number_of_loops): - position = saved_position + lex.current_position = saved_position + error_handler.line_number = saved_line end_found = False while not end_found: - position, value_list = process_program(token_queue, position, value_list, forbidden_identifiers) - if token_queue[position].k == 'SEMICOLON': - position = position + 1 + token, value_list = process_program(value_list, forbidden_identifiers, next_token()) + if token is None or token.k not in ['SEMICOLON', 'END']: + error_handler("SEMICOLON oder END in LOOP erwartet.") + elif token.k == 'SEMICOLON': continue - elif token_queue[position].k == 'END': + elif token.k == 'END': end_found = True - else: - error_handler.handle_error(position, "SEMICOLON oder END erwartet.") forbidden_identifiers.remove(identifier_token.v) - return position + 1, value_list + return next_token(), value_list -def verify_loop(token_queue, position, forbidden_identifiers): - identifier_token = token_queue[position + 1] +def verify_loop(forbidden_identifiers, loop_token): + identifier_token = next_nonempty_token("LOOP", "IDENTIFIER") if not identifier_token.k == 'IDENTIFIER': - error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.') + error_handler.handle_error('IDENTIFIER in LOOP erwartet.') if identifier_token.v in forbidden_identifiers: - error_handler.handle_error(position + 1, - "Identifier " + identifier_token.v + - "ist bereits in Loop vorhanden und darf nicht verwendet werden.") - if not token_queue[position + 2].k == 'DO': - error_handler.handle_error(position + 2, 'DO in LOOP erwartet.') + error_handler.handle_error("Identifier " + identifier_token.v + + " ist bereits in Loop vorhanden und darf nicht verwendet werden.") + if not next_nonempty_token("LOOP", "DO").k == 'DO': + error_handler.handle_error('DO in LOOP erwartet.') forbidden_identifiers.append(identifier_token.v) end_found = False while not end_found: - position = verify_program(token_queue, position, forbidden_identifiers) - if token_queue[position].k == 'SEMICOLON': - position = position + 1 + token = verify_program(forbidden_identifiers, next_token()) + if token is None or token.k not in ['SEMICOLON', 'END']: + error_handler("SEMICOLON oder END in LOOP erwartet.") + elif token.k == 'SEMICOLON': continue - elif token_queue[position].k == 'END': + elif token.k == 'END': end_found = True - else: - error_handler(position, "SEMICOLON oder END in LOOP erwartet.") forbidden_identifiers.remove(identifier_token.v) - return position + 1 + return next_token() -def process_program(token_queue, position, value_list, forbidden_identifiers): - current_position = position +def process_program(value_list, forbidden_identifiers, current_token): values = value_list - current_key = token_queue[position].k - if current_key == 'IDENTIFIER': - try: - current_position, values = process_assignment(token_queue, position, value_list, forbidden_identifiers) - except IndexError: - error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.") - elif current_key == 'LOOP': - try: - current_position, values = process_loop(token_queue, position, value_list, forbidden_identifiers) - except IndexError: - error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs") - else: - error_handler.handle_error(current_position, "Keine passende Anweisung gefunden") - return current_position, values - - -def verify_program(token_queue, position, forbidden_identifiers): - current_key = token_queue[position].k - current_position = position - if current_key == 'IDENTIFIER': - try: - current_position = verify_assignment(token_queue, position, forbidden_identifiers) - except IndexError: - error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.") - elif current_key == 'LOOP': - try: - current_position = verify_loop(token_queue, position, forbidden_identifiers) - except IndexError: - error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs") + if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']: + error_handler.handle_error("Keine passende Anweisung gefunden\n" + + "Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP") + elif current_token.k == 'IDENTIFIER': + current_token, values = process_assignment(value_list, forbidden_identifiers, current_token) + elif current_token.k == 'LOOP': + current_token, values = process_loop(value_list, forbidden_identifiers, current_token) + return current_token, values + + +def verify_program(forbidden_identifiers, current_token): + if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']: + error_handler.handle_error("Keine passende Anweisung gefunden\n" + + "Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP") + elif current_token.k == 'IDENTIFIER': + current_token = verify_assignment(forbidden_identifiers, current_token) + elif current_token.k == 'LOOP': + current_token = verify_loop(forbidden_identifiers, current_token) + return current_token + + +def next_token(): + new_token = lex.next() + if new_token is None: + return None + elif new_token.k == 'LINEBREAK': + error_handler.increase_line() + return next_token() + elif new_token.k == 'WHITESPACE': + return next_token() else: - error_handler.handle_error(current_position, "Keine passende Anweisung gefunden") - return current_position + return new_token + + +def next_nonempty_token(current_function, expected_token): + token = next_token() + if token is None: + error_handler.handle_error("Frühzeitiges Ende von " + current_function + "\n" + "Erwartet: " + expected_token) + return token def interpret(program): - tokens = lexer.tokenize(program) - global error_handler - error_handler = ErrorHandler(program, tokens) - tokens = [token for token in tokens if not token.k == 'LINEBREAK'] - current_position = 0 + global error_handler, lex + lex = lexer.Lexer(regex_to_token, program) + error_handler = ErrorHandler(program) values = {} forbidden_identifiers = [] - while current_position < len(tokens): - current_position, values = process_program(tokens, current_position, values, forbidden_identifiers) - if current_position < len(tokens) and not tokens[current_position].k == 'SEMICOLON': - error_handler.handle_error(current_position, "Semicolon erwartet") - else: - if current_position == len(tokens) - 1: - error_handler.handle_error(current_position, "Semikolons werden nur zur Trennung und nicht zum " + - "Abschluss von Programmen verwendet") - current_position = current_position + 1 + current_token = next_token() + while current_token is not None: + current_token, values = process_program(values, forbidden_identifiers, current_token) + if current_token is not None: + if not current_token.k == 'SEMICOLON': + error_handler.handle_error("Semicolon erwartet") + current_token = next_token() + if current_token is None: + error_handler.handle_error("Semikolons werden nur zur Trennung und nicht zum " + + "Abschluss von Programmen verwendet") if "x0" in values: return values.get("x0") return 0 diff --git a/info4/kapitel-8/Interpreter/lexer.py b/info4/kapitel-8/Interpreter/lexer.py index 1cf0024..a45d608 100644 --- a/info4/kapitel-8/Interpreter/lexer.py +++ b/info4/kapitel-8/Interpreter/lexer.py @@ -7,33 +7,25 @@ class Token: self.v = value -def tokenize(program): - token_queue = [] - regex_to_token = [(re.compile(r'\d+'), 'NUMBER'), - (re.compile(r'x\d+'), 'IDENTIFIER'), - (re.compile(r'\+'), 'PLUS'), - (re.compile(r'-'), 'MINUS'), - (re.compile(r':='), 'EQUALS'), - (re.compile(r'LOOP'), 'LOOP'), - (re.compile(r'DO'), 'DO'), - (re.compile(r'END'), 'END'), - (re.compile(r';'), 'SEMICOLON'), - (re.compile(r'\n', re.MULTILINE), 'LINEBREAK'), - (re.compile(r'\s+'), 'WHITESPACE')] - current_position = 0 - new_position = 0 - while current_position < len(program): - for pattern, value in regex_to_token: - match = pattern.match(program, current_position) - if match: - if not value == 'WHITESPACE': - token_queue.append(Token(value, match.group())) - new_position = match.span()[1] - break - if current_position == new_position: - msg = ['Fehler in Zeile : ' + str(program.count("\n", 0, current_position) + 1), - 'Erwartet: xi, :=, NUMBER, LOOP, DO, END, ;', - 'Bekommen :' + re.compile(r'[^\n]*').match(program, current_position).group()] - raise SyntaxError("\n".join(msg)) - current_position = new_position - return token_queue +class Lexer: + def __init__(self, regex_to_token, program): + self.regex_to_token = regex_to_token + self.program = program + self.current_position = 0 + + def next(self): + new_position = 0 + next_token = None + if self.current_position < len(self.program): + for pattern, value in self.regex_to_token: + match = pattern.match(self.program, self.current_position) + if match: + next_token = Token(value, match.group()) + new_position = match.span()[1] + break + if self.current_position == new_position: + msg = ['Fehler in Zeile ' + str(self.program.count("\n", 0, self.current_position) + 1) + ':', + 'Unbekannter String: ' + re.compile(r'[^\n]*').match(self.program, self.current_position).group()] + raise SyntaxError("\n".join(msg)) + self.current_position = new_position + return next_token -- GitLab