Skip to content
Snippets Groups Projects
Commit ceb2cb5d authored by Chris's avatar Chris
Browse files

Lexer Iterativ gestaltet

parent 40b68335
Branches
No related tags found
1 merge request!1Master
import lexer import lexer
import sys import sys
import operator
import re
regex_to_token = [(re.compile(r'\d+'), 'NUMBER'),
(re.compile(r'x\d+'), 'IDENTIFIER'),
(re.compile(r'\+'), 'PLUS'),
(re.compile(r'-'), 'MINUS'),
(re.compile(r':=|≔'), 'EQUALS'),
(re.compile(r'LOOP'), 'LOOP'),
(re.compile(r'DO'), 'DO'),
(re.compile(r'END'), 'END'),
(re.compile(r';'), 'SEMICOLON'),
(re.compile(r'\n', re.MULTILINE), 'LINEBREAK'),
(re.compile(r'\s+'), 'WHITESPACE'),
(re.compile(r'[^\n]*'), 'UNKNOWN')]
global error_handler, lex, values
class ErrorHandler: class ErrorHandler:
def __init__(self, program, tokens): def __init__(self, program):
sys.tracebacklimit = 0 sys.tracebacklimit = 0
self.program = program self.program = program
position_to_line = {} self.line_number = 0
position = 0
line = 0
for token in tokens:
if token.k == 'LINEBREAK':
line += 1
else:
position_to_line[position] = line
position = position + 1
self.position_to_line = position_to_line
def handle_error(self, position, message): def handle_error(self, message):
line_number = self.position_to_line[position] msg = ["Fehler in Zeile " + str(self.line_number + 1), self.program.split("\n")[self.line_number], message]
msg = ["Fehler in Zeile " + str(line_number + 1), self.program.split("\n")[line_number], message]
raise SyntaxError("\n".join(msg)) from None raise SyntaxError("\n".join(msg)) from None
def increase_line(self):
self.line_number += 1
global error_handler
def process_assignment(value_list, forbidden_identifiers, identifier_token_1):
def process_assignment(token_queue, position, value_list, forbidden_identifiers): identifier_1 = identifier_token_1.v
identifier_1 = token_queue[position].v
if identifier_1 in forbidden_identifiers: if identifier_1 in forbidden_identifiers:
error_handler.handle_error(position, "Identifier " + identifier_1 + error_handler.handle_error("Identifier " + identifier_1 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.") " ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 1].k == 'EQUALS':
error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.")
if identifier_1 in value_list: if next_nonempty_token("Zuweisung", ":=") == 'EQUALS':
value_1 = value_list.get(identifier_1) error_handler.handle_error(":= in Zuweisung erwartet.")
else:
value_1 = 0
if token_queue[position + 2].k == 'NUMBER': identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER")
value_1 = int(token_queue[position + 2].v) if identifier_token_2.k == 'NUMBER':
value_list.update({identifier_1: value_1}) value_1 = int(identifier_token_2.v)
return position + 3, value_list value_list.update({identifier_token_1.v: value_1})
return next_token(), value_list
if not token_queue[position + 2].k == 'IDENTIFIER': if not identifier_token_2.k == 'IDENTIFIER':
error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.") error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.")
identifier_2 = token_queue[position + 2].v identifier_2 = identifier_token_2.v
if identifier_2 in forbidden_identifiers: if identifier_2 in forbidden_identifiers:
error_handler.handle_error(position + 2, error_handler.handle_error("Identifier " + identifier_2 +
"Identifier " + identifier_2 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.") " ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if identifier_2 in value_list: if identifier_2 in value_list:
...@@ -57,176 +60,185 @@ def process_assignment(token_queue, position, value_list, forbidden_identifiers) ...@@ -57,176 +60,185 @@ def process_assignment(token_queue, position, value_list, forbidden_identifiers)
else: else:
value_2 = 0 value_2 = 0
if not token_queue[position + 4].k == 'NUMBER': operator_token = next_nonempty_token("Zuweisung", "+ oder -")
error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.") op = None
if token_queue[position + 3].k == 'PLUS': if operator_token.k == 'PLUS':
value_1 = value_2 + int(token_queue[position + 4].v) op = operator.__add__
elif token_queue[position + 3].k == 'MINUS': elif operator_token.k == 'MINUS':
value_1 = max(0, value_2 + token_queue[position + 4].v) op = operator.__sub__
else: else:
error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.") error_handler.handle_error("+ oder - in Zuweisung erwartet.")
number_token = next_nonempty_token("Zuweisung", "NUMBER")
if not number_token.k == 'NUMBER':
error_handler.handle_error("NUMBER in Zuweisung erwartet.")
value_1 = max(0, op(value_2, int(number_token.v)))
value_list.update({identifier_1: value_1}) value_list.update({identifier_1: value_1})
return position + 5, value_list return next_token(), value_list
def verify_assignment(token_queue, position, forbidden_identifiers): def verify_assignment(forbidden_identifiers, identifier_token_1):
identifier_1 = token_queue[position].v identifier_1 = identifier_token_1.v
if identifier_1 in forbidden_identifiers: if identifier_1 in forbidden_identifiers:
error_handler.handle_error(position, error_handler.handle_error("Identifier " + identifier_1 +
"Identifier " + identifier_1 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.") " ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 1].k == 'EQUALS': if not next_nonempty_token("Zuweisung", ":=").k == 'EQUALS':
error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.") error_handler.handle_error(":= in Zuweisung erwartet.")
if token_queue[position + 2].k == 'NUMBER': identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER")
return position + 3 if identifier_token_2.k == 'NUMBER':
return next_token()
if not token_queue[position + 2].k == 'IDENTIFIER': if not identifier_token_2.k == 'IDENTIFIER':
error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.") error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.")
identifier_2 = token_queue[position + 2].v identifier_2 = identifier_token_2.v
if identifier_2 in forbidden_identifiers: if identifier_2 in forbidden_identifiers:
error_handler.handle_error(position + 2, error_handler.handle_error("Identifier " + identifier_2 +
"Identifier " + identifier_2 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.") " ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 4].k == 'NUMBER': if next_nonempty_token("Zuweisung", "+ oder -").k not in ['PLUS', 'MINUS']:
error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.") error_handler.handle_error("+ oder - in Zuweisung erwartet.")
if not token_queue[position + 3].k in ['PLUS', 'MINUS']: if not next_nonempty_token("Zuweisung", "NUMBER").k == 'NUMBER':
error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.") error_handler.handle_error("NUMBER in Zuweisung erwartet.")
return position + 5 return next_token()
def process_loop(token_queue, position, value_list, forbidden_identifiers): def process_loop(value_list, forbidden_identifiers, loop_token):
identifier_token = token_queue[position + 1] identifier_token = next_nonempty_token('LOOP', 'IDENTIFIER (x0, x1, ...)')
if not identifier_token.k == 'IDENTIFIER': if not identifier_token.k == 'IDENTIFIER':
error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.') error_handler.handle_error('IDENTIFIER in LOOP erwartet.')
if identifier_token.v in forbidden_identifiers: if identifier_token.v in forbidden_identifiers:
error_handler.handle_error(position + 1, error_handler.handle_error('Identifier ' + identifier_token.v +
"Identifier " + identifier_token.v + ' ist bereits in Loop vorhanden und darf nicht verwendet werden.')
"ist bereits in Loop vorhanden und darf nicht verwendet werden.") if not next_nonempty_token("LOOP", "DO").k == 'DO':
if not token_queue[position + 2].k == 'DO': error_handler.handle_error('DO in LOOP erwartet.')
error_handler.handle_error(position + 2, 'DO in LOOP erwartet.')
if identifier_token.v in value_list: if identifier_token.v in value_list:
number_of_loops = int(value_list.get(identifier_token.v)) number_of_loops = int(value_list.get(identifier_token.v))
else: else:
number_of_loops = 0 number_of_loops = 0
saved_position = position + 3
saved_position = lex.current_position
saved_line = error_handler.line_number
forbidden_identifiers.append(identifier_token.v) forbidden_identifiers.append(identifier_token.v)
if number_of_loops == 0: if number_of_loops == 0:
end_found = False end_found = False
position = saved_position
while not end_found: while not end_found:
position = verify_program(token_queue, position, forbidden_identifiers) token = verify_program(forbidden_identifiers, next_token())
if token_queue[position].k == 'SEMICOLON': if token is None or token.k not in ['SEMICOLON', 'END']:
position = position + 1 error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue continue
elif token_queue[position].k == 'END': elif token.k == 'END':
end_found = True end_found = True
else:
error_handler.handle_error(position, "SEMICOLON oder END erwartet.")
for index in range(number_of_loops): for index in range(number_of_loops):
position = saved_position lex.current_position = saved_position
error_handler.line_number = saved_line
end_found = False end_found = False
while not end_found: while not end_found:
position, value_list = process_program(token_queue, position, value_list, forbidden_identifiers) token, value_list = process_program(value_list, forbidden_identifiers, next_token())
if token_queue[position].k == 'SEMICOLON': if token is None or token.k not in ['SEMICOLON', 'END']:
position = position + 1 error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue continue
elif token_queue[position].k == 'END': elif token.k == 'END':
end_found = True end_found = True
else:
error_handler.handle_error(position, "SEMICOLON oder END erwartet.")
forbidden_identifiers.remove(identifier_token.v) forbidden_identifiers.remove(identifier_token.v)
return position + 1, value_list return next_token(), value_list
def verify_loop(token_queue, position, forbidden_identifiers): def verify_loop(forbidden_identifiers, loop_token):
identifier_token = token_queue[position + 1] identifier_token = next_nonempty_token("LOOP", "IDENTIFIER")
if not identifier_token.k == 'IDENTIFIER': if not identifier_token.k == 'IDENTIFIER':
error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.') error_handler.handle_error('IDENTIFIER in LOOP erwartet.')
if identifier_token.v in forbidden_identifiers: if identifier_token.v in forbidden_identifiers:
error_handler.handle_error(position + 1, error_handler.handle_error("Identifier " + identifier_token.v +
"Identifier " + identifier_token.v +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.") " ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 2].k == 'DO': if not next_nonempty_token("LOOP", "DO").k == 'DO':
error_handler.handle_error(position + 2, 'DO in LOOP erwartet.') error_handler.handle_error('DO in LOOP erwartet.')
forbidden_identifiers.append(identifier_token.v) forbidden_identifiers.append(identifier_token.v)
end_found = False end_found = False
while not end_found: while not end_found:
position = verify_program(token_queue, position, forbidden_identifiers) token = verify_program(forbidden_identifiers, next_token())
if token_queue[position].k == 'SEMICOLON': if token is None or token.k not in ['SEMICOLON', 'END']:
position = position + 1 error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue continue
elif token_queue[position].k == 'END': elif token.k == 'END':
end_found = True end_found = True
else:
error_handler(position, "SEMICOLON oder END in LOOP erwartet.")
forbidden_identifiers.remove(identifier_token.v) forbidden_identifiers.remove(identifier_token.v)
return position + 1 return next_token()
def process_program(token_queue, position, value_list, forbidden_identifiers): def process_program(value_list, forbidden_identifiers, current_token):
current_position = position
values = value_list values = value_list
current_key = token_queue[position].k if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']:
if current_key == 'IDENTIFIER': error_handler.handle_error("Keine passende Anweisung gefunden\n" +
try: "Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP")
current_position, values = process_assignment(token_queue, position, value_list, forbidden_identifiers) elif current_token.k == 'IDENTIFIER':
except IndexError: current_token, values = process_assignment(value_list, forbidden_identifiers, current_token)
error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.") elif current_token.k == 'LOOP':
elif current_key == 'LOOP': current_token, values = process_loop(value_list, forbidden_identifiers, current_token)
try: return current_token, values
current_position, values = process_loop(token_queue, position, value_list, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs") def verify_program(forbidden_identifiers, current_token):
if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']:
error_handler.handle_error("Keine passende Anweisung gefunden\n" +
"Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP")
elif current_token.k == 'IDENTIFIER':
current_token = verify_assignment(forbidden_identifiers, current_token)
elif current_token.k == 'LOOP':
current_token = verify_loop(forbidden_identifiers, current_token)
return current_token
def next_token():
new_token = lex.next()
if new_token is None:
return None
elif new_token.k == 'LINEBREAK':
error_handler.increase_line()
return next_token()
elif new_token.k == 'WHITESPACE':
return next_token()
else: else:
error_handler.handle_error(current_position, "Keine passende Anweisung gefunden") return new_token
return current_position, values
def next_nonempty_token(current_function, expected_token):
def verify_program(token_queue, position, forbidden_identifiers): token = next_token()
current_key = token_queue[position].k if token is None:
current_position = position error_handler.handle_error("Frühzeitiges Ende von " + current_function + "\n" + "Erwartet: " + expected_token)
if current_key == 'IDENTIFIER': return token
try:
current_position = verify_assignment(token_queue, position, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.")
elif current_key == 'LOOP':
try:
current_position = verify_loop(token_queue, position, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs")
else:
error_handler.handle_error(current_position, "Keine passende Anweisung gefunden")
return current_position
def interpret(program): def interpret(program):
tokens = lexer.tokenize(program) global error_handler, lex
global error_handler lex = lexer.Lexer(regex_to_token, program)
error_handler = ErrorHandler(program, tokens) error_handler = ErrorHandler(program)
tokens = [token for token in tokens if not token.k == 'LINEBREAK']
current_position = 0
values = {} values = {}
forbidden_identifiers = [] forbidden_identifiers = []
while current_position < len(tokens): current_token = next_token()
current_position, values = process_program(tokens, current_position, values, forbidden_identifiers) while current_token is not None:
if current_position < len(tokens) and not tokens[current_position].k == 'SEMICOLON': current_token, values = process_program(values, forbidden_identifiers, current_token)
error_handler.handle_error(current_position, "Semicolon erwartet") if current_token is not None:
else: if not current_token.k == 'SEMICOLON':
if current_position == len(tokens) - 1: error_handler.handle_error("Semicolon erwartet")
error_handler.handle_error(current_position, "Semikolons werden nur zur Trennung und nicht zum " + current_token = next_token()
if current_token is None:
error_handler.handle_error("Semikolons werden nur zur Trennung und nicht zum " +
"Abschluss von Programmen verwendet") "Abschluss von Programmen verwendet")
current_position = current_position + 1
if "x0" in values: if "x0" in values:
return values.get("x0") return values.get("x0")
return 0 return 0
...@@ -7,33 +7,25 @@ class Token: ...@@ -7,33 +7,25 @@ class Token:
self.v = value self.v = value
def tokenize(program): class Lexer:
token_queue = [] def __init__(self, regex_to_token, program):
regex_to_token = [(re.compile(r'\d+'), 'NUMBER'), self.regex_to_token = regex_to_token
(re.compile(r'x\d+'), 'IDENTIFIER'), self.program = program
(re.compile(r'\+'), 'PLUS'), self.current_position = 0
(re.compile(r'-'), 'MINUS'),
(re.compile(r':='), 'EQUALS'), def next(self):
(re.compile(r'LOOP'), 'LOOP'),
(re.compile(r'DO'), 'DO'),
(re.compile(r'END'), 'END'),
(re.compile(r';'), 'SEMICOLON'),
(re.compile(r'\n', re.MULTILINE), 'LINEBREAK'),
(re.compile(r'\s+'), 'WHITESPACE')]
current_position = 0
new_position = 0 new_position = 0
while current_position < len(program): next_token = None
for pattern, value in regex_to_token: if self.current_position < len(self.program):
match = pattern.match(program, current_position) for pattern, value in self.regex_to_token:
match = pattern.match(self.program, self.current_position)
if match: if match:
if not value == 'WHITESPACE': next_token = Token(value, match.group())
token_queue.append(Token(value, match.group()))
new_position = match.span()[1] new_position = match.span()[1]
break break
if current_position == new_position: if self.current_position == new_position:
msg = ['Fehler in Zeile : ' + str(program.count("\n", 0, current_position) + 1), msg = ['Fehler in Zeile ' + str(self.program.count("\n", 0, self.current_position) + 1) + ':',
'Erwartet: xi, :=, NUMBER, LOOP, DO, END, ;', 'Unbekannter String: ' + re.compile(r'[^\n]*').match(self.program, self.current_position).group()]
'Bekommen :' + re.compile(r'[^\n]*').match(program, current_position).group()]
raise SyntaxError("\n".join(msg)) raise SyntaxError("\n".join(msg))
current_position = new_position self.current_position = new_position
return token_queue return next_token
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment