Skip to content
Snippets Groups Projects
Commit ceb2cb5d authored by Chris's avatar Chris
Browse files

Lexer Iterativ gestaltet

parent 40b68335
No related branches found
No related tags found
1 merge request!1Master
import lexer
import sys
import operator
import re
regex_to_token = [(re.compile(r'\d+'), 'NUMBER'),
(re.compile(r'x\d+'), 'IDENTIFIER'),
(re.compile(r'\+'), 'PLUS'),
(re.compile(r'-'), 'MINUS'),
(re.compile(r':=|≔'), 'EQUALS'),
(re.compile(r'LOOP'), 'LOOP'),
(re.compile(r'DO'), 'DO'),
(re.compile(r'END'), 'END'),
(re.compile(r';'), 'SEMICOLON'),
(re.compile(r'\n', re.MULTILINE), 'LINEBREAK'),
(re.compile(r'\s+'), 'WHITESPACE'),
(re.compile(r'[^\n]*'), 'UNKNOWN')]
global error_handler, lex, values
class ErrorHandler:
def __init__(self, program, tokens):
def __init__(self, program):
sys.tracebacklimit = 0
self.program = program
position_to_line = {}
position = 0
line = 0
for token in tokens:
if token.k == 'LINEBREAK':
line += 1
else:
position_to_line[position] = line
position = position + 1
self.position_to_line = position_to_line
self.line_number = 0
def handle_error(self, position, message):
line_number = self.position_to_line[position]
msg = ["Fehler in Zeile " + str(line_number + 1), self.program.split("\n")[line_number], message]
def handle_error(self, message):
msg = ["Fehler in Zeile " + str(self.line_number + 1), self.program.split("\n")[self.line_number], message]
raise SyntaxError("\n".join(msg)) from None
def increase_line(self):
self.line_number += 1
global error_handler
def process_assignment(token_queue, position, value_list, forbidden_identifiers):
identifier_1 = token_queue[position].v
def process_assignment(value_list, forbidden_identifiers, identifier_token_1):
identifier_1 = identifier_token_1.v
if identifier_1 in forbidden_identifiers:
error_handler.handle_error(position, "Identifier " + identifier_1 +
error_handler.handle_error("Identifier " + identifier_1 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 1].k == 'EQUALS':
error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.")
if identifier_1 in value_list:
value_1 = value_list.get(identifier_1)
else:
value_1 = 0
if next_nonempty_token("Zuweisung", ":=") == 'EQUALS':
error_handler.handle_error(":= in Zuweisung erwartet.")
if token_queue[position + 2].k == 'NUMBER':
value_1 = int(token_queue[position + 2].v)
value_list.update({identifier_1: value_1})
return position + 3, value_list
identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER")
if identifier_token_2.k == 'NUMBER':
value_1 = int(identifier_token_2.v)
value_list.update({identifier_token_1.v: value_1})
return next_token(), value_list
if not token_queue[position + 2].k == 'IDENTIFIER':
error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.")
identifier_2 = token_queue[position + 2].v
if not identifier_token_2.k == 'IDENTIFIER':
error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.")
identifier_2 = identifier_token_2.v
if identifier_2 in forbidden_identifiers:
error_handler.handle_error(position + 2,
"Identifier " + identifier_2 +
error_handler.handle_error("Identifier " + identifier_2 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if identifier_2 in value_list:
......@@ -57,176 +60,185 @@ def process_assignment(token_queue, position, value_list, forbidden_identifiers)
else:
value_2 = 0
if not token_queue[position + 4].k == 'NUMBER':
error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.")
if token_queue[position + 3].k == 'PLUS':
value_1 = value_2 + int(token_queue[position + 4].v)
elif token_queue[position + 3].k == 'MINUS':
value_1 = max(0, value_2 + token_queue[position + 4].v)
operator_token = next_nonempty_token("Zuweisung", "+ oder -")
op = None
if operator_token.k == 'PLUS':
op = operator.__add__
elif operator_token.k == 'MINUS':
op = operator.__sub__
else:
error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.")
error_handler.handle_error("+ oder - in Zuweisung erwartet.")
number_token = next_nonempty_token("Zuweisung", "NUMBER")
if not number_token.k == 'NUMBER':
error_handler.handle_error("NUMBER in Zuweisung erwartet.")
value_1 = max(0, op(value_2, int(number_token.v)))
value_list.update({identifier_1: value_1})
return position + 5, value_list
return next_token(), value_list
def verify_assignment(token_queue, position, forbidden_identifiers):
identifier_1 = token_queue[position].v
def verify_assignment(forbidden_identifiers, identifier_token_1):
identifier_1 = identifier_token_1.v
if identifier_1 in forbidden_identifiers:
error_handler.handle_error(position,
"Identifier " + identifier_1 +
error_handler.handle_error("Identifier " + identifier_1 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 1].k == 'EQUALS':
error_handler.handle_error(position + 1, ":= in Zuweisung erwartet.")
if not next_nonempty_token("Zuweisung", ":=").k == 'EQUALS':
error_handler.handle_error(":= in Zuweisung erwartet.")
if token_queue[position + 2].k == 'NUMBER':
return position + 3
identifier_token_2 = next_nonempty_token("Zuweisung", "IDENTIFIER (x0, x1, ...) oder NUMBER")
if identifier_token_2.k == 'NUMBER':
return next_token()
if not token_queue[position + 2].k == 'IDENTIFIER':
error_handler.handle_error(position + 2, "IDENTIFIER in Zuweisung erwartet.")
identifier_2 = token_queue[position + 2].v
if not identifier_token_2.k == 'IDENTIFIER':
error_handler.handle_error("IDENTIFIER in Zuweisung erwartet.")
identifier_2 = identifier_token_2.v
if identifier_2 in forbidden_identifiers:
error_handler.handle_error(position + 2,
"Identifier " + identifier_2 +
error_handler.handle_error("Identifier " + identifier_2 +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 4].k == 'NUMBER':
error_handler.handle_error(position + 4, "NUMBER in Zuweisung erwartet.")
if not token_queue[position + 3].k in ['PLUS', 'MINUS']:
error_handler.handle_error(position + 3, "PLUS oder MINUS in Zuweisung erwartet.")
if next_nonempty_token("Zuweisung", "+ oder -").k not in ['PLUS', 'MINUS']:
error_handler.handle_error("+ oder - in Zuweisung erwartet.")
if not next_nonempty_token("Zuweisung", "NUMBER").k == 'NUMBER':
error_handler.handle_error("NUMBER in Zuweisung erwartet.")
return position + 5
return next_token()
def process_loop(token_queue, position, value_list, forbidden_identifiers):
identifier_token = token_queue[position + 1]
def process_loop(value_list, forbidden_identifiers, loop_token):
identifier_token = next_nonempty_token('LOOP', 'IDENTIFIER (x0, x1, ...)')
if not identifier_token.k == 'IDENTIFIER':
error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.')
error_handler.handle_error('IDENTIFIER in LOOP erwartet.')
if identifier_token.v in forbidden_identifiers:
error_handler.handle_error(position + 1,
"Identifier " + identifier_token.v +
"ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 2].k == 'DO':
error_handler.handle_error(position + 2, 'DO in LOOP erwartet.')
error_handler.handle_error('Identifier ' + identifier_token.v +
' ist bereits in Loop vorhanden und darf nicht verwendet werden.')
if not next_nonempty_token("LOOP", "DO").k == 'DO':
error_handler.handle_error('DO in LOOP erwartet.')
if identifier_token.v in value_list:
number_of_loops = int(value_list.get(identifier_token.v))
else:
number_of_loops = 0
saved_position = position + 3
saved_position = lex.current_position
saved_line = error_handler.line_number
forbidden_identifiers.append(identifier_token.v)
if number_of_loops == 0:
end_found = False
position = saved_position
while not end_found:
position = verify_program(token_queue, position, forbidden_identifiers)
if token_queue[position].k == 'SEMICOLON':
position = position + 1
token = verify_program(forbidden_identifiers, next_token())
if token is None or token.k not in ['SEMICOLON', 'END']:
error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue
elif token_queue[position].k == 'END':
elif token.k == 'END':
end_found = True
else:
error_handler.handle_error(position, "SEMICOLON oder END erwartet.")
for index in range(number_of_loops):
position = saved_position
lex.current_position = saved_position
error_handler.line_number = saved_line
end_found = False
while not end_found:
position, value_list = process_program(token_queue, position, value_list, forbidden_identifiers)
if token_queue[position].k == 'SEMICOLON':
position = position + 1
token, value_list = process_program(value_list, forbidden_identifiers, next_token())
if token is None or token.k not in ['SEMICOLON', 'END']:
error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue
elif token_queue[position].k == 'END':
elif token.k == 'END':
end_found = True
else:
error_handler.handle_error(position, "SEMICOLON oder END erwartet.")
forbidden_identifiers.remove(identifier_token.v)
return position + 1, value_list
return next_token(), value_list
def verify_loop(token_queue, position, forbidden_identifiers):
identifier_token = token_queue[position + 1]
def verify_loop(forbidden_identifiers, loop_token):
identifier_token = next_nonempty_token("LOOP", "IDENTIFIER")
if not identifier_token.k == 'IDENTIFIER':
error_handler.handle_error(position + 1, 'IDENTIFIER in LOOP erwartet.')
error_handler.handle_error('IDENTIFIER in LOOP erwartet.')
if identifier_token.v in forbidden_identifiers:
error_handler.handle_error(position + 1,
"Identifier " + identifier_token.v +
error_handler.handle_error("Identifier " + identifier_token.v +
" ist bereits in Loop vorhanden und darf nicht verwendet werden.")
if not token_queue[position + 2].k == 'DO':
error_handler.handle_error(position + 2, 'DO in LOOP erwartet.')
if not next_nonempty_token("LOOP", "DO").k == 'DO':
error_handler.handle_error('DO in LOOP erwartet.')
forbidden_identifiers.append(identifier_token.v)
end_found = False
while not end_found:
position = verify_program(token_queue, position, forbidden_identifiers)
if token_queue[position].k == 'SEMICOLON':
position = position + 1
token = verify_program(forbidden_identifiers, next_token())
if token is None or token.k not in ['SEMICOLON', 'END']:
error_handler("SEMICOLON oder END in LOOP erwartet.")
elif token.k == 'SEMICOLON':
continue
elif token_queue[position].k == 'END':
elif token.k == 'END':
end_found = True
else:
error_handler(position, "SEMICOLON oder END in LOOP erwartet.")
forbidden_identifiers.remove(identifier_token.v)
return position + 1
return next_token()
def process_program(token_queue, position, value_list, forbidden_identifiers):
current_position = position
def process_program(value_list, forbidden_identifiers, current_token):
values = value_list
current_key = token_queue[position].k
if current_key == 'IDENTIFIER':
try:
current_position, values = process_assignment(token_queue, position, value_list, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.")
elif current_key == 'LOOP':
try:
current_position, values = process_loop(token_queue, position, value_list, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs")
if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']:
error_handler.handle_error("Keine passende Anweisung gefunden\n" +
"Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP")
elif current_token.k == 'IDENTIFIER':
current_token, values = process_assignment(value_list, forbidden_identifiers, current_token)
elif current_token.k == 'LOOP':
current_token, values = process_loop(value_list, forbidden_identifiers, current_token)
return current_token, values
def verify_program(forbidden_identifiers, current_token):
if current_token is None or current_token.k not in ['IDENTIFIER', 'LOOP']:
error_handler.handle_error("Keine passende Anweisung gefunden\n" +
"Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP")
elif current_token.k == 'IDENTIFIER':
current_token = verify_assignment(forbidden_identifiers, current_token)
elif current_token.k == 'LOOP':
current_token = verify_loop(forbidden_identifiers, current_token)
return current_token
def next_token():
new_token = lex.next()
if new_token is None:
return None
elif new_token.k == 'LINEBREAK':
error_handler.increase_line()
return next_token()
elif new_token.k == 'WHITESPACE':
return next_token()
else:
error_handler.handle_error(current_position, "Keine passende Anweisung gefunden")
return current_position, values
def verify_program(token_queue, position, forbidden_identifiers):
current_key = token_queue[position].k
current_position = position
if current_key == 'IDENTIFIER':
try:
current_position = verify_assignment(token_queue, position, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende einer Zuweisung.")
elif current_key == 'LOOP':
try:
current_position = verify_loop(token_queue, position, forbidden_identifiers)
except IndexError:
error_handler.handle_error(current_position, "Frühzeitiges Ende eines LOOPs")
else:
error_handler.handle_error(current_position, "Keine passende Anweisung gefunden")
return current_position
return new_token
def next_nonempty_token(current_function, expected_token):
token = next_token()
if token is None:
error_handler.handle_error("Frühzeitiges Ende von " + current_function + "\n" + "Erwartet: " + expected_token)
return token
def interpret(program):
tokens = lexer.tokenize(program)
global error_handler
error_handler = ErrorHandler(program, tokens)
tokens = [token for token in tokens if not token.k == 'LINEBREAK']
current_position = 0
global error_handler, lex
lex = lexer.Lexer(regex_to_token, program)
error_handler = ErrorHandler(program)
values = {}
forbidden_identifiers = []
while current_position < len(tokens):
current_position, values = process_program(tokens, current_position, values, forbidden_identifiers)
if current_position < len(tokens) and not tokens[current_position].k == 'SEMICOLON':
error_handler.handle_error(current_position, "Semicolon erwartet")
else:
if current_position == len(tokens) - 1:
error_handler.handle_error(current_position, "Semikolons werden nur zur Trennung und nicht zum " +
current_token = next_token()
while current_token is not None:
current_token, values = process_program(values, forbidden_identifiers, current_token)
if current_token is not None:
if not current_token.k == 'SEMICOLON':
error_handler.handle_error("Semicolon erwartet")
current_token = next_token()
if current_token is None:
error_handler.handle_error("Semikolons werden nur zur Trennung und nicht zum " +
"Abschluss von Programmen verwendet")
current_position = current_position + 1
if "x0" in values:
return values.get("x0")
return 0
......@@ -7,33 +7,25 @@ class Token:
self.v = value
def tokenize(program):
token_queue = []
regex_to_token = [(re.compile(r'\d+'), 'NUMBER'),
(re.compile(r'x\d+'), 'IDENTIFIER'),
(re.compile(r'\+'), 'PLUS'),
(re.compile(r'-'), 'MINUS'),
(re.compile(r':='), 'EQUALS'),
(re.compile(r'LOOP'), 'LOOP'),
(re.compile(r'DO'), 'DO'),
(re.compile(r'END'), 'END'),
(re.compile(r';'), 'SEMICOLON'),
(re.compile(r'\n', re.MULTILINE), 'LINEBREAK'),
(re.compile(r'\s+'), 'WHITESPACE')]
current_position = 0
class Lexer:
def __init__(self, regex_to_token, program):
self.regex_to_token = regex_to_token
self.program = program
self.current_position = 0
def next(self):
new_position = 0
while current_position < len(program):
for pattern, value in regex_to_token:
match = pattern.match(program, current_position)
next_token = None
if self.current_position < len(self.program):
for pattern, value in self.regex_to_token:
match = pattern.match(self.program, self.current_position)
if match:
if not value == 'WHITESPACE':
token_queue.append(Token(value, match.group()))
next_token = Token(value, match.group())
new_position = match.span()[1]
break
if current_position == new_position:
msg = ['Fehler in Zeile : ' + str(program.count("\n", 0, current_position) + 1),
'Erwartet: xi, :=, NUMBER, LOOP, DO, END, ;',
'Bekommen :' + re.compile(r'[^\n]*').match(program, current_position).group()]
if self.current_position == new_position:
msg = ['Fehler in Zeile ' + str(self.program.count("\n", 0, self.current_position) + 1) + ':',
'Unbekannter String: ' + re.compile(r'[^\n]*').match(self.program, self.current_position).group()]
raise SyntaxError("\n".join(msg))
current_position = new_position
return token_queue
self.current_position = new_position
return next_token
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment