Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
general
stups
prob-teaching-notebooks
Commits
ceb2cb5d
Commit
ceb2cb5d
authored
Nov 12, 2020
by
Chris
Browse files
Lexer Iterativ gestaltet
parent
40b68335
Changes
2
Hide whitespace changes
Inline
Side-by-side
info4/kapitel-8/Interpreter/interpreter.py
View file @
ceb2cb5d
import
lexer
import
sys
import
operator
import
re
regex_to_token
=
[(
re
.
compile
(
r
'\d+'
),
'NUMBER'
),
(
re
.
compile
(
r
'x\d+'
),
'IDENTIFIER'
),
(
re
.
compile
(
r
'\+'
),
'PLUS'
),
(
re
.
compile
(
r
'-'
),
'MINUS'
),
(
re
.
compile
(
r
':=|≔'
),
'EQUALS'
),
(
re
.
compile
(
r
'LOOP'
),
'LOOP'
),
(
re
.
compile
(
r
'DO'
),
'DO'
),
(
re
.
compile
(
r
'END'
),
'END'
),
(
re
.
compile
(
r
';'
),
'SEMICOLON'
),
(
re
.
compile
(
r
'\n'
,
re
.
MULTILINE
),
'LINEBREAK'
),
(
re
.
compile
(
r
'\s+'
),
'WHITESPACE'
),
(
re
.
compile
(
r
'[^\n]*'
),
'UNKNOWN'
)]
global
error_handler
,
lex
,
values
class
ErrorHandler
:
def
__init__
(
self
,
program
,
tokens
):
def
__init__
(
self
,
program
):
sys
.
tracebacklimit
=
0
self
.
program
=
program
position_to_line
=
{}
position
=
0
line
=
0
for
token
in
tokens
:
if
token
.
k
==
'LINEBREAK'
:
line
+=
1
else
:
position_to_line
[
position
]
=
line
position
=
position
+
1
self
.
position_to_line
=
position_to_line
def
handle_error
(
self
,
position
,
message
):
line_number
=
self
.
position_to_line
[
position
]
msg
=
[
"Fehler in Zeile "
+
str
(
line_number
+
1
),
self
.
program
.
split
(
"
\n
"
)[
line_number
],
message
]
raise
SyntaxError
(
"
\n
"
.
join
(
msg
))
from
None
self
.
line_number
=
0
def
handle_error
(
self
,
message
):
msg
=
[
"Fehler in Zeile "
+
str
(
self
.
line_number
+
1
),
self
.
program
.
split
(
"
\n
"
)[
self
.
line_number
],
message
]
raise
SyntaxError
(
"
\n
"
.
join
(
msg
))
from
None
global
error_handler
def
increase_line
(
self
):
self
.
line_number
+=
1
def
process_assignment
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
):
identifier_1
=
token_queue
[
position
]
.
v
def
process_assignment
(
value_list
,
forbidden_identifiers
,
identifier_token_1
):
identifier_1
=
identifier_token_1
.
v
if
identifier_1
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
,
"Identifier "
+
identifier_1
+
"ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
token_queue
[
position
+
1
].
k
==
'EQUALS'
:
error_handler
.
handle_error
(
position
+
1
,
":= in Zuweisung erwartet."
)
error_handler
.
handle_error
(
"Identifier "
+
identifier_1
+
" ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
identifier_1
in
value_list
:
value_1
=
value_list
.
get
(
identifier_1
)
else
:
value_1
=
0
if
next_nonempty_token
(
"Zuweisung"
,
":="
)
==
'EQUALS'
:
error_handler
.
handle_error
(
":= in Zuweisung erwartet."
)
if
token_queue
[
position
+
2
].
k
==
'NUMBER'
:
value_1
=
int
(
token_queue
[
position
+
2
].
v
)
value_list
.
update
({
identifier_1
:
value_1
})
return
position
+
3
,
value_list
identifier_token_2
=
next_nonempty_token
(
"Zuweisung"
,
"IDENTIFIER (x0, x1, ...) oder NUMBER"
)
if
identifier_token_2
.
k
==
'NUMBER'
:
value_1
=
int
(
identifier_token_2
.
v
)
value_list
.
update
({
identifier_token_1
.
v
:
value_1
})
return
next_token
(),
value_list
if
not
token_queue
[
position
+
2
]
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
position
+
2
,
"IDENTIFIER in Zuweisung erwartet."
)
identifier_2
=
token_queue
[
position
+
2
]
.
v
if
not
identifier_token_2
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
"IDENTIFIER in Zuweisung erwartet."
)
identifier_2
=
identifier_token_2
.
v
if
identifier_2
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
+
2
,
"Identifier "
+
identifier_2
+
error_handler
.
handle_error
(
"Identifier "
+
identifier_2
+
" ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
identifier_2
in
value_list
:
...
...
@@ -57,176 +60,185 @@ def process_assignment(token_queue, position, value_list, forbidden_identifiers)
else
:
value_2
=
0
if
not
token_queue
[
position
+
4
].
k
==
'NUMBER'
:
error_handler
.
handle_error
(
position
+
4
,
"NUMBER in Zuweisung erwartet."
)
if
token_queue
[
position
+
3
]
.
k
==
'PLUS'
:
value_1
=
value_2
+
int
(
token_queue
[
position
+
4
].
v
)
elif
token_queue
[
position
+
3
]
.
k
==
'MINUS'
:
value_1
=
max
(
0
,
value_2
+
token_queue
[
position
+
4
].
v
)
operator_token
=
next_nonempty_token
(
"Zuweisung"
,
"+ oder -"
)
op
=
None
if
operator_token
.
k
==
'PLUS'
:
op
=
operator
.
__add__
elif
operator_token
.
k
==
'MINUS'
:
op
=
operator
.
__sub__
else
:
error_handler
.
handle_error
(
position
+
3
,
"PLUS oder MINUS in Zuweisung erwartet."
)
error_handler
.
handle_error
(
"+ oder - in Zuweisung erwartet."
)
number_token
=
next_nonempty_token
(
"Zuweisung"
,
"NUMBER"
)
if
not
number_token
.
k
==
'NUMBER'
:
error_handler
.
handle_error
(
"NUMBER in Zuweisung erwartet."
)
value_1
=
max
(
0
,
op
(
value_2
,
int
(
number_token
.
v
)))
value_list
.
update
({
identifier_1
:
value_1
})
return
position
+
5
,
value_list
return
next_token
()
,
value_list
def
verify_assignment
(
token_queue
,
position
,
forbidden_identifiers
):
identifier_1
=
token_queue
[
position
]
.
v
def
verify_assignment
(
forbidden_identifiers
,
identifier_token_1
):
identifier_1
=
identifier_token_1
.
v
if
identifier_1
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
,
"
Identifier "
+
identifier_1
+
"ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
token_queue
[
position
+
1
].
k
==
'EQUALS'
:
error_handler
.
handle_error
(
position
+
1
,
":= in Zuweisung erwartet."
)
if
token_queue
[
position
+
2
]
.
k
==
'NUMBER'
:
return
position
+
3
if
not
token_queue
[
position
+
2
]
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
position
+
2
,
"IDENTIFIER in Zuweisung erwartet."
)
identifier_2
=
token_queue
[
position
+
2
]
.
v
error_handler
.
handle_error
(
"Identifier "
+
identifier_1
+
"
ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
next_nonempty_token
(
"Zuweisung"
,
":="
).
k
==
'EQUALS'
:
error_handler
.
handle_error
(
":= in Zuweisung erwartet."
)
identifier_token_2
=
next_nonempty_token
(
"Zuweisung"
,
"IDENTIFIER (x0, x1, ...) oder NUMBER"
)
if
identifier_token_2
.
k
==
'NUMBER'
:
return
next_token
()
if
not
identifier_token_2
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
"IDENTIFIER in Zuweisung erwartet."
)
identifier_2
=
identifier_token_2
.
v
if
identifier_2
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
+
2
,
"Identifier "
+
identifier_2
+
error_handler
.
handle_error
(
"Identifier "
+
identifier_2
+
" ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
n
ot
token_queue
[
position
+
4
].
k
==
'NUMBER'
:
error_handler
.
handle_error
(
position
+
4
,
"NUMBER
in Zuweisung erwartet."
)
if
not
token_queue
[
position
+
3
].
k
in
[
'PLUS'
,
'MINUS'
]
:
error_handler
.
handle_error
(
position
+
3
,
"PLUS oder MINUS
in Zuweisung erwartet."
)
if
n
ext_nonempty_token
(
"Zuweisung"
,
"+ oder -"
).
k
not
in
[
'PLUS'
,
'MINUS'
]
:
error_handler
.
handle_error
(
"+ oder -
in Zuweisung erwartet."
)
if
not
next_nonempty_token
(
"Zuweisung"
,
"NUMBER"
).
k
==
'NUMBER'
:
error_handler
.
handle_error
(
"NUMBER
in Zuweisung erwartet."
)
return
position
+
5
return
next_token
()
def
process_loop
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
):
identifier_token
=
token_queue
[
position
+
1
]
def
process_loop
(
value_list
,
forbidden_identifiers
,
loop_token
):
identifier_token
=
next_nonempty_token
(
'LOOP'
,
'IDENTIFIER (x0, x1, ...)'
)
if
not
identifier_token
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
position
+
1
,
'IDENTIFIER in LOOP erwartet.'
)
error_handler
.
handle_error
(
'IDENTIFIER in LOOP erwartet.'
)
if
identifier_token
.
v
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
+
1
,
"Identifier "
+
identifier_token
.
v
+
"ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
token_queue
[
position
+
2
].
k
==
'DO'
:
error_handler
.
handle_error
(
position
+
2
,
'DO in LOOP erwartet.'
)
error_handler
.
handle_error
(
'Identifier '
+
identifier_token
.
v
+
' ist bereits in Loop vorhanden und darf nicht verwendet werden.'
)
if
not
next_nonempty_token
(
"LOOP"
,
"DO"
).
k
==
'DO'
:
error_handler
.
handle_error
(
'DO in LOOP erwartet.'
)
if
identifier_token
.
v
in
value_list
:
number_of_loops
=
int
(
value_list
.
get
(
identifier_token
.
v
))
else
:
number_of_loops
=
0
saved_position
=
position
+
3
saved_position
=
lex
.
current_position
saved_line
=
error_handler
.
line_number
forbidden_identifiers
.
append
(
identifier_token
.
v
)
if
number_of_loops
==
0
:
end_found
=
False
position
=
saved_position
while
not
end_found
:
position
=
verify_program
(
token_queue
,
position
,
forbidden_identifiers
)
if
token_queue
[
position
].
k
==
'SEMICOLON'
:
position
=
position
+
1
token
=
verify_program
(
forbidden_identifiers
,
next_token
())
if
token
is
None
or
token
.
k
not
in
[
'SEMICOLON'
,
'END'
]:
error_handler
(
"SEMICOLON oder END in LOOP erwartet."
)
elif
token
.
k
==
'SEMICOLON'
:
continue
elif
token
_queue
[
position
]
.
k
==
'END'
:
elif
token
.
k
==
'END'
:
end_found
=
True
else
:
error_handler
.
handle_error
(
position
,
"SEMICOLON oder END erwartet."
)
for
index
in
range
(
number_of_loops
):
position
=
saved_position
lex
.
current_position
=
saved_position
error_handler
.
line_number
=
saved_line
end_found
=
False
while
not
end_found
:
position
,
value_list
=
process_program
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
)
if
token_queue
[
position
].
k
==
'SEMICOLON'
:
position
=
position
+
1
token
,
value_list
=
process_program
(
value_list
,
forbidden_identifiers
,
next_token
())
if
token
is
None
or
token
.
k
not
in
[
'SEMICOLON'
,
'END'
]:
error_handler
(
"SEMICOLON oder END in LOOP erwartet."
)
elif
token
.
k
==
'SEMICOLON'
:
continue
elif
token
_queue
[
position
]
.
k
==
'END'
:
elif
token
.
k
==
'END'
:
end_found
=
True
else
:
error_handler
.
handle_error
(
position
,
"SEMICOLON oder END erwartet."
)
forbidden_identifiers
.
remove
(
identifier_token
.
v
)
return
position
+
1
,
value_list
return
next_token
()
,
value_list
def
verify_loop
(
token_queue
,
position
,
forbidden_identifiers
):
identifier_token
=
token_queue
[
position
+
1
]
def
verify_loop
(
forbidden_identifiers
,
loop_token
):
identifier_token
=
next_nonempty_token
(
"LOOP"
,
"IDENTIFIER"
)
if
not
identifier_token
.
k
==
'IDENTIFIER'
:
error_handler
.
handle_error
(
position
+
1
,
'IDENTIFIER in LOOP erwartet.'
)
error_handler
.
handle_error
(
'IDENTIFIER in LOOP erwartet.'
)
if
identifier_token
.
v
in
forbidden_identifiers
:
error_handler
.
handle_error
(
position
+
1
,
"Identifier "
+
identifier_token
.
v
+
"ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
token_queue
[
position
+
2
].
k
==
'DO'
:
error_handler
.
handle_error
(
position
+
2
,
'DO in LOOP erwartet.'
)
error_handler
.
handle_error
(
"Identifier "
+
identifier_token
.
v
+
" ist bereits in Loop vorhanden und darf nicht verwendet werden."
)
if
not
next_nonempty_token
(
"LOOP"
,
"DO"
).
k
==
'DO'
:
error_handler
.
handle_error
(
'DO in LOOP erwartet.'
)
forbidden_identifiers
.
append
(
identifier_token
.
v
)
end_found
=
False
while
not
end_found
:
position
=
verify_program
(
token_queue
,
position
,
forbidden_identifiers
)
if
token_queue
[
position
].
k
==
'SEMICOLON'
:
position
=
position
+
1
token
=
verify_program
(
forbidden_identifiers
,
next_token
())
if
token
is
None
or
token
.
k
not
in
[
'SEMICOLON'
,
'END'
]:
error_handler
(
"SEMICOLON oder END in LOOP erwartet."
)
elif
token
.
k
==
'SEMICOLON'
:
continue
elif
token
_queue
[
position
]
.
k
==
'END'
:
elif
token
.
k
==
'END'
:
end_found
=
True
else
:
error_handler
(
position
,
"SEMICOLON oder END in LOOP erwartet."
)
forbidden_identifiers
.
remove
(
identifier_token
.
v
)
return
position
+
1
return
next_token
()
def
process_program
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
):
current_position
=
position
def
process_program
(
value_list
,
forbidden_identifiers
,
current_token
):
values
=
value_list
current_key
=
token_queue
[
position
].
k
if
current_key
==
'IDENTIFIER'
:
try
:
current_position
,
values
=
process_assignment
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
)
except
IndexError
:
error_handler
.
handle_error
(
current_position
,
"Frühzeitiges Ende einer Zuweisung."
)
elif
current_key
==
'LOOP'
:
try
:
current_position
,
values
=
process_loop
(
token_queue
,
position
,
value_list
,
forbidden_identifiers
)
except
IndexError
:
error_handler
.
handle_error
(
current_position
,
"Frühzeitiges Ende eines LOOPs"
)
else
:
error_handler
.
handle_error
(
current_position
,
"Keine passende Anweisung gefunden"
)
return
current_position
,
values
def
verify_program
(
token_queue
,
position
,
forbidden_identifiers
):
current_key
=
token_queue
[
position
].
k
current_position
=
position
if
current_key
==
'IDENTIFIER'
:
try
:
current_position
=
verify_assignment
(
token_queue
,
position
,
forbidden_identifiers
)
except
IndexError
:
error_handler
.
handle_error
(
current_position
,
"Frühzeitiges Ende einer Zuweisung."
)
elif
current_key
==
'LOOP'
:
try
:
current_position
=
verify_loop
(
token_queue
,
position
,
forbidden_identifiers
)
except
IndexError
:
error_handler
.
handle_error
(
current_position
,
"Frühzeitiges Ende eines LOOPs"
)
if
current_token
is
None
or
current_token
.
k
not
in
[
'IDENTIFIER'
,
'LOOP'
]:
error_handler
.
handle_error
(
"Keine passende Anweisung gefunden
\n
"
+
"Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP"
)
elif
current_token
.
k
==
'IDENTIFIER'
:
current_token
,
values
=
process_assignment
(
value_list
,
forbidden_identifiers
,
current_token
)
elif
current_token
.
k
==
'LOOP'
:
current_token
,
values
=
process_loop
(
value_list
,
forbidden_identifiers
,
current_token
)
return
current_token
,
values
def
verify_program
(
forbidden_identifiers
,
current_token
):
if
current_token
is
None
or
current_token
.
k
not
in
[
'IDENTIFIER'
,
'LOOP'
]:
error_handler
.
handle_error
(
"Keine passende Anweisung gefunden
\n
"
+
"Erwartet: IDENTIFIER (x0, x1, ...) oder LOOP"
)
elif
current_token
.
k
==
'IDENTIFIER'
:
current_token
=
verify_assignment
(
forbidden_identifiers
,
current_token
)
elif
current_token
.
k
==
'LOOP'
:
current_token
=
verify_loop
(
forbidden_identifiers
,
current_token
)
return
current_token
def
next_token
():
new_token
=
lex
.
next
()
if
new_token
is
None
:
return
None
elif
new_token
.
k
==
'LINEBREAK'
:
error_handler
.
increase_line
()
return
next_token
()
elif
new_token
.
k
==
'WHITESPACE'
:
return
next_token
()
else
:
error_handler
.
handle_error
(
current_position
,
"Keine passende Anweisung gefunden"
)
return
current_position
return
new_token
def
next_nonempty_token
(
current_function
,
expected_token
):
token
=
next_token
()
if
token
is
None
:
error_handler
.
handle_error
(
"Frühzeitiges Ende von "
+
current_function
+
"
\n
"
+
"Erwartet: "
+
expected_token
)
return
token
def
interpret
(
program
):
tokens
=
lexer
.
tokenize
(
program
)
global
error_handler
error_handler
=
ErrorHandler
(
program
,
tokens
)
tokens
=
[
token
for
token
in
tokens
if
not
token
.
k
==
'LINEBREAK'
]
current_position
=
0
global
error_handler
,
lex
lex
=
lexer
.
Lexer
(
regex_to_token
,
program
)
error_handler
=
ErrorHandler
(
program
)
values
=
{}
forbidden_identifiers
=
[]
while
current_position
<
len
(
tokens
):
current_position
,
values
=
process_program
(
tokens
,
current_position
,
values
,
forbidden_identifiers
)
if
current_position
<
len
(
tokens
)
and
not
tokens
[
current_position
].
k
==
'SEMICOLON'
:
error_handler
.
handle_error
(
current_position
,
"Semicolon erwartet"
)
else
:
if
current_position
==
len
(
tokens
)
-
1
:
error_handler
.
handle_error
(
current_position
,
"Semikolons werden nur zur Trennung und nicht zum "
+
"Abschluss von Programmen verwendet"
)
current_position
=
current_position
+
1
current_token
=
next_token
()
while
current_token
is
not
None
:
current_token
,
values
=
process_program
(
values
,
forbidden_identifiers
,
current_token
)
if
current_token
is
not
None
:
if
not
current_token
.
k
==
'SEMICOLON'
:
error_handler
.
handle_error
(
"Semicolon erwartet"
)
current_token
=
next_token
()
if
current_token
is
None
:
error_handler
.
handle_error
(
"Semikolons werden nur zur Trennung und nicht zum "
+
"Abschluss von Programmen verwendet"
)
if
"x0"
in
values
:
return
values
.
get
(
"x0"
)
return
0
info4/kapitel-8/Interpreter/lexer.py
View file @
ceb2cb5d
...
...
@@ -7,33 +7,25 @@ class Token:
self
.
v
=
value
def
tokenize
(
program
):
token_queue
=
[]
regex_to_token
=
[(
re
.
compile
(
r
'\d+'
),
'NUMBER'
),
(
re
.
compile
(
r
'x\d+'
),
'IDENTIFIER'
),
(
re
.
compile
(
r
'\+'
),
'PLUS'
),
(
re
.
compile
(
r
'-'
),
'MINUS'
),
(
re
.
compile
(
r
':='
),
'EQUALS'
),
(
re
.
compile
(
r
'LOOP'
),
'LOOP'
),
(
re
.
compile
(
r
'DO'
),
'DO'
),
(
re
.
compile
(
r
'END'
),
'END'
),
(
re
.
compile
(
r
';'
),
'SEMICOLON'
),
(
re
.
compile
(
r
'\n'
,
re
.
MULTILINE
),
'LINEBREAK'
),
(
re
.
compile
(
r
'\s+'
),
'WHITESPACE'
)]
current_position
=
0
new_position
=
0
while
current_position
<
len
(
program
):
for
pattern
,
value
in
regex_to_token
:
match
=
pattern
.
match
(
program
,
current_position
)
if
match
:
if
not
value
==
'WHITESPACE'
:
token_queue
.
append
(
Token
(
value
,
match
.
group
()))
new_position
=
match
.
span
()[
1
]
break
if
current_position
==
new_position
:
msg
=
[
'Fehler in Zeile : '
+
str
(
program
.
count
(
"
\n
"
,
0
,
current_position
)
+
1
),
'Erwartet: xi, :=, NUMBER, LOOP, DO, END, ;'
,
'Bekommen :'
+
re
.
compile
(
r
'[^\n]*'
).
match
(
program
,
current_position
).
group
()]
raise
SyntaxError
(
"
\n
"
.
join
(
msg
))
current_position
=
new_position
return
token_queue
class
Lexer
:
def
__init__
(
self
,
regex_to_token
,
program
):
self
.
regex_to_token
=
regex_to_token
self
.
program
=
program
self
.
current_position
=
0
def
next
(
self
):
new_position
=
0
next_token
=
None
if
self
.
current_position
<
len
(
self
.
program
):
for
pattern
,
value
in
self
.
regex_to_token
:
match
=
pattern
.
match
(
self
.
program
,
self
.
current_position
)
if
match
:
next_token
=
Token
(
value
,
match
.
group
())
new_position
=
match
.
span
()[
1
]
break
if
self
.
current_position
==
new_position
:
msg
=
[
'Fehler in Zeile '
+
str
(
self
.
program
.
count
(
"
\n
"
,
0
,
self
.
current_position
)
+
1
)
+
':'
,
'Unbekannter String: '
+
re
.
compile
(
r
'[^\n]*'
).
match
(
self
.
program
,
self
.
current_position
).
group
()]
raise
SyntaxError
(
"
\n
"
.
join
(
msg
))
self
.
current_position
=
new_position
return
next_token
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment