Skip to content
Snippets Groups Projects
Commit 8eeba10a authored by fabian's avatar fabian
Browse files

Umlaute beachten

parent 2e6282f0
No related branches found
No related tags found
Loading
......@@ -5,9 +5,8 @@ import model as htr_model
import dataloader_iam as htr_data_loader
import preprocessor as htr_preprocessor
import numpy as np
from word_beam_search import WordBeamSearch
import base64
import tensorflow as tf
import time
app = Flask(__name__)
......@@ -41,26 +40,13 @@ def predictNach():
batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name
model_name.corpus = open('../data/Nachname.txt').read().split()
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
""" #convert corpus to list, split at space
corpus = model_name.corpus
result_list=[]
print(recognized)
for name in recognized:
indecies = []
for i in range(len(corpus)):
if name == corpus[i]:
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append((-1, -1)) """
model_name.corpus = open('../data/Nachname.txt').read().split()[:200]
#check time of infer_batch
start = time.time()
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
end = time.time()
print("infer_batch time: ", end-start)
processed_image = processed_image + 0.5
......@@ -101,31 +87,10 @@ def predictVor():
#change corpus for name
model_name.corpus = open('../data/Vorname.txt').read().split()
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
model_name.corpus = open('../data/Vorname.txt').read().split()[:200]
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
""" corpus = model_name.corpus
result_list=[]
for name in recognized:
indecies = []
for i in range(len(corpus)):
if name == corpus[i]:
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append(('KeinName', -1)) """
processed_image = processed_image + 0.5
processed_image = processed_image * 255
......@@ -142,14 +107,36 @@ def predictVor():
image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = {
'recognized': recognized,
'image': image_base64
}
return jsonify(result)
def replace_umlauts(text):
text = text.replace('ä', 'a')
text = text.replace('ö', 'o')
text = text.replace('ü', 'u')
text = text.replace('Ä', 'A')
text = text.replace('Ö', 'O')
text = text.replace('Ü', 'U')
text = text.replace('ß', 's')
text = text.replace('é', 'e')
text = text.replace('è', 'e')
text = text.replace('ê', 'e')
text = text.replace('à', 'a')
text = text.replace('â', 'a')
text = text.replace('á', 'a')
text = text.replace('ô', 'o')
text = text.replace('û', 'u')
text = text.replace('ç', 'c')
text = text.replace('î', 'i')
text = text.replace('ï', 'i')
text = text.replace('ë', 'e')
text = text.replace('ù', 'u')
text = text.replace(' ', '-')
return text
def split_Student_Names():
#csv looks like: Vorname;Nachname;Matrikelnummer
......@@ -165,7 +152,9 @@ def split_Student_Names():
for line in lines[1:]:
line = line.split(',')
vorname = line[2][:-1]
vorname = replace_umlauts(vorname)
nachname = line[1]
nachname = replace_umlauts(nachname)
matrikelnummer = line[0]
print(vorname, nachname, matrikelnummer)
vorname_file.write(vorname + " ")
......@@ -180,4 +169,4 @@ if __name__ == '__main__':
#split csv file into Vorname, Nachname and Matrikelnummer
matrikel_numbers = split_Student_Names()
app.run(debug=True,port=8000)
app.run(debug=False,port=8000)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment