Skip to content
Snippets Groups Projects
Commit 8eeba10a authored by fabian's avatar fabian
Browse files

Umlaute beachten

parent 2e6282f0
No related branches found
No related tags found
No related merge requests found
...@@ -5,9 +5,8 @@ import model as htr_model ...@@ -5,9 +5,8 @@ import model as htr_model
import dataloader_iam as htr_data_loader import dataloader_iam as htr_data_loader
import preprocessor as htr_preprocessor import preprocessor as htr_preprocessor
import numpy as np import numpy as np
from word_beam_search import WordBeamSearch
import base64 import base64
import tensorflow as tf import time
app = Flask(__name__) app = Flask(__name__)
...@@ -41,26 +40,13 @@ def predictNach(): ...@@ -41,26 +40,13 @@ def predictNach():
batch = htr_data_loader.Batch([processed_image], None, 1) batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name #change corpus for name
model_name.corpus = open('../data/Nachname.txt').read().split() model_name.corpus = open('../data/Nachname.txt').read().split()[:200]
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
""" #convert corpus to list, split at space
corpus = model_name.corpus
result_list=[]
print(recognized)
for name in recognized:
indecies = []
for i in range(len(corpus)):
if name == corpus[i]:
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append((-1, -1)) """
#check time of infer_batch
start = time.time()
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
end = time.time()
print("infer_batch time: ", end-start)
processed_image = processed_image + 0.5 processed_image = processed_image + 0.5
...@@ -101,31 +87,10 @@ def predictVor(): ...@@ -101,31 +87,10 @@ def predictVor():
#change corpus for name #change corpus for name
model_name.corpus = open('../data/Vorname.txt').read().split() model_name.corpus = open('../data/Vorname.txt').read().split()[:200]
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch) recognized, probability = htr_model.Model.infer_batch(model_name, batch)
""" corpus = model_name.corpus
result_list=[]
for name in recognized:
indecies = []
for i in range(len(corpus)):
if name == corpus[i]:
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append(('KeinName', -1)) """
processed_image = processed_image + 0.5 processed_image = processed_image + 0.5
processed_image = processed_image * 255 processed_image = processed_image * 255
...@@ -142,14 +107,36 @@ def predictVor(): ...@@ -142,14 +107,36 @@ def predictVor():
image_base64 = base64.b64encode(array_bytes).decode('utf-8') image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = { result = {
'recognized': recognized, 'recognized': recognized,
'image': image_base64 'image': image_base64
} }
return jsonify(result) return jsonify(result)
def replace_umlauts(text):
text = text.replace('ä', 'a')
text = text.replace('ö', 'o')
text = text.replace('ü', 'u')
text = text.replace('Ä', 'A')
text = text.replace('Ö', 'O')
text = text.replace('Ü', 'U')
text = text.replace('ß', 's')
text = text.replace('é', 'e')
text = text.replace('è', 'e')
text = text.replace('ê', 'e')
text = text.replace('à', 'a')
text = text.replace('â', 'a')
text = text.replace('á', 'a')
text = text.replace('ô', 'o')
text = text.replace('û', 'u')
text = text.replace('ç', 'c')
text = text.replace('î', 'i')
text = text.replace('ï', 'i')
text = text.replace('ë', 'e')
text = text.replace('ù', 'u')
text = text.replace(' ', '-')
return text
def split_Student_Names(): def split_Student_Names():
#csv looks like: Vorname;Nachname;Matrikelnummer #csv looks like: Vorname;Nachname;Matrikelnummer
...@@ -165,7 +152,9 @@ def split_Student_Names(): ...@@ -165,7 +152,9 @@ def split_Student_Names():
for line in lines[1:]: for line in lines[1:]:
line = line.split(',') line = line.split(',')
vorname = line[2][:-1] vorname = line[2][:-1]
vorname = replace_umlauts(vorname)
nachname = line[1] nachname = line[1]
nachname = replace_umlauts(nachname)
matrikelnummer = line[0] matrikelnummer = line[0]
print(vorname, nachname, matrikelnummer) print(vorname, nachname, matrikelnummer)
vorname_file.write(vorname + " ") vorname_file.write(vorname + " ")
...@@ -180,4 +169,4 @@ if __name__ == '__main__': ...@@ -180,4 +169,4 @@ if __name__ == '__main__':
#split csv file into Vorname, Nachname and Matrikelnummer #split csv file into Vorname, Nachname and Matrikelnummer
matrikel_numbers = split_Student_Names() matrikel_numbers = split_Student_Names()
app.run(debug=True,port=8000) app.run(debug=False,port=8000)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment