diff --git a/src/webserver.py b/src/webserver.py index 629f56f4016389ae0c4157fdd32025b24ea9eac6..8c61bea258bf7c8d9d873aec35db22edac57bae8 100644 --- a/src/webserver.py +++ b/src/webserver.py @@ -5,9 +5,8 @@ import model as htr_model import dataloader_iam as htr_data_loader import preprocessor as htr_preprocessor import numpy as np -from word_beam_search import WordBeamSearch import base64 -import tensorflow as tf +import time app = Flask(__name__) @@ -41,26 +40,13 @@ def predictNach(): batch = htr_data_loader.Batch([processed_image], None, 1) #change corpus for name - model_name.corpus = open('../data/Nachname.txt').read().split() - #model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8')) - recognized, probability = htr_model.Model.infer_batch(model_name, batch) - - """ #convert corpus to list, split at space - corpus = model_name.corpus - result_list=[] - print(recognized) - for name in recognized: - indecies = [] - for i in range(len(corpus)): - if name == corpus[i]: - indecies.append(i) - if len(indecies) == 0: - indecies.append(-1) - else: - result_list.append((name, indecies)) - if len(result_list) == 0: - result_list.append((-1, -1)) """ + model_name.corpus = open('../data/Nachname.txt').read().split()[:200] + #check time of infer_batch + start = time.time() + recognized, probability = htr_model.Model.infer_batch(model_name, batch) + end = time.time() + print("infer_batch time: ", end-start) processed_image = processed_image + 0.5 @@ -101,31 +87,10 @@ def predictVor(): #change corpus for name - model_name.corpus = open('../data/Vorname.txt').read().split() - - - - - #model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8')) + model_name.corpus = open('../data/Vorname.txt').read().split()[:200] recognized, probability = htr_model.Model.infer_batch(model_name, batch) - """ corpus = model_name.corpus - result_list=[] - for name in recognized: - indecies = [] - for i in range(len(corpus)): - if name == corpus[i]: - indecies.append(i) - if len(indecies) == 0: - indecies.append(-1) - else: - result_list.append((name, indecies)) - - if len(result_list) == 0: - result_list.append(('KeinName', -1)) """ - - processed_image = processed_image + 0.5 processed_image = processed_image * 255 @@ -142,14 +107,36 @@ def predictVor(): image_base64 = base64.b64encode(array_bytes).decode('utf-8') - - result = { 'recognized': recognized, 'image': image_base64 } return jsonify(result) +def replace_umlauts(text): + text = text.replace('ä', 'a') + text = text.replace('ö', 'o') + text = text.replace('ü', 'u') + text = text.replace('Ä', 'A') + text = text.replace('Ö', 'O') + text = text.replace('Ü', 'U') + text = text.replace('ß', 's') + text = text.replace('é', 'e') + text = text.replace('è', 'e') + text = text.replace('ê', 'e') + text = text.replace('à', 'a') + text = text.replace('â', 'a') + text = text.replace('á', 'a') + text = text.replace('ô', 'o') + text = text.replace('û', 'u') + text = text.replace('ç', 'c') + text = text.replace('î', 'i') + text = text.replace('ï', 'i') + text = text.replace('ë', 'e') + text = text.replace('ù', 'u') + text = text.replace(' ', '-') + return text + def split_Student_Names(): #csv looks like: Vorname;Nachname;Matrikelnummer @@ -165,7 +152,9 @@ def split_Student_Names(): for line in lines[1:]: line = line.split(',') vorname = line[2][:-1] + vorname = replace_umlauts(vorname) nachname = line[1] + nachname = replace_umlauts(nachname) matrikelnummer = line[0] print(vorname, nachname, matrikelnummer) vorname_file.write(vorname + " ") @@ -180,4 +169,4 @@ if __name__ == '__main__': #split csv file into Vorname, Nachname and Matrikelnummer matrikel_numbers = split_Student_Names() - app.run(debug=True,port=8000) + app.run(debug=False,port=8000)