Umlaute beachten

8eeba10a · fabian · 2e6282f0 · 8eeba10a
Commit 8eeba10a authored 1 year ago by fabian
--- a/src/webserver.py
+++ b/src/webserver.py
@@ -5,9 +5,8 @@ import model as htr_model
 import dataloader_iam as htr_data_loader
 import preprocessor as htr_preprocessor
 import numpy as np
-from word_beam_search import WordBeamSearch
 import base64
-import tensorflow as tf
+import time

 app = Flask(__name__)

@@ -41,26 +40,13 @@ def predictNach():
    batch = htr_data_loader.Batch([processed_image], None, 1)

    #change corpus for name
-    model_name.corpus = open('../data/Nachname.txt').read().split()
-    #model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
-    recognized, probability = htr_model.Model.infer_batch(model_name, batch)
-
-    """     #convert corpus to list, split at space
-    corpus = model_name.corpus
-    result_list=[]
-    print(recognized)
-    for name in recognized:
-        indecies = []
-        for i in range(len(corpus)):
-            if name == corpus[i]:
-                indecies.append(i)
-        if len(indecies) == 0:
-            indecies.append(-1)
-        else:
-            result_list.append((name, indecies))
-    if len(result_list) == 0:
-        result_list.append((-1, -1)) """
+    model_name.corpus = open('../data/Nachname.txt').read().split()[:200]

+    #check time of infer_batch
+    start = time.time()
+    recognized, probability = htr_model.Model.infer_batch(model_name, batch)
+    end = time.time()
+    print("infer_batch time: ", end-start)


    processed_image = processed_image + 0.5
@@ -101,31 +87,10 @@ def predictVor():


    #change corpus for name
-    model_name.corpus = open('../data/Vorname.txt').read().split()
-
-
-    
-
-    #model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
+    model_name.corpus = open('../data/Vorname.txt').read().split()[:200]
    recognized, probability = htr_model.Model.infer_batch(model_name, batch)


-    """     corpus = model_name.corpus
-    result_list=[]
-    for name in recognized:
-        indecies = []
-        for i in range(len(corpus)):
-            if name == corpus[i]:
-                indecies.append(i)
-        if len(indecies) == 0:
-            indecies.append(-1)
-        else:
-            result_list.append((name, indecies))
-    
-    if len(result_list) == 0:
-        result_list.append(('KeinName', -1)) """
-
-

    processed_image = processed_image + 0.5
    processed_image = processed_image * 255
@@ -142,14 +107,36 @@ def predictVor():
    image_base64 = base64.b64encode(array_bytes).decode('utf-8')


-
-
    result = {
        'recognized': recognized,
        'image': image_base64
    }
    return jsonify(result)

+def replace_umlauts(text):
+    text = text.replace('ä', 'a')
+    text = text.replace('ö', 'o')
+    text = text.replace('ü', 'u')
+    text = text.replace('Ä', 'A')
+    text = text.replace('Ö', 'O')
+    text = text.replace('Ü', 'U')
+    text = text.replace('ß', 's')
+    text = text.replace('é', 'e')
+    text = text.replace('è', 'e')
+    text = text.replace('ê', 'e')
+    text = text.replace('à', 'a')
+    text = text.replace('â', 'a')
+    text = text.replace('á', 'a')
+    text = text.replace('ô', 'o')
+    text = text.replace('û', 'u')
+    text = text.replace('ç', 'c')
+    text = text.replace('î', 'i')
+    text = text.replace('ï', 'i')
+    text = text.replace('ë', 'e')
+    text = text.replace('ù', 'u')
+    text = text.replace(' ', '-')
+    return text
+

 def split_Student_Names():
    #csv looks like: Vorname;Nachname;Matrikelnummer
@@ -165,7 +152,9 @@ def split_Student_Names():
        for line in lines[1:]:
            line = line.split(',')
            vorname = line[2][:-1]
+            vorname = replace_umlauts(vorname)
            nachname = line[1]
+            nachname = replace_umlauts(nachname)
            matrikelnummer = line[0]
            print(vorname, nachname, matrikelnummer)
            vorname_file.write(vorname + " ")
@@ -180,4 +169,4 @@ if __name__ == '__main__':
    #split csv file into Vorname, Nachname and Matrikelnummer
    matrikel_numbers = split_Student_Names()

-    app.run(debug=True,port=8000)
+    app.run(debug=False,port=8000)