From 85721ca041121572ca40f6b828292873b0e13409 Mon Sep 17 00:00:00 2001
From: merschie <famer101@hhu.de>
Date: Wed, 12 Jul 2023 13:22:01 +0200
Subject: [PATCH] split corpus into Vor und Nachname

---
 Studenten.csv    | 122 +++++++++++++++++++++++++++++++++++++++++++++++
 src/webserver.py |  63 ++++++++++++++++++++++--
 2 files changed, 181 insertions(+), 4 deletions(-)
 create mode 100644 Studenten.csv

diff --git a/Studenten.csv b/Studenten.csv
new file mode 100644
index 0000000..bff69bd
--- /dev/null
+++ b/Studenten.csv
@@ -0,0 +1,122 @@
+Vorname;Nachname;Matrikelnummer
+Fabian;Mersch;2618131
+Elena;Müller;5396159
+Eleni;Schmidt;4195500
+Jack;Schneider;2477916
+Faris;Fischer;1451975
+Alea;Weber;1527582
+Carina;Meyer;1902738
+Lili;Wagner;1021949
+Tamara;Becker;3396397
+Alexis;Schulz;5654451
+Evelina;Hoffmann;2344214
+Rania;Schäfer;1165119
+Wolfram;Bauer;4673022
+Yannic;Koch;1231797
+Ferdinand;Richter;847962
+Vincent;Klein;3185249
+Jonas;Wolf;3612026
+Lorenzo;Schröder;3561847
+Elsa;Neumann;9826007
+Leander;Schwarz;2182311
+Benedikt;Braun;4361295
+Ariana;Hofmann;2880477
+Leonie;Zimmermann;8649586
+Mirac;Schmitt;4733755
+Martha;Hartmann;2894201
+Fatma;Krüger;6395638
+Dian;Schmid;7236398
+Tim;Werner;8746104
+Tarja;Lange;7684872
+Bryan;Schmitz;2938895
+Bilal;Meier;5181005
+Marit;Krause;6171182
+Fatima;Maier;8651985
+Valentin;Lehmann;5875557
+Darius;Huber;7681752
+Enya;Mayer;6336668
+Ashley;Herrmann;4187880
+Hagen;Köhler;7260199
+Nelson;Walter;1167683
+Miran;König;9091573
+Smilla;Schulze;2918857
+Efe;Fuchs;9216329
+Elsa;Kaiser;4164654
+Ardian;Lang;8594589
+Joshua;Weiß;3159466
+Ali;Peters;7604001
+Maxim;Scholz;3421741
+Polan;Jung;7254079
+Lena;Möller;1270444
+Eleanor;Hahn;5645253
+Juri;Keller;980528
+Marius;Vogel;708661
+Annabell;Schubert;8328098
+Firat;Roth;8221551
+Burak;Frank;3148516
+Andre;Friedrich;1800021
+Mats;Beck;5571110
+Luisa;Günther;2855211
+Angelina;Berger;7828272
+Romeo;Winkler;6925189
+Merlin;Lorenz;7942598
+Friederike;Baumann;624266
+Karina;Schuster;3187881
+Cassandra;Kraus;3256903
+Sandro;Böhm;8196660
+Anton;Simon;6132367
+Vivienne;Franke;9453755
+Lenia;Albrecht;6611707
+Conner;Winter;9613058
+Jasmina;Ludwig;9930906
+Leonora;Martin;6200765
+Jonte;Krämer;6558831
+Karla;Schumacher;4527000
+Liana;Vogt;2344683
+Alice;Jäger;8898890
+Björn;Stein;9120266
+Lennart;Otto;1878736
+Giuliano;Groß;4611990
+Ruby;Sommer;31286
+Owen;Haas;7712537
+Franz;Graf;5559006
+Alica;Heinrich;4883443
+Tino;Seidel;6825506
+Gianluca;Schreiber;2099459
+Marvin;Ziegler;7323188
+Alessandro;Brandt;4974718
+Titus;Kuhn;1906938
+Pedro;Schulte;4541044
+Claire;Dietrich;9647493
+Danilo;Kühn;1634307
+Jayson;Engel;9728419
+Catrin;Pohl;5011573
+Delia;Horn;2060543
+Colin;Sauer;3382411
+Joseline;Arnold;6996707
+Fernando;Thomas;1444539
+Lilly;Bergmann;3305362
+Mirac;Busch;2574220
+Matti;Pfeiffer;4196813
+Liv;Voigt;3169919
+Marah;Götz;515304
+Clemens;Seifert;8923309
+Maxim;Lindner;7560572
+Mio;Ernst;660534
+Mohammed;Hübner;948191
+Jakub;Kramer;434391
+Nelly;Franz;5246351
+Carla;Beyer;4696889
+Jamal;Wolff;5771826
+Jeremy;Peter;2645612
+Jake;Jansen;1035896
+Victor;Kern;2163194
+Mirja;Barth;4594761
+Christopher;Wenzel;2139089
+Florin;Hermann;6826880
+Kassandra;Ott;8984885
+Georg;Paul;8653828
+Helen;Riedel;3789292
+Elsa;Wilhelm;6079828
+Hayley;Hansen;5337008
+Eduard;Nagel;4980605
diff --git a/src/webserver.py b/src/webserver.py
index b8c43b4..d1d60bd 100644
--- a/src/webserver.py
+++ b/src/webserver.py
@@ -5,7 +5,7 @@ import model as htr_model
 import dataloader_iam as htr_data_loader
 import preprocessor as htr_preprocessor
 import numpy as np
-
+from word_beam_search import WordBeamSearch
 
 app = Flask(__name__)
 
@@ -13,21 +13,53 @@ image_size = 32
 
 model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.WordBeamSearch, must_restore=True)
 
+csv_path = '../Studenten.csv'
+
+char_list = htr.char_list_from_file()
+chars = ''.join(char_list)
+word_chars = open('../model/wordCharList.txt').read().splitlines()[0]
 
-@app.route('/predict', methods=['POST'])
-def predict():
+@app.route('/predictNachname', methods=['POST'])
+def predictNach():
     image_array = np.frombuffer(request.data, dtype=np.uint64)
     h=image_array[-2]
     w=image_array[-1]
     image_array = image_array[:-2]
     image_array = image_array.reshape((h, w))
+    preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16)
+    processed_image = preprocessor.process_img(image_array)
+    batch = htr_data_loader.Batch([processed_image], None, 1)
+
+    #change corpus for name
+    corpus = open('../data/Nachname.txt').read()
+    model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),
+                                    word_chars.encode('utf8'))
+    recognized, probability = htr_model.Model.infer_batch(model_name, batch)
+
+    result = {
+        'recognized': recognized[0],
+    }
+    return jsonify(result)
 
 
 
-    print(image_array)
+@app.route('/predictVorname', methods=['POST'])
+def predictVor():
+    image_array = np.frombuffer(request.data, dtype=np.uint64)
+    h=image_array[-2]
+    w=image_array[-1]
+    image_array = image_array[:-2]
+    image_array = image_array.reshape((h, w))
     preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16)
     processed_image = preprocessor.process_img(image_array)
     batch = htr_data_loader.Batch([processed_image], None, 1)
+
+
+    #change corpus for name
+    corpus = open('../data/Vorname.txt').read()
+    model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),
+                                    word_chars.encode('utf8'))
+    
     recognized, probability = htr_model.Model.infer_batch(model_name, batch)
 
     result = {
@@ -35,6 +67,29 @@ def predict():
     }
     return jsonify(result)
 
+def split_Student_Names():
+    #csv looks like: Vorname;Nachname;Matrikelnummer
+    #need to put Vorname in one list, Nachname in another
+
+    #create /data/Vorname.txt and /data/Nachname.txt
+    vorname_file = open('../data/Vorname.txt', 'w')
+    nachname_file = open('../data/Nachname.txt', 'w')
+    
+    with open(csv_path, 'r') as csv_file:
+        lines = csv_file.readlines()
+        for line in lines[1:]:
+            line = line.split(';')
+            vorname = line[0]
+            nachname = line[1]
+            matrikelnummer = line[2]
+            print(vorname, nachname, matrikelnummer)
+            vorname_file.write(vorname + " ")
+            nachname_file.write(nachname + " ")
+
+
 
 if __name__ == '__main__':
+    #split csv file into Vorname, Nachname and Matrikelnummer
+    split_Student_Names()
+
     app.run(debug=True,port=8000)
-- 
GitLab