From 85721ca041121572ca40f6b828292873b0e13409 Mon Sep 17 00:00:00 2001 From: merschie <famer101@hhu.de> Date: Wed, 12 Jul 2023 13:22:01 +0200 Subject: [PATCH] split corpus into Vor und Nachname --- Studenten.csv | 122 +++++++++++++++++++++++++++++++++++++++++++++++ src/webserver.py | 63 ++++++++++++++++++++++-- 2 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 Studenten.csv diff --git a/Studenten.csv b/Studenten.csv new file mode 100644 index 0000000..bff69bd --- /dev/null +++ b/Studenten.csv @@ -0,0 +1,122 @@ +Vorname;Nachname;Matrikelnummer +Fabian;Mersch;2618131 +Elena;Müller;5396159 +Eleni;Schmidt;4195500 +Jack;Schneider;2477916 +Faris;Fischer;1451975 +Alea;Weber;1527582 +Carina;Meyer;1902738 +Lili;Wagner;1021949 +Tamara;Becker;3396397 +Alexis;Schulz;5654451 +Evelina;Hoffmann;2344214 +Rania;Schäfer;1165119 +Wolfram;Bauer;4673022 +Yannic;Koch;1231797 +Ferdinand;Richter;847962 +Vincent;Klein;3185249 +Jonas;Wolf;3612026 +Lorenzo;Schröder;3561847 +Elsa;Neumann;9826007 +Leander;Schwarz;2182311 +Benedikt;Braun;4361295 +Ariana;Hofmann;2880477 +Leonie;Zimmermann;8649586 +Mirac;Schmitt;4733755 +Martha;Hartmann;2894201 +Fatma;Krüger;6395638 +Dian;Schmid;7236398 +Tim;Werner;8746104 +Tarja;Lange;7684872 +Bryan;Schmitz;2938895 +Bilal;Meier;5181005 +Marit;Krause;6171182 +Fatima;Maier;8651985 +Valentin;Lehmann;5875557 +Darius;Huber;7681752 +Enya;Mayer;6336668 +Ashley;Herrmann;4187880 +Hagen;Köhler;7260199 +Nelson;Walter;1167683 +Miran;König;9091573 +Smilla;Schulze;2918857 +Efe;Fuchs;9216329 +Elsa;Kaiser;4164654 +Ardian;Lang;8594589 +Joshua;Weiß;3159466 +Ali;Peters;7604001 +Maxim;Scholz;3421741 +Polan;Jung;7254079 +Lena;Möller;1270444 +Eleanor;Hahn;5645253 +Juri;Keller;980528 +Marius;Vogel;708661 +Annabell;Schubert;8328098 +Firat;Roth;8221551 +Burak;Frank;3148516 +Andre;Friedrich;1800021 +Mats;Beck;5571110 +Luisa;Günther;2855211 +Angelina;Berger;7828272 +Romeo;Winkler;6925189 +Merlin;Lorenz;7942598 +Friederike;Baumann;624266 +Karina;Schuster;3187881 +Cassandra;Kraus;3256903 +Sandro;Böhm;8196660 +Anton;Simon;6132367 +Vivienne;Franke;9453755 +Lenia;Albrecht;6611707 +Conner;Winter;9613058 +Jasmina;Ludwig;9930906 +Leonora;Martin;6200765 +Jonte;Krämer;6558831 +Karla;Schumacher;4527000 +Liana;Vogt;2344683 +Alice;Jäger;8898890 +Björn;Stein;9120266 +Lennart;Otto;1878736 +Giuliano;Groß;4611990 +Ruby;Sommer;31286 +Owen;Haas;7712537 +Franz;Graf;5559006 +Alica;Heinrich;4883443 +Tino;Seidel;6825506 +Gianluca;Schreiber;2099459 +Marvin;Ziegler;7323188 +Alessandro;Brandt;4974718 +Titus;Kuhn;1906938 +Pedro;Schulte;4541044 +Claire;Dietrich;9647493 +Danilo;Kühn;1634307 +Jayson;Engel;9728419 +Catrin;Pohl;5011573 +Delia;Horn;2060543 +Colin;Sauer;3382411 +Joseline;Arnold;6996707 +Fernando;Thomas;1444539 +Lilly;Bergmann;3305362 +Mirac;Busch;2574220 +Matti;Pfeiffer;4196813 +Liv;Voigt;3169919 +Marah;Götz;515304 +Clemens;Seifert;8923309 +Maxim;Lindner;7560572 +Mio;Ernst;660534 +Mohammed;Hübner;948191 +Jakub;Kramer;434391 +Nelly;Franz;5246351 +Carla;Beyer;4696889 +Jamal;Wolff;5771826 +Jeremy;Peter;2645612 +Jake;Jansen;1035896 +Victor;Kern;2163194 +Mirja;Barth;4594761 +Christopher;Wenzel;2139089 +Florin;Hermann;6826880 +Kassandra;Ott;8984885 +Georg;Paul;8653828 +Helen;Riedel;3789292 +Elsa;Wilhelm;6079828 +Hayley;Hansen;5337008 +Eduard;Nagel;4980605 diff --git a/src/webserver.py b/src/webserver.py index b8c43b4..d1d60bd 100644 --- a/src/webserver.py +++ b/src/webserver.py @@ -5,7 +5,7 @@ import model as htr_model import dataloader_iam as htr_data_loader import preprocessor as htr_preprocessor import numpy as np - +from word_beam_search import WordBeamSearch app = Flask(__name__) @@ -13,21 +13,53 @@ image_size = 32 model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.WordBeamSearch, must_restore=True) +csv_path = '../Studenten.csv' + +char_list = htr.char_list_from_file() +chars = ''.join(char_list) +word_chars = open('../model/wordCharList.txt').read().splitlines()[0] -@app.route('/predict', methods=['POST']) -def predict(): +@app.route('/predictNachname', methods=['POST']) +def predictNach(): image_array = np.frombuffer(request.data, dtype=np.uint64) h=image_array[-2] w=image_array[-1] image_array = image_array[:-2] image_array = image_array.reshape((h, w)) + preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16) + processed_image = preprocessor.process_img(image_array) + batch = htr_data_loader.Batch([processed_image], None, 1) + + #change corpus for name + corpus = open('../data/Nachname.txt').read() + model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'), + word_chars.encode('utf8')) + recognized, probability = htr_model.Model.infer_batch(model_name, batch) + + result = { + 'recognized': recognized[0], + } + return jsonify(result) - print(image_array) +@app.route('/predictVorname', methods=['POST']) +def predictVor(): + image_array = np.frombuffer(request.data, dtype=np.uint64) + h=image_array[-2] + w=image_array[-1] + image_array = image_array[:-2] + image_array = image_array.reshape((h, w)) preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16) processed_image = preprocessor.process_img(image_array) batch = htr_data_loader.Batch([processed_image], None, 1) + + + #change corpus for name + corpus = open('../data/Vorname.txt').read() + model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'), + word_chars.encode('utf8')) + recognized, probability = htr_model.Model.infer_batch(model_name, batch) result = { @@ -35,6 +67,29 @@ def predict(): } return jsonify(result) +def split_Student_Names(): + #csv looks like: Vorname;Nachname;Matrikelnummer + #need to put Vorname in one list, Nachname in another + + #create /data/Vorname.txt and /data/Nachname.txt + vorname_file = open('../data/Vorname.txt', 'w') + nachname_file = open('../data/Nachname.txt', 'w') + + with open(csv_path, 'r') as csv_file: + lines = csv_file.readlines() + for line in lines[1:]: + line = line.split(';') + vorname = line[0] + nachname = line[1] + matrikelnummer = line[2] + print(vorname, nachname, matrikelnummer) + vorname_file.write(vorname + " ") + nachname_file.write(nachname + " ") + + if __name__ == '__main__': + #split csv file into Vorname, Nachname and Matrikelnummer + split_Student_Names() + app.run(debug=True,port=8000) -- GitLab