Skip to content
Snippets Groups Projects
Commit 2e6282f0 authored by fabian's avatar fabian
Browse files

added lexicon search for Name recognition

parent abf00eb0
Branches
No related tags found
No related merge requests found
......@@ -6,3 +6,4 @@ notes/
*.pyc
.idea/
dump/
word-data/
......@@ -10,8 +10,7 @@ from dataloader_iam import DataLoaderIAM, Batch
from model import Model, DecoderType
from preprocessor import Preprocessor
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
class FilePaths:
"""Filenames and paths to data."""
......
import tensorflow as tf
tf.config.list_physical_devices(
device_type=None
)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))
import os
import sys
from typing import List, Tuple
import numpy as np
import tensorflow as tf
from dataloader_iam import Batch
from ctc_decoder import lexicon_search, BKTree
# Disable eager mode
tf.compat.v1.disable_eager_execution()
class DecoderType:
"""CTC decoder types."""
BestPath = 0
BeamSearch = 1
WordBeamSearch = 2
LexiconSearch = 3
class Model:
......@@ -33,6 +48,8 @@ class Model:
self.must_restore = must_restore
self.snap_ID = 0
self.corpus = []
# Whether to use normalization over a batch or a population
self.is_train = tf.compat.v1.placeholder(tf.bool, name='is_train')
......@@ -129,7 +146,9 @@ class Model:
self.decoder = tf.nn.ctc_greedy_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len)
elif self.decoder_type == DecoderType.BeamSearch:
self.decoder = tf.nn.ctc_beam_search_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len,
beam_width=50)
beam_width=50, )
# word beam search decoding (see https://github.com/githubharald/CTCWordBeamSearch)
elif self.decoder_type == DecoderType.WordBeamSearch:
# prepare information about language (dictionary, characters in dataset, characters forming words)
......@@ -144,6 +163,8 @@ class Model:
# the input to the decoder must have softmax already applied
self.wbs_input = tf.nn.softmax(self.ctc_in_3d_tbc, axis=2)
else:
self.wbs_input = tf.nn.softmax(self.ctc_in_3d_tbc, axis=2)
def setup_tf(self) -> Tuple[tf.compat.v1.Session, tf.compat.v1.train.Saver]:
"""Initialize TF."""
......@@ -257,7 +278,7 @@ class Model:
# put tensors to be evaluated into list
eval_list = []
if self.decoder_type == DecoderType.WordBeamSearch:
if self.decoder_type >= DecoderType.WordBeamSearch:
eval_list.append(self.wbs_input)
else:
eval_list.append(self.decoder)
......@@ -276,11 +297,18 @@ class Model:
eval_res = self.sess.run(eval_list, feed_dict)
# TF decoders: decoding already done in TF graph
if self.decoder_type != DecoderType.WordBeamSearch:
if self.decoder_type < DecoderType.WordBeamSearch:
decoded = eval_res[0]
# word beam search decoder: decoding is done in C++ function compute()
else:
elif self.decoder_type == DecoderType.WordBeamSearch:
decoded = self.decoder.compute(eval_res[0])
else:
bk_tree = BKTree(self.corpus)
mat = np.array(eval_res[0])
mat = mat[:, 0, :]
decoded = lexicon_search(mat, self.char_list, bk_tree, 50)
return decoded, 1
# map labels (numbers) to character string
texts = self.decoder_output_to_text(decoded, num_batch_elements)
......
......@@ -140,6 +140,15 @@ class Preprocessor:
else:
if self.dynamic_width:
ht = self.img_size[1]
#cut out first horizontal line if it is white
img = 255-img
try:
while np.sum(img[0,:]) < 4096:
img = img[1:,:]
except:
return np.ones((ht, ht))
img=255-img
h, w = img.shape
f = ht / h
wt = int(f * w + self.padding)
......
......@@ -6,13 +6,15 @@ import dataloader_iam as htr_data_loader
import preprocessor as htr_preprocessor
import numpy as np
from word_beam_search import WordBeamSearch
import base64
import tensorflow as tf
app = Flask(__name__)
image_size = 32
model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.WordBeamSearch, must_restore=True)
model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.LexiconSearch, must_restore=True)
model_name.setup_ctc
csv_path = '../tns.csv'
char_list = htr.char_list_from_file()
......@@ -39,13 +41,14 @@ def predictNach():
batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name
corpus = open('../data/Nachname.txt').read()
model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),
word_chars.encode('utf8'))
model_name.corpus = open('../data/Nachname.txt').read().split()
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
#convert corpus to list, split at space
corpus = corpus.split()
""" #convert corpus to list, split at space
corpus = model_name.corpus
result_list=[]
print(recognized)
for name in recognized:
indecies = []
for i in range(len(corpus)):
......@@ -53,12 +56,33 @@ def predictNach():
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append((-1, -1)) """
processed_image = processed_image + 0.5
processed_image = processed_image * 255
#rotate image 90 degrees
processed_image = np.rot90(processed_image,3)
#mirror image
processed_image = np.fliplr(processed_image)
height, width = processed_image.shape
image = np.reshape(processed_image,(height*width))
image = np.append(image,height)
image = np.append(image,width)
image = image.astype(np.uint64)
array_bytes = image.tobytes()
image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = {
'recognized': result_list,
'recognized': recognized,
'image': image_base64
}
return jsonify(result)
......@@ -72,18 +96,21 @@ def predictVor():
image_array = image_array[:-2]
image_array = image_array.reshape((h, w))
preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16)
print(image_array.shape)
processed_image = preprocessor.process_img(image_array)
batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name
corpus = open('../data/Vorname.txt').read()
model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),
word_chars.encode('utf8'))
model_name.corpus = open('../data/Vorname.txt').read().split()
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch)
#convert corpus to list, split at space
corpus = corpus.split()
""" corpus = model_name.corpus
result_list=[]
for name in recognized:
indecies = []
......@@ -92,13 +119,34 @@ def predictVor():
indecies.append(i)
if len(indecies) == 0:
indecies.append(-1)
else:
result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append(('KeinName', -1)) """
processed_image = processed_image + 0.5
processed_image = processed_image * 255
#rotate image -90 degrees
processed_image = np.rot90(processed_image,3)
#mirror image
processed_image = np.fliplr(processed_image)
height, width = processed_image.shape
image = np.reshape(processed_image,(height*width))
image = np.append(image,height)
image = np.append(image,width)
image = image.astype(np.uint64)
array_bytes = image.tobytes()
image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = {
'recognized': result_list,
'recognized': recognized,
'image': image_base64
}
return jsonify(result)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment