Skip to content
Snippets Groups Projects
Commit 2e6282f0 authored by fabian's avatar fabian
Browse files

added lexicon search for Name recognition

parent abf00eb0
No related branches found
No related tags found
No related merge requests found
...@@ -6,3 +6,4 @@ notes/ ...@@ -6,3 +6,4 @@ notes/
*.pyc *.pyc
.idea/ .idea/
dump/ dump/
word-data/
...@@ -10,8 +10,7 @@ from dataloader_iam import DataLoaderIAM, Batch ...@@ -10,8 +10,7 @@ from dataloader_iam import DataLoaderIAM, Batch
from model import Model, DecoderType from model import Model, DecoderType
from preprocessor import Preprocessor from preprocessor import Preprocessor
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
class FilePaths: class FilePaths:
"""Filenames and paths to data.""" """Filenames and paths to data."""
......
import tensorflow as tf
tf.config.list_physical_devices(
device_type=None
)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))
import os import os
import sys import sys
from typing import List, Tuple from typing import List, Tuple
import numpy as np import numpy as np
import tensorflow as tf
from dataloader_iam import Batch from dataloader_iam import Batch
from ctc_decoder import lexicon_search, BKTree
# Disable eager mode # Disable eager mode
tf.compat.v1.disable_eager_execution() tf.compat.v1.disable_eager_execution()
class DecoderType: class DecoderType:
"""CTC decoder types.""" """CTC decoder types."""
BestPath = 0 BestPath = 0
BeamSearch = 1 BeamSearch = 1
WordBeamSearch = 2 WordBeamSearch = 2
LexiconSearch = 3
class Model: class Model:
...@@ -33,6 +48,8 @@ class Model: ...@@ -33,6 +48,8 @@ class Model:
self.must_restore = must_restore self.must_restore = must_restore
self.snap_ID = 0 self.snap_ID = 0
self.corpus = []
# Whether to use normalization over a batch or a population # Whether to use normalization over a batch or a population
self.is_train = tf.compat.v1.placeholder(tf.bool, name='is_train') self.is_train = tf.compat.v1.placeholder(tf.bool, name='is_train')
...@@ -129,7 +146,9 @@ class Model: ...@@ -129,7 +146,9 @@ class Model:
self.decoder = tf.nn.ctc_greedy_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len) self.decoder = tf.nn.ctc_greedy_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len)
elif self.decoder_type == DecoderType.BeamSearch: elif self.decoder_type == DecoderType.BeamSearch:
self.decoder = tf.nn.ctc_beam_search_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len, self.decoder = tf.nn.ctc_beam_search_decoder(inputs=self.ctc_in_3d_tbc, sequence_length=self.seq_len,
beam_width=50) beam_width=50, )
# word beam search decoding (see https://github.com/githubharald/CTCWordBeamSearch) # word beam search decoding (see https://github.com/githubharald/CTCWordBeamSearch)
elif self.decoder_type == DecoderType.WordBeamSearch: elif self.decoder_type == DecoderType.WordBeamSearch:
# prepare information about language (dictionary, characters in dataset, characters forming words) # prepare information about language (dictionary, characters in dataset, characters forming words)
...@@ -144,6 +163,8 @@ class Model: ...@@ -144,6 +163,8 @@ class Model:
# the input to the decoder must have softmax already applied # the input to the decoder must have softmax already applied
self.wbs_input = tf.nn.softmax(self.ctc_in_3d_tbc, axis=2) self.wbs_input = tf.nn.softmax(self.ctc_in_3d_tbc, axis=2)
else:
self.wbs_input = tf.nn.softmax(self.ctc_in_3d_tbc, axis=2)
def setup_tf(self) -> Tuple[tf.compat.v1.Session, tf.compat.v1.train.Saver]: def setup_tf(self) -> Tuple[tf.compat.v1.Session, tf.compat.v1.train.Saver]:
"""Initialize TF.""" """Initialize TF."""
...@@ -257,7 +278,7 @@ class Model: ...@@ -257,7 +278,7 @@ class Model:
# put tensors to be evaluated into list # put tensors to be evaluated into list
eval_list = [] eval_list = []
if self.decoder_type == DecoderType.WordBeamSearch: if self.decoder_type >= DecoderType.WordBeamSearch:
eval_list.append(self.wbs_input) eval_list.append(self.wbs_input)
else: else:
eval_list.append(self.decoder) eval_list.append(self.decoder)
...@@ -276,11 +297,18 @@ class Model: ...@@ -276,11 +297,18 @@ class Model:
eval_res = self.sess.run(eval_list, feed_dict) eval_res = self.sess.run(eval_list, feed_dict)
# TF decoders: decoding already done in TF graph # TF decoders: decoding already done in TF graph
if self.decoder_type != DecoderType.WordBeamSearch: if self.decoder_type < DecoderType.WordBeamSearch:
decoded = eval_res[0] decoded = eval_res[0]
# word beam search decoder: decoding is done in C++ function compute() # word beam search decoder: decoding is done in C++ function compute()
else: elif self.decoder_type == DecoderType.WordBeamSearch:
decoded = self.decoder.compute(eval_res[0]) decoded = self.decoder.compute(eval_res[0])
else:
bk_tree = BKTree(self.corpus)
mat = np.array(eval_res[0])
mat = mat[:, 0, :]
decoded = lexicon_search(mat, self.char_list, bk_tree, 50)
return decoded, 1
# map labels (numbers) to character string # map labels (numbers) to character string
texts = self.decoder_output_to_text(decoded, num_batch_elements) texts = self.decoder_output_to_text(decoded, num_batch_elements)
......
...@@ -140,6 +140,15 @@ class Preprocessor: ...@@ -140,6 +140,15 @@ class Preprocessor:
else: else:
if self.dynamic_width: if self.dynamic_width:
ht = self.img_size[1] ht = self.img_size[1]
#cut out first horizontal line if it is white
img = 255-img
try:
while np.sum(img[0,:]) < 4096:
img = img[1:,:]
except:
return np.ones((ht, ht))
img=255-img
h, w = img.shape h, w = img.shape
f = ht / h f = ht / h
wt = int(f * w + self.padding) wt = int(f * w + self.padding)
......
...@@ -6,13 +6,15 @@ import dataloader_iam as htr_data_loader ...@@ -6,13 +6,15 @@ import dataloader_iam as htr_data_loader
import preprocessor as htr_preprocessor import preprocessor as htr_preprocessor
import numpy as np import numpy as np
from word_beam_search import WordBeamSearch from word_beam_search import WordBeamSearch
import base64
import tensorflow as tf
app = Flask(__name__) app = Flask(__name__)
image_size = 32 image_size = 32
model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.WordBeamSearch, must_restore=True) model_name = htr_model.Model(htr.char_list_from_file(), htr_model.DecoderType.LexiconSearch, must_restore=True)
model_name.setup_ctc
csv_path = '../tns.csv' csv_path = '../tns.csv'
char_list = htr.char_list_from_file() char_list = htr.char_list_from_file()
...@@ -39,13 +41,14 @@ def predictNach(): ...@@ -39,13 +41,14 @@ def predictNach():
batch = htr_data_loader.Batch([processed_image], None, 1) batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name #change corpus for name
corpus = open('../data/Nachname.txt').read() model_name.corpus = open('../data/Nachname.txt').read().split()
model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'), #model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch) recognized, probability = htr_model.Model.infer_batch(model_name, batch)
#convert corpus to list, split at space
corpus = corpus.split() """ #convert corpus to list, split at space
corpus = model_name.corpus
result_list=[] result_list=[]
print(recognized)
for name in recognized: for name in recognized:
indecies = [] indecies = []
for i in range(len(corpus)): for i in range(len(corpus)):
...@@ -53,12 +56,33 @@ def predictNach(): ...@@ -53,12 +56,33 @@ def predictNach():
indecies.append(i) indecies.append(i)
if len(indecies) == 0: if len(indecies) == 0:
indecies.append(-1) indecies.append(-1)
else:
result_list.append((name, indecies)) result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append((-1, -1)) """
processed_image = processed_image + 0.5
processed_image = processed_image * 255
#rotate image 90 degrees
processed_image = np.rot90(processed_image,3)
#mirror image
processed_image = np.fliplr(processed_image)
height, width = processed_image.shape
image = np.reshape(processed_image,(height*width))
image = np.append(image,height)
image = np.append(image,width)
image = image.astype(np.uint64)
array_bytes = image.tobytes()
image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = { result = {
'recognized': result_list, 'recognized': recognized,
'image': image_base64
} }
return jsonify(result) return jsonify(result)
...@@ -72,18 +96,21 @@ def predictVor(): ...@@ -72,18 +96,21 @@ def predictVor():
image_array = image_array[:-2] image_array = image_array[:-2]
image_array = image_array.reshape((h, w)) image_array = image_array.reshape((h, w))
preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16) preprocessor = htr_preprocessor.Preprocessor(htr.get_img_size(), dynamic_width=True, padding=16)
print(image_array.shape)
processed_image = preprocessor.process_img(image_array) processed_image = preprocessor.process_img(image_array)
batch = htr_data_loader.Batch([processed_image], None, 1) batch = htr_data_loader.Batch([processed_image], None, 1)
#change corpus for name #change corpus for name
corpus = open('../data/Vorname.txt').read() model_name.corpus = open('../data/Vorname.txt').read().split()
model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),
word_chars.encode('utf8'))
#model_name.decoder = WordBeamSearch(50, 'Words', 0.0, corpus.encode('utf8'), chars.encode('utf8'),word_chars.encode('utf8'))
recognized, probability = htr_model.Model.infer_batch(model_name, batch) recognized, probability = htr_model.Model.infer_batch(model_name, batch)
#convert corpus to list, split at space
corpus = corpus.split()
""" corpus = model_name.corpus
result_list=[] result_list=[]
for name in recognized: for name in recognized:
indecies = [] indecies = []
...@@ -92,13 +119,34 @@ def predictVor(): ...@@ -92,13 +119,34 @@ def predictVor():
indecies.append(i) indecies.append(i)
if len(indecies) == 0: if len(indecies) == 0:
indecies.append(-1) indecies.append(-1)
else:
result_list.append((name, indecies)) result_list.append((name, indecies))
if len(result_list) == 0:
result_list.append(('KeinName', -1)) """
processed_image = processed_image + 0.5
processed_image = processed_image * 255
#rotate image -90 degrees
processed_image = np.rot90(processed_image,3)
#mirror image
processed_image = np.fliplr(processed_image)
height, width = processed_image.shape
image = np.reshape(processed_image,(height*width))
image = np.append(image,height)
image = np.append(image,width)
image = image.astype(np.uint64)
array_bytes = image.tobytes()
image_base64 = base64.b64encode(array_bytes).decode('utf-8')
result = { result = {
'recognized': result_list, 'recognized': recognized,
'image': image_base64
} }
return jsonify(result) return jsonify(result)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment