diff --git a/.gitignore b/.gitignore index e58673f6545c14befec20c231c84f9f8daada26c..61969afaf401bed5fa1a554a03ae522df6da4eb5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ data/words* data/words.txt src/__pycache__/ -model/checkpoint -model/snapshot-* notes/ *.so *.pyc diff --git a/README.md b/README.md index a22c6ccf143e26033c13eadd3aedeff761b1fa2f..c919f348aa685b2fd1a2f8c847802b0b89c84238 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,6 @@ The input image and the expected output is shown below. ``` > python main.py -Validation character error rate of saved model: 11.118344571029994% Init with stored values from ../model/snapshot-76 Recognized: "Hello" Probability: 0.8462573289871216 diff --git a/data/pixelRelevance.npy b/data/pixelRelevance.npy deleted file mode 100644 index 5d1a525240ba5ef3690f42e0d45ee7c032654662..0000000000000000000000000000000000000000 Binary files a/data/pixelRelevance.npy and /dev/null differ diff --git a/data/translationInvariance.npy b/data/translationInvariance.npy deleted file mode 100644 index 72c21ed61205d0d076c863f03026f6933e5376ad..0000000000000000000000000000000000000000 Binary files a/data/translationInvariance.npy and /dev/null differ diff --git a/data/translationInvarianceTexts.pickle b/data/translationInvarianceTexts.pickle deleted file mode 100644 index 05bf9cf0a5989e7b38f4e34bb761e27019d3766c..0000000000000000000000000000000000000000 Binary files a/data/translationInvarianceTexts.pickle and /dev/null differ diff --git a/model/.gitignore b/model/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..86d0cb2726c6c7c179b99520c452dd1b68e7a813 --- /dev/null +++ b/model/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/model/accuracy.txt b/model/accuracy.txt deleted file mode 100644 index 24c9310a87b90eb930fae9bc0c4e5ec5d1899210..0000000000000000000000000000000000000000 --- a/model/accuracy.txt +++ /dev/null @@ -1 +0,0 @@ -Validation character error rate of saved model: 10.89210233592881% \ No newline at end of file diff --git a/src/SamplePreprocessor.py b/src/SamplePreprocessor.py index c82ff0a4c929f85ea2a9bedbcf611209f2072306..b58a02c2a7aa17fa0dc749c63d7afec02dc3a10c 100644 --- a/src/SamplePreprocessor.py +++ b/src/SamplePreprocessor.py @@ -23,9 +23,9 @@ def preprocess(img, imgSize, dataAugmentation=False): if random.random() < 0.25: img = cv2.erode(img, np.ones((3, 3))) if random.random() < 0.5: - img = img * (0.5 + random.random() * 0.5) + img = img * (0.1 + random.random() * 0.9) if random.random() < 0.25: - img = np.clip(img + (np.random.random(img.shape) - 0.5) * random.randint(1, 50), 0, 255) + img = np.clip(img + (np.random.random(img.shape) - 0.5) * random.randint(1, 25), 0, 255) if random.random() < 0.1: img = 255 - img @@ -33,20 +33,20 @@ def preprocess(img, imgSize, dataAugmentation=False): wt, ht = imgSize h, w = img.shape f = min(wt / w, ht / h) - fx = f * np.random.uniform(0.75, 1.25) - fy = f * np.random.uniform(0.75, 1.25) + fx = f * np.random.uniform(0.75, 1.5) + fy = f * np.random.uniform(0.75, 1.5) # random position around center txc = (wt - w * fx) / 2 tyc = (ht - h * fy) / 2 - freedom_x = wt // 10 - freedom_y = ht // 10 - tx = txc + np.random.randint(-freedom_x, freedom_x) - ty = tyc + np.random.randint(-freedom_y, freedom_y) + freedom_x = wt / 5 + freedom_y = ht / 5 + tx = txc + np.random.uniform(-freedom_x, freedom_x) + ty = tyc + np.random.uniform(-freedom_y, freedom_y) # map image into target image M = np.float32([[fx, 0, tx], [0, fy, ty]]) - target = np.ones(imgSize[::-1]) * 255 / 2 + target = np.ones(imgSize[::-1]) * np.random.uniform(0, 255) img = cv2.warpAffine(img, M, dsize=imgSize, dst=target, borderMode=cv2.BORDER_TRANSPARENT) # no data augmentation diff --git a/src/main.py b/src/main.py index 72a6be074ef714573f4e18420c552441dfae30de..d28dacca3971688089c02472b8139bce51baac2f 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,5 @@ import argparse +import json import cv2 import editdistance @@ -12,14 +13,20 @@ from SamplePreprocessor import preprocess class FilePaths: "filenames and paths to data" fnCharList = '../model/charList.txt' - fnAccuracy = '../model/accuracy.txt' + fnSummary = '../model/summary.json' fnInfer = '../data/test.png' fnCorpus = '../data/corpus.txt' +def write_summary(charErrorRates): + with open(FilePaths.fnSummary, 'w') as f: + json.dump(charErrorRates, f) + + def train(model, loader): "train NN" epoch = 0 # number of training epochs since start + summaryCharErrorRates = [] bestCharErrorRate = float('inf') # best valdiation character error rate noImprovementSince = 0 # number of epochs no improvement of character error rate occured earlyStopping = 25 # stop training after this number of epochs without improvement @@ -39,14 +46,16 @@ def train(model, loader): # validate charErrorRate = validate(model, loader) + # write summary + summaryCharErrorRates.append(charErrorRate) + write_summary(summaryCharErrorRates) + # if best validation accuracy so far, save model parameters if charErrorRate < bestCharErrorRate: print('Character error rate improved, save model') bestCharErrorRate = charErrorRate noImprovementSince = 0 model.save() - open(FilePaths.fnAccuracy, 'w').write( - f'Validation character error rate of saved model: {charErrorRate * 100.0}%') else: print(f'Character error rate not improved, best so far: {charErrorRate * 100.0}%') noImprovementSince += 1 @@ -140,7 +149,6 @@ def main(): # infer text on test image else: - print(open(FilePaths.fnAccuracy).read()) model = Model(open(FilePaths.fnCharList).read(), decoderType, mustRestore=True, dump=args.dump) infer(model, FilePaths.fnInfer)