Skip to content
Snippets Groups Projects
Commit 3968a52b authored by Harald Scheidl's avatar Harald Scheidl
Browse files

analyze HTR system

parent 4119ac83
No related branches found
No related tags found
No related merge requests found
data/analyze.png

4.27 KiB

File added
File added
doc/analyze.png

53.1 KiB

...@@ -211,7 +211,7 @@ class Model: ...@@ -211,7 +211,7 @@ class Model:
return lossVal return lossVal
def inferBatch(self, batch, calcProbability=False): def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False):
"feed a batch into the NN to recngnize the texts" "feed a batch into the NN to recngnize the texts"
# decode, optionally save RNN output # decode, optionally save RNN output
...@@ -225,7 +225,7 @@ class Model: ...@@ -225,7 +225,7 @@ class Model:
# feed RNN output and recognized text into CTC loss to compute labeling probability # feed RNN output and recognized text into CTC loss to compute labeling probability
probs = None probs = None
if calcProbability: if calcProbability:
sparse = self.toSparse(texts) sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts)
ctcInput = evalRes[1] ctcInput = evalRes[1]
evalList = self.lossPerElement evalList = self.lossPerElement
feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements} feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements}
......
from __future__ import division
from __future__ import print_function
import sys
import math
import copy
import numpy as np
import cv2
import matplotlib.pyplot as plt
from DataLoader import Batch
from Model import Model, DecoderType
from SamplePreprocessor import preprocess
class FilePaths:
"filenames and paths to data"
fnCharList = '../model/charList.txt'
fnAnalyze = '../data/analyze.png'
fnPixelRelevance = '../data/pixelRelevance.npy'
fnTranslationInvariance = '../data/translationInvariance.npy'
def odds(val):
return val / (1 - val)
def weightOfEvidence(origProb, margProb):
return math.log2(odds(origProb)) - math.log2(odds(margProb))
def analyzePixelRelevance():
"simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis"
# setup model
model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
# read image and specify ground-truth text
img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
(w, h) = img.shape
assert Model.imgSize[1] == w
gt = 'are'
# compute probability of gt text in original image
batch = Batch([gt], [preprocess(img, Model.imgSize)])
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
origProb = probs[0]
# iterate over all pixels in image
pixelRelevance = np.zeros(img.shape, np.float32)
for x in range(w):
for y in range(h):
# try a subset of possible grayvalues of pixel (x,y)
imgsMarginalized = []
for g in [0, 63, 127, 191, 255]:
imgChanged = copy.deepcopy(img)
imgChanged[x, y] = g
imgsMarginalized.append(preprocess(imgChanged, Model.imgSize))
# put them all into one batch
batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized)
# compute probabilities
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
# marginalize over pixel value (assume uniform distribution)
margProb = sum(probs)/len(probs)
pixelRelevance[x, y] = weightOfEvidence(origProb, margProb)
print(x, y, pixelRelevance[x, y], origProb, margProb)
np.save(FilePaths.fnPixelRelevance, pixelRelevance)
def analyzeTranslationInvariance():
# setup model
model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
# read image and specify ground-truth text
img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
(w, h) = img.shape
assert Model.imgSize[1] == w
gt = 'are'
imgList = []
for dy in range(Model.imgSize[0]-h+1):
targetImg = np.ones((Model.imgSize[1], Model.imgSize[0])) * 255
targetImg[:,dy:h+dy] = img
imgList.append(preprocess(targetImg, Model.imgSize))
# put images and gt texts into batch
batch = Batch([gt]*len(imgList), imgList)
# compute probabilities
(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
np.save(FilePaths.fnTranslationInvariance, probs)
def showResults():
# 1. pixel relevance
pixelRelevance = np.load(FilePaths.fnPixelRelevance)
plt.figure('Pixel relevance')
plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5)
plt.colorbar()
img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
plt.imshow(img, cmap=plt.cm.gray, alpha=.4)
# 2. translation invariance
probs = np.load(FilePaths.fnTranslationInvariance)
plt.figure('Translation invariance')
plt.plot(probs, 'o-')
plt.xlabel('horizontal translation')
plt.ylabel('text probability')
# show both plots
plt.show()
if __name__ == '__main__':
if len(sys.argv)>1:
if sys.argv[1]=='--relevance':
print('Analyze pixel relevance')
analyzePixelRelevance()
elif sys.argv[1]=='--invariance':
print('Analyze translation invariance')
analyzeTranslationInvariance()
else:
print('Show results')
showResults()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment