diff --git a/data/analyze.png b/data/analyze.png new file mode 100644 index 0000000000000000000000000000000000000000..ae1c88b34174e82736122fdd824fd6cf06e4dd11 Binary files /dev/null and b/data/analyze.png differ diff --git a/data/pixelRelevance.npy b/data/pixelRelevance.npy new file mode 100644 index 0000000000000000000000000000000000000000..28a681aeb1dab442db4ecd2753dc75989cd0823f Binary files /dev/null and b/data/pixelRelevance.npy differ diff --git a/data/translationInvariance.npy b/data/translationInvariance.npy new file mode 100644 index 0000000000000000000000000000000000000000..39addf8d885e5e7f8061027c8dc125cd412f8b67 Binary files /dev/null and b/data/translationInvariance.npy differ diff --git a/doc/analyze.png b/doc/analyze.png new file mode 100644 index 0000000000000000000000000000000000000000..da513ae4361106fac98423f53dc9ace0a9fc1c01 Binary files /dev/null and b/doc/analyze.png differ diff --git a/src/Model.py b/src/Model.py index 0211c6fb4ddb51c9ee65e048724f8f06c9536600..0a40caf358fa1c540c05a385d1b57d7ba1125872 100644 --- a/src/Model.py +++ b/src/Model.py @@ -211,7 +211,7 @@ class Model: return lossVal - def inferBatch(self, batch, calcProbability=False): + def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False): "feed a batch into the NN to recngnize the texts" # decode, optionally save RNN output @@ -225,7 +225,7 @@ class Model: # feed RNN output and recognized text into CTC loss to compute labeling probability probs = None if calcProbability: - sparse = self.toSparse(texts) + sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts) ctcInput = evalRes[1] evalList = self.lossPerElement feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements} diff --git a/src/analyze.py b/src/analyze.py new file mode 100644 index 0000000000000000000000000000000000000000..ca942975580b624b1e4a50f126e4bdb778652668 --- /dev/null +++ b/src/analyze.py @@ -0,0 +1,136 @@ +from __future__ import division +from __future__ import print_function + +import sys +import math +import copy +import numpy as np +import cv2 +import matplotlib.pyplot as plt +from DataLoader import Batch +from Model import Model, DecoderType +from SamplePreprocessor import preprocess + + +class FilePaths: + "filenames and paths to data" + fnCharList = '../model/charList.txt' + fnAnalyze = '../data/analyze.png' + fnPixelRelevance = '../data/pixelRelevance.npy' + fnTranslationInvariance = '../data/translationInvariance.npy' + + +def odds(val): + return val / (1 - val) + + +def weightOfEvidence(origProb, margProb): + return math.log2(odds(origProb)) - math.log2(odds(margProb)) + + +def analyzePixelRelevance(): + "simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis" + + # setup model + model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True) + + # read image and specify ground-truth text + img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + (w, h) = img.shape + assert Model.imgSize[1] == w + gt = 'are' + + # compute probability of gt text in original image + batch = Batch([gt], [preprocess(img, Model.imgSize)]) + (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) + origProb = probs[0] + + # iterate over all pixels in image + pixelRelevance = np.zeros(img.shape, np.float32) + for x in range(w): + for y in range(h): + + # try a subset of possible grayvalues of pixel (x,y) + imgsMarginalized = [] + for g in [0, 63, 127, 191, 255]: + imgChanged = copy.deepcopy(img) + imgChanged[x, y] = g + imgsMarginalized.append(preprocess(imgChanged, Model.imgSize)) + + # put them all into one batch + batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized) + + # compute probabilities + (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) + + # marginalize over pixel value (assume uniform distribution) + margProb = sum(probs)/len(probs) + + pixelRelevance[x, y] = weightOfEvidence(origProb, margProb) + + print(x, y, pixelRelevance[x, y], origProb, margProb) + + np.save(FilePaths.fnPixelRelevance, pixelRelevance) + + + +def analyzeTranslationInvariance(): + # setup model + model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True) + + # read image and specify ground-truth text + img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + (w, h) = img.shape + assert Model.imgSize[1] == w + gt = 'are' + + imgList = [] + for dy in range(Model.imgSize[0]-h+1): + targetImg = np.ones((Model.imgSize[1], Model.imgSize[0])) * 255 + targetImg[:,dy:h+dy] = img + imgList.append(preprocess(targetImg, Model.imgSize)) + + # put images and gt texts into batch + batch = Batch([gt]*len(imgList), imgList) + + # compute probabilities + (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) + np.save(FilePaths.fnTranslationInvariance, probs) + + +def showResults(): + # 1. pixel relevance + pixelRelevance = np.load(FilePaths.fnPixelRelevance) + plt.figure('Pixel relevance') + + plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5) + plt.colorbar() + + img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + plt.imshow(img, cmap=plt.cm.gray, alpha=.4) + + + # 2. translation invariance + probs = np.load(FilePaths.fnTranslationInvariance) + plt.figure('Translation invariance') + + plt.plot(probs, 'o-') + plt.xlabel('horizontal translation') + plt.ylabel('text probability') + + # show both plots + plt.show() + + +if __name__ == '__main__': + if len(sys.argv)>1: + if sys.argv[1]=='--relevance': + print('Analyze pixel relevance') + analyzePixelRelevance() + elif sys.argv[1]=='--invariance': + print('Analyze translation invariance') + analyzeTranslationInvariance() + else: + print('Show results') + showResults() + \ No newline at end of file