diff --git a/data/pixelRelevance.npy b/data/pixelRelevance.npy index 28a681aeb1dab442db4ecd2753dc75989cd0823f..d7ce72613b0217b0590b679842519e20daad520a 100644 Binary files a/data/pixelRelevance.npy and b/data/pixelRelevance.npy differ diff --git a/data/translationInvarianceTexts.pickle b/data/translationInvarianceTexts.pickle new file mode 100644 index 0000000000000000000000000000000000000000..55ac8fc3635f0050334b16b7fda9d7ea4e1c2ab5 Binary files /dev/null and b/data/translationInvarianceTexts.pickle differ diff --git a/src/analyze.py b/src/analyze.py index ca942975580b624b1e4a50f126e4bdb778652668..425e092259509fd8ee34970dae8ea6ef47abbfcd 100644 --- a/src/analyze.py +++ b/src/analyze.py @@ -3,6 +3,7 @@ from __future__ import print_function import sys import math +import pickle import copy import numpy as np import cv2 @@ -12,12 +13,16 @@ from Model import Model, DecoderType from SamplePreprocessor import preprocess -class FilePaths: +# constants like filepaths +class Constants: "filenames and paths to data" fnCharList = '../model/charList.txt' fnAnalyze = '../data/analyze.png' fnPixelRelevance = '../data/pixelRelevance.npy' fnTranslationInvariance = '../data/translationInvariance.npy' + fnTranslationInvarianceTexts = '../data/translationInvarianceTexts.pickle' + gtText = 'are' + distribution = 'histogram' # 'histogram' or 'uniform' def odds(val): @@ -32,19 +37,28 @@ def analyzePixelRelevance(): "simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis" # setup model - model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True) + model = Model(open(Constants.fnCharList).read(), DecoderType.BestPath, mustRestore=True) # read image and specify ground-truth text - img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE) (w, h) = img.shape assert Model.imgSize[1] == w - gt = 'are' # compute probability of gt text in original image - batch = Batch([gt], [preprocess(img, Model.imgSize)]) + batch = Batch([Constants.gtText], [preprocess(img, Model.imgSize)]) (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) origProb = probs[0] + grayValues = [0, 63, 127, 191, 255] + if Constants.distribution == 'histogram': + bins = [0, 31, 95, 159, 223, 255] + (hist, _) = np.histogram(img, bins=bins) + pixelProb = hist / sum(hist) + elif Constants.distribution == 'uniform': + pixelProb = [1.0 / len(grayValues) for _ in grayValues] + else: + raise Exception('unknown value for Constants.distribution') + # iterate over all pixels in image pixelRelevance = np.zeros(img.shape, np.float32) for x in range(w): @@ -52,37 +66,35 @@ def analyzePixelRelevance(): # try a subset of possible grayvalues of pixel (x,y) imgsMarginalized = [] - for g in [0, 63, 127, 191, 255]: + for g in grayValues: imgChanged = copy.deepcopy(img) imgChanged[x, y] = g imgsMarginalized.append(preprocess(imgChanged, Model.imgSize)) # put them all into one batch - batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized) + batch = Batch([Constants.gtText]*len(imgsMarginalized), imgsMarginalized) # compute probabilities (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) # marginalize over pixel value (assume uniform distribution) - margProb = sum(probs)/len(probs) + margProb = sum([probs[i] * pixelProb[i] for i in range(len(grayValues))]) pixelRelevance[x, y] = weightOfEvidence(origProb, margProb) print(x, y, pixelRelevance[x, y], origProb, margProb) - np.save(FilePaths.fnPixelRelevance, pixelRelevance) - + np.save(Constants.fnPixelRelevance, pixelRelevance) def analyzeTranslationInvariance(): # setup model - model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True) + model = Model(open(Constants.fnCharList).read(), DecoderType.BestPath, mustRestore=True) # read image and specify ground-truth text - img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE) (w, h) = img.shape assert Model.imgSize[1] == w - gt = 'are' imgList = [] for dy in range(Model.imgSize[0]-h+1): @@ -91,32 +103,43 @@ def analyzeTranslationInvariance(): imgList.append(preprocess(targetImg, Model.imgSize)) # put images and gt texts into batch - batch = Batch([gt]*len(imgList), imgList) + batch = Batch([Constants.gtText]*len(imgList), imgList) # compute probabilities - (_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) - np.save(FilePaths.fnTranslationInvariance, probs) + (texts, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True) + + # save results to file + f = open(Constants.fnTranslationInvarianceTexts, 'wb') + pickle.dump(texts, f) + f.close() + np.save(Constants.fnTranslationInvariance, probs) def showResults(): # 1. pixel relevance - pixelRelevance = np.load(FilePaths.fnPixelRelevance) + pixelRelevance = np.load(Constants.fnPixelRelevance) plt.figure('Pixel relevance') plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5) plt.colorbar() - img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE) + img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE) plt.imshow(img, cmap=plt.cm.gray, alpha=.4) # 2. translation invariance - probs = np.load(FilePaths.fnTranslationInvariance) + probs = np.load(Constants.fnTranslationInvariance) + f = open(Constants.fnTranslationInvarianceTexts, 'rb') + texts = pickle.load(f) + texts = ['%d:'%i + texts[i] for i in range(len(texts))] + f.close() + plt.figure('Translation invariance') plt.plot(probs, 'o-') - plt.xlabel('horizontal translation') - plt.ylabel('text probability') + plt.xticks(np.arange(len(texts)), texts, rotation='vertical') + plt.xlabel('horizontal translation and best path') + plt.ylabel('text probability of "%s"'%Constants.gtText) # show both plots plt.show()