analyze pixel relevance using norm. histogram for p(x_i) distribution

b2ac69ac · Harald Scheidl · a78fbdf1 · b2ac69ac · b2ac69ac · b2ac69ac
Commit b2ac69ac authored 6 years ago by Harald Scheidl
--- a/data/pixelRelevance.npy
+++ b/data/pixelRelevance.npy
--- a/data/translationInvarianceTexts.pickle
+++ b/data/translationInvarianceTexts.pickle
--- a/src/analyze.py
+++ b/src/analyze.py
@@ -3,6 +3,7 @@ from __future__ import print_function

 import sys
 import math
+import pickle
 import copy
 import numpy as np
 import cv2
@@ -12,12 +13,16 @@ from Model import Model, DecoderType
 from SamplePreprocessor import preprocess


-class FilePaths:
+# constants like filepaths
+class Constants:
 	"filenames and paths to data"
 	fnCharList = '../model/charList.txt'
 	fnAnalyze = '../data/analyze.png'
 	fnPixelRelevance = '../data/pixelRelevance.npy'
 	fnTranslationInvariance = '../data/translationInvariance.npy'
+	fnTranslationInvarianceTexts = '../data/translationInvarianceTexts.pickle'
+	gtText = 'are'
+	distribution = 'histogram' # 'histogram' or 'uniform'


 def odds(val):
@@ -32,19 +37,28 @@ def analyzePixelRelevance():
 	"simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis"
 	
 	# setup model
-	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+	model = Model(open(Constants.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
 	
 	# read image and specify ground-truth text
-	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE)
 	(w, h) = img.shape
 	assert Model.imgSize[1] == w
-	gt = 'are'
 	
 	# compute probability of gt text in original image
-	batch = Batch([gt], [preprocess(img, Model.imgSize)])
+	batch = Batch([Constants.gtText], [preprocess(img, Model.imgSize)])
 	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
 	origProb = probs[0]
 	
+	grayValues = [0, 63, 127, 191, 255]
+	if Constants.distribution == 'histogram':
+		bins = [0, 31, 95, 159, 223, 255]
+		(hist, _) = np.histogram(img, bins=bins)
+		pixelProb = hist / sum(hist)
+	elif Constants.distribution == 'uniform':
+		pixelProb = [1.0 / len(grayValues) for _ in grayValues]
+	else:
+		raise Exception('unknown value for Constants.distribution')
+	
 	# iterate over all pixels in image
 	pixelRelevance = np.zeros(img.shape, np.float32)
 	for x in range(w):
@@ -52,37 +66,35 @@ def analyzePixelRelevance():
 			
 			# try a subset of possible grayvalues of pixel (x,y)
 			imgsMarginalized = []
-			for g in [0, 63, 127, 191, 255]:
+			for g in grayValues:
 				imgChanged = copy.deepcopy(img)
 				imgChanged[x, y] = g
 				imgsMarginalized.append(preprocess(imgChanged, Model.imgSize))

 			# put them all into one batch
-			batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized)
+			batch = Batch([Constants.gtText]*len(imgsMarginalized), imgsMarginalized)
 			
 			# compute probabilities
 			(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
 			
 			# marginalize over pixel value (assume uniform distribution)
-			margProb = sum(probs)/len(probs)
+			margProb = sum([probs[i] * pixelProb[i] for i in range(len(grayValues))])
 			
 			pixelRelevance[x, y] = weightOfEvidence(origProb, margProb)
 			
 			print(x, y, pixelRelevance[x, y], origProb, margProb)
 			
-	np.save(FilePaths.fnPixelRelevance, pixelRelevance)
-
+	np.save(Constants.fnPixelRelevance, pixelRelevance)


 def analyzeTranslationInvariance():
 	# setup model
-	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+	model = Model(open(Constants.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
 	
 	# read image and specify ground-truth text
-	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE)
 	(w, h) = img.shape
 	assert Model.imgSize[1] == w
-	gt = 'are'
 	
 	imgList = []
 	for dy in range(Model.imgSize[0]-h+1):
@@ -91,32 +103,43 @@ def analyzeTranslationInvariance():
 		imgList.append(preprocess(targetImg, Model.imgSize))
 	
 	# put images and gt texts into batch
-	batch = Batch([gt]*len(imgList), imgList)
+	batch = Batch([Constants.gtText]*len(imgList), imgList)
 	
 	# compute probabilities
-	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
-	np.save(FilePaths.fnTranslationInvariance, probs)
+	(texts, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+	
+	# save results to file
+	f = open(Constants.fnTranslationInvarianceTexts, 'wb')
+	pickle.dump(texts, f)
+	f.close()
+	np.save(Constants.fnTranslationInvariance, probs)


 def showResults():
 	# 1. pixel relevance
-	pixelRelevance = np.load(FilePaths.fnPixelRelevance)
+	pixelRelevance = np.load(Constants.fnPixelRelevance)
 	plt.figure('Pixel relevance')
 	
 	plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5)
 	plt.colorbar()
 	
-	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	img = cv2.imread(Constants.fnAnalyze, cv2.IMREAD_GRAYSCALE)
 	plt.imshow(img, cmap=plt.cm.gray, alpha=.4)
 	

 	# 2. translation invariance
-	probs = np.load(FilePaths.fnTranslationInvariance)
+	probs = np.load(Constants.fnTranslationInvariance)
+	f = open(Constants.fnTranslationInvarianceTexts, 'rb')
+	texts = pickle.load(f)
+	texts = ['%d:'%i + texts[i] for i in range(len(texts))]
+	f.close()
+	
 	plt.figure('Translation invariance')
 	
 	plt.plot(probs, 'o-')
-	plt.xlabel('horizontal translation')
-	plt.ylabel('text probability')
+	plt.xticks(np.arange(len(texts)), texts, rotation='vertical')
+	plt.xlabel('horizontal translation and best path')
+	plt.ylabel('text probability of "%s"'%Constants.gtText)
 	
 	# show both plots
 	plt.show()