analyze HTR system

3968a52b · Harald Scheidl · 4119ac83 · 3968a52b · 3968a52b · 3968a52b
Commit 3968a52b authored 6 years ago by Harald Scheidl
--- a/data/analyze.png
+++ b/data/analyze.png
--- a/data/pixelRelevance.npy
+++ b/data/pixelRelevance.npy
--- a/data/translationInvariance.npy
+++ b/data/translationInvariance.npy
--- a/doc/analyze.png
+++ b/doc/analyze.png
--- a/src/Model.py
+++ b/src/Model.py
@@ -211,7 +211,7 @@ class Model:
 		return lossVal
-	def inferBatch(self, batch, calcProbability=False):
+	def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False):
 		"feed a batch into the NN to recngnize the texts"
 		# decode, optionally save RNN output
@@ -225,7 +225,7 @@ class Model:
 		# feed RNN output and recognized text into CTC loss to compute labeling probability
 		probs = None
 		if calcProbability:
-			sparse = self.toSparse(texts)
+			sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts)
 			ctcInput = evalRes[1]
 			evalList = self.lossPerElement
 			feedDict = {self.savedCtcInput : ctcInput, self.gtTexts : sparse, self.seqLen : [Model.maxTextLen] * numBatchElements}

--- a/src/analyze.py
+++ b/src/analyze.py
+from __future__ import division
+from __future__ import print_function
+import sys
+import math
+import copy
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from DataLoader import Batch
+from Model import Model, DecoderType
+from SamplePreprocessor import preprocess
+class FilePaths:
+	"filenames and paths to data"
+	fnCharList = '../model/charList.txt'
+	fnAnalyze = '../data/analyze.png'
+	fnPixelRelevance = '../data/pixelRelevance.npy'
+	fnTranslationInvariance = '../data/translationInvariance.npy'
+def odds(val):
+	return val / (1 - val)
+def weightOfEvidence(origProb, margProb):
+	return math.log2(odds(origProb)) - math.log2(odds(margProb))
+def analyzePixelRelevance():
+	"simplified implementation of paper: Zintgraf et al - Visualizing Deep Neural Network Decisions: Prediction Difference Analysis"
+	# setup model
+	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+	# read image and specify ground-truth text
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	(w, h) = img.shape
+	assert Model.imgSize[1] == w
+	gt = 'are'
+	# compute probability of gt text in original image
+	batch = Batch([gt], [preprocess(img, Model.imgSize)])
+	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+	origProb = probs[0]
+	# iterate over all pixels in image
+	pixelRelevance = np.zeros(img.shape, np.float32)
+	for x in range(w):
+		for y in range(h):
+			# try a subset of possible grayvalues of pixel (x,y)
+			imgsMarginalized = []
+			for g in [0, 63, 127, 191, 255]:
+				imgChanged = copy.deepcopy(img)
+				imgChanged[x, y] = g
+				imgsMarginalized.append(preprocess(imgChanged, Model.imgSize))
+			# put them all into one batch
+			batch = Batch([gt]*len(imgsMarginalized), imgsMarginalized)
+			# compute probabilities
+			(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+			# marginalize over pixel value (assume uniform distribution)
+			margProb = sum(probs)/len(probs)
+			pixelRelevance[x, y] = weightOfEvidence(origProb, margProb)
+			print(x, y, pixelRelevance[x, y], origProb, margProb)
+	np.save(FilePaths.fnPixelRelevance, pixelRelevance)
+def analyzeTranslationInvariance():
+	# setup model
+	model = Model(open(FilePaths.fnCharList).read(), DecoderType.BestPath, mustRestore=True)
+	# read image and specify ground-truth text
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	(w, h) = img.shape
+	assert Model.imgSize[1] == w
+	gt = 'are'
+	imgList = []
+	for dy in range(Model.imgSize[0]-h+1):
+		targetImg = np.ones((Model.imgSize[1], Model.imgSize[0])) * 255
+		targetImg[:,dy:h+dy] = img
+		imgList.append(preprocess(targetImg, Model.imgSize))
+	# put images and gt texts into batch
+	batch = Batch([gt]*len(imgList), imgList)
+	# compute probabilities
+	(_, probs) = model.inferBatch(batch, calcProbability=True, probabilityOfGT=True)
+	np.save(FilePaths.fnTranslationInvariance, probs)
+def showResults():
+	# 1. pixel relevance
+	pixelRelevance = np.load(FilePaths.fnPixelRelevance)
+	plt.figure('Pixel relevance')
+	plt.imshow(pixelRelevance, cmap=plt.cm.jet, vmin=-0.5, vmax=0.5)
+	plt.colorbar()
+	img = cv2.imread(FilePaths.fnAnalyze, cv2.IMREAD_GRAYSCALE)
+	plt.imshow(img, cmap=plt.cm.gray, alpha=.4)
+	# 2. translation invariance
+	probs = np.load(FilePaths.fnTranslationInvariance)
+	plt.figure('Translation invariance')
+	plt.plot(probs, 'o-')
+	plt.xlabel('horizontal translation')
+	plt.ylabel('text probability')
+	# show both plots
+	plt.show()
+if __name__ == '__main__':
+	if len(sys.argv)>1:
+		if sys.argv[1]=='--relevance':
+			print('Analyze pixel relevance')
+			analyzePixelRelevance()
+		elif sys.argv[1]=='--invariance':
+			print('Analyze translation invariance')
+			analyzeTranslationInvariance()
+	else:
+		print('Show results')
+		showResults()
\ No newline at end of file