Skip to content
Snippets Groups Projects
Commit 36f233b4 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Improved classification handling and plotting.

parent b2e66543
No related branches found
No related tags found
No related merge requests found
...@@ -11,7 +11,8 @@ TODO: Improve maximum selection runtime ...@@ -11,7 +11,8 @@ TODO: Improve maximum selection runtime
TODO: Discuss if we want training accuracy/ROC in addition to CFV TODO: Discuss if we want training accuracy/ROC in addition to CFV
TODO: Discuss whether to change output to binary TODO: Discuss whether to change output to binary
TODO: Adapt TCD file to new classification TODO: Adapt TCD file to new classification
TODO: Improve classification stat handling TODO: Improve classification stat handling -> Done
TODO: Discuss automatic comparison between (non-)normalized data
""" """
import numpy as np import numpy as np
...@@ -129,7 +130,8 @@ class ModelTrainer(object): ...@@ -129,7 +130,8 @@ class ModelTrainer(object):
# print(roc) # print(roc)
# plt.plot(fpr, tpr, label="AUC="+str(auroc)) # plt.plot(fpr, tpr, label="AUC="+str(auroc))
return [precision[0], recall[0], accuracy, f_score[0], auroc] return {'Precision': precision[0], 'Recall': recall[0], 'Accuracy': accuracy,
'F-Score': f_score[0], 'AUROC': auroc}
def save_model(self): def save_model(self):
# Saving Model # Saving Model
...@@ -143,8 +145,8 @@ class ModelTrainer(object): ...@@ -143,8 +145,8 @@ class ModelTrainer(object):
torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt') torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt')
torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt') torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt')
def _classify(self): # def _classify(self):
pass # pass
def read_training_data(directory): def read_training_data(directory):
...@@ -154,10 +156,11 @@ def read_training_data(directory): ...@@ -154,10 +156,11 @@ def read_training_data(directory):
return TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file)))) return TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file))))
def evaluate_models(models, directory, num_iterations=100): def evaluate_models(models, directory, num_iterations=100, measures=None):
if measures is None:
measures = ['Accuracy', 'Precision', 'Recall', 'F-Score', 'AUROC']
dataset = read_training_data(directory) dataset = read_training_data(directory)
stats = ['Precision', 'Recall', 'Accuracy', 'F-Score', 'AUROC'] classification_stats = {measure: {model: [] for model in models} for measure in measures}
classification_stats = {model: {name: [] for name in stats} for model in models}
for iteration in range(num_iterations): for iteration in range(num_iterations):
for train_index, test_index in KFold(n_splits=5, shuffle=True).split(dataset): for train_index, test_index in KFold(n_splits=5, shuffle=True).split(dataset):
# print("TRAIN:", train_index, "TEST:", test_index) # print("TRAIN:", train_index, "TEST:", test_index)
...@@ -166,28 +169,13 @@ def evaluate_models(models, directory, num_iterations=100): ...@@ -166,28 +169,13 @@ def evaluate_models(models, directory, num_iterations=100):
for model in models: for model in models:
result = models[model].test_model(training_set, test_set) result = models[model].test_model(training_set, test_set)
count = 0 for measure in measures:
for stat in stats: classification_stats[measure][model].append(result[measure])
classification_stats[model][stat].append(result[count])
count += 1 plot_boxplot(models.keys(), classification_stats)
classification_stats = {measure: {model: np.array(classification_stats[measure][model]).mean()
# print(classification_stats) for model in models} for measure in measures}
# print(np.array(classification_stats).mean(axis=0)) plot_classification_accuracy(models.keys(), classification_stats)
# print(np.array(classification_stats['Adam']['Precision']).shape)
# print(np.array([np.array(classification_stats[model]) for model in models]).transpose().shape)
# print(np.array([np.array(classification_stats[model]).transpose() for model in models]).shape)
# print(np.array([[classification_stats[model][stat] for model in models] for stat in stats]).shape)
# print(np.array([[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats]).shape)
# print(np.array([*(np.array([[classification_stats[model][stat]
# for model in models] for stat in stats]))]).shape)
# print(*(np.array([[classification_stats[model][stat]
# for model in models] for stat in stats]))[0].shape)
plot_boxplot(models.keys(), *(np.array([[classification_stats[model][stat]
for model in models] for stat in stats])))
classification_stats = [[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats]
# print(*classification_stats)
plot_classification_accuracy(models.keys(), *classification_stats)
# Set paths for plot files if not existing already # Set paths for plot files if not existing already
plot_dir = directory + '/model evaluation' plot_dir = directory + '/model evaluation'
......
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
@author: Laura C. Kühle @author: Laura C. Kühle
TODO: Give option to select plotting color TODO: Give option to select plotting color
TODO: Improve classification plotting TODO: Improve classification plotting -> Done
TODO: Add documentation to plot_boxplot()
""" """
import numpy as np import numpy as np
...@@ -236,7 +237,7 @@ def calculate_exact_solution(mesh, cell_len, wave_speed, final_time, interval_le ...@@ -236,7 +237,7 @@ def calculate_exact_solution(mesh, cell_len, wave_speed, final_time, interval_le
return grid, exact return grid, exact
def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, auroc): def plot_classification_accuracy(model_names, evaluation_dict):
"""Plots classification accuracy. """Plots classification accuracy.
Plots the accuracy, precision, and recall in a bar plot. Plots the accuracy, precision, and recall in a bar plot.
...@@ -253,52 +254,50 @@ def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, a ...@@ -253,52 +254,50 @@ def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, a
List of strings for x-axis labels. List of strings for x-axis labels.
""" """
pos = np.arange(len(xlabels)) pos = np.arange(len(model_names))
width = 1/(3*len(xlabels)) width = 1/(3*len(model_names))
fig = plt.figure('classification_accuracy') fig = plt.figure('classification_accuracy')
ax = fig.add_axes([0.15, 0.1, 0.75, 0.8]) ax = fig.add_axes([0.15, 0.1, 0.75, 0.8])
ax.bar(pos - 2*width, fscore, width, label='F-Score') step_len = 1
ax.bar(pos - width, precision, width, label='Precision') adjustment = -(len(model_names)//2)*step_len
ax.bar(pos, recall, width, label='Recall') for measure in evaluation_dict:
ax.bar(pos + width, accuracy, width, label='Accuracy') model_eval = [evaluation_dict[measure][model] for model in evaluation_dict[measure]]
ax.bar(pos + 2*width, auroc, width, label='AUROC') ax.bar(pos + adjustment*width, model_eval, width, label=measure)
adjustment += step_len
ax.set_xticks(pos) ax.set_xticks(pos)
ax.set_xticklabels(xlabels) ax.set_xticklabels(model_names)
ax.set_ylabel('Classification (%)') ax.set_ylabel('Classification (%)')
ax.set_ylim(bottom=-0.02) ax.set_ylim(bottom=-0.02)
ax.set_ylim(top=1.02) ax.set_ylim(top=1.02)
ax.set_title('Non-Normalized Test Data') ax.set_title('Classification Evaluation (Barplot)')
ax.legend(loc='upper right') ax.legend(loc='upper right')
# fig.tight_layout() # fig.tight_layout()
def plot_boxplot(xlabels, precision, recall, accuracy, fscore, auroc): def plot_boxplot(model_names, evaluation_dict):
fig = plt.figure('boxplot_accuracy') fig = plt.figure('boxplot_accuracy')
pos = np.arange(len(xlabels))
width = 1/(5*len(xlabels))
ax = fig.add_axes([0.15, 0.1, 0.75, 0.8]) ax = fig.add_axes([0.15, 0.1, 0.75, 0.8])
step_len = 1.5
boxplots = [] boxplots = []
boxplots.append(ax.boxplot(fscore.transpose(), positions=pos - 3*width, widths=width, meanline=True, adjustment = -(len(model_names)//2)*step_len
showmeans=True, patch_artist=True)) pos = np.arange(len(model_names))
boxplots.append(ax.boxplot(precision.transpose(), positions=pos - 1.5*width, widths=width, meanline=True, width = 1/(5*len(model_names))
showmeans=True, patch_artist=True))
boxplots.append(ax.boxplot(recall.transpose(), positions=pos, widths=width, meanline=True, showmeans=True,
patch_artist=True))
boxplots.append(ax.boxplot(accuracy.transpose(), positions=pos + 1.5*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
boxplots.append(ax.boxplot(auroc.transpose(), positions=pos + 3*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
count = 0
colors = ['red', 'yellow', 'blue', 'tan', 'green'] colors = ['red', 'yellow', 'blue', 'tan', 'green']
for bp in boxplots: count = 0
for patch in bp['boxes']: for measure in evaluation_dict:
model_eval = [evaluation_dict[measure][model] for model in evaluation_dict[measure]]
boxplot = ax.boxplot(model_eval, positions=pos + adjustment*width, widths=width,
meanline=True, showmeans=True, patch_artist=True)
for patch in boxplot['boxes']:
patch.set(facecolor=colors[count]) patch.set(facecolor=colors[count])
boxplots.append(boxplot)
count += 1 count += 1
adjustment += step_len
ax.set_xticks(pos) ax.set_xticks(pos)
ax.set_xticklabels(xlabels) ax.set_xticklabels(model_names)
ax.set_ylim(bottom=-0.02) ax.set_ylim(bottom=-0.02)
ax.set_ylim(top=1.02) ax.set_ylim(top=1.02)
ax.set_ylabel('Classification (%)') ax.set_ylabel('Classification (%)')
ax.set_title('Non-Normalized Test Data') ax.set_title('Classification Evaluation (Boxplot)')
ax.legend([bp["boxes"][0] for bp in boxplots], ax.legend([bp["boxes"][0] for bp in boxplots], evaluation_dict.keys(), loc='upper right')
['F-Score', 'Precision', 'Recall', 'Accuracy', 'AUROC'], loc='upper right')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment