diff --git a/ANN_Training.py b/ANN_Training.py index 53ee0e27459921eab3397824e3c172901060c975..417c6f937dbf86ba45582bc78809d79763ba050e 100644 --- a/ANN_Training.py +++ b/ANN_Training.py @@ -11,7 +11,8 @@ TODO: Improve maximum selection runtime TODO: Discuss if we want training accuracy/ROC in addition to CFV TODO: Discuss whether to change output to binary TODO: Adapt TCD file to new classification -TODO: Improve classification stat handling +TODO: Improve classification stat handling -> Done +TODO: Discuss automatic comparison between (non-)normalized data """ import numpy as np @@ -129,7 +130,8 @@ class ModelTrainer(object): # print(roc) # plt.plot(fpr, tpr, label="AUC="+str(auroc)) - return [precision[0], recall[0], accuracy, f_score[0], auroc] + return {'Precision': precision[0], 'Recall': recall[0], 'Accuracy': accuracy, + 'F-Score': f_score[0], 'AUROC': auroc} def save_model(self): # Saving Model @@ -143,8 +145,8 @@ class ModelTrainer(object): torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt') torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt') - def _classify(self): - pass + # def _classify(self): + # pass def read_training_data(directory): @@ -154,10 +156,11 @@ def read_training_data(directory): return TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file)))) -def evaluate_models(models, directory, num_iterations=100): +def evaluate_models(models, directory, num_iterations=100, measures=None): + if measures is None: + measures = ['Accuracy', 'Precision', 'Recall', 'F-Score', 'AUROC'] dataset = read_training_data(directory) - stats = ['Precision', 'Recall', 'Accuracy', 'F-Score', 'AUROC'] - classification_stats = {model: {name: [] for name in stats} for model in models} + classification_stats = {measure: {model: [] for model in models} for measure in measures} for iteration in range(num_iterations): for train_index, test_index in KFold(n_splits=5, shuffle=True).split(dataset): # print("TRAIN:", train_index, "TEST:", test_index) @@ -166,28 +169,13 @@ def evaluate_models(models, directory, num_iterations=100): for model in models: result = models[model].test_model(training_set, test_set) - count = 0 - for stat in stats: - classification_stats[model][stat].append(result[count]) - count += 1 - - # print(classification_stats) - # print(np.array(classification_stats).mean(axis=0)) - # print(np.array(classification_stats['Adam']['Precision']).shape) - # print(np.array([np.array(classification_stats[model]) for model in models]).transpose().shape) - # print(np.array([np.array(classification_stats[model]).transpose() for model in models]).shape) - # print(np.array([[classification_stats[model][stat] for model in models] for stat in stats]).shape) - # print(np.array([[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats]).shape) - # print(np.array([*(np.array([[classification_stats[model][stat] - # for model in models] for stat in stats]))]).shape) - # print(*(np.array([[classification_stats[model][stat] - # for model in models] for stat in stats]))[0].shape) - plot_boxplot(models.keys(), *(np.array([[classification_stats[model][stat] - for model in models] for stat in stats]))) - classification_stats = [[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats] - # print(*classification_stats) - - plot_classification_accuracy(models.keys(), *classification_stats) + for measure in measures: + classification_stats[measure][model].append(result[measure]) + + plot_boxplot(models.keys(), classification_stats) + classification_stats = {measure: {model: np.array(classification_stats[measure][model]).mean() + for model in models} for measure in measures} + plot_classification_accuracy(models.keys(), classification_stats) # Set paths for plot files if not existing already plot_dir = directory + '/model evaluation' diff --git a/Plotting.py b/Plotting.py index 078381c7845b55386040a71b9db33bd7a07d313d..c9e1c74fa2b62ce226d7f3ce6b46ec0c8911f7b2 100644 --- a/Plotting.py +++ b/Plotting.py @@ -3,7 +3,8 @@ @author: Laura C. Kühle TODO: Give option to select plotting color -TODO: Improve classification plotting +TODO: Improve classification plotting -> Done +TODO: Add documentation to plot_boxplot() """ import numpy as np @@ -236,7 +237,7 @@ def calculate_exact_solution(mesh, cell_len, wave_speed, final_time, interval_le return grid, exact -def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, auroc): +def plot_classification_accuracy(model_names, evaluation_dict): """Plots classification accuracy. Plots the accuracy, precision, and recall in a bar plot. @@ -253,52 +254,50 @@ def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, a List of strings for x-axis labels. """ - pos = np.arange(len(xlabels)) - width = 1/(3*len(xlabels)) + pos = np.arange(len(model_names)) + width = 1/(3*len(model_names)) fig = plt.figure('classification_accuracy') ax = fig.add_axes([0.15, 0.1, 0.75, 0.8]) - ax.bar(pos - 2*width, fscore, width, label='F-Score') - ax.bar(pos - width, precision, width, label='Precision') - ax.bar(pos, recall, width, label='Recall') - ax.bar(pos + width, accuracy, width, label='Accuracy') - ax.bar(pos + 2*width, auroc, width, label='AUROC') + step_len = 1 + adjustment = -(len(model_names)//2)*step_len + for measure in evaluation_dict: + model_eval = [evaluation_dict[measure][model] for model in evaluation_dict[measure]] + ax.bar(pos + adjustment*width, model_eval, width, label=measure) + adjustment += step_len ax.set_xticks(pos) - ax.set_xticklabels(xlabels) + ax.set_xticklabels(model_names) ax.set_ylabel('Classification (%)') ax.set_ylim(bottom=-0.02) ax.set_ylim(top=1.02) - ax.set_title('Non-Normalized Test Data') + ax.set_title('Classification Evaluation (Barplot)') ax.legend(loc='upper right') # fig.tight_layout() -def plot_boxplot(xlabels, precision, recall, accuracy, fscore, auroc): +def plot_boxplot(model_names, evaluation_dict): fig = plt.figure('boxplot_accuracy') - pos = np.arange(len(xlabels)) - width = 1/(5*len(xlabels)) ax = fig.add_axes([0.15, 0.1, 0.75, 0.8]) + step_len = 1.5 boxplots = [] - boxplots.append(ax.boxplot(fscore.transpose(), positions=pos - 3*width, widths=width, meanline=True, - showmeans=True, patch_artist=True)) - boxplots.append(ax.boxplot(precision.transpose(), positions=pos - 1.5*width, widths=width, meanline=True, - showmeans=True, patch_artist=True)) - boxplots.append(ax.boxplot(recall.transpose(), positions=pos, widths=width, meanline=True, showmeans=True, - patch_artist=True)) - boxplots.append(ax.boxplot(accuracy.transpose(), positions=pos + 1.5*width, widths=width, meanline=True, - showmeans=True, patch_artist=True)) - boxplots.append(ax.boxplot(auroc.transpose(), positions=pos + 3*width, widths=width, meanline=True, - showmeans=True, patch_artist=True)) - count = 0 + adjustment = -(len(model_names)//2)*step_len + pos = np.arange(len(model_names)) + width = 1/(5*len(model_names)) colors = ['red', 'yellow', 'blue', 'tan', 'green'] - for bp in boxplots: - for patch in bp['boxes']: + count = 0 + for measure in evaluation_dict: + model_eval = [evaluation_dict[measure][model] for model in evaluation_dict[measure]] + boxplot = ax.boxplot(model_eval, positions=pos + adjustment*width, widths=width, + meanline=True, showmeans=True, patch_artist=True) + for patch in boxplot['boxes']: patch.set(facecolor=colors[count]) - count +=1 + boxplots.append(boxplot) + count += 1 + adjustment += step_len + ax.set_xticks(pos) - ax.set_xticklabels(xlabels) + ax.set_xticklabels(model_names) ax.set_ylim(bottom=-0.02) ax.set_ylim(top=1.02) ax.set_ylabel('Classification (%)') - ax.set_title('Non-Normalized Test Data') - ax.legend([bp["boxes"][0] for bp in boxplots], - ['F-Score', 'Precision', 'Recall', 'Accuracy', 'AUROC'], loc='upper right') + ax.set_title('Classification Evaluation (Boxplot)') + ax.legend([bp["boxes"][0] for bp in boxplots], evaluation_dict.keys(), loc='upper right')