From 2c73fc5e47e07dd43e1528a7ebd7691b4b2d91ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BChle=2C=20Laura=20Christine=20=28lakue103=29?= <laura.kuehle@uni-duesseldorf.de> Date: Tue, 25 Jan 2022 18:54:39 +0100 Subject: [PATCH] Improved log output for ANN training. --- ANN_Data_Generator.py | 4 ++-- ANN_Training.py | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ANN_Data_Generator.py b/ANN_Data_Generator.py index 64b0c7f..d821b03 100644 --- a/ANN_Data_Generator.py +++ b/ANN_Data_Generator.py @@ -96,7 +96,7 @@ class TrainingDataGenerator(object): self._save_data(data_dict) toc = time.perf_counter() - print('Total runtime:', toc-tic) + print(f'Total runtime: {toc - tic:0.4f}s') return data_dict def _calculate_data_set(self, num_samples): @@ -200,7 +200,7 @@ class TrainingDataGenerator(object): toc = time.perf_counter() print('Finished calculating data ' + troubled_indicator + ' troubled cells!') - print('Calculation time:', toc-tic, '\n') + print(f'Calculation time: {toc - tic:0.4f}s\n') # Set output data output_data = np.zeros((num_samples, 2)) diff --git a/ANN_Training.py b/ANN_Training.py index 7420fb7..395cf26 100644 --- a/ANN_Training.py +++ b/ANN_Training.py @@ -8,11 +8,13 @@ Docstring-Style: D200, D400 TODO: Test new ANN set-up with Soraya TODO: Remove object set-up (for more flexibility) TODO: Add documentation -TODO: Improve log output +TODO: Improve log output (timer, bit of text) -> Done TODO: Throw exception for error due to missing classes +TODO: Allow multiple approximations in one config """ import numpy as np +import time import matplotlib from matplotlib import pyplot as plt import os @@ -64,13 +66,16 @@ class ModelTrainer(object): self._model.parameters(), **self._optimizer_config) self._validation_loss = torch.zeros(self._num_epochs//10) - def epoch_training(self, dataset=None, num_epochs=None): + def epoch_training(self, dataset=None, num_epochs=None, verbose=True): + tic = time.perf_counter() # Split data into training and validation set if dataset is None: dataset = self._training_data if num_epochs is None: num_epochs = self._num_epochs num_samples = len(dataset) + if verbose: + print('Splitting data randomly into training and validation set.') train_ds, valid_ds = random_split(dataset, [round(num_samples*0.8), round(num_samples*0.2)]) # Load sets @@ -78,6 +83,10 @@ class ModelTrainer(object): valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2) # Training with Validation + if verbose: + print('\nTraining model...') + print('Number of epochs:', num_epochs) + tic_train = time.perf_counter() for epoch in range(num_epochs): self._model.train() for x_batch, y_batch in train_dl: @@ -97,13 +106,21 @@ class ModelTrainer(object): if (epoch+1) % 100 == 0: self._validation_loss[int((epoch+1) / 100)-1] = valid_loss / len(valid_dl) - print(epoch+1, valid_loss / len(valid_dl)) + if verbose: + print(epoch+1, 'epochs completed. Loss:', valid_loss / len(valid_dl)) if valid_loss / len(valid_dl) < self._threshold: break + toc_train = time.perf_counter() + if verbose: + print('Finished training model!') + print(f'Training time: {toc_train-tic_train:0.4f}s\n') + toc = time.perf_counter() + if verbose: + print(f'Total runtime: {toc-tic:0.4f}s\n') def test_model(self, training_set, test_set): - self.epoch_training(training_set, num_epochs=100) + self.epoch_training(training_set, num_epochs=100, verbose=False) self._model.eval() x_test, y_test = test_set @@ -143,17 +160,24 @@ def read_training_data(directory, normalized=True): def evaluate_models(models, directory, num_iterations=100, colors=None, compare_normalization=False): + tic = time.perf_counter() if colors is None: colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red', 'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue', 'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green', 'F-Score_Troubled': '#006400', 'AUROC': 'yellow'} + print('Read normalized training data.') datasets = {'normalized': read_training_data(directory)} if compare_normalization: + print('Read raw, non-normalized training data.') datasets['raw'] = read_training_data(directory, False) classification_stats = {measure: {model + ' (' + dataset + ')': [] for model in models for dataset in datasets} for measure in colors} + + print('\nTraining models with 5-fold cross validation...') + print('Number of iterations:', num_iterations) + tic_train = time.perf_counter() for iteration in range(num_iterations): for train_index, test_index in KFold( n_splits=5, shuffle=True).split(datasets['normalized']): @@ -166,7 +190,13 @@ def evaluate_models(models, directory, num_iterations=100, colors=None, for measure in colors: classification_stats[measure][model + ' (' + dataset + ')'].append( result[measure]) + if iteration+1%max(10, 10*(num_iterations//100)): + print(iteration+1, 'iterations completed.') + toc_train = time.perf_counter() + print('Finished training models with 5-fold cross validation!') + print(f'Training time: {toc_train - tic_train:0.4f}s\n') + print('Plotting evaluation of trained models.') plot_boxplot(classification_stats, colors) classification_stats = {measure: {model + ' (' + dataset + ')': np.array( classification_stats[measure][model + ' (' + dataset + ')']).mean() for model in models @@ -179,6 +209,7 @@ def evaluate_models(models, directory, num_iterations=100, colors=None, os.makedirs(plot_dir) # Save plots + print('Saving plots.') for identifier in plt.get_figlabels(): # Set path for figure directory if not existing already if not os.path.exists(plot_dir + '/' + identifier): @@ -186,3 +217,5 @@ def evaluate_models(models, directory, num_iterations=100, colors=None, plt.figure(identifier) plt.savefig(plot_dir + '/' + identifier + '/' + '_'.join(models.keys()) + '.pdf') + toc = time.perf_counter() + print(f'Total runtime: {toc - tic:0.4f}s') -- GitLab