Improved log output for ANN training.

2c73fc5e · Laura Christine Kühle · fdbab2c1 · 2c73fc5e · 2c73fc5e
Commit 2c73fc5e authored Jan 25, 2022 by Laura Christine Kühle
--- a/ANN_Data_Generator.py
+++ b/ANN_Data_Generator.py
@@ -96,7 +96,7 @@ class TrainingDataGenerator(object):

        self._save_data(data_dict)
        toc = time.perf_counter()
-        print('Total runtime:', toc-tic)
+        print(f'Total runtime: {toc - tic:0.4f}s')
        return data_dict

    def _calculate_data_set(self, num_samples):
@@ -200,7 +200,7 @@ class TrainingDataGenerator(object):

        toc = time.perf_counter()
        print('Finished calculating data ' + troubled_indicator + ' troubled cells!')
-        print('Calculation time:', toc-tic, '\n')
+        print(f'Calculation time: {toc - tic:0.4f}s\n')

        # Set output data
        output_data = np.zeros((num_samples, 2))

--- a/ANN_Training.py
+++ b/ANN_Training.py
@@ -8,11 +8,13 @@ Docstring-Style: D200, D400
 TODO: Test new ANN set-up with Soraya
 TODO: Remove object set-up (for more flexibility)
 TODO: Add documentation
-TODO: Improve log output
+TODO: Improve log output (timer, bit of text) -> Done
 TODO: Throw exception for error due to missing classes
+TODO: Allow multiple approximations in one config

 """
 import numpy as np
+import time
 import matplotlib
 from matplotlib import pyplot as plt
 import os
@@ -64,13 +66,16 @@ class ModelTrainer(object):
            self._model.parameters(), **self._optimizer_config)
        self._validation_loss = torch.zeros(self._num_epochs//10)

-    def epoch_training(self, dataset=None, num_epochs=None):
+    def epoch_training(self, dataset=None, num_epochs=None, verbose=True):
+        tic = time.perf_counter()
        # Split data into training and validation set
        if dataset is None:
            dataset = self._training_data
        if num_epochs is None:
            num_epochs = self._num_epochs
        num_samples = len(dataset)
+        if verbose:
+            print('Splitting data randomly into training and validation set.')
        train_ds, valid_ds = random_split(dataset, [round(num_samples*0.8), round(num_samples*0.2)])

        # Load sets
@@ -78,6 +83,10 @@ class ModelTrainer(object):
        valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2)

        # Training with Validation
+        if verbose:
+            print('\nTraining model...')
+            print('Number of epochs:', num_epochs)
+        tic_train = time.perf_counter()
        for epoch in range(num_epochs):
            self._model.train()
            for x_batch, y_batch in train_dl:
@@ -97,13 +106,21 @@ class ModelTrainer(object):

                if (epoch+1) % 100 == 0:
                    self._validation_loss[int((epoch+1) / 100)-1] = valid_loss / len(valid_dl)
-                    print(epoch+1, valid_loss / len(valid_dl))
+                    if verbose:
+                        print(epoch+1, 'epochs completed. Loss:', valid_loss / len(valid_dl))

                if valid_loss / len(valid_dl) < self._threshold:
                    break
+        toc_train = time.perf_counter()
+        if verbose:
+            print('Finished training model!')
+            print(f'Training time: {toc_train-tic_train:0.4f}s\n')
+        toc = time.perf_counter()
+        if verbose:
+            print(f'Total runtime: {toc-tic:0.4f}s\n')

    def test_model(self, training_set, test_set):
-        self.epoch_training(training_set, num_epochs=100)
+        self.epoch_training(training_set, num_epochs=100, verbose=False)
        self._model.eval()

        x_test, y_test = test_set
@@ -143,17 +160,24 @@ def read_training_data(directory, normalized=True):

 def evaluate_models(models, directory, num_iterations=100, colors=None,
                    compare_normalization=False):
+    tic = time.perf_counter()
    if colors is None:
        colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red',
                  'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue',
                  'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green',
                  'F-Score_Troubled': '#006400', 'AUROC': 'yellow'}

+    print('Read normalized training data.')
    datasets = {'normalized': read_training_data(directory)}
    if compare_normalization:
+        print('Read raw, non-normalized training data.')
        datasets['raw'] = read_training_data(directory, False)
    classification_stats = {measure: {model + ' (' + dataset + ')': [] for model in models
                                      for dataset in datasets} for measure in colors}
+
+    print('\nTraining models with 5-fold cross validation...')
+    print('Number of iterations:', num_iterations)
+    tic_train = time.perf_counter()
    for iteration in range(num_iterations):
        for train_index, test_index in KFold(
                n_splits=5, shuffle=True).split(datasets['normalized']):
@@ -166,7 +190,13 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,
                    for measure in colors:
                        classification_stats[measure][model + ' (' + dataset + ')'].append(
                            result[measure])
+        if iteration+1%max(10, 10*(num_iterations//100)):
+            print(iteration+1, 'iterations completed.')
+    toc_train = time.perf_counter()
+    print('Finished training models with 5-fold cross validation!')
+    print(f'Training time: {toc_train - tic_train:0.4f}s\n')

+    print('Plotting evaluation of trained models.')
    plot_boxplot(classification_stats, colors)
    classification_stats = {measure: {model + ' (' + dataset + ')': np.array(
        classification_stats[measure][model + ' (' + dataset + ')']).mean() for model in models
@@ -179,6 +209,7 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,
        os.makedirs(plot_dir)

    # Save plots
+    print('Saving plots.')
    for identifier in plt.get_figlabels():
        # Set path for figure directory if not existing already
        if not os.path.exists(plot_dir + '/' + identifier):
@@ -186,3 +217,5 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,

        plt.figure(identifier)
        plt.savefig(plot_dir + '/' + identifier + '/' + '_'.join(models.keys()) + '.pdf')
+    toc = time.perf_counter()
+    print(f'Total runtime: {toc - tic:0.4f}s')