Skip to content
Snippets Groups Projects
Commit 2c73fc5e authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Improved log output for ANN training.

parent fdbab2c1
Branches
No related tags found
No related merge requests found
......@@ -96,7 +96,7 @@ class TrainingDataGenerator(object):
self._save_data(data_dict)
toc = time.perf_counter()
print('Total runtime:', toc-tic)
print(f'Total runtime: {toc - tic:0.4f}s')
return data_dict
def _calculate_data_set(self, num_samples):
......@@ -200,7 +200,7 @@ class TrainingDataGenerator(object):
toc = time.perf_counter()
print('Finished calculating data ' + troubled_indicator + ' troubled cells!')
print('Calculation time:', toc-tic, '\n')
print(f'Calculation time: {toc - tic:0.4f}s\n')
# Set output data
output_data = np.zeros((num_samples, 2))
......
......@@ -8,11 +8,13 @@ Docstring-Style: D200, D400
TODO: Test new ANN set-up with Soraya
TODO: Remove object set-up (for more flexibility)
TODO: Add documentation
TODO: Improve log output
TODO: Improve log output (timer, bit of text) -> Done
TODO: Throw exception for error due to missing classes
TODO: Allow multiple approximations in one config
"""
import numpy as np
import time
import matplotlib
from matplotlib import pyplot as plt
import os
......@@ -64,13 +66,16 @@ class ModelTrainer(object):
self._model.parameters(), **self._optimizer_config)
self._validation_loss = torch.zeros(self._num_epochs//10)
def epoch_training(self, dataset=None, num_epochs=None):
def epoch_training(self, dataset=None, num_epochs=None, verbose=True):
tic = time.perf_counter()
# Split data into training and validation set
if dataset is None:
dataset = self._training_data
if num_epochs is None:
num_epochs = self._num_epochs
num_samples = len(dataset)
if verbose:
print('Splitting data randomly into training and validation set.')
train_ds, valid_ds = random_split(dataset, [round(num_samples*0.8), round(num_samples*0.2)])
# Load sets
......@@ -78,6 +83,10 @@ class ModelTrainer(object):
valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2)
# Training with Validation
if verbose:
print('\nTraining model...')
print('Number of epochs:', num_epochs)
tic_train = time.perf_counter()
for epoch in range(num_epochs):
self._model.train()
for x_batch, y_batch in train_dl:
......@@ -97,13 +106,21 @@ class ModelTrainer(object):
if (epoch+1) % 100 == 0:
self._validation_loss[int((epoch+1) / 100)-1] = valid_loss / len(valid_dl)
print(epoch+1, valid_loss / len(valid_dl))
if verbose:
print(epoch+1, 'epochs completed. Loss:', valid_loss / len(valid_dl))
if valid_loss / len(valid_dl) < self._threshold:
break
toc_train = time.perf_counter()
if verbose:
print('Finished training model!')
print(f'Training time: {toc_train-tic_train:0.4f}s\n')
toc = time.perf_counter()
if verbose:
print(f'Total runtime: {toc-tic:0.4f}s\n')
def test_model(self, training_set, test_set):
self.epoch_training(training_set, num_epochs=100)
self.epoch_training(training_set, num_epochs=100, verbose=False)
self._model.eval()
x_test, y_test = test_set
......@@ -143,17 +160,24 @@ def read_training_data(directory, normalized=True):
def evaluate_models(models, directory, num_iterations=100, colors=None,
compare_normalization=False):
tic = time.perf_counter()
if colors is None:
colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red',
'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue',
'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green',
'F-Score_Troubled': '#006400', 'AUROC': 'yellow'}
print('Read normalized training data.')
datasets = {'normalized': read_training_data(directory)}
if compare_normalization:
print('Read raw, non-normalized training data.')
datasets['raw'] = read_training_data(directory, False)
classification_stats = {measure: {model + ' (' + dataset + ')': [] for model in models
for dataset in datasets} for measure in colors}
print('\nTraining models with 5-fold cross validation...')
print('Number of iterations:', num_iterations)
tic_train = time.perf_counter()
for iteration in range(num_iterations):
for train_index, test_index in KFold(
n_splits=5, shuffle=True).split(datasets['normalized']):
......@@ -166,7 +190,13 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,
for measure in colors:
classification_stats[measure][model + ' (' + dataset + ')'].append(
result[measure])
if iteration+1%max(10, 10*(num_iterations//100)):
print(iteration+1, 'iterations completed.')
toc_train = time.perf_counter()
print('Finished training models with 5-fold cross validation!')
print(f'Training time: {toc_train - tic_train:0.4f}s\n')
print('Plotting evaluation of trained models.')
plot_boxplot(classification_stats, colors)
classification_stats = {measure: {model + ' (' + dataset + ')': np.array(
classification_stats[measure][model + ' (' + dataset + ')']).mean() for model in models
......@@ -179,6 +209,7 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,
os.makedirs(plot_dir)
# Save plots
print('Saving plots.')
for identifier in plt.get_figlabels():
# Set path for figure directory if not existing already
if not os.path.exists(plot_dir + '/' + identifier):
......@@ -186,3 +217,5 @@ def evaluate_models(models, directory, num_iterations=100, colors=None,
plt.figure(identifier)
plt.savefig(plot_dir + '/' + identifier + '/' + '_'.join(models.keys()) + '.pdf')
toc = time.perf_counter()
print(f'Total runtime: {toc - tic:0.4f}s')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment