Skip to content
Snippets Groups Projects
Commit 7deb16ed authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Added comments.

parent 440409e2
No related branches found
No related tags found
No related merge requests found
...@@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done ...@@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done
TODO: Put plotting into separate function -> Done TODO: Put plotting into separate function -> Done
TODO: Reduce number of testing epochs to 50 -> Done TODO: Reduce number of testing epochs to 50 -> Done
TODO: Rename 'data_directory' to 'data_dir' -> Done TODO: Rename 'data_directory' to 'data_dir' -> Done
TODO: Add comments -> Done
""" """
import numpy as np import numpy as np
...@@ -108,8 +109,6 @@ class ModelTrainer: ...@@ -108,8 +109,6 @@ class ModelTrainer:
self._optimizer = getattr(torch.optim, optimizer)( self._optimizer = getattr(torch.optim, optimizer)(
self._model.parameters(), **optimizer_config) self._model.parameters(), **optimizer_config)
self._validation_loss = torch.zeros(self._num_epochs//10) self._validation_loss = torch.zeros(self._num_epochs//10)
# print(type(self._model), type(self._loss_function),
# type(self._optimizer), type(self._validation_loss))
def epoch_training(self, dataset: torch.utils.data.dataset.TensorDataset, def epoch_training(self, dataset: torch.utils.data.dataset.TensorDataset,
num_epochs: int = None, verbose: bool = True) -> None: num_epochs: int = None, verbose: bool = True) -> None:
...@@ -129,7 +128,6 @@ class ModelTrainer: ...@@ -129,7 +128,6 @@ class ModelTrainer:
Flag whether commentary in console is wanted. Default: False. Flag whether commentary in console is wanted. Default: False.
""" """
# print(type(dataset))
tic = time.perf_counter() tic = time.perf_counter()
if num_epochs is None: if num_epochs is None:
num_epochs = self._num_epochs num_epochs = self._num_epochs
...@@ -146,7 +144,7 @@ class ModelTrainer: ...@@ -146,7 +144,7 @@ class ModelTrainer:
shuffle=True) shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2) valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2)
# Training with Validation # Train with validation
if verbose: if verbose:
print('\nTraining model...') print('\nTraining model...')
print('Number of epochs:', num_epochs) print('Number of epochs:', num_epochs)
...@@ -163,6 +161,7 @@ class ModelTrainer: ...@@ -163,6 +161,7 @@ class ModelTrainer:
self._optimizer.step() self._optimizer.step()
self._optimizer.zero_grad() self._optimizer.zero_grad()
# Determine validation loss
self._model.eval() self._model.eval()
with torch.no_grad(): with torch.no_grad():
valid_loss = sum( valid_loss = sum(
...@@ -170,6 +169,7 @@ class ModelTrainer: ...@@ -170,6 +169,7 @@ class ModelTrainer:
y_batch_valid.float()) y_batch_valid.float())
for x_batch_valid, y_batch_valid in valid_dl) for x_batch_valid, y_batch_valid in valid_dl)
# Report validation loss
if (epoch+1) % 100 == 0: if (epoch+1) % 100 == 0:
self._validation_loss[int((epoch+1) / 100)-1] \ self._validation_loss[int((epoch+1) / 100)-1] \
= valid_loss / len(valid_dl) = valid_loss / len(valid_dl)
...@@ -177,6 +177,7 @@ class ModelTrainer: ...@@ -177,6 +177,7 @@ class ModelTrainer:
print(epoch+1, 'epochs completed. Loss:', print(epoch+1, 'epochs completed. Loss:',
valid_loss / len(valid_dl)) valid_loss / len(valid_dl))
# Interrupt if threshold is reached
if valid_loss / len(valid_dl) < self._threshold: if valid_loss / len(valid_dl) < self._threshold:
break break
toc_train = time.perf_counter() toc_train = time.perf_counter()
...@@ -208,13 +209,16 @@ class ModelTrainer: ...@@ -208,13 +209,16 @@ class ModelTrainer:
Dictionary containing classification evaluation data. Dictionary containing classification evaluation data.
""" """
# Train model
self.epoch_training(training_set, num_epochs=50, verbose=False) self.epoch_training(training_set, num_epochs=50, verbose=False)
self._model.eval() self._model.eval()
# Classify data
x_test, y_test = test_set x_test, y_test = test_set
model_score = self._model(x_test.float()) model_score = self._model(x_test.float())
model_output = torch.argmax(model_score, dim=1) model_output = torch.argmax(model_score, dim=1)
# Evaluate classification
y_true = y_test.detach().numpy()[:, 1] y_true = y_test.detach().numpy()[:, 1]
y_pred = model_output.detach().numpy() y_pred = model_output.detach().numpy()
accuracy = accuracy_score(y_true, y_pred) accuracy = accuracy_score(y_true, y_pred)
...@@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, ...@@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
compare_normalization: bool = False) -> None: compare_normalization: bool = False) -> None:
"""Evaluates the classification of a given set of models. """Evaluates the classification of a given set of models.
Evaluates the classification and saves the results in a json file. Evaluates the classification and saves the results in a JSON file.
Parameters Parameters
---------- ----------
...@@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, ...@@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
""" """
tic = time.perf_counter() tic = time.perf_counter()
# Read training data
print('Read normalized training data.') print('Read normalized training data.')
datasets = {'normalized': read_training_data(directory)} datasets = {'normalized': read_training_data(directory)}
if compare_normalization: if compare_normalization:
print('Read raw, non-normalized training data.') print('Read raw, non-normalized training data.')
datasets['raw'] = read_training_data(directory, False) datasets['raw'] = read_training_data(directory, False)
# Train models for evaluation
print('\nTraining models with 5-fold cross validation...') print('\nTraining models with 5-fold cross validation...')
print('Number of iterations:', num_iterations) print('Number of iterations:', num_iterations)
tic_train = time.perf_counter() tic_train = time.perf_counter()
classification_stats = {} classification_stats = {}
for iteration in range(num_iterations): for iteration in range(num_iterations):
# Split data for cross validation
for train_index, test_index in KFold( for train_index, test_index in KFold(
n_splits=5, shuffle=True).split(datasets['normalized']): n_splits=5, shuffle=True).split(datasets['normalized']):
for dataset in datasets.keys(): for dataset in datasets.keys():
training_set = TensorDataset(*datasets[dataset][train_index]) training_set = TensorDataset(*datasets[dataset][train_index])
test_set = datasets[dataset][test_index] test_set = datasets[dataset][test_index]
# Save results for each model on split dataset
for model in models: for model in models:
result = models[model].test_model(training_set, test_set) result = models[model].test_model(training_set, test_set)
for measure in result.keys(): for measure in result.keys():
...@@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, ...@@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
classification_stats[measure][model + ' (' + dataset + classification_stats[measure][model + ' (' + dataset +
')'].append( ')'].append(
result[measure]) result[measure])
# Report status
if iteration+1 % max(10, 10*(num_iterations//100)): if iteration+1 % max(10, 10*(num_iterations//100)):
print(iteration+1, 'iterations completed.') print(iteration+1, 'iterations completed.')
toc_train = time.perf_counter() toc_train = time.perf_counter()
...@@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, ...@@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
if not os.path.exists(plot_dir): if not os.path.exists(plot_dir):
os.makedirs(plot_dir) os.makedirs(plot_dir)
print('Saving evaluation results in json format.') # Save evaluation results in JSON format
print('Saving evaluation results in JSON format.')
with open(plot_dir + '/' + '_'.join(models.keys()) + '.json', 'w')\ with open(plot_dir + '/' + '_'.join(models.keys()) + '.json', 'w')\
as json_file: as json_file:
json_file.write(json.dumps(classification_stats)) json_file.write(json.dumps(classification_stats))
...@@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str, ...@@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str,
""" """
tic = time.perf_counter() tic = time.perf_counter()
# Set colors if not given
if colors is None: if colors is None:
colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red', colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red',
'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue', 'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue',
'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green', 'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green',
'F-Score_Troubled': '#006400', 'AUROC': 'yellow'} 'F-Score_Troubled': '#006400', 'AUROC': 'yellow'}
# Read evaluation results
print('Reading evaluation results.') print('Reading evaluation results.')
with open(evaluation_file) as json_file: with open(evaluation_file) as json_file:
classification_stats = json.load(json_file) classification_stats = json.load(json_file)
# Plot data
print('\nPlotting evaluation of trained models...') print('\nPlotting evaluation of trained models...')
print('Plotting data in boxplot.') print('Plotting data in boxplot.')
models = classification_stats[list(colors.keys())[0]].keys() models = classification_stats[list(colors.keys())[0]].keys()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment