diff --git a/ANN_Training.py b/ANN_Training.py index 687e5981b41fc9ca03a46063d135594ebcde42ab..2a124ba8edb5acb47362dcec372ea29845e0188a 100644 --- a/ANN_Training.py +++ b/ANN_Training.py @@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done TODO: Put plotting into separate function -> Done TODO: Reduce number of testing epochs to 50 -> Done TODO: Rename 'data_directory' to 'data_dir' -> Done +TODO: Add comments -> Done """ import numpy as np @@ -108,8 +109,6 @@ class ModelTrainer: self._optimizer = getattr(torch.optim, optimizer)( self._model.parameters(), **optimizer_config) self._validation_loss = torch.zeros(self._num_epochs//10) - # print(type(self._model), type(self._loss_function), - # type(self._optimizer), type(self._validation_loss)) def epoch_training(self, dataset: torch.utils.data.dataset.TensorDataset, num_epochs: int = None, verbose: bool = True) -> None: @@ -129,7 +128,6 @@ class ModelTrainer: Flag whether commentary in console is wanted. Default: False. """ - # print(type(dataset)) tic = time.perf_counter() if num_epochs is None: num_epochs = self._num_epochs @@ -146,7 +144,7 @@ class ModelTrainer: shuffle=True) valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2) - # Training with Validation + # Train with validation if verbose: print('\nTraining model...') print('Number of epochs:', num_epochs) @@ -163,6 +161,7 @@ class ModelTrainer: self._optimizer.step() self._optimizer.zero_grad() + # Determine validation loss self._model.eval() with torch.no_grad(): valid_loss = sum( @@ -170,6 +169,7 @@ class ModelTrainer: y_batch_valid.float()) for x_batch_valid, y_batch_valid in valid_dl) + # Report validation loss if (epoch+1) % 100 == 0: self._validation_loss[int((epoch+1) / 100)-1] \ = valid_loss / len(valid_dl) @@ -177,6 +177,7 @@ class ModelTrainer: print(epoch+1, 'epochs completed. Loss:', valid_loss / len(valid_dl)) + # Interrupt if threshold is reached if valid_loss / len(valid_dl) < self._threshold: break toc_train = time.perf_counter() @@ -208,13 +209,16 @@ class ModelTrainer: Dictionary containing classification evaluation data. """ + # Train model self.epoch_training(training_set, num_epochs=50, verbose=False) self._model.eval() + # Classify data x_test, y_test = test_set model_score = self._model(x_test.float()) model_output = torch.argmax(model_score, dim=1) + # Evaluate classification y_true = y_test.detach().numpy()[:, 1] y_pred = model_output.detach().numpy() accuracy = accuracy_score(y_true, y_pred) @@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, compare_normalization: bool = False) -> None: """Evaluates the classification of a given set of models. - Evaluates the classification and saves the results in a json file. + Evaluates the classification and saves the results in a JSON file. Parameters ---------- @@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, """ tic = time.perf_counter() + + # Read training data print('Read normalized training data.') datasets = {'normalized': read_training_data(directory)} if compare_normalization: print('Read raw, non-normalized training data.') datasets['raw'] = read_training_data(directory, False) + # Train models for evaluation print('\nTraining models with 5-fold cross validation...') print('Number of iterations:', num_iterations) tic_train = time.perf_counter() classification_stats = {} for iteration in range(num_iterations): + # Split data for cross validation for train_index, test_index in KFold( n_splits=5, shuffle=True).split(datasets['normalized']): for dataset in datasets.keys(): training_set = TensorDataset(*datasets[dataset][train_index]) test_set = datasets[dataset][test_index] + # Save results for each model on split dataset for model in models: result = models[model].test_model(training_set, test_set) for measure in result.keys(): @@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, classification_stats[measure][model + ' (' + dataset + ')'].append( result[measure]) + # Report status if iteration+1 % max(10, 10*(num_iterations//100)): print(iteration+1, 'iterations completed.') toc_train = time.perf_counter() @@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, if not os.path.exists(plot_dir): os.makedirs(plot_dir) - print('Saving evaluation results in json format.') + # Save evaluation results in JSON format + print('Saving evaluation results in JSON format.') with open(plot_dir + '/' + '_'.join(models.keys()) + '.json', 'w')\ as json_file: json_file.write(json.dumps(classification_stats)) @@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str, """ tic = time.perf_counter() + + # Set colors if not given if colors is None: colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red', 'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue', 'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green', 'F-Score_Troubled': '#006400', 'AUROC': 'yellow'} + # Read evaluation results print('Reading evaluation results.') with open(evaluation_file) as json_file: classification_stats = json.load(json_file) + # Plot data print('\nPlotting evaluation of trained models...') print('Plotting data in boxplot.') models = classification_stats[list(colors.keys())[0]].keys()