Added comments.

7deb16ed · Laura Christine Kühle · 440409e2 · 7deb16ed
Commit 7deb16ed authored 3 years ago by Laura Christine Kühle
--- a/ANN_Training.py
+++ b/ANN_Training.py
@@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done
 TODO: Put plotting into separate function -> Done
 TODO: Reduce number of testing epochs to 50 -> Done
 TODO: Rename 'data_directory' to 'data_dir' -> Done
+TODO: Add comments -> Done
 """
 import numpy as np
@@ -108,8 +109,6 @@ class ModelTrainer:
        self._optimizer = getattr(torch.optim, optimizer)(
            self._model.parameters(), **optimizer_config)
        self._validation_loss = torch.zeros(self._num_epochs//10)
-        # print(type(self._model), type(self._loss_function),
-        #       type(self._optimizer), type(self._validation_loss))
    def epoch_training(self, dataset: torch.utils.data.dataset.TensorDataset,
                       num_epochs: int = None, verbose: bool = True) -> None:
@@ -129,7 +128,6 @@ class ModelTrainer:
            Flag whether commentary in console is wanted. Default: False.
        """
-        # print(type(dataset))
        tic = time.perf_counter()
        if num_epochs is None:
            num_epochs = self._num_epochs
@@ -146,7 +144,7 @@ class ModelTrainer:
                              shuffle=True)
        valid_dl = DataLoader(valid_ds, batch_size=self._batch_size * 2)
-        # Training with Validation
+        # Train with validation
        if verbose:
            print('\nTraining model...')
            print('Number of epochs:', num_epochs)
@@ -163,6 +161,7 @@ class ModelTrainer:
                self._optimizer.step()
                self._optimizer.zero_grad()
+            # Determine validation loss
            self._model.eval()
            with torch.no_grad():
                valid_loss = sum(
@@ -170,6 +169,7 @@ class ModelTrainer:
                                        y_batch_valid.float())
                    for x_batch_valid, y_batch_valid in valid_dl)
+                # Report validation loss
                if (epoch+1) % 100 == 0:
                    self._validation_loss[int((epoch+1) / 100)-1] \
                        = valid_loss / len(valid_dl)
@@ -177,6 +177,7 @@ class ModelTrainer:
                        print(epoch+1, 'epochs completed. Loss:',
                              valid_loss / len(valid_dl))
+                # Interrupt if threshold is reached
                if valid_loss / len(valid_dl) < self._threshold:
                    break
        toc_train = time.perf_counter()
@@ -208,13 +209,16 @@ class ModelTrainer:
            Dictionary containing classification evaluation data.
        """
+        # Train model
        self.epoch_training(training_set, num_epochs=50, verbose=False)
        self._model.eval()
+        # Classify data
        x_test, y_test = test_set
        model_score = self._model(x_test.float())
        model_output = torch.argmax(model_score, dim=1)
+        # Evaluate classification
        y_true = y_test.detach().numpy()[:, 1]
        y_pred = model_output.detach().numpy()
        accuracy = accuracy_score(y_true, y_pred)
@@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
                    compare_normalization: bool = False) -> None:
    """Evaluates the classification of a given set of models.
-    Evaluates the classification and saves the results in a json file.
+    Evaluates the classification and saves the results in a JSON file.
    Parameters
    ----------
@@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
    """
    tic = time.perf_counter()
+    # Read training data
    print('Read normalized training data.')
    datasets = {'normalized': read_training_data(directory)}
    if compare_normalization:
        print('Read raw, non-normalized training data.')
        datasets['raw'] = read_training_data(directory, False)
+    # Train models for evaluation
    print('\nTraining models with 5-fold cross validation...')
    print('Number of iterations:', num_iterations)
    tic_train = time.perf_counter()
    classification_stats = {}
    for iteration in range(num_iterations):
+        # Split data for cross validation
        for train_index, test_index in KFold(
                n_splits=5, shuffle=True).split(datasets['normalized']):
            for dataset in datasets.keys():
                training_set = TensorDataset(*datasets[dataset][train_index])
                test_set = datasets[dataset][test_index]
+                # Save results for each model on split dataset
                for model in models:
                    result = models[model].test_model(training_set, test_set)
                    for measure in result.keys():
@@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
                        classification_stats[measure][model + ' (' + dataset +
                                                      ')'].append(
                            result[measure])
+        # Report status
        if iteration+1 % max(10, 10*(num_iterations//100)):
            print(iteration+1, 'iterations completed.')
    toc_train = time.perf_counter()
@@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
-    print('Saving evaluation results in json format.')
+    # Save evaluation results in JSON format
+    print('Saving evaluation results in JSON format.')
    with open(plot_dir + '/' + '_'.join(models.keys()) + '.json', 'w')\
            as json_file:
        json_file.write(json.dumps(classification_stats))
@@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str,
    """
    tic = time.perf_counter()
+    # Set colors if not given
    if colors is None:
        colors = {'Accuracy': 'magenta', 'Precision_Smooth': 'red',
                  'Precision_Troubled': '#8B0000', 'Recall_Smooth': 'blue',
                  'Recall_Troubled': '#00008B', 'F-Score_Smooth': 'green',
                  'F-Score_Troubled': '#006400', 'AUROC': 'yellow'}
+    # Read evaluation results
    print('Reading evaluation results.')
    with open(evaluation_file) as json_file:
        classification_stats = json.load(json_file)
+    # Plot data
    print('\nPlotting evaluation of trained models...')
    print('Plotting data in boxplot.')
    models = classification_stats[list(colors.keys())[0]].keys()