Improved directory structure and naming for output of ANN training.

fd7cc789 · Laura Christine Kühle · 14a2a500 · fd7cc789 · fd7cc789 · fd7cc789
Commit fd7cc789 authored 3 years ago by Laura Christine Kühle
--- a/ANN_Data_Generator.py
+++ b/ANN_Data_Generator.py
@@ -53,14 +53,9 @@ class TrainingDataGenerator(object):
        return data

    def _save_data(self, data, num_samples, normalize):
-        normalize_string = 'non-' if not normalize else ''
-        name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
-               + str((num_samples*(1-self._balance))/1000)\
-               + 'k__' + normalize_string + 'normalized.npy'
-
-        input_name = self._data_dir + '/training_input.npy'  # + name
+        input_name = self._data_dir + '/input_data.npy'
        np.save(input_name, data[0])
-        output_name = self._data_dir + '/training_output.npy'  # + name
+        output_name = self._data_dir + '/output_data.npy'
        np.save(output_name, data[1])

    def _calculate_data_set(self, num_samples, normalize):

--- a/ANN_Training.py
+++ b/ANN_Training.py
@@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu
 TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.)
 TODO: Decide on k-fold cross-validation (Use? Which model do we keep?)
 TODO: Rework model testing
-TODO: Clean up directories/naming
+TODO: Clean up directories/naming -> Done
 TODO: Add log to pipeline
+TODO: Remove object set-up

 """
 import numpy as np
@@ -29,11 +30,9 @@ class ModelTrainer(object):
        self._reset(config)

    def _reset(self, config):
-        data_dir = config.pop('data_dir', 'test_data')
-        self._model_dir = config.pop('model_dir', 'test_data')
-        self._plot_dir = config.pop('plot_dir', 'new_fig')
-        self._data_file = config.pop('training_data', 'smooth_0.05k__troubled_0.05k__normalized.npy')
-        self._read_training_data(data_dir)
+        self._dir = config.pop('dir', 'test_data')
+        self._model_name = config.pop('model_name', '0')
+        self._read_training_data()

        self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500))
        self._num_epochs = config.pop('num_epochs', 1000)
@@ -63,10 +62,10 @@ class ModelTrainer(object):
            self._model.parameters(), **self._optimizer_config)
        self._validation_loss = torch.zeros(self._num_epochs//100)

-    def _read_training_data(self, directory):
+    def _read_training_data(self):
        # Get training dataset from saved file and map to Torch tensor and dataset
-        input_file = directory + '/training_input.npy'  # + self._data_file
-        output_file = directory + '/training_output.npy'  # + self._data_file
+        input_file = self._dir + '/input_data.npy'
+        output_file = self._dir + '/output_data.npy'
        self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file),
                                                                np.load(output_file))))

@@ -133,17 +132,18 @@ class ModelTrainer(object):
            + test_name

        # Set paths for plot files if not existing already
-        if not os.path.exists(self._plot_dir):
-            os.makedirs(self._plot_dir)
+        plot_dir = self._dir + '/model evaluation'
+        if not os.path.exists(plot_dir):
+            os.makedirs(plot_dir)

        # Save plots
        for identifier in plt.get_figlabels():
            # Set path for figure directory if not existing already
-            if not os.path.exists(self._plot_dir + '/' + identifier):
-                os.makedirs(self._plot_dir + '/' + identifier)
+            if not os.path.exists(plot_dir + '/' + identifier):
+                os.makedirs(plot_dir + '/' + identifier)

            plt.figure(identifier)
-            plt.savefig(self._plot_dir + '/' + identifier + '/' + name + '.pdf')
+            plt.savefig(plot_dir + '/' + identifier + '/' + name + '.pdf')

    @staticmethod
    def _evaluate_classification(model_output, true_output):
@@ -184,16 +184,15 @@ class ModelTrainer(object):

    def save_model(self):
        # Saving Model
-        # data_name = self._data_file.split('.npy')[0]
-        # path = self._model.get_name() + '__' + self._optimizer.__class__.__name__ + '_' \
-        #     + str(self._learning_rate) + '__' + self._loss_function.__class__.__name__ + '.pt'
+        name = self._model_name

        # Set paths for plot files if not existing already
-        if not os.path.exists(self._model_dir):
-            os.makedirs(self._model_dir)
+        model_dir = self._dir + '/trained models'
+        if not os.path.exists(model_dir):
+            os.makedirs(model_dir)

-        torch.save(self._model.state_dict(), self._model_dir + '/model.pt')  # __' + path)
-        torch.save(self._validation_loss, self._model_dir + '/loss.pt')  # __' + path)
+        torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt')
+        torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt')

    def _classify(self):
        pass

--- a/Snakefile
+++ b/Snakefile
@@ -9,12 +9,12 @@ DIR = config['data_directory']

 rule all:
    input:
-        DIR+'/model.pt'
+        DIR+'/trained models/model__' + config['model_name'] + '.pt'

 rule generate_data:
    output:
-        DIR+'/training_input.npy',
-        DIR+'/training_output.npy'
+        DIR+'/input_data.npy',
+        DIR+'/output_data.npy'
    params:
        left_bound = config['left_boundary'],
        right_bound = config['right_boundary'],
@@ -40,9 +40,10 @@ rule generate_data:

 rule train_model:
    input:
-        DIR+'/training_input.npy',
-        DIR+'/training_output.npy'
+        DIR+'/input_data.npy',
+        DIR+'/output_data.npy'
    params:
+        model_name = config['model_name'],
        num_epochs = config['num_epochs'],
        threshold = config['threshold'],
        batch_size = config['batch_size'],
@@ -53,10 +54,11 @@ rule train_model:
    log:
        DIR+'/log/train_model.log'
    output:
-        DIR+'/model.pt',
-        DIR+'/loss.pt'
+        DIR+'/trained models/model__' + config['model_name'] + '.pt',
+        DIR+'/trained models/loss__' + config['model_name'] + '.pt'
    run:
-        trainer= ANN_Training.ModelTrainer({'num_epochs': params.num_epochs, 'data_dir': DIR,
+        trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
+                                            'num_epochs': params.num_epochs, 'dir': DIR,
                                            'model_dir': DIR, 'threshold': params.threshold,
                                            'batch_size': params.batch_size, 'model': params.model,
                                            'model_config': params.model_config,

--- a/config.yaml
+++ b/config.yaml
@@ -23,6 +23,7 @@ functions:
    adjustment: 0

 # Parameter for Model Training
+model_name: Test_Name
 num_epochs: 1000
 threshold: 1.0e-5
 batch_size: 500