From fd7cc789d44324d0d0660a426190759884e1fd72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=BChle=2C=20Laura=20Christine=20=28lakue103=29?=
 <laura.kuehle@uni-duesseldorf.de>
Date: Tue, 16 Nov 2021 13:47:55 +0100
Subject: [PATCH] Improved directory structure and naming for output of ANN
 training.

---
 ANN_Data_Generator.py |  9 ++-------
 ANN_Training.py       | 41 ++++++++++++++++++++---------------------
 Snakefile             | 18 ++++++++++--------
 config.yaml           |  1 +
 4 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/ANN_Data_Generator.py b/ANN_Data_Generator.py
index 8aff005..d394521 100644
--- a/ANN_Data_Generator.py
+++ b/ANN_Data_Generator.py
@@ -53,14 +53,9 @@ class TrainingDataGenerator(object):
         return data
 
     def _save_data(self, data, num_samples, normalize):
-        normalize_string = 'non-' if not normalize else ''
-        name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
-               + str((num_samples*(1-self._balance))/1000)\
-               + 'k__' + normalize_string + 'normalized.npy'
-
-        input_name = self._data_dir + '/training_input.npy'  # + name
+        input_name = self._data_dir + '/input_data.npy'
         np.save(input_name, data[0])
-        output_name = self._data_dir + '/training_output.npy'  # + name
+        output_name = self._data_dir + '/output_data.npy'
         np.save(output_name, data[1])
 
     def _calculate_data_set(self, num_samples, normalize):
diff --git a/ANN_Training.py b/ANN_Training.py
index dccd8b1..58b320d 100644
--- a/ANN_Training.py
+++ b/ANN_Training.py
@@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu
 TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.)
 TODO: Decide on k-fold cross-validation (Use? Which model do we keep?)
 TODO: Rework model testing
-TODO: Clean up directories/naming
+TODO: Clean up directories/naming -> Done
 TODO: Add log to pipeline
+TODO: Remove object set-up
 
 """
 import numpy as np
@@ -29,11 +30,9 @@ class ModelTrainer(object):
         self._reset(config)
 
     def _reset(self, config):
-        data_dir = config.pop('data_dir', 'test_data')
-        self._model_dir = config.pop('model_dir', 'test_data')
-        self._plot_dir = config.pop('plot_dir', 'new_fig')
-        self._data_file = config.pop('training_data', 'smooth_0.05k__troubled_0.05k__normalized.npy')
-        self._read_training_data(data_dir)
+        self._dir = config.pop('dir', 'test_data')
+        self._model_name = config.pop('model_name', '0')
+        self._read_training_data()
 
         self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500))
         self._num_epochs = config.pop('num_epochs', 1000)
@@ -63,10 +62,10 @@ class ModelTrainer(object):
             self._model.parameters(), **self._optimizer_config)
         self._validation_loss = torch.zeros(self._num_epochs//100)
 
-    def _read_training_data(self, directory):
+    def _read_training_data(self):
         # Get training dataset from saved file and map to Torch tensor and dataset
-        input_file = directory + '/training_input.npy'  # + self._data_file
-        output_file = directory + '/training_output.npy'  # + self._data_file
+        input_file = self._dir + '/input_data.npy'
+        output_file = self._dir + '/output_data.npy'
         self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file),
                                                                 np.load(output_file))))
 
@@ -133,17 +132,18 @@ class ModelTrainer(object):
             + test_name
 
         # Set paths for plot files if not existing already
-        if not os.path.exists(self._plot_dir):
-            os.makedirs(self._plot_dir)
+        plot_dir = self._dir + '/model evaluation'
+        if not os.path.exists(plot_dir):
+            os.makedirs(plot_dir)
 
         # Save plots
         for identifier in plt.get_figlabels():
             # Set path for figure directory if not existing already
-            if not os.path.exists(self._plot_dir + '/' + identifier):
-                os.makedirs(self._plot_dir + '/' + identifier)
+            if not os.path.exists(plot_dir + '/' + identifier):
+                os.makedirs(plot_dir + '/' + identifier)
 
             plt.figure(identifier)
-            plt.savefig(self._plot_dir + '/' + identifier + '/' + name + '.pdf')
+            plt.savefig(plot_dir + '/' + identifier + '/' + name + '.pdf')
 
     @staticmethod
     def _evaluate_classification(model_output, true_output):
@@ -184,16 +184,15 @@ class ModelTrainer(object):
 
     def save_model(self):
         # Saving Model
-        # data_name = self._data_file.split('.npy')[0]
-        # path = self._model.get_name() + '__' + self._optimizer.__class__.__name__ + '_' \
-        #     + str(self._learning_rate) + '__' + self._loss_function.__class__.__name__ + '.pt'
+        name = self._model_name
 
         # Set paths for plot files if not existing already
-        if not os.path.exists(self._model_dir):
-            os.makedirs(self._model_dir)
+        model_dir = self._dir + '/trained models'
+        if not os.path.exists(model_dir):
+            os.makedirs(model_dir)
 
-        torch.save(self._model.state_dict(), self._model_dir + '/model.pt')  # __' + path)
-        torch.save(self._validation_loss, self._model_dir + '/loss.pt')  # __' + path)
+        torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt')
+        torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt')
 
     def _classify(self):
         pass
diff --git a/Snakefile b/Snakefile
index e459754..a236723 100644
--- a/Snakefile
+++ b/Snakefile
@@ -9,12 +9,12 @@ DIR = config['data_directory']
 
 rule all:
     input:
-        DIR+'/model.pt'
+        DIR+'/trained models/model__' + config['model_name'] + '.pt'
 
 rule generate_data:
     output:
-        DIR+'/training_input.npy',
-        DIR+'/training_output.npy'
+        DIR+'/input_data.npy',
+        DIR+'/output_data.npy'
     params:
         left_bound = config['left_boundary'],
         right_bound = config['right_boundary'],
@@ -40,9 +40,10 @@ rule generate_data:
 
 rule train_model:
     input:
-        DIR+'/training_input.npy',
-        DIR+'/training_output.npy'
+        DIR+'/input_data.npy',
+        DIR+'/output_data.npy'
     params:
+        model_name = config['model_name'],
         num_epochs = config['num_epochs'],
         threshold = config['threshold'],
         batch_size = config['batch_size'],
@@ -53,10 +54,11 @@ rule train_model:
     log:
         DIR+'/log/train_model.log'
     output:
-        DIR+'/model.pt',
-        DIR+'/loss.pt'
+        DIR+'/trained models/model__' + config['model_name'] + '.pt',
+        DIR+'/trained models/loss__' + config['model_name'] + '.pt'
     run:
-        trainer= ANN_Training.ModelTrainer({'num_epochs': params.num_epochs, 'data_dir': DIR,
+        trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
+                                            'num_epochs': params.num_epochs, 'dir': DIR,
                                             'model_dir': DIR, 'threshold': params.threshold,
                                             'batch_size': params.batch_size, 'model': params.model,
                                             'model_config': params.model_config,
diff --git a/config.yaml b/config.yaml
index 616088d..ffd7420 100644
--- a/config.yaml
+++ b/config.yaml
@@ -23,6 +23,7 @@ functions:
     adjustment: 0
 
 # Parameter for Model Training
+model_name: Test_Name
 num_epochs: 1000
 threshold: 1.0e-5
 batch_size: 500
-- 
GitLab