From fd7cc789d44324d0d0660a426190759884e1fd72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BChle=2C=20Laura=20Christine=20=28lakue103=29?= <laura.kuehle@uni-duesseldorf.de> Date: Tue, 16 Nov 2021 13:47:55 +0100 Subject: [PATCH] Improved directory structure and naming for output of ANN training. --- ANN_Data_Generator.py | 9 ++------- ANN_Training.py | 41 ++++++++++++++++++++--------------------- Snakefile | 18 ++++++++++-------- config.yaml | 1 + 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/ANN_Data_Generator.py b/ANN_Data_Generator.py index 8aff005..d394521 100644 --- a/ANN_Data_Generator.py +++ b/ANN_Data_Generator.py @@ -53,14 +53,9 @@ class TrainingDataGenerator(object): return data def _save_data(self, data, num_samples, normalize): - normalize_string = 'non-' if not normalize else '' - name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \ - + str((num_samples*(1-self._balance))/1000)\ - + 'k__' + normalize_string + 'normalized.npy' - - input_name = self._data_dir + '/training_input.npy' # + name + input_name = self._data_dir + '/input_data.npy' np.save(input_name, data[0]) - output_name = self._data_dir + '/training_output.npy' # + name + output_name = self._data_dir + '/output_data.npy' np.save(output_name, data[1]) def _calculate_data_set(self, num_samples, normalize): diff --git a/ANN_Training.py b/ANN_Training.py index dccd8b1..58b320d 100644 --- a/ANN_Training.py +++ b/ANN_Training.py @@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.) TODO: Decide on k-fold cross-validation (Use? Which model do we keep?) TODO: Rework model testing -TODO: Clean up directories/naming +TODO: Clean up directories/naming -> Done TODO: Add log to pipeline +TODO: Remove object set-up """ import numpy as np @@ -29,11 +30,9 @@ class ModelTrainer(object): self._reset(config) def _reset(self, config): - data_dir = config.pop('data_dir', 'test_data') - self._model_dir = config.pop('model_dir', 'test_data') - self._plot_dir = config.pop('plot_dir', 'new_fig') - self._data_file = config.pop('training_data', 'smooth_0.05k__troubled_0.05k__normalized.npy') - self._read_training_data(data_dir) + self._dir = config.pop('dir', 'test_data') + self._model_name = config.pop('model_name', '0') + self._read_training_data() self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500)) self._num_epochs = config.pop('num_epochs', 1000) @@ -63,10 +62,10 @@ class ModelTrainer(object): self._model.parameters(), **self._optimizer_config) self._validation_loss = torch.zeros(self._num_epochs//100) - def _read_training_data(self, directory): + def _read_training_data(self): # Get training dataset from saved file and map to Torch tensor and dataset - input_file = directory + '/training_input.npy' # + self._data_file - output_file = directory + '/training_output.npy' # + self._data_file + input_file = self._dir + '/input_data.npy' + output_file = self._dir + '/output_data.npy' self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file)))) @@ -133,17 +132,18 @@ class ModelTrainer(object): + test_name # Set paths for plot files if not existing already - if not os.path.exists(self._plot_dir): - os.makedirs(self._plot_dir) + plot_dir = self._dir + '/model evaluation' + if not os.path.exists(plot_dir): + os.makedirs(plot_dir) # Save plots for identifier in plt.get_figlabels(): # Set path for figure directory if not existing already - if not os.path.exists(self._plot_dir + '/' + identifier): - os.makedirs(self._plot_dir + '/' + identifier) + if not os.path.exists(plot_dir + '/' + identifier): + os.makedirs(plot_dir + '/' + identifier) plt.figure(identifier) - plt.savefig(self._plot_dir + '/' + identifier + '/' + name + '.pdf') + plt.savefig(plot_dir + '/' + identifier + '/' + name + '.pdf') @staticmethod def _evaluate_classification(model_output, true_output): @@ -184,16 +184,15 @@ class ModelTrainer(object): def save_model(self): # Saving Model - # data_name = self._data_file.split('.npy')[0] - # path = self._model.get_name() + '__' + self._optimizer.__class__.__name__ + '_' \ - # + str(self._learning_rate) + '__' + self._loss_function.__class__.__name__ + '.pt' + name = self._model_name # Set paths for plot files if not existing already - if not os.path.exists(self._model_dir): - os.makedirs(self._model_dir) + model_dir = self._dir + '/trained models' + if not os.path.exists(model_dir): + os.makedirs(model_dir) - torch.save(self._model.state_dict(), self._model_dir + '/model.pt') # __' + path) - torch.save(self._validation_loss, self._model_dir + '/loss.pt') # __' + path) + torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt') + torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt') def _classify(self): pass diff --git a/Snakefile b/Snakefile index e459754..a236723 100644 --- a/Snakefile +++ b/Snakefile @@ -9,12 +9,12 @@ DIR = config['data_directory'] rule all: input: - DIR+'/model.pt' + DIR+'/trained models/model__' + config['model_name'] + '.pt' rule generate_data: output: - DIR+'/training_input.npy', - DIR+'/training_output.npy' + DIR+'/input_data.npy', + DIR+'/output_data.npy' params: left_bound = config['left_boundary'], right_bound = config['right_boundary'], @@ -40,9 +40,10 @@ rule generate_data: rule train_model: input: - DIR+'/training_input.npy', - DIR+'/training_output.npy' + DIR+'/input_data.npy', + DIR+'/output_data.npy' params: + model_name = config['model_name'], num_epochs = config['num_epochs'], threshold = config['threshold'], batch_size = config['batch_size'], @@ -53,10 +54,11 @@ rule train_model: log: DIR+'/log/train_model.log' output: - DIR+'/model.pt', - DIR+'/loss.pt' + DIR+'/trained models/model__' + config['model_name'] + '.pt', + DIR+'/trained models/loss__' + config['model_name'] + '.pt' run: - trainer= ANN_Training.ModelTrainer({'num_epochs': params.num_epochs, 'data_dir': DIR, + trainer= ANN_Training.ModelTrainer({'model_name': params.model_name, + 'num_epochs': params.num_epochs, 'dir': DIR, 'model_dir': DIR, 'threshold': params.threshold, 'batch_size': params.batch_size, 'model': params.model, 'model_config': params.model_config, diff --git a/config.yaml b/config.yaml index 616088d..ffd7420 100644 --- a/config.yaml +++ b/config.yaml @@ -23,6 +23,7 @@ functions: adjustment: 0 # Parameter for Model Training +model_name: Test_Name num_epochs: 1000 threshold: 1.0e-5 batch_size: 500 -- GitLab