Skip to content
Snippets Groups Projects
Commit fd7cc789 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Improved directory structure and naming for output of ANN training.

parent 14a2a500
No related branches found
No related tags found
No related merge requests found
...@@ -53,14 +53,9 @@ class TrainingDataGenerator(object): ...@@ -53,14 +53,9 @@ class TrainingDataGenerator(object):
return data return data
def _save_data(self, data, num_samples, normalize): def _save_data(self, data, num_samples, normalize):
normalize_string = 'non-' if not normalize else '' input_name = self._data_dir + '/input_data.npy'
name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
+ str((num_samples*(1-self._balance))/1000)\
+ 'k__' + normalize_string + 'normalized.npy'
input_name = self._data_dir + '/training_input.npy' # + name
np.save(input_name, data[0]) np.save(input_name, data[0])
output_name = self._data_dir + '/training_output.npy' # + name output_name = self._data_dir + '/output_data.npy'
np.save(output_name, data[1]) np.save(output_name, data[1])
def _calculate_data_set(self, num_samples, normalize): def _calculate_data_set(self, num_samples, normalize):
......
...@@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu ...@@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu
TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.) TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.)
TODO: Decide on k-fold cross-validation (Use? Which model do we keep?) TODO: Decide on k-fold cross-validation (Use? Which model do we keep?)
TODO: Rework model testing TODO: Rework model testing
TODO: Clean up directories/naming TODO: Clean up directories/naming -> Done
TODO: Add log to pipeline TODO: Add log to pipeline
TODO: Remove object set-up
""" """
import numpy as np import numpy as np
...@@ -29,11 +30,9 @@ class ModelTrainer(object): ...@@ -29,11 +30,9 @@ class ModelTrainer(object):
self._reset(config) self._reset(config)
def _reset(self, config): def _reset(self, config):
data_dir = config.pop('data_dir', 'test_data') self._dir = config.pop('dir', 'test_data')
self._model_dir = config.pop('model_dir', 'test_data') self._model_name = config.pop('model_name', '0')
self._plot_dir = config.pop('plot_dir', 'new_fig') self._read_training_data()
self._data_file = config.pop('training_data', 'smooth_0.05k__troubled_0.05k__normalized.npy')
self._read_training_data(data_dir)
self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500)) self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500))
self._num_epochs = config.pop('num_epochs', 1000) self._num_epochs = config.pop('num_epochs', 1000)
...@@ -63,10 +62,10 @@ class ModelTrainer(object): ...@@ -63,10 +62,10 @@ class ModelTrainer(object):
self._model.parameters(), **self._optimizer_config) self._model.parameters(), **self._optimizer_config)
self._validation_loss = torch.zeros(self._num_epochs//100) self._validation_loss = torch.zeros(self._num_epochs//100)
def _read_training_data(self, directory): def _read_training_data(self):
# Get training dataset from saved file and map to Torch tensor and dataset # Get training dataset from saved file and map to Torch tensor and dataset
input_file = directory + '/training_input.npy' # + self._data_file input_file = self._dir + '/input_data.npy'
output_file = directory + '/training_output.npy' # + self._data_file output_file = self._dir + '/output_data.npy'
self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file), self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file),
np.load(output_file)))) np.load(output_file))))
...@@ -133,17 +132,18 @@ class ModelTrainer(object): ...@@ -133,17 +132,18 @@ class ModelTrainer(object):
+ test_name + test_name
# Set paths for plot files if not existing already # Set paths for plot files if not existing already
if not os.path.exists(self._plot_dir): plot_dir = self._dir + '/model evaluation'
os.makedirs(self._plot_dir) if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
# Save plots # Save plots
for identifier in plt.get_figlabels(): for identifier in plt.get_figlabels():
# Set path for figure directory if not existing already # Set path for figure directory if not existing already
if not os.path.exists(self._plot_dir + '/' + identifier): if not os.path.exists(plot_dir + '/' + identifier):
os.makedirs(self._plot_dir + '/' + identifier) os.makedirs(plot_dir + '/' + identifier)
plt.figure(identifier) plt.figure(identifier)
plt.savefig(self._plot_dir + '/' + identifier + '/' + name + '.pdf') plt.savefig(plot_dir + '/' + identifier + '/' + name + '.pdf')
@staticmethod @staticmethod
def _evaluate_classification(model_output, true_output): def _evaluate_classification(model_output, true_output):
...@@ -184,16 +184,15 @@ class ModelTrainer(object): ...@@ -184,16 +184,15 @@ class ModelTrainer(object):
def save_model(self): def save_model(self):
# Saving Model # Saving Model
# data_name = self._data_file.split('.npy')[0] name = self._model_name
# path = self._model.get_name() + '__' + self._optimizer.__class__.__name__ + '_' \
# + str(self._learning_rate) + '__' + self._loss_function.__class__.__name__ + '.pt'
# Set paths for plot files if not existing already # Set paths for plot files if not existing already
if not os.path.exists(self._model_dir): model_dir = self._dir + '/trained models'
os.makedirs(self._model_dir) if not os.path.exists(model_dir):
os.makedirs(model_dir)
torch.save(self._model.state_dict(), self._model_dir + '/model.pt') # __' + path) torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt')
torch.save(self._validation_loss, self._model_dir + '/loss.pt') # __' + path) torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt')
def _classify(self): def _classify(self):
pass pass
......
...@@ -9,12 +9,12 @@ DIR = config['data_directory'] ...@@ -9,12 +9,12 @@ DIR = config['data_directory']
rule all: rule all:
input: input:
DIR+'/model.pt' DIR+'/trained models/model__' + config['model_name'] + '.pt'
rule generate_data: rule generate_data:
output: output:
DIR+'/training_input.npy', DIR+'/input_data.npy',
DIR+'/training_output.npy' DIR+'/output_data.npy'
params: params:
left_bound = config['left_boundary'], left_bound = config['left_boundary'],
right_bound = config['right_boundary'], right_bound = config['right_boundary'],
...@@ -40,9 +40,10 @@ rule generate_data: ...@@ -40,9 +40,10 @@ rule generate_data:
rule train_model: rule train_model:
input: input:
DIR+'/training_input.npy', DIR+'/input_data.npy',
DIR+'/training_output.npy' DIR+'/output_data.npy'
params: params:
model_name = config['model_name'],
num_epochs = config['num_epochs'], num_epochs = config['num_epochs'],
threshold = config['threshold'], threshold = config['threshold'],
batch_size = config['batch_size'], batch_size = config['batch_size'],
...@@ -53,10 +54,11 @@ rule train_model: ...@@ -53,10 +54,11 @@ rule train_model:
log: log:
DIR+'/log/train_model.log' DIR+'/log/train_model.log'
output: output:
DIR+'/model.pt', DIR+'/trained models/model__' + config['model_name'] + '.pt',
DIR+'/loss.pt' DIR+'/trained models/loss__' + config['model_name'] + '.pt'
run: run:
trainer= ANN_Training.ModelTrainer({'num_epochs': params.num_epochs, 'data_dir': DIR, trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
'num_epochs': params.num_epochs, 'dir': DIR,
'model_dir': DIR, 'threshold': params.threshold, 'model_dir': DIR, 'threshold': params.threshold,
'batch_size': params.batch_size, 'model': params.model, 'batch_size': params.batch_size, 'model': params.model,
'model_config': params.model_config, 'model_config': params.model_config,
......
...@@ -23,6 +23,7 @@ functions: ...@@ -23,6 +23,7 @@ functions:
adjustment: 0 adjustment: 0
# Parameter for Model Training # Parameter for Model Training
model_name: Test_Name
num_epochs: 1000 num_epochs: 1000
threshold: 1.0e-5 threshold: 1.0e-5
batch_size: 500 batch_size: 500
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment