Skip to content
Snippets Groups Projects
Commit fd7cc789 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Improved directory structure and naming for output of ANN training.

parent 14a2a500
No related branches found
No related tags found
No related merge requests found
......@@ -53,14 +53,9 @@ class TrainingDataGenerator(object):
return data
def _save_data(self, data, num_samples, normalize):
normalize_string = 'non-' if not normalize else ''
name = '__smooth_' + str((num_samples*self._balance)/1000) + 'k__troubled_' \
+ str((num_samples*(1-self._balance))/1000)\
+ 'k__' + normalize_string + 'normalized.npy'
input_name = self._data_dir + '/training_input.npy' # + name
input_name = self._data_dir + '/input_data.npy'
np.save(input_name, data[0])
output_name = self._data_dir + '/training_output.npy' # + name
output_name = self._data_dir + '/output_data.npy'
np.save(output_name, data[1])
def _calculate_data_set(self, num_samples, normalize):
......
......@@ -9,8 +9,9 @@ TODO: Fix difference between accuracies (stems from rounding; choose higher valu
TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, etc.)
TODO: Decide on k-fold cross-validation (Use? Which model do we keep?)
TODO: Rework model testing
TODO: Clean up directories/naming
TODO: Clean up directories/naming -> Done
TODO: Add log to pipeline
TODO: Remove object set-up
"""
import numpy as np
......@@ -29,11 +30,9 @@ class ModelTrainer(object):
self._reset(config)
def _reset(self, config):
data_dir = config.pop('data_dir', 'test_data')
self._model_dir = config.pop('model_dir', 'test_data')
self._plot_dir = config.pop('plot_dir', 'new_fig')
self._data_file = config.pop('training_data', 'smooth_0.05k__troubled_0.05k__normalized.npy')
self._read_training_data(data_dir)
self._dir = config.pop('dir', 'test_data')
self._model_name = config.pop('model_name', '0')
self._read_training_data()
self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500))
self._num_epochs = config.pop('num_epochs', 1000)
......@@ -63,10 +62,10 @@ class ModelTrainer(object):
self._model.parameters(), **self._optimizer_config)
self._validation_loss = torch.zeros(self._num_epochs//100)
def _read_training_data(self, directory):
def _read_training_data(self):
# Get training dataset from saved file and map to Torch tensor and dataset
input_file = directory + '/training_input.npy' # + self._data_file
output_file = directory + '/training_output.npy' # + self._data_file
input_file = self._dir + '/input_data.npy'
output_file = self._dir + '/output_data.npy'
self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file),
np.load(output_file))))
......@@ -133,17 +132,18 @@ class ModelTrainer(object):
+ test_name
# Set paths for plot files if not existing already
if not os.path.exists(self._plot_dir):
os.makedirs(self._plot_dir)
plot_dir = self._dir + '/model evaluation'
if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
# Save plots
for identifier in plt.get_figlabels():
# Set path for figure directory if not existing already
if not os.path.exists(self._plot_dir + '/' + identifier):
os.makedirs(self._plot_dir + '/' + identifier)
if not os.path.exists(plot_dir + '/' + identifier):
os.makedirs(plot_dir + '/' + identifier)
plt.figure(identifier)
plt.savefig(self._plot_dir + '/' + identifier + '/' + name + '.pdf')
plt.savefig(plot_dir + '/' + identifier + '/' + name + '.pdf')
@staticmethod
def _evaluate_classification(model_output, true_output):
......@@ -184,16 +184,15 @@ class ModelTrainer(object):
def save_model(self):
# Saving Model
# data_name = self._data_file.split('.npy')[0]
# path = self._model.get_name() + '__' + self._optimizer.__class__.__name__ + '_' \
# + str(self._learning_rate) + '__' + self._loss_function.__class__.__name__ + '.pt'
name = self._model_name
# Set paths for plot files if not existing already
if not os.path.exists(self._model_dir):
os.makedirs(self._model_dir)
model_dir = self._dir + '/trained models'
if not os.path.exists(model_dir):
os.makedirs(model_dir)
torch.save(self._model.state_dict(), self._model_dir + '/model.pt') # __' + path)
torch.save(self._validation_loss, self._model_dir + '/loss.pt') # __' + path)
torch.save(self._model.state_dict(), model_dir + '/model__' + name + '.pt')
torch.save(self._validation_loss, model_dir + '/loss__' + name + '.pt')
def _classify(self):
pass
......
......@@ -9,12 +9,12 @@ DIR = config['data_directory']
rule all:
input:
DIR+'/model.pt'
DIR+'/trained models/model__' + config['model_name'] + '.pt'
rule generate_data:
output:
DIR+'/training_input.npy',
DIR+'/training_output.npy'
DIR+'/input_data.npy',
DIR+'/output_data.npy'
params:
left_bound = config['left_boundary'],
right_bound = config['right_boundary'],
......@@ -40,9 +40,10 @@ rule generate_data:
rule train_model:
input:
DIR+'/training_input.npy',
DIR+'/training_output.npy'
DIR+'/input_data.npy',
DIR+'/output_data.npy'
params:
model_name = config['model_name'],
num_epochs = config['num_epochs'],
threshold = config['threshold'],
batch_size = config['batch_size'],
......@@ -53,10 +54,11 @@ rule train_model:
log:
DIR+'/log/train_model.log'
output:
DIR+'/model.pt',
DIR+'/loss.pt'
DIR+'/trained models/model__' + config['model_name'] + '.pt',
DIR+'/trained models/loss__' + config['model_name'] + '.pt'
run:
trainer= ANN_Training.ModelTrainer({'num_epochs': params.num_epochs, 'data_dir': DIR,
trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
'num_epochs': params.num_epochs, 'dir': DIR,
'model_dir': DIR, 'threshold': params.threshold,
'batch_size': params.batch_size, 'model': params.model,
'model_config': params.model_config,
......
......@@ -23,6 +23,7 @@ functions:
adjustment: 0
# Parameter for Model Training
model_name: Test_Name
num_epochs: 1000
threshold: 1.0e-5
batch_size: 500
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment