Skip to content
Snippets Groups Projects
Commit b2e66543 authored by Laura Christine Kühle's avatar Laura Christine Kühle
Browse files

Added option to compare evaluation of multiple models.

parent 9adbf4a6
Branches
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@
"""
@author: Laura C. Kühle, Soraya Terrab (sorayaterrab)
TODO: Give option to compare multiple models
TODO: Give option to compare multiple models -> Done
TODO: Add more evaluation measures (AUROC, ROC, F1, training accuracy, boxplot over CVF, etc.) -> Done
TODO: Add log to pipeline
TODO: Remove object set-up
......@@ -11,6 +11,7 @@ TODO: Improve maximum selection runtime
TODO: Discuss if we want training accuracy/ROC in addition to CFV
TODO: Discuss whether to change output to binary
TODO: Adapt TCD file to new classification
TODO: Improve classification stat handling
"""
import numpy as np
......@@ -33,7 +34,7 @@ class ModelTrainer(object):
def _reset(self, config):
self._dir = config.pop('dir', 'test_data')
self._model_name = config.pop('model_name', '0')
self._read_training_data()
self._training_data = read_training_data(self._dir)
self._batch_size = config.pop('batch_size', min(len(self._training_data)//2, 500))
self._num_epochs = config.pop('num_epochs', 1000)
......@@ -63,13 +64,6 @@ class ModelTrainer(object):
self._model.parameters(), **self._optimizer_config)
self._validation_loss = torch.zeros(self._num_epochs//10)
def _read_training_data(self):
# Get training dataset from saved file and map to Torch tensor and dataset
input_file = self._dir + '/input_data.npy'
output_file = self._dir + '/output_data.npy'
self._training_data = TensorDataset(*map(torch.tensor, (np.load(input_file),
np.load(output_file))))
def epoch_training(self, dataset=None, num_epochs=None):
# Split data into training and validation set
if dataset is None:
......@@ -108,39 +102,7 @@ class ModelTrainer(object):
if valid_loss / len(valid_dl) < self._threshold:
break
def test_model(self, num_iterations=100):
classification_stats = []
for iteration in range(num_iterations):
dataset = self._training_data
for train_index, test_index in KFold(n_splits=5, shuffle=True).split(dataset):
# print("TRAIN:", train_index, "TEST:", test_index)
training_set = TensorDataset(*dataset[train_index])
test_set = dataset[test_index]
classification_stats.append(self._test_fold(training_set, test_set))
# print(classification_stats)
# print(np.array(classification_stats).mean(axis=0))
plot_boxplot([self._model_name], *np.array(classification_stats).transpose())
classification_stats = np.array(classification_stats).mean(axis=0)
plot_classification_accuracy([self._model_name], *classification_stats)
# Set paths for plot files if not existing already
plot_dir = self._dir + '/model evaluation'
if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
# Save plots
for identifier in plt.get_figlabels():
# Set path for figure directory if not existing already
if not os.path.exists(plot_dir + '/' + identifier):
os.makedirs(plot_dir + '/' + identifier)
plt.figure(identifier)
plt.savefig(plot_dir + '/' + identifier + '/' + self._model_name + '.pdf')
def _test_fold(self, training_set, test_set):
def test_model(self, training_set, test_set):
self.epoch_training(training_set, num_epochs=100)
self._model.eval()
......@@ -185,6 +147,63 @@ class ModelTrainer(object):
pass
def read_training_data(directory):
# Get training dataset from saved file and map to Torch tensor and dataset
input_file = directory + '/input_data.npy'
output_file = directory + '/output_data.npy'
return TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file))))
def evaluate_models(models, directory, num_iterations=100):
dataset = read_training_data(directory)
stats = ['Precision', 'Recall', 'Accuracy', 'F-Score', 'AUROC']
classification_stats = {model: {name: [] for name in stats} for model in models}
for iteration in range(num_iterations):
for train_index, test_index in KFold(n_splits=5, shuffle=True).split(dataset):
# print("TRAIN:", train_index, "TEST:", test_index)
training_set = TensorDataset(*dataset[train_index])
test_set = dataset[test_index]
for model in models:
result = models[model].test_model(training_set, test_set)
count = 0
for stat in stats:
classification_stats[model][stat].append(result[count])
count += 1
# print(classification_stats)
# print(np.array(classification_stats).mean(axis=0))
# print(np.array(classification_stats['Adam']['Precision']).shape)
# print(np.array([np.array(classification_stats[model]) for model in models]).transpose().shape)
# print(np.array([np.array(classification_stats[model]).transpose() for model in models]).shape)
# print(np.array([[classification_stats[model][stat] for model in models] for stat in stats]).shape)
# print(np.array([[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats]).shape)
# print(np.array([*(np.array([[classification_stats[model][stat]
# for model in models] for stat in stats]))]).shape)
# print(*(np.array([[classification_stats[model][stat]
# for model in models] for stat in stats]))[0].shape)
plot_boxplot(models.keys(), *(np.array([[classification_stats[model][stat]
for model in models] for stat in stats])))
classification_stats = [[np.array(classification_stats[model][stat]).mean(axis=0) for model in models] for stat in stats]
# print(*classification_stats)
plot_classification_accuracy(models.keys(), *classification_stats)
# Set paths for plot files if not existing already
plot_dir = directory + '/model evaluation'
if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
# Save plots
for identifier in plt.get_figlabels():
# Set path for figure directory if not existing already
if not os.path.exists(plot_dir + '/' + identifier):
os.makedirs(plot_dir + '/' + identifier)
plt.figure(identifier)
plt.savefig(plot_dir + '/' + identifier + '/' + '_'.join(models.keys()) + '.pdf')
# Loss Functions: BCELoss, BCEWithLogitsLoss,
# CrossEntropyLoss (not working), MSELoss (with reduction='sum')
# Optimizer: Adam, SGD
......
......@@ -253,11 +253,6 @@ def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, a
List of strings for x-axis labels.
"""
precision = [precision]
recall = [recall]
accuracy = [accuracy]
fscore = [fscore]
auroc = [auroc]
pos = np.arange(len(xlabels))
width = 1/(3*len(xlabels))
fig = plt.figure('classification_accuracy')
......@@ -278,25 +273,20 @@ def plot_classification_accuracy(xlabels, precision, recall, accuracy, fscore, a
def plot_boxplot(xlabels, precision, recall, accuracy, fscore, auroc):
precision = [precision]
recall = [recall]
accuracy = [accuracy]
fscore = [fscore]
auroc = [auroc]
fig = plt.figure('boxplot_accuracy')
pos = np.arange(len(xlabels))
width = 1/(5*len(xlabels))
ax = fig.add_axes([0.15, 0.1, 0.75, 0.8])
boxplots = []
boxplots.append(ax.boxplot(fscore, positions=pos - 3*width, widths=width, meanline=True,
boxplots.append(ax.boxplot(fscore.transpose(), positions=pos - 3*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
boxplots.append(ax.boxplot(precision, positions=pos - 1.5*width, widths=width, meanline=True,
boxplots.append(ax.boxplot(precision.transpose(), positions=pos - 1.5*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
boxplots.append(ax.boxplot(recall, positions=pos, widths=width, meanline=True, showmeans=True,
boxplots.append(ax.boxplot(recall.transpose(), positions=pos, widths=width, meanline=True, showmeans=True,
patch_artist=True))
boxplots.append(ax.boxplot(accuracy, positions=pos + 1.5*width, widths=width, meanline=True,
boxplots.append(ax.boxplot(accuracy.transpose(), positions=pos + 1.5*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
boxplots.append(ax.boxplot(auroc, positions=pos + 3*width, widths=width, meanline=True,
boxplots.append(ax.boxplot(auroc.transpose(), positions=pos + 3*width, widths=width, meanline=True,
showmeans=True, patch_artist=True))
count = 0
colors = ['red', 'yellow', 'blue', 'tan', 'green']
......
configfile: 'config.yaml'
import ANN_Data_Generator, Initial_Condition, ANN_Training
from ANN_Training import evaluate_models
import numpy as np
def replace_none(list):
return {} if list is None else list
DIR = config['data_directory']
MODELS = config['models']
if config['random_seed'] is not None:
np.random.seed(config['random_seed'])
rule all:
input:
DIR+'/trained models/model__' + config['model_name'] + '.pt',
DIR+'/model evaluation/classification_accuracy/' + config['model_name'] + '.pdf'
expand(DIR+'/trained models/model__{model}.pt', model=MODELS),
DIR+'/model evaluation/classification_accuracy/' + '_'.join(MODELS.keys()) + '.pdf'
rule test_model:
input:
DIR+'/input_data.npy',
DIR+'/output_data.npy'
params:
model_name = config['model_name'],
num_epochs = config['num_epochs'],
threshold = config['threshold'],
batch_size = config['batch_size'],
model = config['model'],
model_config = replace_none(config['model_config']),
loss_function = config['loss_function'],
optimizer = config['optimizer']
log:
DIR+'/log/test_model.log'
output:
DIR+'/model evaluation/classification_accuracy/' + config['model_name'] + '.pdf'
DIR+'/model evaluation/classification_accuracy/' + '_'.join(MODELS.keys()) + '.pdf'
run:
trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
'num_epochs': params.num_epochs, 'dir': DIR,
'model_dir': DIR, 'threshold': params.threshold,
'batch_size': params.batch_size, 'model': params.model,
'model_config': params.model_config,
'loss_function': params.loss_function,
'optimizer': params.optimizer})
trainer.test_model()
models = {}
for model in MODELS:
trainer= ANN_Training.ModelTrainer({'model_name': model, 'dir': DIR,
'model_dir': DIR, **MODELS[model]})
models[model] = trainer
evaluate_models(models, DIR, 2)
rule generate_data:
output:
......@@ -74,26 +65,16 @@ rule train_model:
DIR+'/input_data.npy',
DIR+'/output_data.npy'
params:
model_name = config['model_name'],
num_epochs = config['num_epochs'],
threshold = config['threshold'],
batch_size = config['batch_size'],
model = config['model'],
model_config = replace_none(config['model_config']),
loss_function = config['loss_function'],
optimizer = config['optimizer']
models = MODELS
log:
DIR+'/log/train_model.log'
output:
DIR+'/trained models/model__' + config['model_name'] + '.pt',
DIR+'/trained models/loss__' + config['model_name'] + '.pt'
expand(DIR+'/trained models/model__{model}.pt', model=MODELS),
expand(DIR+'/trained models/loss__{model}.pt', model=MODELS)
run:
trainer= ANN_Training.ModelTrainer({'model_name': params.model_name,
'num_epochs': params.num_epochs, 'dir': DIR,
'model_dir': DIR, 'threshold': params.threshold,
'batch_size': params.batch_size, 'model': params.model,
'model_config': params.model_config,
'loss_function': params.loss_function,
'optimizer': params.optimizer})
for model in params.models:
print(model)
trainer= ANN_Training.ModelTrainer({'model_name': model, 'dir': DIR,
'model_dir': DIR, **params.models[model]})
trainer.epoch_training()
trainer.save_model()
\ No newline at end of file
......@@ -24,12 +24,21 @@ functions:
adjustment: 0
# Parameter for Model Training
model_name: Test_Name
models:
Adam:
num_epochs: 1000
threshold: 1.0e-5
batch_size: 500
model: ThreeLayerReLu
model_config:
model_config: {}
loss_function: BCELoss
optimizer: Adam
SGD:
num_epochs: 1000
threshold: 1.0e-5
batch_size: 500
model: ThreeLayerReLu
model_config: {}
loss_function: BCELoss
optimizer: SGD
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment