From 630e788b5e1a1b4f68f1bfaeda12617b5efe996c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BChle=2C=20Laura=20Christine=20=28lakue103=29?= <laura.kuehle@uni-duesseldorf.de> Date: Tue, 1 Mar 2022 17:46:28 +0100 Subject: [PATCH] Improved file structure and naming for ANN. --- ANN_Data_Generator.py | 6 +++--- ANN_Training.py | 39 ++++++++++++++++++++------------------ Plotting.py | 8 ++++---- config.yaml | 2 +- workflows/ANN_data.smk | 4 ++-- workflows/ANN_training.smk | 26 ++++++++++++------------- 6 files changed, 44 insertions(+), 41 deletions(-) diff --git a/ANN_Data_Generator.py b/ANN_Data_Generator.py index 76bd497..f56e026 100644 --- a/ANN_Data_Generator.py +++ b/ANN_Data_Generator.py @@ -142,8 +142,8 @@ class TrainingDataGenerator(object): # Create normalized input data norm_input_matrix = self._normalize_data(input_matrix) - return {'input': input_matrix, 'output': output_matrix, - 'normalized_input': norm_input_matrix} + return {'input_data.raw': input_matrix, 'output_data': output_matrix, + 'input_data.normalized': norm_input_matrix} def _generate_cell_data(self, num_samples, initial_conditions, is_smooth): """Generates random training input and output. @@ -280,5 +280,5 @@ class TrainingDataGenerator(object): """Saves data.""" print('Saving training data.') for key in data.keys(): - name = self._data_dir + '/' + key + '_data.npy' + name = self._data_dir + '/' + key + '.npy' np.save(name, data[key]) diff --git a/ANN_Training.py b/ANN_Training.py index 27f86b6..f937c44 100644 --- a/ANN_Training.py +++ b/ANN_Training.py @@ -7,8 +7,10 @@ Docstring-Style: D200, D400 TODO: Add README for ANN training TODO: Fix random seed -TODO: Write-protect all data and models -TODO: Put legend outside plot (bbox_to_anchor) +TODO: Improve file structure and naming (e.g. use '.' instead of '__') -> Done +TODO: Write-protect all data and models -> Done +TODO: Put legend outside plot (bbox_to_anchor) -> Done +TODO: Improve legend layout -> Done TODO: Put plotting into separate function -> Done TODO: Reduce number of testing epochs to 50 -> Done TODO: Rename 'data_directory' to 'data_dir' -> Done @@ -27,7 +29,7 @@ from sklearn.metrics import accuracy_score, precision_recall_fscore_support, \ roc_auc_score import ANN_Model -from Plotting import plot_classification_accuracy, plot_boxplot +from Plotting import plot_classification_barplot, plot_classification_boxplot matplotlib.use('Agg') @@ -247,10 +249,10 @@ class ModelTrainer(object): os.makedirs(model_dir) # Save model and loss - torch.save(self._model.state_dict(), model_dir + '/model__' + - model_name + '.pt') - torch.save(self._validation_loss, model_dir + '/loss__' + - model_name + '.pt') + torch.save(self._model.state_dict(), model_dir + '/' + + model_name + '.model.pt') + torch.save(self._validation_loss, model_dir + '/' + + model_name + '.loss.pt') def read_training_data(directory: str, normalized: bool = True) -> \ @@ -271,8 +273,8 @@ def read_training_data(directory: str, normalized: bool = True) -> \ """ # Get training dataset from saved file and map to Torch tensor and dataset - input_file = directory + ('/normalized_input_data.npy' - if normalized else '/input_data.npy') + input_file = directory + ('/input_data.normalized.npy' + if normalized else '/input_data.raw.npy') output_file = directory + '/output_data.npy' return TensorDataset(*map(torch.tensor, (np.load(input_file), np.load(output_file)))) @@ -332,8 +334,13 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, print('Finished training models with 5-fold cross validation!') print(f'Training time: {toc_train - tic_train:0.4f}s\n') + # Set paths for plot files if not existing already + plot_dir = directory + '/model evaluation' + if not os.path.exists(plot_dir): + os.makedirs(plot_dir) + print('Saving evaluation results in json format.') - with open(directory + '/' + '_'.join(models.keys()) + '.json', 'w')\ + with open(plot_dir + '/' + '_'.join(models.keys()) + '.json', 'w')\ as json_file: json_file.write(json.dumps(classification_stats)) toc = time.perf_counter() @@ -341,7 +348,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100, def plot_evaluation_results(evaluation_file: str, directory: str, - colors: dict = None) -> None: + colors: dict = None) -> None: """Plots given evaluation results of model classifications. Plots evaluation results for all measures for which a color is given. If @@ -373,13 +380,13 @@ def plot_evaluation_results(evaluation_file: str, directory: str, print('\nPlotting evaluation of trained models...') print('Plotting data in boxplot.') models = classification_stats[list(colors.keys())[0]].keys() - plot_boxplot(classification_stats, colors) + plot_classification_boxplot(classification_stats, colors) print('Plotting averaged data in barplot.') classification_stats = {measure: {model: np.array( classification_stats[measure][model]).mean() for model in models} for measure in colors} - plot_classification_accuracy(classification_stats, colors) + plot_classification_barplot(classification_stats, colors) print('Finished plotting evaluation of trained models!\n') # Set paths for plot files if not existing already @@ -391,11 +398,7 @@ def plot_evaluation_results(evaluation_file: str, directory: str, print('Saving plots.') file_name = evaluation_file.split('/')[-1].rstrip('.json') for identifier in plt.get_figlabels(): - # Set path for figure directory if not existing already - if not os.path.exists(plot_dir + '/' + identifier): - os.makedirs(plot_dir + '/' + identifier) - plt.figure(identifier) - plt.savefig(plot_dir + '/' + identifier + '/' + file_name + '.pdf') + plt.savefig(plot_dir + '/' + file_name + '.' + identifier + '.pdf') toc = time.perf_counter() print(f'Total runtime: {toc - tic:0.4f}s') diff --git a/Plotting.py b/Plotting.py index 7afc200..0db5713 100644 --- a/Plotting.py +++ b/Plotting.py @@ -261,7 +261,7 @@ def calculate_exact_solution( return grid, exact -def plot_classification_accuracy(evaluation_dict: dict, colors: dict) -> None: +def plot_classification_barplot(evaluation_dict: dict, colors: dict) -> None: """Plots classification accuracy. Plots given evaluation measures in a bar plot for each model. @@ -278,7 +278,7 @@ def plot_classification_accuracy(evaluation_dict: dict, colors: dict) -> None: font_size = 16 - (len(max(model_names, key=len))//3) pos = np.arange(len(model_names)) width = 1/(3*len(model_names)) - fig = plt.figure('classification_accuracy') + fig = plt.figure('barplot') ax = fig.add_axes([0.15, 0.3, 0.6, 0.6]) step_len = 1 adjustment = -(len(model_names)//2)*step_len @@ -299,7 +299,7 @@ def plot_classification_accuracy(evaluation_dict: dict, colors: dict) -> None: ncol=1, fancybox=True, fontsize=8) -def plot_boxplot(evaluation_dict: dict, colors: dict) -> None: +def plot_classification_boxplot(evaluation_dict: dict, colors: dict) -> None: """Plots classification accuracy. Plots given evaluation measures in a boxplot for each model. @@ -314,7 +314,7 @@ def plot_boxplot(evaluation_dict: dict, colors: dict) -> None: """ model_names = evaluation_dict[list(colors.keys())[0]].keys() font_size = 16 - (len(max(model_names, key=len))//3) - fig = plt.figure('boxplot_accuracy') + fig = plt.figure('boxplot') ax = fig.add_axes([0.15, 0.3, 0.6, 0.6]) step_len = 1.5 boxplots = [] diff --git a/config.yaml b/config.yaml index 979aca1..edeea1a 100644 --- a/config.yaml +++ b/config.yaml @@ -21,7 +21,7 @@ Approximation: detector_config: fold_len: 16 whisker_len: 3 - model_state: 'model__Adam.pt' + model_state: 'Adam.model.pt' init_cond: 'Sine' init_config: diff --git a/workflows/ANN_data.smk b/workflows/ANN_data.smk index ab8890f..ef0bc3b 100644 --- a/workflows/ANN_data.smk +++ b/workflows/ANN_data.smk @@ -11,8 +11,8 @@ DIR = config['data_dir'] rule generate_data: output: - protected(DIR+'/input_data.npy'), - protected(DIR+'/normalized_input_data.npy'), + protected(DIR+'/input_data.raw.npy'), + protected(DIR+'/input_data.normalized.npy'), protected(DIR+'/output_data.npy') default_target: True params: diff --git a/workflows/ANN_training.smk b/workflows/ANN_training.smk index a80af1f..5842c6a 100644 --- a/workflows/ANN_training.smk +++ b/workflows/ANN_training.smk @@ -12,17 +12,17 @@ MODELS = config['models'] rule all: input: - expand(DIR+'/trained models/model__{model}.pt', model=MODELS), - DIR+'/model evaluation/classification_accuracy/' - + '_'.join(MODELS.keys()) + '.pdf' + expand(DIR+'/trained models/{model}.model.pt', model=MODELS), + DIR+'/model evaluation/'+'_'.join(MODELS.keys()) + +'.barplot.pdf' default_target: True rule plot_test_results: input: - json_file=DIR+'/'+ '_'.join(MODELS.keys()) + '.json' + json_file=DIR+'/model evaluation/'+ '_'.join(MODELS.keys()) + '.json' output: - DIR+'/model evaluation/classification_accuracy/' - + '_'.join(MODELS.keys())+'.pdf' + DIR+'/model evaluation/'+'_'.join(MODELS.keys()) + +'.barplot.pdf' params: colors = config['classification_colors'] log: @@ -43,11 +43,11 @@ rule plot_test_results: rule test_model: input: - DIR+'/input_data.npy', - DIR+'/normalized_input_data.npy', + DIR+'/input_data.raw.npy', + DIR+'/input_data.normalized.npy', DIR+'/output_data.npy' output: - protected(DIR+'/'+'_'.join(MODELS.keys())+'.json') + protected(DIR+'/model evaluation/'+'_'.join(MODELS.keys())+'.json') params: num_iterations = config['num_iterations'], compare_normalization = config['compare_normalization'] @@ -69,12 +69,12 @@ rule test_model: rule train_model: input: - DIR+'/input_data.npy', - DIR+'/normalized_input_data.npy', + DIR+'/input_data.raw.npy', + DIR+'/input_data.normalized.npy', DIR+'/output_data.npy' output: - protected(DIR+'/trained models/model__{model}.pt'), - protected(DIR+'/trained models/loss__{model}.pt') + protected(DIR+'/trained models/{model}.model.pt'), + protected(DIR+'/trained models/{model}.loss.pt') log: DIR+'/log/train_model_{model}.log' run: -- GitLab