Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
Troubled Cell Detection
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Laura Christine Kühle
Troubled Cell Detection
Commits
c4702196
Commit
c4702196
authored
3 years ago
by
Laura Christine Kühle
Browse files
Options
Downloads
Patches
Plain Diff
Moved plotting of model evaluation in separate function.
parent
7456c491
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
ANN_Training.py
+57
-28
57 additions, 28 deletions
ANN_Training.py
workflows/ANN_training.smk
+26
-4
26 additions, 4 deletions
workflows/ANN_training.smk
with
83 additions
and
32 deletions
ANN_Training.py
+
57
−
28
View file @
c4702196
...
@@ -9,8 +9,9 @@ TODO: Add README for ANN training
...
@@ -9,8 +9,9 @@ TODO: Add README for ANN training
TODO: Fix random seed
TODO: Fix random seed
TODO: Write-protect all data and models
TODO: Write-protect all data and models
TODO: Put legend outside plot (bbox_to_anchor)
TODO: Put legend outside plot (bbox_to_anchor)
TODO: Put plotting into separate function
TODO: Put plotting into separate function
-> Done
TODO: Reduce number of testing epochs to 50 -> Done
TODO: Reduce number of testing epochs to 50 -> Done
TODO: Rename
'
data_directory
'
to
'
data_dir
'
-> Done
"""
"""
import
numpy
as
np
import
numpy
as
np
...
@@ -105,8 +106,8 @@ class ModelTrainer(object):
...
@@ -105,8 +106,8 @@ class ModelTrainer(object):
self
.
_optimizer
=
getattr
(
torch
.
optim
,
optimizer
)(
self
.
_optimizer
=
getattr
(
torch
.
optim
,
optimizer
)(
self
.
_model
.
parameters
(),
**
optimizer_config
)
self
.
_model
.
parameters
(),
**
optimizer_config
)
self
.
_validation_loss
=
torch
.
zeros
(
self
.
_num_epochs
//
10
)
self
.
_validation_loss
=
torch
.
zeros
(
self
.
_num_epochs
//
10
)
print
(
type
(
self
.
_model
),
type
(
self
.
_loss_function
),
#
print(type(self._model), type(self._loss_function),
type
(
self
.
_optimizer
),
type
(
self
.
_validation_loss
))
#
type(self._optimizer), type(self._validation_loss))
def
epoch_training
(
self
,
dataset
:
torch
.
utils
.
data
.
dataset
.
TensorDataset
,
def
epoch_training
(
self
,
dataset
:
torch
.
utils
.
data
.
dataset
.
TensorDataset
,
num_epochs
:
int
=
None
,
verbose
:
bool
=
True
)
->
None
:
num_epochs
:
int
=
None
,
verbose
:
bool
=
True
)
->
None
:
...
@@ -126,7 +127,7 @@ class ModelTrainer(object):
...
@@ -126,7 +127,7 @@ class ModelTrainer(object):
Flag whether commentary in console is wanted. Default: False.
Flag whether commentary in console is wanted. Default: False.
"""
"""
print
(
type
(
dataset
))
#
print(type(dataset))
tic
=
time
.
perf_counter
()
tic
=
time
.
perf_counter
()
if
num_epochs
is
None
:
if
num_epochs
is
None
:
num_epochs
=
self
.
_num_epochs
num_epochs
=
self
.
_num_epochs
...
@@ -278,10 +279,11 @@ def read_training_data(directory: str, normalized: bool = True) -> \
...
@@ -278,10 +279,11 @@ def read_training_data(directory: str, normalized: bool = True) -> \
def
evaluate_models
(
models
:
dict
,
directory
:
str
,
num_iterations
:
int
=
100
,
def
evaluate_models
(
models
:
dict
,
directory
:
str
,
num_iterations
:
int
=
100
,
colors
:
dict
=
None
,
compare_normalization
:
bool
=
False
)
->
None
:
compare_normalization
:
bool
=
False
)
->
None
:
"""
Evaluates the classification of a given set of models.
"""
Evaluates the classification of a given set of models.
Evaluates the classification and saves the results in a json file.
Parameters
Parameters
----------
----------
models : dict
models : dict
...
@@ -290,34 +292,22 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -290,34 +292,22 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
Path to directory for saving resulting plots.
Path to directory for saving resulting plots.
num_iterations : int, optional
num_iterations : int, optional
Number of iterations for evaluation. Default: 100.
Number of iterations for evaluation. Default: 100.
colors : dict, optional
Dictionary containing plotting colors. If None, set to default colors.
Default: None.
compare_normalization : bool, optional
compare_normalization : bool, optional
Flag whether both normalized and raw data should be evaluated.
Flag whether both normalized and raw data should be evaluated.
Default: False.
Default: False.
"""
"""
tic
=
time
.
perf_counter
()
tic
=
time
.
perf_counter
()
if
colors
is
None
:
colors
=
{
'
Accuracy
'
:
'
magenta
'
,
'
Precision_Smooth
'
:
'
red
'
,
'
Precision_Troubled
'
:
'
#8B0000
'
,
'
Recall_Smooth
'
:
'
blue
'
,
'
Recall_Troubled
'
:
'
#00008B
'
,
'
F-Score_Smooth
'
:
'
green
'
,
'
F-Score_Troubled
'
:
'
#006400
'
,
'
AUROC
'
:
'
yellow
'
}
print
(
'
Read normalized training data.
'
)
print
(
'
Read normalized training data.
'
)
datasets
=
{
'
normalized
'
:
read_training_data
(
directory
)}
datasets
=
{
'
normalized
'
:
read_training_data
(
directory
)}
if
compare_normalization
:
if
compare_normalization
:
print
(
'
Read raw, non-normalized training data.
'
)
print
(
'
Read raw, non-normalized training data.
'
)
datasets
[
'
raw
'
]
=
read_training_data
(
directory
,
False
)
datasets
[
'
raw
'
]
=
read_training_data
(
directory
,
False
)
classification_stats
=
{
measure
:
{
model
+
'
(
'
+
dataset
+
'
)
'
:
[]
for
model
in
models
for
dataset
in
datasets
}
for
measure
in
colors
}
print
(
'
\n
Training models with 5-fold cross validation...
'
)
print
(
'
\n
Training models with 5-fold cross validation...
'
)
print
(
'
Number of iterations:
'
,
num_iterations
)
print
(
'
Number of iterations:
'
,
num_iterations
)
tic_train
=
time
.
perf_counter
()
tic_train
=
time
.
perf_counter
()
classification_stats
=
{}
for
iteration
in
range
(
num_iterations
):
for
iteration
in
range
(
num_iterations
):
for
train_index
,
test_index
in
KFold
(
for
train_index
,
test_index
in
KFold
(
n_splits
=
5
,
shuffle
=
True
).
split
(
datasets
[
'
normalized
'
]):
n_splits
=
5
,
shuffle
=
True
).
split
(
datasets
[
'
normalized
'
]):
...
@@ -327,7 +317,12 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -327,7 +317,12 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
for
model
in
models
:
for
model
in
models
:
result
=
models
[
model
].
test_model
(
training_set
,
test_set
)
result
=
models
[
model
].
test_model
(
training_set
,
test_set
)
for
measure
in
colors
:
for
measure
in
result
.
keys
():
if
measure
not
in
classification_stats
.
keys
():
classification_stats
[
measure
]
=
\
{
model
+
'
(
'
+
dataset
+
'
)
'
:
[]
for
model
in
models
for
dataset
in
datasets
}
classification_stats
[
measure
][
model
+
'
(
'
+
dataset
+
classification_stats
[
measure
][
model
+
'
(
'
+
dataset
+
'
)
'
].
append
(
'
)
'
].
append
(
result
[
measure
])
result
[
measure
])
...
@@ -337,21 +332,55 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -337,21 +332,55 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
print
(
'
Finished training models with 5-fold cross validation!
'
)
print
(
'
Finished training models with 5-fold cross validation!
'
)
print
(
f
'
Training time:
{
toc_train
-
tic_train
:
0.4
f
}
s
\n
'
)
print
(
f
'
Training time:
{
toc_train
-
tic_train
:
0.4
f
}
s
\n
'
)
print
(
'
Saving evaluation results in json format.
'
)
with
open
(
directory
+
'
/
'
+
'
_
'
.
join
(
models
.
keys
())
+
'
.json
'
,
'
w
'
)
\
with
open
(
directory
+
'
/
'
+
'
_
'
.
join
(
models
.
keys
())
+
'
.json
'
,
'
w
'
)
\
as
json_file
:
as
json_file
:
json_file
.
write
(
json
.
dumps
(
classification_stats
))
json_file
.
write
(
json
.
dumps
(
classification_stats
))
with
open
(
directory
+
'
/
'
+
'
_
'
.
join
(
models
.
keys
())
+
'
.json
'
)
\
toc
=
time
.
perf_counter
()
as
json_file
:
print
(
f
'
Total runtime:
{
toc
-
tic
:
0.4
f
}
s
'
)
def
plot_evaluation_results
(
evaluation_file
:
str
,
directory
:
str
,
colors
:
dict
=
None
)
->
None
:
"""
Plots given evaluation results of model classifications.
Plots evaluation results for all measures for which a color is given. If
colors is set to None, all measures are plotted with a default color
scheme.
Parameters
----------
evaluation_file: str
Path to file containing evaluation results.
directory : str
Path to directory for saving resulting plots.
colors : dict, optional
Dictionary containing plotting colors. If None, set to default colors.
Default: None.
"""
tic
=
time
.
perf_counter
()
if
colors
is
None
:
colors
=
{
'
Accuracy
'
:
'
magenta
'
,
'
Precision_Smooth
'
:
'
red
'
,
'
Precision_Troubled
'
:
'
#8B0000
'
,
'
Recall_Smooth
'
:
'
blue
'
,
'
Recall_Troubled
'
:
'
#00008B
'
,
'
F-Score_Smooth
'
:
'
green
'
,
'
F-Score_Troubled
'
:
'
#006400
'
,
'
AUROC
'
:
'
yellow
'
}
print
(
'
Reading evaluation results.
'
)
with
open
(
evaluation_file
)
as
json_file
:
classification_stats
=
json
.
load
(
json_file
)
classification_stats
=
json
.
load
(
json_file
)
print
(
'
Plotting evaluation of trained models.
'
)
print
(
'
\n
Plotting evaluation of trained models...
'
)
print
(
'
Plotting data in boxplot.
'
)
models
=
classification_stats
[
list
(
colors
.
keys
())[
0
]].
keys
()
plot_boxplot
(
classification_stats
,
colors
)
plot_boxplot
(
classification_stats
,
colors
)
classification_stats
=
{
measure
:
{
model
+
'
(
'
+
dataset
+
'
)
'
:
np
.
array
(
print
(
'
Plotting averaged data in barplot.
'
)
classification_stats
[
measure
][
model
+
'
(
'
+
dataset
+
'
)
'
]).
mean
()
classification_stats
=
{
measure
:
{
model
:
np
.
array
(
for
model
in
models
classification_stats
[
measure
][
model
]).
mean
()
for
dataset
in
dataset
s
}
for
model
in
model
s
}
for
measure
in
colors
}
for
measure
in
colors
}
plot_classification_accuracy
(
classification_stats
,
colors
)
plot_classification_accuracy
(
classification_stats
,
colors
)
print
(
'
Finished plotting evaluation of trained models!
\n
'
)
# Set paths for plot files if not existing already
# Set paths for plot files if not existing already
plot_dir
=
directory
+
'
/model evaluation
'
plot_dir
=
directory
+
'
/model evaluation
'
...
@@ -360,13 +389,13 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -360,13 +389,13 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
# Save plots
# Save plots
print
(
'
Saving plots.
'
)
print
(
'
Saving plots.
'
)
file_name
=
evaluation_file
.
split
(
'
/
'
)[
-
1
].
rstrip
(
'
.json
'
)
for
identifier
in
plt
.
get_figlabels
():
for
identifier
in
plt
.
get_figlabels
():
# Set path for figure directory if not existing already
# Set path for figure directory if not existing already
if
not
os
.
path
.
exists
(
plot_dir
+
'
/
'
+
identifier
):
if
not
os
.
path
.
exists
(
plot_dir
+
'
/
'
+
identifier
):
os
.
makedirs
(
plot_dir
+
'
/
'
+
identifier
)
os
.
makedirs
(
plot_dir
+
'
/
'
+
identifier
)
plt
.
figure
(
identifier
)
plt
.
figure
(
identifier
)
plt
.
savefig
(
plot_dir
+
'
/
'
+
identifier
+
'
/
'
+
plt
.
savefig
(
plot_dir
+
'
/
'
+
identifier
+
'
/
'
+
file_name
+
'
.pdf
'
)
'
_
'
.
join
(
models
.
keys
())
+
'
.pdf
'
)
toc
=
time
.
perf_counter
()
toc
=
time
.
perf_counter
()
print
(
f
'
Total runtime:
{
toc
-
tic
:
0.4
f
}
s
'
)
print
(
f
'
Total runtime:
{
toc
-
tic
:
0.4
f
}
s
'
)
This diff is collapsed.
Click to expand it.
workflows/ANN_training.smk
+
26
−
4
View file @
c4702196
...
@@ -17,17 +17,39 @@ rule all:
...
@@ -17,17 +17,39 @@ rule all:
+ '_'.join(MODELS.keys()) + '.pdf'
+ '_'.join(MODELS.keys()) + '.pdf'
default_target: True
default_target: True
rule plot_test_results:
input:
json_file=DIR+'/'+ '_'.join(MODELS.keys()) + '.json'
output:
DIR+'/model evaluation/classification_accuracy/'
+ '_'.join(MODELS.keys())+'.pdf'
params:
colors = config['classification_colors']
log:
DIR+'/log/plot_test_results.log'
run:
models = {}
with open(str(log), 'w') as logfile:
sys.stdout = logfile
sys.stderr = logfile
for model in MODELS:
trainer= ANN_Training.ModelTrainer(
{'model_name': model, 'dir': DIR, 'model_dir': DIR,
**MODELS[model]})
models[model] = trainer
plot_evaluation_results(evaluation_file=input.json_file,
directory=DIR, colors=params.colors)
rule test_model:
rule test_model:
input:
input:
DIR+'/input_data.npy',
DIR+'/input_data.npy',
DIR+'/normalized_input_data.npy',
DIR+'/normalized_input_data.npy',
DIR+'/output_data.npy'
DIR+'/output_data.npy'
output:
output:
DIR+'/model evaluation/classification_accuracy/'
DIR+'/'+'_'.join(MODELS.keys())+'.json'
+ '_'.join(MODELS.keys()) + '.pdf'
params:
params:
num_iterations = config['num_iterations'],
num_iterations = config['num_iterations'],
colors = config['classification_colors'],
compare_normalization = config['compare_normalization']
compare_normalization = config['compare_normalization']
log:
log:
DIR+'/log/test_model.log'
DIR+'/log/test_model.log'
...
@@ -42,7 +64,7 @@ rule test_model:
...
@@ -42,7 +64,7 @@ rule test_model:
**MODELS[model]})
**MODELS[model]})
models[model] = trainer
models[model] = trainer
evaluate_models(models=models, directory=DIR,
evaluate_models(models=models, directory=DIR,
num_iterations=params.num_iterations,
colors=params.colors,
num_iterations=params.num_iterations,
compare_normalization=params.compare_normalization)
compare_normalization=params.compare_normalization)
rule train_model:
rule train_model:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment