Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
Troubled Cell Detection
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Laura Christine Kühle
Troubled Cell Detection
Commits
7deb16ed
Commit
7deb16ed
authored
3 years ago
by
Laura Christine Kühle
Browse files
Options
Downloads
Patches
Plain Diff
Added comments.
parent
440409e2
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
ANN_Training.py
+21
-6
21 additions, 6 deletions
ANN_Training.py
with
21 additions
and
6 deletions
ANN_Training.py
+
21
−
6
View file @
7deb16ed
...
@@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done
...
@@ -14,6 +14,7 @@ TODO: Improve legend layout -> Done
TODO: Put plotting into separate function -> Done
TODO: Put plotting into separate function -> Done
TODO: Reduce number of testing epochs to 50 -> Done
TODO: Reduce number of testing epochs to 50 -> Done
TODO: Rename
'
data_directory
'
to
'
data_dir
'
-> Done
TODO: Rename
'
data_directory
'
to
'
data_dir
'
-> Done
TODO: Add comments -> Done
"""
"""
import
numpy
as
np
import
numpy
as
np
...
@@ -108,8 +109,6 @@ class ModelTrainer:
...
@@ -108,8 +109,6 @@ class ModelTrainer:
self
.
_optimizer
=
getattr
(
torch
.
optim
,
optimizer
)(
self
.
_optimizer
=
getattr
(
torch
.
optim
,
optimizer
)(
self
.
_model
.
parameters
(),
**
optimizer_config
)
self
.
_model
.
parameters
(),
**
optimizer_config
)
self
.
_validation_loss
=
torch
.
zeros
(
self
.
_num_epochs
//
10
)
self
.
_validation_loss
=
torch
.
zeros
(
self
.
_num_epochs
//
10
)
# print(type(self._model), type(self._loss_function),
# type(self._optimizer), type(self._validation_loss))
def
epoch_training
(
self
,
dataset
:
torch
.
utils
.
data
.
dataset
.
TensorDataset
,
def
epoch_training
(
self
,
dataset
:
torch
.
utils
.
data
.
dataset
.
TensorDataset
,
num_epochs
:
int
=
None
,
verbose
:
bool
=
True
)
->
None
:
num_epochs
:
int
=
None
,
verbose
:
bool
=
True
)
->
None
:
...
@@ -129,7 +128,6 @@ class ModelTrainer:
...
@@ -129,7 +128,6 @@ class ModelTrainer:
Flag whether commentary in console is wanted. Default: False.
Flag whether commentary in console is wanted. Default: False.
"""
"""
# print(type(dataset))
tic
=
time
.
perf_counter
()
tic
=
time
.
perf_counter
()
if
num_epochs
is
None
:
if
num_epochs
is
None
:
num_epochs
=
self
.
_num_epochs
num_epochs
=
self
.
_num_epochs
...
@@ -146,7 +144,7 @@ class ModelTrainer:
...
@@ -146,7 +144,7 @@ class ModelTrainer:
shuffle
=
True
)
shuffle
=
True
)
valid_dl
=
DataLoader
(
valid_ds
,
batch_size
=
self
.
_batch_size
*
2
)
valid_dl
=
DataLoader
(
valid_ds
,
batch_size
=
self
.
_batch_size
*
2
)
# Train
ing
with
V
alidation
# Train with
v
alidation
if
verbose
:
if
verbose
:
print
(
'
\n
Training model...
'
)
print
(
'
\n
Training model...
'
)
print
(
'
Number of epochs:
'
,
num_epochs
)
print
(
'
Number of epochs:
'
,
num_epochs
)
...
@@ -163,6 +161,7 @@ class ModelTrainer:
...
@@ -163,6 +161,7 @@ class ModelTrainer:
self
.
_optimizer
.
step
()
self
.
_optimizer
.
step
()
self
.
_optimizer
.
zero_grad
()
self
.
_optimizer
.
zero_grad
()
# Determine validation loss
self
.
_model
.
eval
()
self
.
_model
.
eval
()
with
torch
.
no_grad
():
with
torch
.
no_grad
():
valid_loss
=
sum
(
valid_loss
=
sum
(
...
@@ -170,6 +169,7 @@ class ModelTrainer:
...
@@ -170,6 +169,7 @@ class ModelTrainer:
y_batch_valid
.
float
())
y_batch_valid
.
float
())
for
x_batch_valid
,
y_batch_valid
in
valid_dl
)
for
x_batch_valid
,
y_batch_valid
in
valid_dl
)
# Report validation loss
if
(
epoch
+
1
)
%
100
==
0
:
if
(
epoch
+
1
)
%
100
==
0
:
self
.
_validation_loss
[
int
((
epoch
+
1
)
/
100
)
-
1
]
\
self
.
_validation_loss
[
int
((
epoch
+
1
)
/
100
)
-
1
]
\
=
valid_loss
/
len
(
valid_dl
)
=
valid_loss
/
len
(
valid_dl
)
...
@@ -177,6 +177,7 @@ class ModelTrainer:
...
@@ -177,6 +177,7 @@ class ModelTrainer:
print
(
epoch
+
1
,
'
epochs completed. Loss:
'
,
print
(
epoch
+
1
,
'
epochs completed. Loss:
'
,
valid_loss
/
len
(
valid_dl
))
valid_loss
/
len
(
valid_dl
))
# Interrupt if threshold is reached
if
valid_loss
/
len
(
valid_dl
)
<
self
.
_threshold
:
if
valid_loss
/
len
(
valid_dl
)
<
self
.
_threshold
:
break
break
toc_train
=
time
.
perf_counter
()
toc_train
=
time
.
perf_counter
()
...
@@ -208,13 +209,16 @@ class ModelTrainer:
...
@@ -208,13 +209,16 @@ class ModelTrainer:
Dictionary containing classification evaluation data.
Dictionary containing classification evaluation data.
"""
"""
# Train model
self
.
epoch_training
(
training_set
,
num_epochs
=
50
,
verbose
=
False
)
self
.
epoch_training
(
training_set
,
num_epochs
=
50
,
verbose
=
False
)
self
.
_model
.
eval
()
self
.
_model
.
eval
()
# Classify data
x_test
,
y_test
=
test_set
x_test
,
y_test
=
test_set
model_score
=
self
.
_model
(
x_test
.
float
())
model_score
=
self
.
_model
(
x_test
.
float
())
model_output
=
torch
.
argmax
(
model_score
,
dim
=
1
)
model_output
=
torch
.
argmax
(
model_score
,
dim
=
1
)
# Evaluate classification
y_true
=
y_test
.
detach
().
numpy
()[:,
1
]
y_true
=
y_test
.
detach
().
numpy
()[:,
1
]
y_pred
=
model_output
.
detach
().
numpy
()
y_pred
=
model_output
.
detach
().
numpy
()
accuracy
=
accuracy_score
(
y_true
,
y_pred
)
accuracy
=
accuracy_score
(
y_true
,
y_pred
)
...
@@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -284,7 +288,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
compare_normalization
:
bool
=
False
)
->
None
:
compare_normalization
:
bool
=
False
)
->
None
:
"""
Evaluates the classification of a given set of models.
"""
Evaluates the classification of a given set of models.
Evaluates the classification and saves the results in a
json
file.
Evaluates the classification and saves the results in a
JSON
file.
Parameters
Parameters
----------
----------
...
@@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -300,23 +304,28 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
"""
"""
tic
=
time
.
perf_counter
()
tic
=
time
.
perf_counter
()
# Read training data
print
(
'
Read normalized training data.
'
)
print
(
'
Read normalized training data.
'
)
datasets
=
{
'
normalized
'
:
read_training_data
(
directory
)}
datasets
=
{
'
normalized
'
:
read_training_data
(
directory
)}
if
compare_normalization
:
if
compare_normalization
:
print
(
'
Read raw, non-normalized training data.
'
)
print
(
'
Read raw, non-normalized training data.
'
)
datasets
[
'
raw
'
]
=
read_training_data
(
directory
,
False
)
datasets
[
'
raw
'
]
=
read_training_data
(
directory
,
False
)
# Train models for evaluation
print
(
'
\n
Training models with 5-fold cross validation...
'
)
print
(
'
\n
Training models with 5-fold cross validation...
'
)
print
(
'
Number of iterations:
'
,
num_iterations
)
print
(
'
Number of iterations:
'
,
num_iterations
)
tic_train
=
time
.
perf_counter
()
tic_train
=
time
.
perf_counter
()
classification_stats
=
{}
classification_stats
=
{}
for
iteration
in
range
(
num_iterations
):
for
iteration
in
range
(
num_iterations
):
# Split data for cross validation
for
train_index
,
test_index
in
KFold
(
for
train_index
,
test_index
in
KFold
(
n_splits
=
5
,
shuffle
=
True
).
split
(
datasets
[
'
normalized
'
]):
n_splits
=
5
,
shuffle
=
True
).
split
(
datasets
[
'
normalized
'
]):
for
dataset
in
datasets
.
keys
():
for
dataset
in
datasets
.
keys
():
training_set
=
TensorDataset
(
*
datasets
[
dataset
][
train_index
])
training_set
=
TensorDataset
(
*
datasets
[
dataset
][
train_index
])
test_set
=
datasets
[
dataset
][
test_index
]
test_set
=
datasets
[
dataset
][
test_index
]
# Save results for each model on split dataset
for
model
in
models
:
for
model
in
models
:
result
=
models
[
model
].
test_model
(
training_set
,
test_set
)
result
=
models
[
model
].
test_model
(
training_set
,
test_set
)
for
measure
in
result
.
keys
():
for
measure
in
result
.
keys
():
...
@@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -328,6 +337,7 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
classification_stats
[
measure
][
model
+
'
(
'
+
dataset
+
classification_stats
[
measure
][
model
+
'
(
'
+
dataset
+
'
)
'
].
append
(
'
)
'
].
append
(
result
[
measure
])
result
[
measure
])
# Report status
if
iteration
+
1
%
max
(
10
,
10
*
(
num_iterations
//
100
)):
if
iteration
+
1
%
max
(
10
,
10
*
(
num_iterations
//
100
)):
print
(
iteration
+
1
,
'
iterations completed.
'
)
print
(
iteration
+
1
,
'
iterations completed.
'
)
toc_train
=
time
.
perf_counter
()
toc_train
=
time
.
perf_counter
()
...
@@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
...
@@ -339,7 +349,8 @@ def evaluate_models(models: dict, directory: str, num_iterations: int = 100,
if
not
os
.
path
.
exists
(
plot_dir
):
if
not
os
.
path
.
exists
(
plot_dir
):
os
.
makedirs
(
plot_dir
)
os
.
makedirs
(
plot_dir
)
print
(
'
Saving evaluation results in json format.
'
)
# Save evaluation results in JSON format
print
(
'
Saving evaluation results in JSON format.
'
)
with
open
(
plot_dir
+
'
/
'
+
'
_
'
.
join
(
models
.
keys
())
+
'
.json
'
,
'
w
'
)
\
with
open
(
plot_dir
+
'
/
'
+
'
_
'
.
join
(
models
.
keys
())
+
'
.json
'
,
'
w
'
)
\
as
json_file
:
as
json_file
:
json_file
.
write
(
json
.
dumps
(
classification_stats
))
json_file
.
write
(
json
.
dumps
(
classification_stats
))
...
@@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str,
...
@@ -367,16 +378,20 @@ def plot_evaluation_results(evaluation_file: str, directory: str,
"""
"""
tic
=
time
.
perf_counter
()
tic
=
time
.
perf_counter
()
# Set colors if not given
if
colors
is
None
:
if
colors
is
None
:
colors
=
{
'
Accuracy
'
:
'
magenta
'
,
'
Precision_Smooth
'
:
'
red
'
,
colors
=
{
'
Accuracy
'
:
'
magenta
'
,
'
Precision_Smooth
'
:
'
red
'
,
'
Precision_Troubled
'
:
'
#8B0000
'
,
'
Recall_Smooth
'
:
'
blue
'
,
'
Precision_Troubled
'
:
'
#8B0000
'
,
'
Recall_Smooth
'
:
'
blue
'
,
'
Recall_Troubled
'
:
'
#00008B
'
,
'
F-Score_Smooth
'
:
'
green
'
,
'
Recall_Troubled
'
:
'
#00008B
'
,
'
F-Score_Smooth
'
:
'
green
'
,
'
F-Score_Troubled
'
:
'
#006400
'
,
'
AUROC
'
:
'
yellow
'
}
'
F-Score_Troubled
'
:
'
#006400
'
,
'
AUROC
'
:
'
yellow
'
}
# Read evaluation results
print
(
'
Reading evaluation results.
'
)
print
(
'
Reading evaluation results.
'
)
with
open
(
evaluation_file
)
as
json_file
:
with
open
(
evaluation_file
)
as
json_file
:
classification_stats
=
json
.
load
(
json_file
)
classification_stats
=
json
.
load
(
json_file
)
# Plot data
print
(
'
\n
Plotting evaluation of trained models...
'
)
print
(
'
\n
Plotting evaluation of trained models...
'
)
print
(
'
Plotting data in boxplot.
'
)
print
(
'
Plotting data in boxplot.
'
)
models
=
classification_stats
[
list
(
colors
.
keys
())[
0
]].
keys
()
models
=
classification_stats
[
list
(
colors
.
keys
())[
0
]].
keys
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment