Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelpruner
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Peter Schubert
modelpruner
Commits
0a9159d6
Commit
0a9159d6
authored
2 years ago
by
Peter Schubert
Browse files
Options
Downloads
Patches
Plain Diff
start implementing mp
parent
50ebc192
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
modelpruner/core/model_pruner.py
+46
-31
46 additions, 31 deletions
modelpruner/core/model_pruner.py
modelpruner/models/fba_model.py
+74
-8
74 additions, 8 deletions
modelpruner/models/fba_model.py
modelpruner/problems/lp_problem.py
+1
-0
1 addition, 0 deletions
modelpruner/problems/lp_problem.py
with
121 additions
and
39 deletions
modelpruner/core/model_pruner.py
+
46
−
31
View file @
0a9159d6
...
@@ -4,43 +4,32 @@ import sys
...
@@ -4,43 +4,32 @@ import sys
import
os
import
os
import
re
import
re
import
time
import
time
import
copy
import
numpy
as
np
import
numpy
as
np
import
multiprocessing
as
mp
import
pickle
import
pickle
import
sbmlxdf
from
modelpruner.models.fba_model
import
FbaModel
from
modelpruner.models.fba_model
import
FbaModel
from
modelpruner.core.protected_parts
import
ProtectedParts
from
modelpruner.core.protected_parts
import
ProtectedParts
global
_fba_model
class
ModelPruner
:
class
ModelPruner
:
def
__init__
(
self
,
sbml_fname
,
protected_parts
,
reduced_fname
=
None
,
resume
=
False
):
def
__init__
(
self
,
sbml_fname
,
protected_parts
,
reduced_fname
=
None
,
resume
=
False
):
self
.
tolerance
=
1e-8
# later accessible by set_params() ?
self
.
cpus
=
int
(
os
.
cpu_count
()
*
.
8
)
if
reduced_fname
is
None
:
if
reduced_fname
is
None
:
self
.
reduced_fname
=
re
.
sub
(
r
'
.xml$
'
,
'
_red.xml
'
,
sbml_fname
)
self
.
reduced_fname
=
re
.
sub
(
r
'
.xml$
'
,
'
_red.xml
'
,
sbml_fname
)
self
.
_snapshot_sbml
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.xml
'
,
sbml_fname
)
self
.
_snapshot_sbml
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.xml
'
,
sbml_fname
)
self
.
_snapshot_pkl
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.pkl
'
,
sbml_fname
)
self
.
_snapshot_pkl
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.pkl
'
,
sbml_fname
)
if
resume
is
True
:
if
resume
is
True
:
if
os
.
path
.
exists
(
self
.
_snapshot_sbml
):
sbml_fname
=
self
.
_snapshot_sbml
self
.
sbml_model
=
sbmlxdf
.
Model
(
self
.
_snapshot_sbml
)
self
.
fba_model
=
FbaModel
(
sbml_fname
)
print
(
f
'
Resume from snapshot from
{
self
.
_snapshot_sbml
}
'
f
'
(last modified:
{
time
.
ctime
(
os
.
path
.
getmtime
(
self
.
_snapshot_sbml
))
}
)
'
)
else
:
print
(
f
'
{
self
.
_snapshot_sbml
}
not found!
'
)
raise
FileNotFoundError
else
:
if
os
.
path
.
exists
(
sbml_fname
):
self
.
sbml_model
=
sbmlxdf
.
Model
(
sbml_fname
)
print
(
f
'
Full SBML model loaded from
{
sbml_fname
}
'
f
'
(last modified:
{
time
.
ctime
(
os
.
path
.
getmtime
(
sbml_fname
))
}
)
'
)
else
:
print
(
f
'
{
sbml_fname
}
not found!
'
)
raise
FileNotFoundError
self
.
tolerance
=
1e-8
# later accessible by set_params() ?
self
.
fba_model
=
FbaModel
(
self
.
sbml_model
)
self
.
nrp
=
ProtectedParts
(
protected_parts
)
self
.
nrp
=
ProtectedParts
(
protected_parts
)
self
.
protected_sids
=
self
.
nrp
.
initial_protected_sids
self
.
protected_sids
=
self
.
nrp
.
initial_protected_sids
...
@@ -139,7 +128,6 @@ class ModelPruner:
...
@@ -139,7 +128,6 @@ class ModelPruner:
print
(
'
no more reactions to remove
'
)
print
(
'
no more reactions to remove
'
)
# store intermediate results (snapshots)
# store intermediate results (snapshots)
print
(
f
'
snapshot check next:
{
next_snapshot
}
, rids:
{
len
(
self
.
fba_model
.
rids
)
}
'
)
if
(
next_snapshot
is
not
None
)
and
(
len
(
self
.
fba_model
.
rids
)
<
next_snapshot
):
if
(
next_snapshot
is
not
None
)
and
(
len
(
self
.
fba_model
.
rids
)
<
next_snapshot
):
self
.
export_pruned_model
(
self
.
_snapshot_sbml
)
self
.
export_pruned_model
(
self
.
_snapshot_sbml
)
with
open
(
self
.
_snapshot_pkl
,
'
wb
'
)
as
f
:
with
open
(
self
.
_snapshot_pkl
,
'
wb
'
)
as
f
:
...
@@ -160,45 +148,72 @@ class ModelPruner:
...
@@ -160,45 +148,72 @@ class ModelPruner:
if
os
.
path
.
exists
(
fname
):
if
os
.
path
.
exists
(
fname
):
os
.
remove
(
fname
)
os
.
remove
(
fname
)
def
reaction_types_fva
(
self
,
free_rids
):
def
init_worker
(
self
):
global
_fba_model
_fba_model
=
copy
.
deepcopy
(
self
.
fba_model
)
@staticmethod
def
fva_single_rids
(
rid
):
return
_fba_model
.
fva_single_rid
(
rid
)
flux_min_pfs
=
np
.
zeros
((
len
(
self
.
nrp
.
protected_functions
),
len
(
free_rids
)))
def
reaction_types_fva
(
self
,
free_rids
):
flux_max_pfs
=
np
.
zeros
((
len
(
self
.
nrp
.
protected_functions
),
len
(
free_rids
)))
print
(
time
.
strftime
(
"
%H:%M:%S
"
,
time
.
localtime
()))
n_pf
=
len
(
self
.
nrp
.
protected_functions
)
n_pf
=
len
(
self
.
nrp
.
protected_functions
)
flux_min_pfs
=
np
.
zeros
((
n_pf
,
len
(
free_rids
)))
flux_max_pfs
=
np
.
zeros
((
n_pf
,
len
(
free_rids
)))
processes
=
min
(
self
.
cpus
,
len
(
free_rids
))
for
idx
,
pf
in
enumerate
(
self
.
nrp
.
protected_functions
.
values
()):
for
idx
,
pf
in
enumerate
(
self
.
nrp
.
protected_functions
.
values
()):
self
.
set_temp_func_conditions
(
pf
)
self
.
set_temp_func_conditions
(
pf
)
if
processes
>
20
:
# place to split fva for free_rids over several
rid2idx
=
{
rid
:
idx
for
idx
,
rid
in
enumerate
(
free_rids
)}
pool
=
mp
.
Pool
(
processes
,
initializer
=
self
.
init_worker
)
for
rid
,
res
in
pool
.
imap_unordered
(
self
.
fva_single_rids
,
free_rids
):
flux_min_pfs
[
idx
,
rid2idx
[
rid
]]
=
res
[
'
min
'
]
flux_max_pfs
[
idx
,
rid2idx
[
rid
]]
=
res
[
'
max
'
]
pool
.
close
()
pool
.
join
()
else
:
res
=
self
.
fba_model
.
fva_optimize
(
free_rids
,
fract_of_optimum
=
0.0
)
res
=
self
.
fba_model
.
fva_optimize
(
free_rids
,
fract_of_optimum
=
0.0
)
self
.
restore_base_conditions
()
if
res
[
'
success
'
]
is
False
:
print
(
'
FVA unsuccessful
'
)
return
{
'
blocked_rids
'
:
[],
'
essential_rids
'
:
[],
'
candidate_rids_sorted
'
:
[]}
flux_min_pfs
[
idx
]
=
res
[
'
min
'
]
flux_min_pfs
[
idx
]
=
res
[
'
min
'
]
flux_max_pfs
[
idx
]
=
res
[
'
max
'
]
flux_max_pfs
[
idx
]
=
res
[
'
max
'
]
self
.
restore_base_conditions
()
# if res['success'] is False:
# print('FVA unsuccessful')
# return {'blocked_rids': [], 'essential_rids': [], 'candidate_rids_sorted': []}
sys
.
stdout
.
write
(
'
\r
FVA[{}{}] {:3.0f}%
'
.
format
(
sys
.
stdout
.
write
(
'
\r
FVA[{}{}] {:3.0f}%
'
.
format
(
'
=
'
*
int
(
idx
+
1
),
'
'
*
int
(
n_pf
-
idx
-
1
),
(
idx
+
1
)
/
n_pf
*
100
))
'
=
'
*
int
(
idx
+
1
),
'
'
*
int
(
n_pf
-
idx
-
1
),
(
idx
+
1
)
/
n_pf
*
100
))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
print
()
print
()
# analyze flux ranges per reaction, aggregate fluxes
flux_min_pfs
[
np
.
abs
(
flux_min_pfs
)
<
self
.
tolerance
]
=
0.0
flux_min_pfs
[
np
.
abs
(
flux_min_pfs
)
<
self
.
tolerance
]
=
0.0
flux_max_pfs
[
np
.
abs
(
flux_max_pfs
)
<
self
.
tolerance
]
=
0.0
flux_max_pfs
[
np
.
abs
(
flux_max_pfs
)
<
self
.
tolerance
]
=
0.0
flux_min
=
np
.
min
(
flux_min_pfs
,
axis
=
0
)
flux_min
=
np
.
min
(
flux_min_pfs
,
axis
=
0
)
flux_max
=
np
.
max
(
flux_max_pfs
,
axis
=
0
)
flux_max
=
np
.
max
(
flux_max_pfs
,
axis
=
0
)
# blocked reactions do not carry any flux in all conditions
mask_b
=
np
.
all
(
np
.
vstack
((
flux_min
==
0
,
flux_max
==
0
)),
axis
=
0
)
mask_b
=
np
.
all
(
np
.
vstack
((
flux_min
==
0
,
flux_max
==
0
)),
axis
=
0
)
blocked_rids
=
set
(
np
.
array
(
free_rids
)[
mask_b
])
blocked_rids
=
set
(
np
.
array
(
free_rids
)[
mask_b
])
# essential reactions carry either only forward or reverse flux, exluding zero flux
mask_e1
=
np
.
all
(
np
.
vstack
((
flux_min
<
0
,
flux_max
<
0
)),
axis
=
0
)
mask_e1
=
np
.
all
(
np
.
vstack
((
flux_min
<
0
,
flux_max
<
0
)),
axis
=
0
)
mask_e2
=
np
.
all
(
np
.
vstack
((
flux_min
>
0
,
flux_max
>
0
)),
axis
=
0
)
mask_e2
=
np
.
all
(
np
.
vstack
((
flux_min
>
0
,
flux_max
>
0
)),
axis
=
0
)
mask_e
=
np
.
any
(
np
.
vstack
((
mask_e1
,
mask_e2
)),
axis
=
0
)
mask_e
=
np
.
any
(
np
.
vstack
((
mask_e1
,
mask_e2
)),
axis
=
0
)
essential_rids
=
set
(
np
.
array
(
free_rids
)[
mask_e
])
essential_rids
=
set
(
np
.
array
(
free_rids
)[
mask_e
])
# reactions that support zero flux are considered candiates for removal
# here such reactions are sorted from smallest to largest flux span
mask_c
=
np
.
all
(
np
.
vstack
((
np
.
logical_not
(
mask_b
),
np
.
logical_not
(
mask_e
))),
axis
=
0
)
mask_c
=
np
.
all
(
np
.
vstack
((
np
.
logical_not
(
mask_b
),
np
.
logical_not
(
mask_e
))),
axis
=
0
)
flux_span
=
flux_max
-
flux_min
flux_span
=
flux_max
-
flux_min
ind
=
np
.
argsort
(
flux_span
)
ind
=
np
.
argsort
(
flux_span
)
candidate_rids_sorted
=
list
(
np
.
array
(
free_rids
)[
ind
][
np
.
array
(
mask_c
)[
ind
]])
candidate_rids_sorted
=
list
(
np
.
array
(
free_rids
)[
ind
][
np
.
array
(
mask_c
)[
ind
]])
return
{
'
blocked_rids
'
:
blocked_rids
,
'
essential_rids
'
:
essential_rids
,
return
{
'
blocked_rids
'
:
blocked_rids
,
'
essential_rids
'
:
essential_rids
,
'
candidate_rids_sorted
'
:
candidate_rids_sorted
}
'
candidate_rids_sorted
'
:
candidate_rids_sorted
}
...
...
This diff is collapsed.
Click to expand it.
modelpruner/models/fba_model.py
+
74
−
8
View file @
0a9159d6
import
os
import
time
import
re
import
re
import
math
import
math
import
numpy
as
np
import
numpy
as
np
...
@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem
...
@@ -10,12 +12,21 @@ from modelpruner.problems.lp_problem import LpProblem
class
FbaModel
:
class
FbaModel
:
def
__init__
(
self
,
sbml_
model
):
def
__init__
(
self
,
sbml_
fname
):
self
.
model_dict
=
sbml_model
.
to_df
()
self
.
sbml_fname
=
sbml_fname
df_reactions
=
self
.
model_dict
[
'
reactions
'
]
if
os
.
path
.
exists
(
sbml_fname
):
df_species
=
self
.
model_dict
[
'
species
'
]
sbml_model
=
sbmlxdf
.
Model
(
sbml_fname
)
df_fbc_objectives
=
self
.
model_dict
[
'
fbcObjectives
'
]
print
(
f
'
SBML model loaded from
{
sbml_fname
}
'
f
'
(last modified:
{
time
.
ctime
(
os
.
path
.
getmtime
(
sbml_fname
))
}
)
'
)
else
:
print
(
f
'
{
sbml_fname
}
not found!
'
)
raise
FileNotFoundError
model_dict
=
sbml_model
.
to_df
()
df_reactions
=
model_dict
[
'
reactions
'
]
df_species
=
model_dict
[
'
species
'
]
df_fbc_objectives
=
model_dict
[
'
fbcObjectives
'
]
self
.
full_model_shape
=
{
'
n_full_rids
'
:
len
(
df_reactions
),
self
.
full_model_shape
=
{
'
n_full_rids
'
:
len
(
df_reactions
),
'
n_full_sids
'
:
len
(
df_species
)}
'
n_full_sids
'
:
len
(
df_species
)}
...
@@ -28,6 +39,11 @@ class FbaModel:
...
@@ -28,6 +39,11 @@ class FbaModel:
df_reactions
[
'
fbcUb
'
].
to_numpy
()))
df_reactions
[
'
fbcUb
'
].
to_numpy
()))
self
.
s_mat
=
sbml_model
.
get_s_matrix
().
to_numpy
()
self
.
s_mat
=
sbml_model
.
get_s_matrix
().
to_numpy
()
# TODO: create LP Problem only once (but what is impact on deepcopy()
# function to assign new LP problem
# TODO: delete rows / cols in LP problem on removal of rids, sids
# TODO: prior to solve, add objective, thereafter immediatly remove objective
self
.
objective
=
{}
self
.
objective
=
{}
for
oid
,
row
in
df_fbc_objectives
.
iterrows
():
for
oid
,
row
in
df_fbc_objectives
.
iterrows
():
if
row
[
'
active
'
]
is
True
:
if
row
[
'
active
'
]
is
True
:
...
@@ -135,6 +151,7 @@ class FbaModel:
...
@@ -135,6 +151,7 @@ class FbaModel:
rids
=
self
.
rids
rids
=
self
.
rids
flux_min
=
np
.
zeros
(
len
(
rids
))
flux_min
=
np
.
zeros
(
len
(
rids
))
flux_max
=
np
.
zeros
(
len
(
rids
))
flux_max
=
np
.
zeros
(
len
(
rids
))
# fix FBA objective as constraint
# fix FBA objective as constraint
fwd_bwd_obj_coefs
=
np
.
hstack
((
self
.
obj_coefs
,
-
self
.
obj_coefs
[
self
.
reversible
]))
fwd_bwd_obj_coefs
=
np
.
hstack
((
self
.
obj_coefs
,
-
self
.
obj_coefs
[
self
.
reversible
]))
if
self
.
obj_dir
==
'
maximize
'
:
if
self
.
obj_dir
==
'
maximize
'
:
...
@@ -164,6 +181,53 @@ class FbaModel:
...
@@ -164,6 +181,53 @@ class FbaModel:
del
lp
del
lp
return
{
'
success
'
:
True
,
'
rids
'
:
np
.
array
(
rids
),
'
min
'
:
flux_min
,
'
max
'
:
flux_max
}
return
{
'
success
'
:
True
,
'
rids
'
:
np
.
array
(
rids
),
'
min
'
:
flux_min
,
'
max
'
:
flux_max
}
def
fva_single_rid
(
self
,
rids
):
"""
FVA - flux variability analysis for all or selected reactions.
:param rids: selected reaction ids to run FVA (alternatively, all)
:type rids: list of strings (default: None - all reactions)
:return: rid, min and max flux values
:rtype: dict
"""
lp
=
self
.
create_fba_lp
()
if
lp
is
None
:
return
{
'
success
'
:
False
,
'
message
'
:
'
not an FBA model
'
}
res
=
lp
.
solve
(
short_result
=
True
)
if
res
[
'
success
'
]
is
False
:
del
lp
return
{
'
success
'
:
False
,
'
message
'
:
res
[
'
generic_status
'
]}
if
rids
is
None
:
rids
=
self
.
rids
flux_min
=
np
.
zeros
(
len
(
rids
))
flux_max
=
np
.
zeros
(
len
(
rids
))
# fix FBA objective as constraint
fwd_bwd_obj_coefs
=
np
.
hstack
((
self
.
obj_coefs
,
-
self
.
obj_coefs
[
self
.
reversible
]))
if
self
.
obj_dir
==
'
maximize
'
:
row_bds
=
np
.
array
([
0.0
,
np
.
nan
])
else
:
row_bds
=
np
.
array
([
np
.
nan
,
1000.0
])
lp
.
add_constraint
(
fwd_bwd_obj_coefs
,
row_bds
)
n_cols
=
len
(
self
.
rids
)
for
idx
,
rid
in
enumerate
(
rids
):
# select reaction maximize/minimize flux
new_obj_coefs
=
np
.
zeros
(
n_cols
)
new_obj_coefs
[
self
.
rid2idx
[
rid
]]
=
1
new_fwd_bwd_obj_coefs
=
np
.
hstack
((
new_obj_coefs
,
-
new_obj_coefs
[
self
.
reversible
]))
lp
.
set_obj_coefs
(
new_fwd_bwd_obj_coefs
)
lp
.
set_obj_dir
(
'
minimize
'
)
res
=
lp
.
solve
(
short_result
=
True
)
flux_min
[
idx
]
=
res
[
'
fun
'
]
if
res
[
'
success
'
]
else
np
.
nan
lp
.
set_obj_dir
(
'
maximize
'
)
res
=
lp
.
solve
(
short_result
=
True
)
flux_max
[
idx
]
=
res
[
'
fun
'
]
if
res
[
'
success
'
]
else
np
.
nan
del
lp
return
{
'
success
'
:
True
,
'
rids
'
:
np
.
array
(
rids
),
'
min
'
:
flux_min
,
'
max
'
:
flux_max
}
def
update_flux_bounds
(
self
,
flux_bounds
):
def
update_flux_bounds
(
self
,
flux_bounds
):
"""
selectivly set lower/upper flux bounds for given reactions.
"""
selectivly set lower/upper flux bounds for given reactions.
...
@@ -269,9 +333,11 @@ class FbaModel:
...
@@ -269,9 +333,11 @@ class FbaModel:
def
export_pruned_model
(
self
,
pruned_sbml
):
def
export_pruned_model
(
self
,
pruned_sbml
):
pruned_mdict
=
self
.
model_dict
.
copy
()
sbml_model
=
sbmlxdf
.
Model
(
self
.
sbml_fname
)
pruned_mdict
[
'
reactions
'
]
=
self
.
model_dict
[
'
reactions
'
].
loc
[
self
.
rids
]
model_dict
=
sbml_model
.
to_df
()
pruned_mdict
[
'
species
'
]
=
self
.
model_dict
[
'
species
'
].
loc
[
self
.
sids
]
pruned_mdict
=
model_dict
.
copy
()
pruned_mdict
[
'
reactions
'
]
=
model_dict
[
'
reactions
'
].
loc
[
self
.
rids
]
pruned_mdict
[
'
species
'
]
=
model_dict
[
'
species
'
].
loc
[
self
.
sids
]
# update model attributes (ids, names and modification date)
# update model attributes (ids, names and modification date)
pruned_mdict
[
'
modelAttrs
'
][
'
id
'
]
+=
'
_pruned
'
pruned_mdict
[
'
modelAttrs
'
][
'
id
'
]
+=
'
_pruned
'
...
...
This diff is collapsed.
Click to expand it.
modelpruner/problems/lp_problem.py
+
1
−
0
View file @
0a9159d6
...
@@ -258,6 +258,7 @@ class LpProblem:
...
@@ -258,6 +258,7 @@ class LpProblem:
glpk
.
glp_scale_prob
(
self
.
lp
,
opt
)
glpk
.
glp_scale_prob
(
self
.
lp
,
opt
)
# sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))]
# sjj = [glpk.glp_get_sjj(self.lp, 1+i) for i in range(glpk.glp_get_num_cols(self.lp))]
# rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))]
# rii = [glpk.glp_get_rii(self.lp, 1+i) for i in range(glpk.glp_get_num_rows(self.lp))]
# TODO: glpk.glp_adv_basis(self.lp, 0)
simplex_result
=
glpk
.
glp_simplex
(
self
.
lp
,
smcp
)
simplex_result
=
glpk
.
glp_simplex
(
self
.
lp
,
smcp
)
return
self
.
results
(
simplex_result
,
short_result
)
return
self
.
results
(
simplex_result
,
short_result
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment