Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
modelpruner
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Peter Schubert
modelpruner
Commits
b58855bb
Commit
b58855bb
authored
2 years ago
by
Peter Schubert
Browse files
Options
Downloads
Patches
Plain Diff
Multiprocessing implemented for FVA (validated on macOS)
parent
9b98c8c4
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
modelpruner/core/model_pruner.py
+136
-56
136 additions, 56 deletions
modelpruner/core/model_pruner.py
modelpruner/models/fba_model.py
+50
-23
50 additions, 23 deletions
modelpruner/models/fba_model.py
modelpruner/problems/lp_problem.py
+9
-12
9 additions, 12 deletions
modelpruner/problems/lp_problem.py
with
195 additions
and
91 deletions
modelpruner/core/model_pruner.py
+
136
−
56
View file @
b58855bb
...
@@ -5,7 +5,6 @@ import os
...
@@ -5,7 +5,6 @@ import os
import
re
import
re
import
time
import
time
import
math
import
math
import
copy
import
numpy
as
np
import
numpy
as
np
import
multiprocessing
as
mp
import
multiprocessing
as
mp
import
json
import
json
...
@@ -14,30 +13,72 @@ from modelpruner.models.fba_model import FbaModel
...
@@ -14,30 +13,72 @@ from modelpruner.models.fba_model import FbaModel
from
modelpruner.core.protected_parts
import
ProtectedParts
from
modelpruner.core.protected_parts
import
ProtectedParts
global
_fba_model
global
_fba_model
global
_pf
global
_pf
s
def
init_worker
(
fba_model
,
pf
):
def
_init_worker
(
fba_model
,
pfs
):
"""
Multiprocessing initializer for workers.
Configure global parameters for the worker process
:param fba_model: Fba model to apply FVA
:type fba_model: Class FbaModel
:param pfs: Protected Functions
:type pfs: dict, key: function id, value: class ProtectedFunction
"""
global
_fba_model
global
_fba_model
global
_pf
global
_pf
s
_fba_model
=
fba_model
.
copy
()
_fba_model
=
fba_model
.
copy
()
_pf
=
pf
_pfs
=
pfs
def
_worker
(
rids
):
"""
Mulitprocessing worker to run FVA for selected reactions over all conditions.
Checked that best performance achieved when worker performs FVA for
all protected functions and a larger range for reactions.
def
worker
(
rid
):
:param rids: reactions for FVA
:type rids: list-like of str
"""
global
_fba_model
global
_fba_model
global
_pf
global
_pf
s
return
_fba_model
.
fva_pf_flux_ranges
(
_pf
,
[
rid
])
flux_min_pfs
=
np
.
zeros
((
len
(
_pfs
),
len
(
rids
)))
flux_max_pfs
=
np
.
zeros
((
len
(
_pfs
),
len
(
rids
)))
for
idx
,
pf
in
enumerate
(
_pfs
.
values
()):
res
=
_fba_model
.
fva_pf_flux_ranges
(
pf
,
rids
)
if
res
[
'
success
'
]
is
True
:
flux_min_pfs
[
idx
]
=
res
[
'
min
'
]
flux_max_pfs
[
idx
]
=
res
[
'
max
'
]
else
:
print
(
f
'
FBA for condition
{
pf
.
obj_id
}
is not feasible
'
)
flux_min_pfs
[
idx
]
=
np
.
nan
flux_max_pfs
[
idx
]
=
np
.
nan
break
# delete linear problem
# del _fba_model.delete_fba_lp()
res
=
{
'
success
'
:
True
,
'
rids
'
:
rids
,
'
min_pfs
'
:
flux_min_pfs
,
'
max_pfs
'
:
flux_max_pfs
}
return
res
class
ModelPruner
:
class
ModelPruner
:
def
__init__
(
self
,
sbml_fname
,
protected_parts
,
resume
=
False
):
def
__init__
(
self
,
sbml_fname
,
protected_parts
,
resume
=
False
):
"""
Init
:param sbml_fname: pathname of original model (ending with
'
.xml
'
)
:type sbml_fname: str
:param protected_parts: pathname of protected parts file or dict
:type protected_parts: str or dict with protected parts information
:param resume: Indicator if to resume pruning from stored snapshot
:type resume: bool, optional (default: False)
"""
self
.
tolerance
=
1e-7
# later accessible by set_params() ?
self
.
tolerance
=
1e-7
# later accessible by set_params() ?
self
.
cpus
=
int
(
os
.
cpu_count
()
*
0.8
)
self
.
cpus
=
int
(
os
.
cpu_count
()
-
0
)
self
.
min_mp_total_flux
=
1000
self
.
_snapshot_sbml
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.xml
'
,
sbml_fname
)
self
.
_snapshot_sbml
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.xml
'
,
sbml_fname
)
self
.
_snapshot_json
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.json
'
,
sbml_fname
)
self
.
_snapshot_json
=
re
.
sub
(
r
'
.xml$
'
,
'
_snapshot.json
'
,
sbml_fname
)
...
@@ -52,7 +93,7 @@ class ModelPruner:
...
@@ -52,7 +93,7 @@ class ModelPruner:
with
open
(
self
.
_snapshot_json
,
'
r
'
)
as
f
:
with
open
(
self
.
_snapshot_json
,
'
r
'
)
as
f
:
snapshot_data
=
json
.
load
(
f
)
snapshot_data
=
json
.
load
(
f
)
self
.
protected_rids
=
set
(
snapshot_data
[
'
protected_rids
'
])
self
.
protected_rids
=
set
(
snapshot_data
[
'
protected_rids
'
])
print
(
f
'
s
napshot restored at
{
len
(
self
.
fba_model
.
rids
)
:
4
d
}
remaining reactions
'
print
(
f
'
S
napshot restored at
{
len
(
self
.
fba_model
.
rids
)
:
4
d
}
remaining reactions
'
f
'
from:
{
self
.
_snapshot_json
}
'
)
f
'
from:
{
self
.
_snapshot_json
}
'
)
else
:
else
:
self
.
protected_rids
=
self
.
nrp
.
protected_rids
self
.
protected_rids
=
self
.
nrp
.
protected_rids
...
@@ -62,19 +103,24 @@ class ModelPruner:
...
@@ -62,19 +103,24 @@ class ModelPruner:
self
.
fba_model
.
update_model_flux_bounds
(
self
.
nrp
.
overwrite_bounds
)
self
.
fba_model
.
update_model_flux_bounds
(
self
.
nrp
.
overwrite_bounds
)
self
.
fba_model
.
update_model_reversible
(
self
.
get_pp_reversible_reactions
())
self
.
fba_model
.
update_model_reversible
(
self
.
get_pp_reversible_reactions
())
print
(
'
d
etermine optimal values for protected functions
'
)
print
(
'
D
etermine optimal values for protected functions
'
)
self
.
set_function_optimum
()
self
.
set_function_optimum
()
print
(
'
Check consistency of protected parts
'
)
print
(
'
Check consistency of protected parts
'
)
self
.
check_protected_parts
()
self
.
check_protected_parts
()
def
get_total_protected_rids
(
self
):
def
get_total_protected_rids
(
self
):
"""
Determine protected reactions, incl. those leading to protected metabolites.
:return: total protected reaction ids
:rtype: set of str
"""
# identify reactions that need protection due to protected metabolites
# identify reactions that need protection due to protected metabolites
# over and above the protected rids provided in protected parts
# over and above the protected rids provided in protected parts
sid_mask
=
np
.
array
([
True
if
sid
in
self
.
protected_sids
else
False
sid_mask
=
np
.
array
([
True
if
sid
in
self
.
protected_sids
else
False
for
sid
in
self
.
fba_model
.
sids
])
for
sid
in
self
.
fba_model
.
sids
])
psf_mask
=
np
.
any
(
self
.
fba_model
.
s_mat
[
sid_mask
]
!=
0
,
axis
=
0
)
psf_mask
=
self
.
fba_model
.
s_mat
_coo
.
tocsr
()
[
sid_mask
]
.
getnnz
(
axis
=
0
)
protected_sid_rids
=
set
(
self
.
fba_model
.
rids
[
psf_mask
])
protected_sid_rids
=
set
(
self
.
fba_model
.
rids
[
psf_mask
])
protected_rids
=
self
.
nrp
.
protected_rids
.
union
(
protected_sid_rids
)
protected_rids
=
self
.
nrp
.
protected_rids
.
union
(
protected_sid_rids
)
return
protected_rids
return
protected_rids
...
@@ -109,7 +155,10 @@ class ModelPruner:
...
@@ -109,7 +155,10 @@ class ModelPruner:
pf
.
set_target_ub
(
pf
.
target_frac
*
pf
.
optimal
)
pf
.
set_target_ub
(
pf
.
target_frac
*
pf
.
optimal
)
def
check_protected_parts
(
self
):
def
check_protected_parts
(
self
):
"""
Report on consisteny issues wrt to protected parts.
E.g. report on reactions and metabolties that do not exist in the model
"""
extra_p_rids
=
self
.
protected_rids
.
difference
(
set
(
self
.
fba_model
.
rids
))
extra_p_rids
=
self
.
protected_rids
.
difference
(
set
(
self
.
fba_model
.
rids
))
extra_p_sids
=
self
.
protected_sids
.
difference
(
set
(
self
.
fba_model
.
sids
))
extra_p_sids
=
self
.
protected_sids
.
difference
(
set
(
self
.
fba_model
.
sids
))
pf_rids
=
set
()
pf_rids
=
set
()
...
@@ -117,27 +166,27 @@ class ModelPruner:
...
@@ -117,27 +166,27 @@ class ModelPruner:
pf_rids
|=
{
rid
for
rid
in
pf
.
objective
}
pf_rids
|=
{
rid
for
rid
in
pf
.
objective
}
pf_rids
|=
{
rid
for
rid
in
pf
.
overwrite_bounds
}
pf_rids
|=
{
rid
for
rid
in
pf
.
overwrite_bounds
}
if
pf
.
fba_success
is
False
:
if
pf
.
fba_success
is
False
:
print
(
f
'
p
rotected function
{
pf
.
obj_id
}
has unsuccessful FBA
'
print
(
f
'
P
rotected function
{
pf
.
obj_id
}
has unsuccessful FBA
'
f
'
with optimal value
{
pf
.
optimal
}
'
)
f
'
with optimal value
{
pf
.
optimal
}
'
)
extra_pf_rids
=
pf_rids
.
difference
(
set
(
self
.
fba_model
.
rids
))
extra_pf_rids
=
pf_rids
.
difference
(
set
(
self
.
fba_model
.
rids
))
pf_unprotected_rids
=
pf_rids
.
difference
(
set
(
self
.
protected_rids
))
pf_unprotected_rids
=
pf_rids
.
difference
(
set
(
self
.
protected_rids
))
if
len
(
extra_p_rids
)
>
0
:
if
len
(
extra_p_rids
)
>
0
:
print
(
'
p
rotected reactions that do not exist in model:
'
,
extra_p_rids
)
print
(
'
P
rotected reactions that do not exist in model:
'
,
extra_p_rids
)
if
len
(
extra_p_sids
)
>
0
:
if
len
(
extra_p_sids
)
>
0
:
print
(
'
p
rotected metabolites that do not exist in model:
'
,
extra_p_sids
)
print
(
'
P
rotected metabolites that do not exist in model:
'
,
extra_p_sids
)
if
len
(
extra_pf_rids
)
>
0
:
if
len
(
extra_pf_rids
)
>
0
:
print
(
'
r
eactions in protected functions that do not exist in model:
'
,
extra_pf_rids
)
print
(
'
R
eactions in protected functions that do not exist in model:
'
,
extra_pf_rids
)
if
len
(
pf_unprotected_rids
)
>
0
:
if
len
(
pf_unprotected_rids
)
>
0
:
print
(
'
r
eactions in protected functions that are not protected:
'
,
pf_unprotected_rids
)
print
(
'
R
eactions in protected functions that are not protected:
'
,
pf_unprotected_rids
)
blocked_rids
=
self
.
reaction_types_fva
(
self
.
fba_model
.
rids
)[
'
blocked_rids
'
]
blocked_rids
=
self
.
reaction_types_fva
(
self
.
fba_model
.
rids
)[
'
blocked_rids
'
]
blocked_p_rids
=
self
.
nrp
.
protected_sids
.
intersection
(
blocked_rids
)
blocked_p_rids
=
self
.
nrp
.
protected_sids
.
intersection
(
blocked_rids
)
blocked_pf_rids
=
pf_rids
.
intersection
(
blocked_rids
)
blocked_pf_rids
=
pf_rids
.
intersection
(
blocked_rids
)
if
len
(
blocked_p_rids
)
>
0
:
if
len
(
blocked_p_rids
)
>
0
:
print
(
'
p
rotected reactions that are blocked (no flux):
'
,
blocked_p_rids
)
print
(
'
P
rotected reactions that are blocked (no flux):
'
,
blocked_p_rids
)
if
len
(
blocked_pf_rids
)
>
0
:
if
len
(
blocked_pf_rids
)
>
0
:
print
(
'
r
eactions in protected functions that are blocked (no flux):
'
,
blocked_pf_rids
)
print
(
'
R
eactions in protected functions that are blocked (no flux):
'
,
blocked_pf_rids
)
def
identify_parallel_reactions
(
self
):
def
identify_parallel_reactions
(
self
):
"""
Identify parallel reactions having same stoichiometry (considering reverse).
"""
Identify parallel reactions having same stoichiometry (considering reverse).
...
@@ -183,17 +232,24 @@ class ModelPruner:
...
@@ -183,17 +232,24 @@ class ModelPruner:
flux_max_pfs
=
np
.
zeros
((
n_pf
,
len
(
free_rids
)))
flux_max_pfs
=
np
.
zeros
((
n_pf
,
len
(
free_rids
)))
res
=
{}
res
=
{}
processes
=
min
(
self
.
cpus
,
len
(
free_rids
))
# setting up multiprocessing takes quite some resources
for
idx
,
pf
in
enumerate
(
self
.
nrp
.
protected_functions
.
values
()):
# there is a tradeoff wrt running on one processor
if
processes
>
100
:
processes
=
min
(
self
.
cpus
,
int
(
len
(
free_rids
)
*
n_pf
/
self
.
min_mp_total_flux
))
# processes = min(self.cpus, len(free_rids))
if
processes
>
1
:
print
(
f
'
Multiprocessing with
{
processes
}
processes
'
)
frid2idx
=
{
rid
:
idx
for
idx
,
rid
in
enumerate
(
free_rids
)}
frid2idx
=
{
rid
:
idx
for
idx
,
rid
in
enumerate
(
free_rids
)}
# place to split fva for free_rids over several
# delete lp problem prior to pool creation (so FbaProblem can be pickled)
with
mp
.
Pool
(
processes
,
initializer
=
init_worker
,
initargs
=
(
self
.
fba_model
,
pf
))
as
pool
:
self
.
fba_model
.
delete_fba_lp
()
for
res
in
pool
.
imap_unordered
(
worker
,
free_rids
):
with
mp
.
Pool
(
processes
,
initializer
=
_init_worker
,
rid
=
res
[
'
rids
'
][
0
]
initargs
=
(
self
.
fba_model
,
self
.
nrp
.
protected_functions
))
as
pool
:
flux_min_pfs
[
idx
,
frid2idx
[
rid
]]
=
res
[
'
min
'
][
0
]
for
res
in
pool
.
imap_unordered
(
_worker
,
np
.
array_split
(
free_rids
,
processes
)):
flux_max_pfs
[
idx
,
frid2idx
[
rid
]]
=
res
[
'
max
'
][
0
]
idxs
=
[
frid2idx
[
rid
]
for
rid
in
res
[
'
rids
'
]]
flux_min_pfs
[:,
idxs
]
=
res
[
'
min_pfs
'
]
flux_max_pfs
[:,
idxs
]
=
res
[
'
max_pfs
'
]
else
:
else
:
for
idx
,
pf
in
enumerate
(
self
.
nrp
.
protected_functions
.
values
()):
# print(f'\nCheck FVA for pf {pf.obj_id} with {len(free_rids)} free variables')
# print(f'\nCheck FVA for pf {pf.obj_id} with {len(free_rids)} free variables')
res
=
self
.
fba_model
.
fva_pf_flux_ranges
(
pf
,
free_rids
)
res
=
self
.
fba_model
.
fva_pf_flux_ranges
(
pf
,
free_rids
)
if
res
[
'
success
'
]
is
True
:
if
res
[
'
success
'
]
is
True
:
...
@@ -262,16 +318,27 @@ class ModelPruner:
...
@@ -262,16 +318,27 @@ class ModelPruner:
return
reaction_types
return
reaction_types
def
init_worker
(
self
):
def
prune
(
self
,
snapshot_freq
=
0
):
global
_fba_model
"""
Prune the metabolic network
_fba_model
=
copy
.
deepcopy
(
self
.
fba_model
)
Once the Metabolic network has been loaded it can be pruned.
With snapshot_freq one can deterimine if (snapshot_freq > 0) and
how often intermediate results shall be store, from which one can resume in future
Step during Pruning
1. identification and removal of parallel reactions
2. check feasibility of the network wrt to all protected functions
3. in a loop till all free reactions have been consumed
3.1 identify types of remaining free reactions using FVA across all protected functions
3.2 blocked reactions (zero flux across all functions) get removed
3.3 essential reactions (strict positive/negative flux across conditions) get protected
3.4 a single candidate reaction (supporting zero flux) gets identify and removed
@staticmethod
:param snapshot_freq: frequency to store intermediate results (every x reactions)
def
fva_single_rids
(
rid
):
:type snapshot_freq: int, optional (default: 0, no snapshots)
return
_fba_model
.
fva_single_rid
(
rid
)
"""
def
prune
(
self
,
snapshot_freq
=
None
):
print
(
f
'
Initial S-matrix:
{
self
.
fba_model
.
s_mat_coo
.
shape
}
;
'
print
(
f
'
initial S-matrix:
{
self
.
fba_model
.
s_mat
.
shape
}
;
'
f
'
protected reactions:
{
len
(
self
.
protected_rids
)
}
:
'
,
self
.
protected_rids
)
f
'
protected reactions:
{
len
(
self
.
protected_rids
)
}
:
'
,
self
.
protected_rids
)
drop_rids
=
self
.
identify_parallel_reactions
().
difference
(
self
.
protected_rids
)
drop_rids
=
self
.
identify_parallel_reactions
().
difference
(
self
.
protected_rids
)
if
len
(
drop_rids
)
>
0
:
if
len
(
drop_rids
)
>
0
:
...
@@ -279,13 +346,13 @@ class ModelPruner:
...
@@ -279,13 +346,13 @@ class ModelPruner:
print
(
f
'
{
len
(
drop_rids
)
}
parallel reaction(s) dropped:
'
,
drop_rids
)
print
(
f
'
{
len
(
drop_rids
)
}
parallel reaction(s) dropped:
'
,
drop_rids
)
feasible
=
np
.
array
([
self
.
fba_model
.
check_pf_feasibility
(
pf
)
for
pf
in
self
.
nrp
.
protected_functions
.
values
()])
feasible
=
np
.
array
([
self
.
fba_model
.
check_pf_feasibility
(
pf
)
for
pf
in
self
.
nrp
.
protected_functions
.
values
()])
print
(
'
p
rotected functions feasibility:
'
,
np
.
all
(
feasible
))
print
(
'
P
rotected functions feasibility:
'
,
np
.
all
(
feasible
))
next_snapshot
=
0
next_snapshot
=
0
if
type
(
snapshot_freq
)
is
int
:
if
snapshot_freq
>
0
:
next_snapshot
=
max
(
0
,
len
(
self
.
fba_model
.
rids
)
-
snapshot_freq
)
next_snapshot
=
max
(
0
,
len
(
self
.
fba_model
.
rids
)
-
snapshot_freq
)
while
len
(
self
.
protected_rids
)
<
self
.
fba_model
.
s_mat
.
shape
[
1
]:
while
len
(
self
.
protected_rids
)
<
self
.
fba_model
.
s_mat
_coo
.
shape
[
1
]:
free_rids
=
list
(
set
(
self
.
fba_model
.
rids
).
difference
(
self
.
protected_rids
))
free_rids
=
list
(
set
(
self
.
fba_model
.
rids
).
difference
(
self
.
protected_rids
))
reaction_types
=
self
.
reaction_types_fva
(
free_rids
)
reaction_types
=
self
.
reaction_types_fva
(
free_rids
)
print
()
print
()
...
@@ -299,11 +366,11 @@ class ModelPruner:
...
@@ -299,11 +366,11 @@ class ModelPruner:
# try to remove one reaction while still satisfying all protected functions
# try to remove one reaction while still satisfying all protected functions
for
rid
in
candidate_rids
:
for
rid
in
candidate_rids
:
# print('
c
heck feasibility of', rid)
# print('
C
heck feasibility of', rid)
feasible
=
True
feasible
=
True
for
pf
in
self
.
nrp
.
protected_functions
.
values
():
for
pf
in
self
.
nrp
.
protected_functions
.
values
():
feasible
=
self
.
fba_model
.
check_pf_feasibility
(
pf
,
zero_flux_rid
=
rid
)
feasible
=
self
.
fba_model
.
check_pf_feasibility
(
pf
,
zero_flux_rid
=
rid
)
# print(f'
f
easibility of {rid} is {feasible}')
# print(f'
F
easibility of {rid} is {feasible}')
if
feasible
is
False
:
if
feasible
is
False
:
essential_rids
.
add
(
rid
)
essential_rids
.
add
(
rid
)
break
break
...
@@ -317,7 +384,7 @@ class ModelPruner:
...
@@ -317,7 +384,7 @@ class ModelPruner:
print
(
f
"
{
len
(
essential_rids
)
}
reaction(s) added to protected reactions:
"
,
essential_rids
)
print
(
f
"
{
len
(
essential_rids
)
}
reaction(s) added to protected reactions:
"
,
essential_rids
)
if
feasible
is
False
:
if
feasible
is
False
:
print
(
'
n
o more reactions to remove
'
)
print
(
'
N
o more reactions to remove
'
)
# store intermediate results (snapshots)
# store intermediate results (snapshots)
if
len
(
self
.
fba_model
.
rids
)
<
next_snapshot
:
if
len
(
self
.
fba_model
.
rids
)
<
next_snapshot
:
...
@@ -328,7 +395,7 @@ class ModelPruner:
...
@@ -328,7 +395,7 @@ class ModelPruner:
next_snapshot
=
max
(
0
,
len
(
self
.
fba_model
.
rids
)
-
snapshot_freq
)
next_snapshot
=
max
(
0
,
len
(
self
.
fba_model
.
rids
)
-
snapshot_freq
)
print
(
'
snapshot data saved.
'
)
print
(
'
snapshot data saved.
'
)
print
(
f
'
S-matrix:
{
self
.
fba_model
.
s_mat
.
shape
}
;
'
print
(
f
'
S-matrix:
{
self
.
fba_model
.
s_mat
_coo
.
shape
}
;
'
f
'
protected reactions:
{
len
(
self
.
protected_rids
)
}
'
)
f
'
protected reactions:
{
len
(
self
.
protected_rids
)
}
'
)
# check if blocked reactions exist in the finally pruned model
# check if blocked reactions exist in the finally pruned model
...
@@ -336,10 +403,12 @@ class ModelPruner:
...
@@ -336,10 +403,12 @@ class ModelPruner:
# if len(blocked_rids) > 0:
# if len(blocked_rids) > 0:
# print(f'{len(blocked_rids)} blocked reaction(s) in pruned model', blocked_rids)
# print(f'{len(blocked_rids)} blocked reaction(s) in pruned model', blocked_rids)
# remove any snapshot files
def
remove_snapshot_files
(
self
):
# for fname in [self._snapshot_sbml, self._snapshot_json]:
"""
Remove any snapshot files still existing
"""
# if os.path.exists(fname):
for
pname
in
[
self
.
_snapshot_sbml
,
self
.
_snapshot_json
]:
# os.remove(fname)
if
os
.
path
.
exists
(
pname
):
os
.
remove
(
pname
)
print
(
f
'
{
pname
}
removed
'
)
def
get_reduction_status
(
self
):
def
get_reduction_status
(
self
):
"""
Return status in model reduction process
"""
Return status in model reduction process
...
@@ -355,6 +424,7 @@ class ModelPruner:
...
@@ -355,6 +424,7 @@ class ModelPruner:
return
status
return
status
def
print_status
(
self
):
def
print_status
(
self
):
"""
Print status of pruning process
"""
status
=
self
.
get_reduction_status
()
status
=
self
.
get_reduction_status
()
print
(
"
reactions removed/remaining/protected:
"
print
(
"
reactions removed/remaining/protected:
"
...
@@ -362,11 +432,21 @@ class ModelPruner:
...
@@ -362,11 +432,21 @@ class ModelPruner:
f
"
/
{
len
(
self
.
protected_rids
)
:
4
d
}
; dof:
{
status
[
'
dof
'
]
:
3
d
}
"
)
f
"
/
{
len
(
self
.
protected_rids
)
:
4
d
}
; dof:
{
status
[
'
dof
'
]
:
3
d
}
"
)
def
export_pruned_model
(
self
,
pruned_sbml
):
def
export_pruned_model
(
self
,
pruned_sbml
):
"""
Export the pruned network (or snapshot) to SBML.
Component information of initial network will be carried over,
e.g. annotation data, groups, gene products
:param pruned_sbml: pathname of pruned network, ending with
'
.xml
'
:type pruned_sbml: str
"""
success
=
self
.
fba_model
.
export_pruned_model
(
pruned_sbml
)
success
=
self
.
fba_model
.
export_pruned_model
(
pruned_sbml
)
if
success
is
True
:
if
success
is
True
:
print
(
'
pruned model exported to
'
,
pruned_sbml
)
print
(
'
Pruned model exported to
'
,
pruned_sbml
)
# self.remove_snapshot_files()
else
:
else
:
print
(
'
p
runed model could not be exported
'
)
print
(
'
P
runed model could not be exported
'
)
def
__del__
(
self
):
def
__del__
(
self
):
"""
Delete corresponding LpProblem
"""
del
self
.
fba_model
del
self
.
fba_model
This diff is collapsed.
Click to expand it.
modelpruner/models/fba_model.py
+
50
−
23
View file @
b58855bb
...
@@ -5,6 +5,7 @@ import time
...
@@ -5,6 +5,7 @@ import time
import
re
import
re
import
math
import
math
import
numpy
as
np
import
numpy
as
np
from
scipy
import
sparse
import
sbmlxdf
import
sbmlxdf
from
modelpruner.problems.lp_problem
import
LpProblem
from
modelpruner.problems.lp_problem
import
LpProblem
...
@@ -41,12 +42,13 @@ class FbaModel:
...
@@ -41,12 +42,13 @@ class FbaModel:
print
(
f
'
reaction
{
idx
}
set to reversible
'
)
print
(
f
'
reaction
{
idx
}
set to reversible
'
)
self
.
rids_rev
=
self
.
rids
[
self
.
reversible
]
self
.
rids_rev
=
self
.
rids
[
self
.
reversible
]
self
.
rid2idx_rev
=
{
rid
:
idx
+
len
(
self
.
rids
)
for
idx
,
rid
in
enumerate
(
self
.
rids_rev
)}
self
.
rid2idx_rev
=
{
rid
:
idx
+
len
(
self
.
rids
)
for
idx
,
rid
in
enumerate
(
self
.
rids_rev
)}
self
.
s_mat
=
sbml_model
.
get_s_matrix
().
to_numpy
()
# using a sparse matrix speeds up significantly pickling (factor > 350 for S-matrix)
# pickling required to support multiprocessing
# also data exchange from sbmlxdf.Model and towards LpProblem improves
self
.
s_mat_coo
=
sparse
.
coo_matrix
(
sbml_model
.
get_s_matrix
(
sparse
=
True
))
# TODO: create LP Problem only once (but what is impact on deepcopy()
# lp problem will only be created once required.
# function to assign new LP problem
# this gives chances to update e.g. flux bounds and to support pickling (multiprocessing)
# lp problem will only be created once required. This gives chances to update e.g. flux bounds
self
.
lp
=
None
self
.
lp
=
None
def
update_reversible_rids
(
self
):
def
update_reversible_rids
(
self
):
...
@@ -87,11 +89,13 @@ class FbaModel:
...
@@ -87,11 +89,13 @@ class FbaModel:
:return: lists of reaction ids that are parallel
:return: lists of reaction ids that are parallel
:rtype: list with lists of str
:rtype: list with lists of str
"""
"""
fwd_rev_s_mat
=
np
.
hstack
((
self
.
s_mat
,
-
self
.
s_mat
[:,
self
.
reversible
]))
fwd_rev_s_mat_coo
=
sparse
.
hstack
([
self
.
s_mat_coo
,
-
self
.
s_mat_coo
.
tocsc
()[:,
self
.
reversible
]])
fwd_rev_rids
=
np
.
hstack
((
self
.
rids
,
self
.
rids_rev
))
fwd_rev_rids
=
np
.
hstack
((
self
.
rids
,
self
.
rids_rev
))
# identify groups of parallel reactions
# identify groups of parallel reactions
sorted_idx
=
np
.
lexsort
(
fwd_rev_s_mat
)
sorted_idx
=
np
.
lexsort
(
fwd_rev_s_mat
_coo
.
todense
())[
0
]
groups
=
[]
groups
=
[]
consumed_idxs
=
set
()
consumed_idxs
=
set
()
for
pos
in
range
(
len
(
sorted_idx
)):
for
pos
in
range
(
len
(
sorted_idx
)):
...
@@ -100,7 +104,7 @@ class FbaModel:
...
@@ -100,7 +104,7 @@ class FbaModel:
group
=
[
idx
]
group
=
[
idx
]
for
adjacent_pos
in
range
(
pos
+
1
,
len
(
sorted_idx
)):
for
adjacent_pos
in
range
(
pos
+
1
,
len
(
sorted_idx
)):
candiate_idx
=
sorted_idx
[
adjacent_pos
]
candiate_idx
=
sorted_idx
[
adjacent_pos
]
if
np
.
all
(
fwd_rev_s_mat
[:,
idx
]
==
fwd_rev_s_mat
[:,
candiate_idx
])
:
if
(
fwd_rev_s_mat
_coo
.
getcol
(
idx
)
!=
fwd_rev_s_mat_coo
.
getcol
(
candiate_idx
)).
getnnz
()
==
0
:
consumed_idxs
.
add
(
candiate_idx
)
consumed_idxs
.
add
(
candiate_idx
)
group
.
append
(
candiate_idx
)
group
.
append
(
candiate_idx
)
else
:
else
:
...
@@ -131,14 +135,24 @@ class FbaModel:
...
@@ -131,14 +135,24 @@ class FbaModel:
:returns: status information in a dictionary
:returns: status information in a dictionary
:rtype: dict
:rtype: dict
"""
"""
status
=
{
'
dof
'
:
self
.
s_mat
.
shape
[
1
]
-
np
.
linalg
.
matrix_rank
(
self
.
s_mat
),
status
=
{
'
dof
'
:
self
.
s_mat
_coo
.
shape
[
1
]
-
np
.
linalg
.
matrix_rank
(
self
.
s_mat
_coo
.
todense
()
),
'
n_rids
'
:
len
(
self
.
rids
),
'
n_rids
'
:
len
(
self
.
rids
),
'
n_sids
'
:
len
(
self
.
sids
)}
'
n_sids
'
:
len
(
self
.
sids
)}
status
.
update
(
self
.
full_model_shape
)
status
.
update
(
self
.
full_model_shape
)
return
status
return
status
def
export_pruned_model
(
self
,
pruned_sbml
):
def
export_pruned_model
(
self
,
pruned_sbml
):
"""
Export the pruned network (or snapshot) to SBML.
Component information of initial network will be carried over,
e.g. annotation data, groups, gene products
History information, if available, will be updated.
Model will also be validated with respect to SBML compliance.
:param pruned_sbml: pathname of pruned network, ending with
'
.xml
'
:type pruned_sbml: str
"""
sbml_model
=
sbmlxdf
.
Model
(
self
.
sbml_fname
)
sbml_model
=
sbmlxdf
.
Model
(
self
.
sbml_fname
)
model_dict
=
sbml_model
.
to_df
()
model_dict
=
sbml_model
.
to_df
()
pruned_mdict
=
model_dict
.
copy
()
pruned_mdict
=
model_dict
.
copy
()
...
@@ -147,8 +161,11 @@ class FbaModel:
...
@@ -147,8 +161,11 @@ class FbaModel:
# update model attributes (ids, names and modification date)
# update model attributes (ids, names and modification date)
pruned_mdict
[
'
modelAttrs
'
][
'
id
'
]
+=
'
_pruned
'
pruned_mdict
[
'
modelAttrs
'
][
'
id
'
]
+=
'
_pruned
'
if
'
name
'
in
pruned_mdict
[
'
modelAttrs
'
]:
pruned_mdict
[
'
modelAttrs
'
][
'
name
'
]
+=
'
_pruned
'
pruned_mdict
[
'
modelAttrs
'
][
'
name
'
]
+=
'
_pruned
'
if
'
metaid
'
in
pruned_mdict
[
'
modelAttrs
'
]:
pruned_mdict
[
'
modelAttrs
'
][
'
metaid
'
]
+=
'
_pruned
'
pruned_mdict
[
'
modelAttrs
'
][
'
metaid
'
]
+=
'
_pruned
'
if
'
modifiedHistory
'
in
pruned_mdict
[
'
modelAttrs
'
]:
pruned_mdict
[
'
modelAttrs
'
][
'
modifiedHistory
'
]
+=
'
; localtime
'
pruned_mdict
[
'
modelAttrs
'
][
'
modifiedHistory
'
]
+=
'
; localtime
'
# clean groups table
# clean groups table
...
@@ -203,13 +220,23 @@ class FbaModel:
...
@@ -203,13 +220,23 @@ class FbaModel:
:return: LpProblem configured for FBA
:return: LpProblem configured for FBA
:rtype: LpProblem
:rtype: LpProblem
"""
"""
fwd_rev_s_mat
=
np
.
hstack
((
self
.
s_mat
[:],
-
self
.
s_mat
[:,
self
.
reversible
]))
fwd_rev_s_mat_coo
=
sparse
.
hstack
([
self
.
s_mat_coo
,
-
self
.
s_mat_coo
.
tocsc
()[:,
self
.
reversible
]])
fwd_rev_col_bnds
=
np
.
hstack
((
np
.
fmax
(
self
.
flux_bounds
[:],
0
),
fwd_rev_col_bnds
=
np
.
hstack
((
np
.
fmax
(
self
.
flux_bounds
[:],
0
),
np
.
fmax
(
np
.
flip
(
-
self
.
flux_bounds
[:,
self
.
reversible
],
axis
=
0
),
0
)))
np
.
fmax
(
np
.
flip
(
-
self
.
flux_bounds
[:,
self
.
reversible
],
axis
=
0
),
0
)))
row_bnds
=
np
.
zeros
((
fwd_rev_s_mat
.
shape
[
0
],
2
))
row_bnds
=
np
.
zeros
((
fwd_rev_s_mat
_coo
.
shape
[
0
],
2
))
lp
=
LpProblem
(
fwd_rev_s_mat
,
row_bnds
,
fwd_rev_col_bnds
)
lp
=
LpProblem
(
fwd_rev_s_mat
_coo
,
row_bnds
,
fwd_rev_col_bnds
)
return
lp
return
lp
def
delete_fba_lp
(
self
):
"""
Delete the corresponding linear problem.
E.g. required for multiprocessing so FbaModel can be pickled
"""
if
self
.
lp
is
not
None
:
del
self
.
lp
self
.
lp
=
None
def
set_lp_objective
(
self
,
objective
,
direction
=
'
maximize
'
):
def
set_lp_objective
(
self
,
objective
,
direction
=
'
maximize
'
):
"""
Set objective coefficients and direction in linear problem
"""
Set objective coefficients and direction in linear problem
...
@@ -241,7 +268,7 @@ class FbaModel:
...
@@ -241,7 +268,7 @@ class FbaModel:
"""
"""
if
self
.
lp
is
None
:
if
self
.
lp
is
None
:
self
.
lp
=
self
.
create_core_fba_lp
()
self
.
lp
=
self
.
create_core_fba_lp
()
print
(
'
Core LP Problem created
'
)
#
print('Core LP Problem created')
for
rid
,
bound_upd
in
flux_bounds
.
items
():
for
rid
,
bound_upd
in
flux_bounds
.
items
():
if
rid
in
self
.
rids
:
if
rid
in
self
.
rids
:
temp_bounds
=
self
.
flux_bounds
[:,
self
.
rid2idx
[
rid
]].
copy
()
temp_bounds
=
self
.
flux_bounds
[:,
self
.
rid2idx
[
rid
]].
copy
()
...
@@ -291,6 +318,9 @@ class FbaModel:
...
@@ -291,6 +318,9 @@ class FbaModel:
:param drop_rids: reaction ids to be removed from the model
:param drop_rids: reaction ids to be removed from the model
:type drop_rids: set of str
:type drop_rids: set of str
"""
"""
if
self
.
lp
is
None
:
self
.
lp
=
self
.
create_core_fba_lp
()
# print('Core LP Problem created')
assert
self
.
lp
is
not
None
assert
self
.
lp
is
not
None
# remove variables (columns) from linear problem
# remove variables (columns) from linear problem
...
@@ -304,16 +334,16 @@ class FbaModel:
...
@@ -304,16 +334,16 @@ class FbaModel:
self
.
reversible
=
self
.
reversible
[
keep_rids_mask
]
self
.
reversible
=
self
.
reversible
[
keep_rids_mask
]
self
.
update_reversible_rids
()
self
.
update_reversible_rids
()
self
.
flux_bounds
=
self
.
flux_bounds
[:,
keep_rids_mask
]
self
.
flux_bounds
=
self
.
flux_bounds
[:,
keep_rids_mask
]
self
.
s_mat
=
self
.
s_mat
[:,
keep_rids_mask
]
self
.
s_mat
_coo
=
self
.
s_mat
_coo
.
tocsc
()
[:,
keep_rids_mask
]
.
tocoo
()
# drop blocked metabolites due to deleted reactions
# drop blocked metabolites due to deleted reactions
keep_sids_mask
=
np
.
any
(
self
.
s_mat
!=
0
,
axis
=
1
)
keep_sids_mask
=
(
self
.
s_mat
_coo
.
getnnz
(
axis
=
1
)
>
0
)
row_idxs0
=
[
idx
for
idx
,
val
in
enumerate
(
keep_sids_mask
)
if
val
==
np
.
bool_
(
False
)]
row_idxs0
=
[
idx
for
idx
,
val
in
enumerate
(
keep_sids_mask
)
if
val
==
np
.
bool_
(
False
)]
if
len
(
row_idxs0
)
>
0
:
if
len
(
row_idxs0
)
>
0
:
self
.
lp
.
remove_rows
(
row_idxs0
)
self
.
lp
.
remove_rows
(
row_idxs0
)
self
.
sids
=
self
.
sids
[
keep_sids_mask
]
self
.
sids
=
self
.
sids
[
keep_sids_mask
]
self
.
sid2idx
=
{
sid
:
idx
for
idx
,
sid
in
enumerate
(
self
.
sids
)}
self
.
sid2idx
=
{
sid
:
idx
for
idx
,
sid
in
enumerate
(
self
.
sids
)}
self
.
s_mat
=
self
.
s_mat
[
keep_sids_mask
,
:]
self
.
s_mat
_coo
=
self
.
s_mat
_coo
.
tocsr
()
[
keep_sids_mask
,
:]
.
tocoo
()
def
fba_pf_optimize
(
self
,
pf
,
zero_flux_rid
=
None
):
def
fba_pf_optimize
(
self
,
pf
,
zero_flux_rid
=
None
):
"""
FBA optimization of current problem
"""
FBA optimization of current problem
...
@@ -329,7 +359,7 @@ class FbaModel:
...
@@ -329,7 +359,7 @@ class FbaModel:
"""
"""
if
self
.
lp
is
None
:
if
self
.
lp
is
None
:
self
.
lp
=
self
.
create_core_fba_lp
()
self
.
lp
=
self
.
create_core_fba_lp
()
print
(
'
Core LP Problem created
'
)
#
print('Core LP Problem created')
assert
self
.
lp
is
not
None
assert
self
.
lp
is
not
None
if
zero_flux_rid
is
not
None
:
if
zero_flux_rid
is
not
None
:
...
@@ -430,9 +460,6 @@ class FbaModel:
...
@@ -430,9 +460,6 @@ class FbaModel:
result
.
__dict__
[
'
lp
'
]
=
None
result
.
__dict__
[
'
lp
'
]
=
None
return
result
return
result
def
__reduce__
(
self
):
return
self
.
__class__
,
(
self
.
sbml_fname
,
)
def
__del__
(
self
):
def
__del__
(
self
):
print
(
'
deletion of fba_model
'
)
print
(
'
deletion of fba_model
'
)
del
self
.
lp
del
self
.
lp
This diff is collapsed.
Click to expand it.
modelpruner/problems/lp_problem.py
+
9
−
12
View file @
b58855bb
...
@@ -6,8 +6,6 @@ Peter Schubert, HHU Duesseldorf, June 2022
...
@@ -6,8 +6,6 @@ Peter Schubert, HHU Duesseldorf, June 2022
"""
"""
import
numpy
as
np
import
numpy
as
np
import
scipy
import
scipy.sparse
import
swiglpk
as
glpk
import
swiglpk
as
glpk
...
@@ -61,13 +59,13 @@ scaling_options = {'GM': glpk.GLP_SF_GM,
...
@@ -61,13 +59,13 @@ scaling_options = {'GM': glpk.GLP_SF_GM,
class
LpProblem
:
class
LpProblem
:
def
__init__
(
self
,
constr_coefs
,
row_bnds
,
col_bnds
):
def
__init__
(
self
,
constr_coefs
_coo
,
row_bnds
,
col_bnds
):
"""
create core linear problem (constraints, variable and row bounds)
"""
create core linear problem (constraints, variable and row bounds)
Problem needs to be properly deleted usin del(LpProblem)
Problem needs to be properly deleted usin del(LpProblem)
Note: glpk related indices start at 1 (not zero)
Note: glpk related indices start at 1 (not zero)
:param constr_coefs: matrix with constraint coefficients
:param constr_coefs
_coo
: matrix with constraint coefficients
:type constr_coefs
: 2D nparray
with shape(nrows, ncols)
:type constr_coefs
_coo: scipy.sparce.coo_matrix
with shape(nrows, ncols)
:param row_bnds: lower and upper bounds of auxhiliary (row) variables
:param row_bnds: lower and upper bounds of auxhiliary (row) variables
:type row_bnds: 2D ndarray of shape(nrows, 2) (1st col: lower bounds, 2nd col: upper bounds)
:type row_bnds: 2D ndarray of shape(nrows, 2) (1st col: lower bounds, 2nd col: upper bounds)
:param col_bnds: lower and upper bounds of structural (col) variables
:param col_bnds: lower and upper bounds of structural (col) variables
...
@@ -78,7 +76,7 @@ class LpProblem:
...
@@ -78,7 +76,7 @@ class LpProblem:
glp_presolve
=
glpk
.
GLP_OFF
# or glpk.GLP_OFF (default) or GLP_ON
glp_presolve
=
glpk
.
GLP_OFF
# or glpk.GLP_OFF (default) or GLP_ON
self
.
res
=
{}
self
.
res
=
{}
self
.
nrows
,
self
.
ncols
=
constr_coefs
.
shape
self
.
nrows
,
self
.
ncols
=
constr_coefs
_coo
.
shape
assert
col_bnds
.
shape
==
(
2
,
self
.
ncols
)
assert
col_bnds
.
shape
==
(
2
,
self
.
ncols
)
assert
row_bnds
.
shape
==
(
self
.
nrows
,
2
)
assert
row_bnds
.
shape
==
(
self
.
nrows
,
2
)
...
@@ -95,15 +93,14 @@ class LpProblem:
...
@@ -95,15 +93,14 @@ class LpProblem:
for
i
,
lb_ub
in
enumerate
(
row_bnds
):
for
i
,
lb_ub
in
enumerate
(
row_bnds
):
var_type
,
lb
,
ub
=
self
.
_get_variable_type
(
lb_ub
)
var_type
,
lb
,
ub
=
self
.
_get_variable_type
(
lb_ub
)
glpk
.
glp_set_row_bnds
(
self
.
lp
,
1
+
i
,
var_type
,
lb
,
ub
)
glpk
.
glp_set_row_bnds
(
self
.
lp
,
1
+
i
,
var_type
,
lb
,
ub
)
constr_coefs_sparse
=
scipy
.
sparse
.
coo_matrix
(
constr_coefs
)
n_coefs
=
len
(
constr_coefs_coo
.
row
)
n_coefs
=
len
(
constr_coefs_sparse
.
row
)
ia
=
glpk
.
intArray
(
1
+
n_coefs
)
ia
=
glpk
.
intArray
(
1
+
n_coefs
)
ja
=
glpk
.
intArray
(
1
+
n_coefs
)
ja
=
glpk
.
intArray
(
1
+
n_coefs
)
ar
=
glpk
.
doubleArray
(
1
+
n_coefs
)
ar
=
glpk
.
doubleArray
(
1
+
n_coefs
)
for
i
in
range
(
n_coefs
):
for
i
in
range
(
n_coefs
):
ia
[
1
+
i
]
=
int
(
1
+
constr_coefs_
sparse
.
row
[
i
])
ia
[
1
+
i
]
=
int
(
1
+
constr_coefs_
coo
.
row
[
i
])
ja
[
1
+
i
]
=
int
(
1
+
constr_coefs_
sparse
.
col
[
i
])
ja
[
1
+
i
]
=
int
(
1
+
constr_coefs_
coo
.
col
[
i
])
ar
[
1
+
i
]
=
constr_coefs_
sparse
.
data
[
i
]
ar
[
1
+
i
]
=
constr_coefs_
coo
.
data
[
i
]
glpk
.
glp_load_matrix
(
self
.
lp
,
n_coefs
,
ia
,
ja
,
ar
)
glpk
.
glp_load_matrix
(
self
.
lp
,
n_coefs
,
ia
,
ja
,
ar
)
# configure options
# configure options
...
@@ -307,7 +304,7 @@ class LpProblem:
...
@@ -307,7 +304,7 @@ class LpProblem:
def
__del__
(
self
):
def
__del__
(
self
):
"""
Explicitly delete the LpProblem. (can we do without?)
"""
Explicitly delete the LpProblem. (can we do without?)
"""
"""
print
(
'
deletion of LpProblem
'
)
#
print('deletion of LpProblem')
assert
self
.
lp
is
not
None
assert
self
.
lp
is
not
None
if
getattr
(
self
,
'
lp
'
):
if
getattr
(
self
,
'
lp
'
):
glpk
.
glp_delete_prob
(
self
.
lp
)
glpk
.
glp_delete_prob
(
self
.
lp
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment