Skip to content
Snippets Groups Projects
Commit a569e10b authored by Peter Schubert's avatar Peter Schubert
Browse files

model validation and check unused parameters/molecules

parent 3cb87afe
No related branches found
No related tags found
No related merge requests found
"""Implementation of RbaDensity class.
"""Implementation of RbaDensities and RbaDensity classes.
Peter Schubert, CCB, HHU Duesseldorf, December 2022
"""
......@@ -10,15 +10,12 @@ import xml.etree.ElementTree
from .rba_target_value import RbaTargetValue
class RbaDensity:
def __init__(self, cid):
self.id = cid
self.target_value = None
class RbaDensities:
@staticmethod
def get_xml_items(model_dir):
def __init__(self):
self.densities = {}
def get_xml_items(self, model_dir):
file_name = os.path.join(model_dir, 'density.xml')
if os.path.exists(file_name) is False:
print(f'{file_name} not found!')
......@@ -28,6 +25,37 @@ class RbaDensity:
root = tree.getroot()
assert root.tag == 'RBADensity'
self.densities = RbaDensity.get_xml_items(root)
def get_df_items(self):
df = pd.DataFrame([item.to_dict() for item in self.densities.values()])
df.set_index('compartment', inplace=True)
return df
def validate(self, component_ids):
valid = True
missing_components = self.ref_parameters().difference(component_ids['functions'])\
.difference(component_ids['aggregates'])
if len(missing_components) > 0:
print('function/aggregates used in densities not defined:', missing_components)
valid = False
return valid
def ref_parameters(self):
refs = set()
for d in self.densities.values():
refs |= {tv.split('=')[1].strip() for tv in d.target_value.get_str().split(',')}
return refs
class RbaDensity:
def __init__(self, cid):
self.id = cid
self.target_value = None
@staticmethod
def get_xml_items(root):
data = {}
target_densities = root.find('listOfTargetDensities')
for target_density in target_densities.findall('targetDensity'):
......@@ -37,12 +65,6 @@ class RbaDensity:
data[cid] = rba_density
return data
@staticmethod
def get_df_items(items):
df = pd.DataFrame([item.to_dict() for item in items.values()])
df.set_index('compartment', inplace=True)
return df
def to_dict(self):
return {'compartment': self.id,
'targetValue': self.target_value.get_str()}
"""Implementation of RbaEnzyme class.
"""Implementation of RbaEnzymes and RbaEnzyme classes.
Peter Schubert, CCB, HHU Duesseldorf, December 2022
"""
......@@ -12,19 +12,12 @@ import xml.etree.ElementTree
from rbaxdf.utils.et_utils import get_species_refs
class RbaEnzyme:
class RbaEnzymes:
def __init__(self, eid):
self.id = eid
self.reaction = ''
self.forward_eff = ''
self.backward_eff = ''
self.zero_cost = False
self.mach_reactants = {}
self.mach_products = {}
def __init__(self):
self.enzymes = {}
@staticmethod
def get_xml_items(model_dir):
def get_xml_items(self, model_dir):
file_name = os.path.join(model_dir, 'enzymes.xml')
if os.path.exists(file_name) is False:
......@@ -35,6 +28,57 @@ class RbaEnzyme:
root = tree.getroot()
assert root.tag == 'RBAEnzymes'
self.enzymes = RbaEnzyme.get_xml_items(root)
def get_df_items(self):
df = pd.DataFrame([item.to_dict() for item in self.enzymes.values()])
df.set_index('enzyme', inplace=True)
return df
def validate(self, component_ids):
valid = True
missing = self.ref_molecules().difference(component_ids['species']) \
.difference(component_ids['rnas']) \
.difference(component_ids['proteins'])
if len(missing) > 0:
print('species/macromolecules used in enzyme machinery not defined:', missing)
valid = False
missing = self.ref_parameters().difference(component_ids['functions']) \
.difference(component_ids['aggregates'])
if len(missing) > 0:
print('function/aggregates used in enzymes not defined:', missing)
valid = False
return valid
def ref_molecules(self):
refs = set()
for e in self.enzymes.values():
refs |= {sid for sid in e.mach_reactants}
refs |= {sid for sid in e.mach_products}
return refs
def ref_parameters(self):
refs = set()
for e in self.enzymes.values():
refs.add(e.forward_eff)
refs.add(e.backward_eff)
return refs
class RbaEnzyme:
def __init__(self, eid):
self.id = eid
self.reaction = ''
self.forward_eff = ''
self.backward_eff = ''
self.zero_cost = False
self.mach_reactants = {}
self.mach_products = {}
@staticmethod
def get_xml_items(root):
data = {}
enzymes = root.find('listOfEnzymes')
for enzyme in enzymes.findall('enzyme'):
......@@ -53,12 +97,6 @@ class RbaEnzyme:
data[eid] = rba_enzyme
return data
@staticmethod
def get_df_items(items):
df = pd.DataFrame([item.to_dict() for item in items.values()])
df.set_index('enzyme', inplace=True)
return df
def to_dict(self):
mach_reactants = '; '.join([f'species={species}, stoic={stoic}'
for species, stoic in self.mach_reactants.items()])
......
......@@ -47,6 +47,21 @@ class RbaMetabolism:
print(f'wrong metabolism type: {m_type}')
return df
def validate(self, component_ids):
valid = True
missing = self.ref_molecules().difference(component_ids['species'])
if len(missing) > 0:
print('species used in reactions not defined:', missing)
valid = False
return valid
def ref_molecules(self):
refs = set()
for r in self.reactions.values():
refs |= {sid for sid in r.reactants}
refs |= {sid for sid in r.products}
return refs
class RbaCompartment:
......
......@@ -9,10 +9,10 @@ import pandas as pd
from .rba_macromolecules import RbaMacromolecules
from .rba_metabolism import RbaMetabolism
from .rba_parameters import RbaParameters
from .rba_process import RbaProcesses
from .rba_enzyme import RbaEnzyme
from .rba_density import RbaDensity
from .rba_target_group import RbaTargetGroup
from .rba_processes import RbaProcesses
from .rba_enzymes import RbaEnzymes
from .rba_densities import RbaDensities
from .rba_targets import RbaTargets
class RbaModel:
......@@ -28,9 +28,9 @@ class RbaModel:
self.metabolism = RbaMetabolism()
self.parameters = RbaParameters()
self.processes = RbaProcesses()
self.density = None
self.targets = None
self.enzymes = None
self.densities = RbaDensities()
self.targets = RbaTargets()
self.enzymes = RbaEnzymes()
if os.path.exists(model_dir) is False:
print(f'{model_dir} not found!')
......@@ -43,10 +43,9 @@ class RbaModel:
self.proteins.get_xml_items(self.model_dir)
self.metabolism.get_xml_items(self.model_dir)
self.processes.get_xml_items(self.model_dir)
self.density = RbaDensity.get_xml_items(self.model_dir)
self.targets = RbaTargetGroup.get_xml_items(self.model_dir)
self.enzymes = RbaEnzyme.get_xml_items(self.model_dir)
self.densities.get_xml_items(self.model_dir)
self.enzymes.get_xml_items(self.model_dir)
self.targets.get_xml_items(self.model_dir)
self.is_model = True
......@@ -59,11 +58,9 @@ class RbaModel:
m_dict['rnas'] = self.rnas.get_df_items()
m_dict['dna'] = self.dna.get_df_items()
m_dict['proteins'] = self.proteins.get_df_items()
m_dict['density'] = RbaDensity.get_df_items(self.density)
m_dict['targets'] = RbaTargetGroup.get_df_items(self.targets)
m_dict['enzymes'] = RbaEnzyme.get_df_items(self.enzymes)
m_dict['enzymes'] = self.enzymes.get_df_items()
m_dict['densities'] = self.densities.get_df_items()
m_dict['targets'] = self.targets.get_df_items()
m_dict['compartments'] = self.metabolism.get_df_items('compartments')
m_dict['species'] = self.metabolism.get_df_items('species')
m_dict['reactions'] = self.metabolism.get_df_items('reactions')
......@@ -86,9 +83,9 @@ class RbaModel:
if '=' in row['machineryCapacity']:
first_value = row['machineryCapacity'].split('=')[1]
m_dict['processes'].at[idx, 'capacity_info'] = self.parameters.get_value_info(first_value)
for idx, row in m_dict['density'].iterrows():
for idx, row in m_dict['densities'].iterrows():
first_value = row['targetValue'].split('=')[1]
m_dict['density'].at[idx, 'value_info'] = self.parameters.get_value_info(first_value)
m_dict['densities'].at[idx, 'value_info'] = self.parameters.get_value_info(first_value)
for idx, row in m_dict['targets'].iterrows():
first_value = row['targetValue'].split('=')[1]
m_dict['targets'].at[idx, 'value_info'] = self.parameters.get_value_info(first_value)
......@@ -99,6 +96,54 @@ class RbaModel:
df.to_excel(writer, sheet_name=name, index=keep_index)
print(f'model exported to {xlsx_name}')
def validate(self):
component_ids = {'species': set(self.metabolism.species),
'dna': set(self.dna.macromolecules),
'rnas': set(self.rnas.macromolecules),
'proteins': set(self.proteins.macromolecules),
'functions': set(self.parameters.functions),
'aggregates': set(self.parameters.aggregates)}
valid = True
valid = valid and self.metabolism.validate(component_ids)
valid = valid and self.processes.validate(component_ids)
valid = valid and self.densities.validate(component_ids)
valid = valid and self.enzymes.validate(component_ids)
valid = valid and self.targets.validate(component_ids)
valid = valid and self.parameters.validate(component_ids)
return valid
def check_unused(self):
unused = 0
molecules = (set(self.metabolism.species) | set(self.dna.macromolecules) |
set(self.rnas.macromolecules) | set(self.proteins.macromolecules))
parameters = set(self.parameters.functions) | set(self.parameters.aggregates)
ref_parameters = set()
ref_parameters |= self.processes.ref_parameters()
ref_parameters |= self.densities.ref_parameters()
ref_parameters |= self.targets.ref_parameters()
ref_parameters |= self.parameters.ref_parameters()
ref_parameters |= self.enzymes.ref_parameters()
ref_molecules = set()
ref_molecules |= self.metabolism.ref_molecules()
ref_molecules |= self.processes.ref_molecules()
ref_molecules |= self.enzymes.ref_molecules()
ref_molecules |= self.targets.ref_molecules()
unused_parameters = parameters.difference(ref_parameters)
unused_molecules = molecules.difference(ref_molecules)
if len(unused_parameters) > 0:
print(f'{len(unused_parameters)} unused parameters:', unused_parameters)
unused += len(unused_parameters)
if len(unused_molecules) > 0:
print(f'{len(unused_molecules)} unused molecules:', unused_molecules)
unused += len(unused_molecules)
if unused == 0:
print('no unused parameters/molecules')
def from_df(self):
pass
......
......@@ -46,6 +46,20 @@ class RbaParameters:
print(f'wrong parameter type: {p_type}')
return df
def validate(self, component_ids):
valid = True
missing = self.ref_parameters().difference(component_ids['functions'])
if len(missing) > 0:
print('functions used in aggregates not defined:', missing)
valid = False
return valid
def ref_parameters(self):
refs = set()
for a in self.aggregates.values():
refs |= set(a.functions)
return refs
class RbaFunction:
......
......@@ -55,6 +55,74 @@ class RbaProcesses:
print(f'wrong parameter type: {p_type}')
return df
def validate(self, component_ids):
valid = True
missing = self.ref_molecules_pmaps().difference(component_ids['species'])
if len(missing) > 0:
print('species used in processingMaps not defined:', missing)
valid = False
missing = self.ref_molecules_machinery().difference(component_ids['species']) \
.difference(component_ids['rnas']) \
.difference(component_ids['proteins'])
if len(missing) > 0:
print('species/macromolecules used in processes not defined:', missing)
valid = False
missing = self.ref_molecules_inputs().difference(component_ids['dna']) \
.difference(component_ids['rnas']) \
.difference(component_ids['proteins'])
if len(missing) > 0:
print('macromolecules used in processes not defined:', missing)
valid = False
missing = self.ref_parameters().difference(component_ids['functions']) \
.difference(component_ids['aggregates'])
if len(missing) > 0:
print('function/aggregates used in aggregates not defined:', missing)
valid = False
return valid
def ref_molecules_pmaps(self):
refs = set()
for pmap in self.processing_maps.values():
refs |= {sid for sid in pmap.constant_processing.get('reactants', {})}
refs |= {sid for sid in pmap.constant_processing.get('products', {})}
for comp_proc in pmap.component_processings.values():
refs |= {sid for sid in comp_proc['reactants']}
refs |= {sid for sid in comp_proc['products']}
return refs
def ref_molecules_machinery(self):
refs = set()
for p in self.processes.values():
refs |= {sid for sid in p.machinery.get('reactants', {})}
refs |= {sid for sid in p.machinery.get('products', {})}
return refs
def ref_molecules_inputs(self):
refs = set()
for p in self.processes.values():
refs |= {sid for sid in p.productions.get('inputs', {})}
refs |= {sid for sid in p.degradations.get('inputs', {})}
return refs
def ref_molecules(self):
refs = set()
refs |= self.ref_molecules_pmaps()
refs |= self.ref_molecules_machinery()
refs |= self.ref_molecules_inputs()
return refs
def ref_parameters(self):
refs = set()
for p in self.processes.values():
if 'capacity' in p.machinery:
refs.add(p.machinery['capacity'].value)
return refs
class RbaProcess:
......@@ -134,8 +202,8 @@ class RbaProcess:
return {'process': self.id, 'name': self.name,
'machineryCapacity': mach_capacity, 'machineryReactants': mach_reactants,
'machineryProducts': mach_products,
'productionsProcessingMap': prod_pmap, 'productionsSet': prod_set,
'productionsInputs': prod_inputs,
'productionProcessingMap': prod_pmap, 'productionSet': prod_set,
'productionInputs': prod_inputs,
'degradationProcessingMap': degr_pmap, 'degradationSet': degr_set,
'degradationInputs': degr_inputs}
......
"""Implementation of RbaTargetGroup class.
"""Implementation of RbaTargets and RbaTargetGroup classes.
Peter Schubert, CCB, HHU Duesseldorf, December 2022
"""
......@@ -10,18 +10,12 @@ import xml.etree.ElementTree
from rbaxdf.utils.et_utils import get_target_species, get_target_reactions
class RbaTargetGroup:
def __init__(self, tgid):
self.id = tgid
self.concentrations = {}
self.production_fluxes = {}
self.degradation_fluxes = {}
self.reaction_fluxes = {}
class RbaTargets:
@staticmethod
def get_xml_items(model_dir):
def __init__(self):
self.target_groups = {}
def get_xml_items(self, model_dir):
file_name = os.path.join(model_dir, 'targets.xml')
if os.path.exists(file_name) is False:
print(f'{file_name} not found!')
......@@ -31,6 +25,66 @@ class RbaTargetGroup:
root = tree.getroot()
assert root.tag == 'RBATargets'
self.target_groups = RbaTargetGroup.get_xml_items(root)
def get_df_items(self):
data = []
for tgid, tg in self.target_groups.items():
tdict = tg.to_dict()
for target_type in ['concentrations', 'productionFluxes', 'degradationFluxes', 'reactionFluxes']:
for target, value in tdict[target_type].items():
data.append([tgid, target_type, target, value])
df = pd.DataFrame(data, columns=['targetGroup', 'targetType', 'target', 'targetValue'])
df.set_index('targetGroup', inplace=True)
return df
def validate(self, component_ids):
valid = True
missing = self.ref_molecules().difference(component_ids['species']) \
.difference(component_ids['rnas']) \
.difference(component_ids['dna']) \
.difference(component_ids['proteins'])
if len(missing) > 0:
print('species/macromolecules used in targets not defined:', missing)
valid = False
missing = self.ref_parameters().difference(component_ids['functions']) \
.difference(component_ids['aggregates'])
if len(missing) > 0:
print('function/aggregates used in targets not defined:', missing)
valid = False
return valid
def ref_molecules(self):
refs = set()
for tg in self.target_groups.values():
refs |= {sid for sid in tg.concentrations}
refs |= {sid for sid in tg.production_fluxes}
refs |= {sid for sid in tg.degradation_fluxes}
return refs
def ref_parameters(self):
refs = set()
for tg in self.target_groups.values():
for target_type in ['concentrations', 'production_fluxes', 'degradation_fluxes', 'reaction_fluxes']:
targets = getattr(tg, target_type)
for target in targets.values():
refs |= {tv.split('=')[1].strip() for tv in target.get_str().split(',')}
return refs
class RbaTargetGroup:
def __init__(self, tgid):
self.id = tgid
self.concentrations = {}
self.production_fluxes = {}
self.degradation_fluxes = {}
self.reaction_fluxes = {}
@staticmethod
def get_xml_items(root):
data = {}
target_groups = root.find('listOfTargetGroups')
for target_group in target_groups.findall('targetGroup'):
......@@ -43,19 +97,10 @@ class RbaTargetGroup:
data[tgid] = rba_target
return data
@staticmethod
def get_df_items(items):
data = []
for tgid, tg in items.items():
for target, target_value in tg.concentrations.items():
data.append([tgid, 'concentrations', target, target_value.get_str()])
for target, target_value in tg.production_fluxes.items():
data.append([tgid, 'productionFluxes', target, target_value.get_str()])
for target, target_value in tg.degradation_fluxes.items():
data.append([tgid, 'degradationFluxes', target, target_value.get_str()])
for target, target_value in tg.reaction_fluxes.items():
data.append([tgid, 'reactionFluxes', target, target_value.get_str()])
df = pd.DataFrame(data, columns=['targetGroup', 'targetType', 'target', 'targetValue'])
df.set_index('targetGroup', inplace=True)
return df
def to_dict(self):
conc = {target: value.get_str() for target, value in self.concentrations.items()}
prod_fluxes = {target: value.get_str() for target, value in self.production_fluxes.items()}
degr_fluxes = {target: value.get_str() for target, value in self.degradation_fluxes.items()}
reac_fluxes = {target: value.get_str() for target, value in self.reaction_fluxes.items()}
return {'targetGroup': self.id, 'concentrations': conc, 'productionFluxes': prod_fluxes,
'degradationFluxes': degr_fluxes, 'reactionFluxes': reac_fluxes}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment