From 9ed273c20ba88fe8e7b98805b3a9757ce8d9a094 Mon Sep 17 00:00:00 2001 From: Peter Schubert <Peter.Schubert@hhu.de> Date: Thu, 22 Dec 2022 16:22:23 +0100 Subject: [PATCH] Export to xml implemented --- rbaxdf/model/rba_densities.py | 36 ++++++---- rbaxdf/model/rba_enzymes.py | 44 ++++++++++--- rbaxdf/model/rba_macromolecules.py | 54 +++++++++++---- rbaxdf/model/rba_metabolism.py | 66 +++++++++++++++---- rbaxdf/model/rba_model.py | 73 +++++++++++---------- rbaxdf/model/rba_parameters.py | 66 ++++++++++++++----- rbaxdf/model/rba_processes.py | 101 +++++++++++++++++++++++++---- rbaxdf/model/rba_target_value.py | 15 +++-- rbaxdf/model/rba_targets.py | 54 +++++++++++---- 9 files changed, 384 insertions(+), 125 deletions(-) diff --git a/rbaxdf/model/rba_densities.py b/rbaxdf/model/rba_densities.py index 3ea8e1e..3d236ff 100644 --- a/rbaxdf/model/rba_densities.py +++ b/rbaxdf/model/rba_densities.py @@ -5,7 +5,7 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent from .rba_target_value import RbaTargetValue @@ -15,19 +15,31 @@ class RbaDensities: def __init__(self): self.densities = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'density.xml') - if os.path.exists(file_name) is False: + if os.path.exists(model_dir) is False: print(f'{file_name} not found!') return {} - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBADensity' + self.densities = RbaDensity.import_xml(root.find('listOfTargetDensities')) - self.densities = RbaDensity.get_xml_items(root) + def export_xml(self, model_dir): - def get_df_items(self): + file_name = os.path.join(model_dir, 'density.xml') + root = Element('RBADensity') + + target_densities = SubElement(root, 'listOfTargetDensities') + for item in self.densities.values(): + target_densities.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self): df = pd.DataFrame([item.to_dict() for item in self.densities.values()]) df.set_index('compartment', inplace=True) return df @@ -44,7 +56,7 @@ class RbaDensities: def ref_parameters(self): refs = set() for d in self.densities.values(): - refs |= {tv.split('=')[1].strip() for tv in d.target_value.get_str().split(',')} + refs |= set(d.target_value.to_dict().values()) return refs @@ -55,9 +67,8 @@ class RbaDensity: self.target_value = None @staticmethod - def get_xml_items(root): + def import_xml(target_densities): data = {} - target_densities = root.find('listOfTargetDensities') for target_density in target_densities.findall('targetDensity'): cid = target_density.attrib['compartment'] rba_density = RbaDensity(cid) @@ -65,6 +76,9 @@ class RbaDensity: data[cid] = rba_density return data + def export_xml(self): + attribs = self.target_value.to_dict() | {'compartment': self.id} + return Element('targetDensity', attribs) + def to_dict(self): - return {'compartment': self.id, - 'targetValue': self.target_value.get_str()} + return {'compartment': self.id, 'targetValue': self.target_value.to_str()} diff --git a/rbaxdf/model/rba_enzymes.py b/rbaxdf/model/rba_enzymes.py index 2fe00fd..b1f4965 100644 --- a/rbaxdf/model/rba_enzymes.py +++ b/rbaxdf/model/rba_enzymes.py @@ -4,10 +4,9 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 """ import os -from xml.etree.ElementTree import Element import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent from rbaxdf.utils.et_utils import get_species_refs @@ -17,20 +16,32 @@ class RbaEnzymes: def __init__(self): self.enzymes = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'enzymes.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return {} - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBAEnzymes' + self.enzymes = RbaEnzyme.import_xml(root.find('listOfEnzymes')) - self.enzymes = RbaEnzyme.get_xml_items(root) + def export_xml(self, model_dir): - def get_df_items(self): + file_name = os.path.join(model_dir, 'enzymes.xml') + root = Element('RBAEnzymes') + + enzymes = SubElement(root, 'listOfEnzymes') + for item in self.enzymes.values(): + enzymes.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self): df = pd.DataFrame([item.to_dict() for item in self.enzymes.values()]) df.set_index('enzyme', inplace=True) return df @@ -78,9 +89,8 @@ class RbaEnzyme: self.mach_products = {} @staticmethod - def get_xml_items(root): + def import_xml(enzymes): data = {} - enzymes = root.find('listOfEnzymes') for enzyme in enzymes.findall('enzyme'): eid = enzyme.attrib['id'] rba_enzyme = RbaEnzyme(eid) @@ -97,6 +107,24 @@ class RbaEnzyme: data[eid] = rba_enzyme return data + def export_xml(self): + attribs = {'id': self.id, 'reaction': self.reaction, 'forward_efficiency': self.forward_eff, + 'backward_efficiency': self.backward_eff, 'zeroCost': str(self.zero_cost).lower()} + enzyme = Element('enzyme', attribs) + if len(self.mach_reactants) + len(self.mach_products) > 0: + machinery_composition = SubElement(enzyme, 'machineryComposition') + if len(self.mach_reactants) > 0: + reactants = SubElement(machinery_composition, 'listOfReactants') + for species, stoic in self.mach_reactants.items(): + attribs = {'species': species, 'stoichiometry': str(stoic)} + SubElement(reactants, 'speciesReference', attribs) + if len(self.mach_products) > 0: + products = SubElement(machinery_composition, 'listOfProducts') + for species, stoic in self.mach_products.items(): + attribs = {'species': species, 'stoichiometry': str(stoic)} + SubElement(products, 'speciesReference', attribs) + return enzyme + def to_dict(self): mach_reactants = '; '.join([f'species={species}, stoic={stoic}' for species, stoic in self.mach_reactants.items()]) diff --git a/rbaxdf/model/rba_macromolecules.py b/rbaxdf/model/rba_macromolecules.py index 70b13c7..b4fc92c 100644 --- a/rbaxdf/model/rba_macromolecules.py +++ b/rbaxdf/model/rba_macromolecules.py @@ -6,7 +6,9 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import numpy as np import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent + +type2tag = {'dna': 'RBADna', 'rnas': 'RBARnas', 'proteins': 'RBAProteins'} class RbaMacromolecules: @@ -16,21 +18,37 @@ class RbaMacromolecules: self.components = {} self.macromolecules = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, self.type + '.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return {} - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() - assert root.tag in {'RBADna', 'RBAProteins', 'RBARnas'} + assert root.tag == type2tag[self.type] + + self.components = RbaComponent.import_xml(root.find('listOfComponents')) + self.macromolecules = RbaMacromolecule.import_xml(root.find('listOfMacromolecules')) + + def export_xml(self, model_dir): + file_name = os.path.join(model_dir, self.type + '.xml') + root = Element(type2tag[self.type]) - self.components = RbaComponent.get_xml_items(root) - self.macromolecules = RbaMacromolecule.get_xml_items(root) + components = SubElement(root, 'listOfComponents') + for item in self.components.values(): + components.append(item.export_xml()) - def get_df_items(self): + macromolecules = SubElement(root, 'listOfMacromolecules') + for item in self.macromolecules.values(): + macromolecules.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self): df_comp = pd.DataFrame([item.to_dict() for item in self.components.values()]) df_comp.set_index('component', inplace=True) df_mm = pd.DataFrame([item.to_dict() for item in self.macromolecules.values()]) @@ -50,9 +68,8 @@ class RbaComponent: self.weight = np.nan @staticmethod - def get_xml_items(root): + def import_xml(components): data = {} - components = root.find('listOfComponents') for component in components.findall('component'): cid = component.attrib['id'] rba_component = RbaComponent(cid) @@ -62,6 +79,10 @@ class RbaComponent: data[cid] = rba_component return data + def export_xml(self): + attribs = {'id': self.id, 'name': self.name, 'type': self.type, 'weight': str(self.weight)} + return Element('component', attribs) + def to_dict(self): return {'component': self.id, 'name': self.name, 'type': self.type, 'weight': self.weight} @@ -75,10 +96,9 @@ class RbaMacromolecule: self.composition = {} @staticmethod - def get_xml_items(root): + def import_xml(macromolecules): data = {} - macromolecules = root.find('listOfMacromolecules') for macromolecule in macromolecules.findall('macromolecule'): mmid = macromolecule.attrib['id'] rba_macromolecule = RbaMacromolecule(mmid) @@ -91,7 +111,13 @@ class RbaMacromolecule: data[mmid] = rba_macromolecule return data + def export_xml(self): + attribs = {'id': self.id, 'compartment': self.compartment} + macromolecule = Element('macromolecule', attribs) + composition = SubElement(macromolecule, 'composition') + for component, stoic in self.composition.items(): + SubElement(composition, 'componentReference', {'component': component, 'stoichiometry': str(stoic)}) + return macromolecule + def to_dict(self): - mm_dict = {'id': self.id, 'compartment': self.compartment} - mm_dict |= self.composition - return mm_dict + return {'id': self.id, 'compartment': self.compartment} | self.composition diff --git a/rbaxdf/model/rba_metabolism.py b/rbaxdf/model/rba_metabolism.py index bd80540..4760c43 100644 --- a/rbaxdf/model/rba_metabolism.py +++ b/rbaxdf/model/rba_metabolism.py @@ -5,7 +5,7 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent from rbaxdf.utils.et_utils import get_species_refs @@ -17,32 +17,55 @@ class RbaMetabolism: self.species = {} self.reactions = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'metabolism.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBAMetabolism' - self.compartments = RbaCompartment.get_xml_items(root) - self.species = RbaSpecies.get_xml_items(root) - self.reactions = RbaReaction.get_xml_items(root) + self.compartments = RbaCompartment.import_xml(root.find('listOfCompartments')) + self.species = RbaSpecies.import_xml(root.find('listOfSpecies')) + self.reactions = RbaReaction.import_xml(root.find('listOfReactions')) - def get_df_items(self, m_type): + def export_xml(self, model_dir): + file_name = os.path.join(model_dir, 'metabolism.xml') + root = Element('RBAMetabolism') + + compartments = SubElement(root, 'listOfCompartments') + for item in self.compartments.values(): + compartments.append(item.export_xml()) + + species = SubElement(root, 'listOfSpecies') + for item in self.species.values(): + species.append(item.export_xml()) + + reactions = SubElement(root, 'listOfReactions') + for item in self.reactions.values(): + reactions.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self, m_type): df = None if m_type == 'compartments': df = pd.DataFrame([item.to_dict() for item in self.compartments.values()]) df.index.name = 'index' + elif m_type == 'species': df = pd.DataFrame([item.to_dict() for item in self.species.values()]) df.set_index('species', inplace=True) + elif m_type == 'reactions': df = pd.DataFrame([item.to_dict() for item in self.reactions.values()]) df.set_index('reaction', inplace=True) + else: print(f'wrong metabolism type: {m_type}') return df @@ -69,15 +92,17 @@ class RbaCompartment: self.id = cid @staticmethod - def get_xml_items(root): + def import_xml(compartments): data = {} - compartments = root.find('listOfCompartments') for compartment in compartments.findall('compartment'): cid = compartment.attrib['id'] rba_compartment = RbaCompartment(cid) data[cid] = rba_compartment return data + def export_xml(self): + return Element('compartment', {'id': self.id}) + def to_dict(self): return {'compartment': self.id} @@ -89,10 +114,9 @@ class RbaSpecies: self.boundary_condition = False @staticmethod - def get_xml_items(root): + def import_xml(species): data = {} - species = root.find('listOfSpecies') for sp in species.findall('species'): sid = sp.attrib['id'] rba_species = RbaSpecies(sid) @@ -101,6 +125,10 @@ class RbaSpecies: data[sid] = rba_species return data + def export_xml(self): + attribs = {'id': self.id, 'boundaryCondition': str(self.boundary_condition).lower()} + return Element('species', attribs) + def to_dict(self): return {'species': self.id, 'boundaryCondition': self.boundary_condition} @@ -114,10 +142,9 @@ class RbaReaction: self.products = {} @staticmethod - def get_xml_items(root): + def import_xml(reactions): data = {} - reactions = root.find('listOfReactions') for reaction in reactions.findall('reaction'): rid = reaction.attrib['id'] rba_reaction = RbaReaction(rid) @@ -128,6 +155,19 @@ class RbaReaction: data[rid] = rba_reaction return data + def export_xml(self): + attribs = {'id': self.id, 'reversible': str(self.reversible).lower()} + reaction = Element('reaction', attribs) + + reactants = SubElement(reaction, 'listOfReactants') + for sid, stoic in self.reactants.items(): + SubElement(reactants, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + + products = SubElement(reaction, 'listOfProducts') + for sid, stoic in self.products.items(): + SubElement(products, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + return reaction + def to_dict(self): reactants = '; '.join([f'species={species}, stoic={stoic}' for species, stoic in self.reactants.items()]) diff --git a/rbaxdf/model/rba_model.py b/rbaxdf/model/rba_model.py index 966ffe7..205d192 100644 --- a/rbaxdf/model/rba_model.py +++ b/rbaxdf/model/rba_model.py @@ -14,6 +14,9 @@ from .rba_enzymes import RbaEnzymes from .rba_densities import RbaDensities from .rba_targets import RbaTargets +components = {'parameters', 'dna', 'rnas', 'proteins', 'metabolism', + 'processes', 'enzymes', 'densities', 'targets'} + class RbaModel: @@ -36,38 +39,43 @@ class RbaModel: print(f'{model_dir} not found!') raise FileNotFoundError - def import_rba(self): - self.parameters.get_xml_items(self.model_dir) - self.dna.get_xml_items(self.model_dir) - self.rnas.get_xml_items(self.model_dir) - self.proteins.get_xml_items(self.model_dir) - self.metabolism.get_xml_items(self.model_dir) - self.processes.get_xml_items(self.model_dir) - self.densities.get_xml_items(self.model_dir) - self.enzymes.get_xml_items(self.model_dir) - self.targets.get_xml_items(self.model_dir) + def set_model_dir(self, model_dir): + if os.path.exists(model_dir) is False: + os.makedirs(model_dir) + print(f'{model_dir} created') + self.model_dir = model_dir + + def import_xml(self): + + for component in components: + getattr(self, component).import_xml(self.model_dir) + print(f'RBA model imported from: {self.model_dir}') self.is_model = True - def export_rba(self): - pass + def export_xml(self): + + for component in components: + getattr(self, component).export_xml(self.model_dir) + + print(f'RBA model exported to: {self.model_dir}') def to_df(self): m_dict = {} if self.is_model is True: - m_dict['rnas'] = self.rnas.get_df_items() - m_dict['dna'] = self.dna.get_df_items() - m_dict['proteins'] = self.proteins.get_df_items() - m_dict['enzymes'] = self.enzymes.get_df_items() - m_dict['densities'] = self.densities.get_df_items() - m_dict['targets'] = self.targets.get_df_items() - m_dict['compartments'] = self.metabolism.get_df_items('compartments') - m_dict['species'] = self.metabolism.get_df_items('species') - m_dict['reactions'] = self.metabolism.get_df_items('reactions') - m_dict['functions'] = self.parameters.get_df_items('functions') - m_dict['aggregates'] = self.parameters.get_df_items('aggregates') - m_dict['processes'] = self.processes.get_df_items('processes') - m_dict['processing_maps'] = self.processes.get_df_items('processingMaps') + m_dict['rnas'] = self.rnas.to_df() + m_dict['dna'] = self.dna.to_df() + m_dict['proteins'] = self.proteins.to_df() + m_dict['enzymes'] = self.enzymes.to_df() + m_dict['densities'] = self.densities.to_df() + m_dict['targets'] = self.targets.to_df() + m_dict['compartments'] = self.metabolism.to_df('compartments') + m_dict['species'] = self.metabolism.to_df('species') + m_dict['reactions'] = self.metabolism.to_df('reactions') + m_dict['functions'] = self.parameters.to_df('functions') + m_dict['aggregates'] = self.parameters.to_df('aggregates') + m_dict['processes'] = self.processes.to_df('processes') + m_dict['processing_maps'] = self.processes.to_df('processingMaps') return m_dict @@ -105,16 +113,14 @@ class RbaModel: 'aggregates': set(self.parameters.aggregates)} valid = True - valid = valid and self.metabolism.validate(component_ids) - valid = valid and self.processes.validate(component_ids) - valid = valid and self.densities.validate(component_ids) - valid = valid and self.enzymes.validate(component_ids) - valid = valid and self.targets.validate(component_ids) - valid = valid and self.parameters.validate(component_ids) + components = {'parameters', 'metabolism', + 'processes', 'enzymes', 'densities', 'targets'} + for component in components: + valid = valid and getattr(self, component).validate(component_ids) + print(f'model valid status: {valid}') return valid def check_unused(self): - unused = 0 molecules = (set(self.metabolism.species) | set(self.dna.macromolecules) | set(self.rnas.macromolecules) | set(self.proteins.macromolecules)) parameters = set(self.parameters.functions) | set(self.parameters.aggregates) @@ -123,8 +129,8 @@ class RbaModel: ref_parameters |= self.processes.ref_parameters() ref_parameters |= self.densities.ref_parameters() ref_parameters |= self.targets.ref_parameters() - ref_parameters |= self.parameters.ref_parameters() ref_parameters |= self.enzymes.ref_parameters() + ref_parameters |= self.parameters.ref_functions(ref_parameters) ref_molecules = set() ref_molecules |= self.metabolism.ref_molecules() @@ -135,6 +141,7 @@ class RbaModel: unused_parameters = parameters.difference(ref_parameters) unused_molecules = molecules.difference(ref_molecules) + unused = 0 if len(unused_parameters) > 0: print(f'{len(unused_parameters)} unused parameters:', unused_parameters) unused += len(unused_parameters) diff --git a/rbaxdf/model/rba_parameters.py b/rbaxdf/model/rba_parameters.py index 2e6eef8..1a6f35e 100644 --- a/rbaxdf/model/rba_parameters.py +++ b/rbaxdf/model/rba_parameters.py @@ -5,7 +5,7 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent class RbaParameters: @@ -14,18 +14,34 @@ class RbaParameters: self.functions = {} self.aggregates = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'parameters.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBAParameters' + self.functions = RbaFunction.import_xml(root.find('listOfFunctions')) + self.aggregates = RbaAggregate.import_xml(root.find('listOfAggregates')) - self.functions = RbaFunction.get_xml_items(root) - self.aggregates = RbaAggregate.get_xml_items(root) + def export_xml(self, model_dir): + + file_name = os.path.join(model_dir, 'parameters.xml') + root = Element('RBAParameters') + + functions = SubElement(root, 'listOfFunctions') + for item in self.functions.values(): + functions.append(item.export_xml()) + + aggregates = SubElement(root, 'listOfAggregates') + for item in self.aggregates.values(): + aggregates.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) def get_value_info(self, value): if value in self.functions: @@ -34,7 +50,7 @@ class RbaParameters: value_info = self.aggregates[value].value_info return value_info - def get_df_items(self, p_type): + def to_df(self, p_type): df = None if p_type == 'functions': df = pd.DataFrame([item.to_dict() for item in self.functions.values()]) @@ -48,16 +64,18 @@ class RbaParameters: def validate(self, component_ids): valid = True - missing = self.ref_parameters().difference(component_ids['functions']) + missing = self.ref_functions(self.aggregates).difference(component_ids['functions']) if len(missing) > 0: print('functions used in aggregates not defined:', missing) valid = False return valid - def ref_parameters(self): + def ref_functions(self, ref_parameters): + # add functions used in aggregates refs = set() - for a in self.aggregates.values(): - refs |= set(a.functions) + for param in ref_parameters: + if param in self.aggregates: + refs |= set(self.aggregates[param].functions) return refs @@ -71,10 +89,9 @@ class RbaFunction: self.value_info = '' @staticmethod - def get_xml_items(root): + def import_xml(functions): data = {} - functions = root.find('listOfFunctions') for function in functions.findall('function'): fid = function.attrib['id'] rba_function = RbaFunction(fid) @@ -90,6 +107,16 @@ class RbaFunction: data[fid] = rba_function return data + def export_xml(self): + attribs = {'id': self.id, 'type': self.type, 'variable': self.variable} + function = Element('function', attribs) + + if len(self.parameters) > 0: + parameters = SubElement(function, 'listOfParameters') + for parameter, value in self.parameters.items(): + SubElement(parameters, 'parameter', {'id': parameter, 'value': str(value)}) + return function + def set_value_info(self): value = f'{self.type}: ' if self.type == 'constant': @@ -123,10 +150,9 @@ class RbaAggregate: self.value_info = '' @staticmethod - def get_xml_items(root): + def import_xml(aggregates): data = {} - aggregates = root.find('listOfAggregates') for aggregate in aggregates.findall('aggregate'): aid = aggregate.attrib['id'] rba_aggregate = RbaAggregate(aid) @@ -138,9 +164,17 @@ class RbaAggregate: data[aid] = rba_aggregate return data + def export_xml(self): + attribs = {'id': self.id, 'type': self.type} + aggregate = Element('aggregate', attribs) + + function_refs = SubElement(aggregate, 'listOfFunctionReferences') + for function_ref in self.functions: + SubElement(function_refs, 'functionReference', {'function': function_ref}) + return aggregate + def set_value_info(self): self.value_info = f'aggregate: {self.type}' def to_dict(self): - functions = ', '.join(self.functions) - return {'aggregate': self.id, 'type': self.type, 'functions': functions} + return {'aggregate': self.id, 'type': self.type, 'functions': ', '.join(self.functions)} diff --git a/rbaxdf/model/rba_processes.py b/rbaxdf/model/rba_processes.py index e5d256c..c193228 100644 --- a/rbaxdf/model/rba_processes.py +++ b/rbaxdf/model/rba_processes.py @@ -6,7 +6,7 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import numpy as np import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent from .rba_target_value import RbaTargetValue from rbaxdf.utils.et_utils import get_species_refs @@ -18,20 +18,37 @@ class RbaProcesses: self.processes = {} self.processing_maps = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'processes.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBAProcesses' - self.processes = RbaProcess.get_xml_items(root) - self.processing_maps = RbaProcessingMap.get_xml_items(root) + self.processes = RbaProcess.import_xml(root.find('listOfProcesses')) + self.processing_maps = RbaProcessingMap.import_xml(root.find('listOfProcessingMaps')) - def get_df_items(self, p_type): + def export_xml(self, model_dir): + + file_name = os.path.join(model_dir, 'processes.xml') + root = Element('RBAProcesses') + + processes = SubElement(root, 'listOfProcesses') + for item in self.processes.values(): + processes.append(item.export_xml()) + + pmaps = SubElement(root, 'listOfProcessingMaps') + for item in self.processing_maps.values(): + pmaps.append(item.export_xml()) + + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self, p_type): df = None if p_type == 'processes': df = pd.DataFrame([item.to_dict() for item in self.processes.values()]) @@ -134,10 +151,9 @@ class RbaProcess: self.degradations = {} @staticmethod - def get_xml_items(root): + def import_xml(processes): data = {} - processes = root.find('listOfProcesses') for process in processes.findall('process'): pid = process.attrib['id'] rba_process = RbaProcess(pid) @@ -150,7 +166,6 @@ class RbaProcess: capacity = RbaTargetValue(machinery.find('capacity')) rba_process.machinery = {'capacity': capacity, 'reactants': reactants, 'products': products} - # TODO: use an utilty function processings = process.find('processings') if processings is not None: productions = processings.find('listOfProductions') @@ -173,12 +188,46 @@ class RbaProcess: data[pid] = rba_process return data + def export_xml(self): + process = Element('process', {'id': self.id, 'name': self.name}) + + if len(self.machinery) > 0: + machinery = SubElement(process, 'machinery') + composition = SubElement(machinery, 'machineryComposition') + if len(self.machinery['reactants']) > 0: + reactants = SubElement(composition, 'listOfReactants') + for sid, stoic in self.machinery['reactants'].items(): + SubElement(reactants, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + if len(self.machinery['products']) > 0: + products = SubElement(composition, 'listOfProducts') + for sid, stoic in self.machinery['products'].items(): + SubElement(products, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + SubElement(machinery, 'capacity', self.machinery['capacity'].to_dict()) + if len(self.productions) + len(self.degradations) > 0: + processings = SubElement(process, 'processings') + if len(self.productions) > 0: + productions = SubElement(processings, 'listOfProductions') + attribs = {'processingMap': self.productions['processingMap'], 'set': self.productions['set']} + processing = SubElement(productions, 'processing', attribs) + inputs = SubElement(processing, 'listOfInputs') + for sid in self.productions['inputs']: + SubElement(inputs, 'speciesReference', {'species': sid, 'stoichiometry': '1'}) + if len(self.degradations) > 0: + degradations = SubElement(processings, 'listOfDegradations') + attribs = {'processingMap': self.degradations['processingMap'], 'set': self.degradations['set']} + processing = SubElement(degradations, 'processing', attribs) + inputs = SubElement(processing, 'listOfInputs') + for sid in self.degradations['inputs']: + SubElement(inputs, 'speciesReference', {'species': sid, 'stoichiometry': '1'}) + return process + def to_dict(self): mach_capacity = '' mach_reactants = '' mach_products = '' if len(self.machinery) > 0: - mach_capacity = self.machinery['capacity'].get_str() + mach_capacity = ', '.join([f'{key}={val}' + for key, val in self.machinery['capacity'].to_dict().items()]) mach_reactants = '; '.join([f'species={species}, stoic={stoic}' for species, stoic in self.machinery['reactants'].items()]) mach_products = '; '.join([f'species={species}, stoic={stoic}' @@ -216,10 +265,9 @@ class RbaProcessingMap: self.component_processings = {} @staticmethod - def get_xml_items(root): + def import_xml(processing_maps): data = {} - processing_maps = root.find('listOfProcessingMaps') for processing_map in processing_maps.findall('processingMap'): pmid = processing_map.attrib['id'] rba_pmap = RbaProcessingMap(pmid) @@ -241,6 +289,35 @@ class RbaProcessingMap: data[pmid] = rba_pmap return data + def export_xml(self): + pmap = Element('processingMap', {'id': self.id}) + + if len(self.constant_processing) > 0: + const_proc = SubElement(pmap, 'constantProcessing') + if len(self.constant_processing['reactants']) > 0: + reactants = SubElement(const_proc, 'listOfReactants') + for sid, stoic in self.constant_processing['reactants'].items(): + SubElement(reactants, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + if len(self.constant_processing['products']) > 0: + products = SubElement(const_proc, 'listOfProducts') + for sid, stoic in self.constant_processing['products'].items(): + SubElement(products, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + + if len(self.component_processings) > 0: + comp_procs = SubElement(pmap, 'listOfComponentProcessings') + for component, params in self.component_processings.items(): + attribs = {'component': component, 'machineryCost': str(params['cost'])} + comp_proc = SubElement(comp_procs, 'componentProcessing', attribs) + if len(params['reactants']) > 0: + reactants = SubElement(comp_proc, 'listOfReactants') + for sid, stoic in params['reactants'].items(): + SubElement(reactants, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + if len(params['products']) > 0: + products = SubElement(comp_proc, 'listOfProducts') + for sid, stoic in params['products'].items(): + SubElement(products, 'speciesReference', {'species': sid, 'stoichiometry': str(stoic)}) + return pmap + def to_df(self): pmid = self.id data = [] diff --git a/rbaxdf/model/rba_target_value.py b/rbaxdf/model/rba_target_value.py index 17e6f7e..e5ee386 100644 --- a/rbaxdf/model/rba_target_value.py +++ b/rbaxdf/model/rba_target_value.py @@ -14,12 +14,15 @@ class RbaTargetValue: self.lower_bound = None self.upper_bound = None - def get_str(self): - target_values = [] + def to_dict(self): + target_values = {} if self.lower_bound is not None: - target_values.append(f'lowerBound={self.lower_bound}') + target_values['lowerBound'] = self.lower_bound if self.upper_bound is not None: - target_values.append(f'upperBound={self.upper_bound}') + target_values['upperBound'] = self.upper_bound if self.value is not None: - target_values.append(f'value={self.value}') - return ', '.join(target_values) + target_values['value'] = self.value + return target_values + + def to_str(self): + return ', '.join([f'{key}={val}' for key, val in self.to_dict().items()]) diff --git a/rbaxdf/model/rba_targets.py b/rbaxdf/model/rba_targets.py index 11b2cdc..fadf2e7 100644 --- a/rbaxdf/model/rba_targets.py +++ b/rbaxdf/model/rba_targets.py @@ -5,7 +5,7 @@ Peter Schubert, CCB, HHU Duesseldorf, December 2022 import os import pandas as pd -import xml.etree.ElementTree +from xml.etree.ElementTree import parse, ElementTree, Element, SubElement, indent from rbaxdf.utils.et_utils import get_target_species, get_target_reactions @@ -15,19 +15,29 @@ class RbaTargets: def __init__(self): self.target_groups = {} - def get_xml_items(self, model_dir): + def import_xml(self, model_dir): file_name = os.path.join(model_dir, 'targets.xml') if os.path.exists(file_name) is False: print(f'{file_name} not found!') return {} - tree = xml.etree.ElementTree.parse(file_name) + tree = parse(file_name) root = tree.getroot() assert root.tag == 'RBATargets' - self.target_groups = RbaTargetGroup.get_xml_items(root) + self.target_groups = RbaTargetGroup.import_xml(root.find('listOfTargetGroups')) - def get_df_items(self): + def export_xml(self, model_dir): + file_name = os.path.join(model_dir, 'targets.xml') + root = Element('RBATargets') + target_groups = SubElement(root, 'listOfTargetGroups') + for item in self.target_groups.values(): + target_groups.append(item.export_xml()) + tree = ElementTree(root) + indent(tree) + tree.write(file_name) + + def to_df(self): data = [] for tgid, tg in self.target_groups.items(): tdict = tg.to_dict() @@ -70,7 +80,7 @@ class RbaTargets: for target_type in ['concentrations', 'production_fluxes', 'degradation_fluxes', 'reaction_fluxes']: targets = getattr(tg, target_type) for target in targets.values(): - refs |= {tv.split('=')[1].strip() for tv in target.get_str().split(',')} + refs |= set(target.to_dict().values()) return refs @@ -84,9 +94,8 @@ class RbaTargetGroup: self.reaction_fluxes = {} @staticmethod - def get_xml_items(root): + def import_xml(target_groups): data = {} - target_groups = root.find('listOfTargetGroups') for target_group in target_groups.findall('targetGroup'): tgid = target_group.attrib.get('id', '') rba_target = RbaTargetGroup(tgid) @@ -97,10 +106,31 @@ class RbaTargetGroup: data[tgid] = rba_target return data + def export_xml(self): + target_group = Element('targetGroup', {'id': self.id}) + for target_type, tag in {'concentrations': 'listOfConcentrations', + 'production_fluxes': 'listOfProductionFluxes', + 'degradation_fluxes': 'listOfDegradationFluxes'}.items(): + targets = getattr(self, target_type) + if len(targets) > 0: + lo_targets = SubElement(target_group, tag) + for target, value in targets.items(): + attribs = value.to_dict() | {'species': target} + SubElement(lo_targets, 'targetSpecies', attribs) + + targets = self.reaction_fluxes + if len(targets) > 0: + lo_targets = SubElement(target_group, 'listOfReactionFluxes') + for target, value in targets.items(): + attribs = value.to_dict() | {'reaction': target} + SubElement(lo_targets, 'targetReaction', attribs) + + return target_group + def to_dict(self): - conc = {target: value.get_str() for target, value in self.concentrations.items()} - prod_fluxes = {target: value.get_str() for target, value in self.production_fluxes.items()} - degr_fluxes = {target: value.get_str() for target, value in self.degradation_fluxes.items()} - reac_fluxes = {target: value.get_str() for target, value in self.reaction_fluxes.items()} + conc = {target: value.to_str() for target, value in self.concentrations.items()} + prod_fluxes = {target: value.to_str() for target, value in self.production_fluxes.items()} + degr_fluxes = {target: value.to_str() for target, value in self.degradation_fluxes.items()} + reac_fluxes = {target: value.to_str() for target, value in self.reaction_fluxes.items()} return {'targetGroup': self.id, 'concentrations': conc, 'productionFluxes': prod_fluxes, 'degradationFluxes': degr_fluxes, 'reactionFluxes': reac_fluxes} -- GitLab