Source code for sbmlxdf.misc

"""Implementation of miscellaneous functions.

Peter Schubert, HHU Duesseldorf, October 2020
"""
import re


_map_mathml2numpy = (
    # arithmetic operators
    ('abs', 'NP_NS.absolute'), ('exp', 'NP_NS.exp'), ('sqrt', 'NP_NS.sqrt'),
    ('sqr', 'NP_NS.square'), ('ln', 'NP_NS.log'), ('log10', 'NP_NS.log10'),
    ('floor', 'NP_NS.floor'), ('ceil', 'NP_NS.ceil'),
    ('factorial', 'NP_NS.math.factorial'), ('rem', 'NP_NS.fmod'),
    # relational operators
    ('eq', 'NP_NS.equal'), ('neq', 'NP_NS.not_equal'), ('gt', 'NP_NS.greater'),
    ('lt', 'NP_NS.less'), ('geq', 'NP_NS.greater_equal'),
    ('leq', 'NP_NS.less_equal'),
    # logical operators
    ('and', 'NP_NS.logical_and'), ('or', 'NP_NS.logical_or'),
    ('xor', 'NP_NS.logical_xor'), ('not', 'NP_NS.logical_not'),
    # trigonometric operators
    ('sin', 'NP_NS.sin'), ('cos', 'NP_NS.cos'), ('tan', 'NP_NS.tan'),
    ('sec', '1.0/NP_NS.cos'), ('csc', '1.0/NP_NS.sin'),
    ('cot', '1.0/NP_NS.tan'),
    ('sinh', 'NP_NS.sinh'), ('cosh', 'NP_NS.cosh'), ('tanh', 'NP_NS.tanh'),
    ('sech', '1.0/NP_NS.cosh'), ('csch', ' 1.0/NP_NS.sinh'),
    ('coth', '1.0/NP_NS.tanh'),
    ('asin', 'NP_NS.arcsin'), ('acos', 'NP_NS.arccos'),
    ('atan', 'NP_NS.arctan'), ('arcsinh', 'NP_NS.arcsinh'),
    ('arccosh', 'NP_NS.arccosh'), ('arctanh', 'NP_NS.arctanh'),
)


[docs] def mathml2numpy(mformula, np_ns='np'): """Convert mathml infix notation to a numpy notation. mathml functions and operators are converted to numpy equivalents, where possible. Functions are prefixed with numpy namespace :param mformula: mathml infix notation extracted from SBML :type mformula: str :param np_ns: numpy namespace prefix used in own Python code. Default: 'np' :type np_ns: str :returns: mathml converted to numpy notation :rtype: str """ np_formula = ' ' + mformula np_formula = re.sub(r'\s?dimensionless\s?', ' ', np_formula) np_formula = re.sub(r'\^', '**', np_formula) np_formula = re.sub(r'\s?&&\s?', ' and ', np_formula) np_formula = re.sub(r'\s?\|\|\s?', ' or ', np_formula) for mathml_f, np_f in _map_mathml2numpy: np_formula = re.sub(r'\s+' + mathml_f + r'\(', ' ' + np_f.replace('NP_NS', np_ns) + '(', np_formula) return np_formula.strip()
def get_bool_val(parameter): """Get boolean value from parameter Values imported from spreadsheets are all converted to string objects, while parameters coming from Model.to_df() may contain boolean values. 'True' objects from spreadsheets can be represented as 'True' or as numerical 1, getting converted to string. :param parameter: parameter to retrieve boolean value from :type parameter: bool or str :returns: boolean value of parameter :rtype: bool """ if type(parameter) == bool: return parameter else: return parameter.upper() == str('TRUE') or parameter == '1'
[docs] def record_generator(records_str, sep=';'): """Generator to extract individual records from a string of records. This generator does not yet handle nested records. Example: parsing through species reference records, e.g. df_reaction['rectants'] .. code-block:: python srefs = {} for sref_str in sbmlxdf.record_generator(srefs_str): params = sbmlxdf.extract_params(sref_str) srefs[params['species']] = float(params['stoic']) :param records_str: containing records separated by sep :type records_str: str :param sep: seperator used to separate records :type sep: str (default: ';') :returns: key-values pairs extracted from record :rtype: dict """ if type(records_str) == str: for record in records_str.split(sep): if len(record.strip()) > 0: yield record.strip()
[docs] def extract_params(record): """Extract parameters from a record. A single record consists of comma separated key-value pairs. Example: 'key1=val1, key2=val2, ...' is converted to {key1: val1, key2: val2, ...} :param record: key '=' value pairs separated by "," :type record: str :returns: key-values pairs extracted from record :rtype: dict """ params = {} for kv_pair in record_generator(record, sep=','): if '=' in kv_pair: k, v = kv_pair.split('=') params[k.strip()] = v.strip() return params
def extract_nested_params(s): """Extract parameters from a record. A record consists of comma separated key-value pairs. Values may contain nested records (key=[record_x, record_y, ...]), values can also be functions with several parameters, e.g. math=gamma(shape_Z, scale_Z) Example: 'key1=val1, key2=val2, ...' is converted to {key1: val1, key2: val2, ...} see also: :func:`extract_records` and :func:`extract_lo_records` :param s: key '=' value pairs separated by "," :type s: str :returns: key-values pairs :rtype: dict """ find_key = re.compile(r'\s*(?P<key>\w*)\s*=\s*') params = {} pos = 0 i = 0 while pos < len(s): m = find_key.search(s[pos:]) if m: pos += m.end(0) if pos < len(s): if s[pos] == '[': pos += 1 if pos >= len(s): break brackets = 1 for i in range(pos, len(s)): if s[i] == ']': brackets -= 1 if s[i] == '[': brackets += 1 if brackets == 0: break else: r_brackets = 0 for i in range(pos, len(s)): if s[i] == '(': r_brackets += 1 if s[i] == ')': r_brackets -= 1 if s[i] == ',' and r_brackets == 0: break if i == len(s) - 1: i += 1 params[m['key']] = s[pos:i].strip() pos = i else: break return params def extract_records(s): """Split string of records into individual records. Each record consists of comma separated key-value pairs. E.g. record1: 'key1=val1, key2=val2, ...'. Values may contain nested records (key=[record_x, record_y, ...]). Example: 'record1; record2; ...' is converted to [record1, record2, ...] see also: :func:`extract_params` and :func:`extract_lo_records` :param s: records separated by ";" :type s: str :returns: elements contain individual records :rtype: list of str """ records = [] brackets = 0 pos = 0 i = 0 while pos < len(s): for i in range(pos, len(s)): if s[i] == '[': brackets += 1 if s[i] == ']': brackets -= 1 if s[i] == ';' and brackets == 0: break if s[i] != ';': i += 1 records.append(s[pos:i].strip()) pos = i + 1 return records def extract_lo_records(s): """Split string of groups of records into strings of records per group. Supporting values with containing nested records (key=[record_x, record_y, ...]). Example: '[record1; record2; ...];[record7; record8; ...]' is converted to ['record1; record2; ...', 'record7; record8; ...'] see also: :func:`extract_params` and :func:`extract_records` :param s: string with groups of records enclosed in square brackets, separated by ";" :type s: str :returns: elements contain the string of records for each group :rtype: list of str """ lo_records = [] pos = 0 i = 0 while pos < len(s): m = re.search(r'\[', s[pos:]) if m: pos += m.end(0) brackets = 1 if pos >= len(s): break for i in range(pos, len(s)): if s[i] == '[': brackets += 1 if s[i] == ']': brackets -= 1 if brackets == 0: break if s[i] == ']': lo_records.append(s[pos:i].strip()) pos = i + 1 else: break return lo_records def get_miriam_refs(annotations, database, qualifier=None): """Extract references from MIRIAM annotation for specific database/qualifier. .. code-block:: python chebi_refs = sbmlxdf.misc.get_miriam_refs(miriam_annot, 'chebi', 'bqbiol:is') :param annotations: MIRIAM annotation string produced by sbmlxdf :type annotations: str :param database: specific resource to access, e.g. 'uniprot' :type database: str :param qualifier: specific qualifier for which to extract resouces e.g. 'bqbiol:is', (default: all) :type qualifier: str or None (default) :return: list of resources :rtype: list of str """ refs = [] if type(annotations) is str: for annotation in record_generator(annotations): fields = [item.strip() for item in annotation.split(',')] if qualifier is not None and fields[0] != qualifier: continue for field in fields[1:]: if database in field: refs.append(field.rsplit('/')[-1]) return refs
[docs] def extract_xml_attrs(xml_annots, ns=None, token=None): """Extract XML-attributes from given namespace and/or token. Example of xml_annots: 'ns_uri=http://www.hhu.de/ccb/bgm/ns, prefix=bgm, token=molecule, weight_Da=100' .. code-block:: python XML_SPECIES_NS = 'http://www.hhu.de/ccb/rba/species/ns' xml_attrs = sbmlxdf.misc.extract_xml_attrs(xml_annots, ns=XML_SPECIES_NS) :param xml_annots: XML-annotations separated by ";" :type xml_annots: str :param ns: namespace from which to collect attributes :type ns: str, optional :param token: token from which to collect attributes :type token: str, optional :returns: attribute names corresponding values :rtype: dict """ xml_attrs = {} for xml_str in record_generator(xml_annots): params = extract_params(xml_str) if (((ns is not None) and (params['ns_uri'] != ns)) or ((token is not None) and (params['token'] != token))): continue for k, v in params.items(): if k not in {'ns_uri', 'prefix', 'token'}: xml_attrs[k] = v return xml_attrs
def convert_srefs(srefs_str): """Convert species references from rectants/products. E.g. 'species=M_mal__L_e, stoic=1.0, const=True; species=M_h_e, stoic=2.0, const=True' is converted to '2.0 M_h_e + M_mal__L_e' srefs get sorted according to metabolite id :param srefs_str: ';' - separated string with species references as key/value pairs :type srefs_str: str :returns: stoichiometric string :rtype: string """ d_srefs = {} for sref in record_generator(srefs_str): params = extract_params(sref) d_srefs[params['species']] = params.get('stoic', '1.0') l_srefs = [] for sid in sorted(d_srefs.keys()): if d_srefs[sid] == '1.0': l_srefs.append(sid) else: l_srefs.append(d_srefs[sid] + ' ' + sid) return ' + '.join(l_srefs) def get_srefs_dict(reaction_str): """Generate species references from one side of reaction string. E.g. 'M_adp_c + M_atp_m -> M_adp_m + M_atp_c' is converted to {'M_adp_c': -1.0, 'M_atp_m': -1.0, 'M_adp_m': 1.0, 'M_atp_c': 1.0} :param reaction_str: reactions sting :type reaction_str: str :returns: dict with reactants/products and corresponding stochiometry :rtype: dict """ react_srefs = {} if type(reaction_str) is str: for idx, side in enumerate(re.split(r'[=-]>', reaction_str)): for sref in side.split('+'): l_sref = re.split(r'\s+', sref.strip()) stoic = float(l_sref[0]) if len(l_sref) == 2 else 1.0 sid = l_sref[-1] if sid != '': react_srefs[sid] = -stoic if idx == 0 else stoic return react_srefs def generate_srefs(stoichometric_str): """Generate species references from one side of reaction string. E.g. '2.0 M_h_e + M_mal__L_e' is converted to 'species=M_h_e, stoic=2.0, const=True; species=M_mal__L_e, stoic=1.0, const=True' :param stoichometric_str: stoichiometric string :type stoichometric_str: str :returns: ';'-separated string with species references as key/value pairs :rtype: string """ d_srefs = {} for sref in stoichometric_str.split('+'): l_sref = re.split(r'\s+', sref.strip()) stoic = l_sref[0] if len(l_sref) == 2 else '1.0' sid = l_sref[-1] if sid != '': d_srefs[sid] = stoic l_srefs = [] for sid, stoic in d_srefs.items(): l_srefs.append('species=' + sid + ', stoic=' + stoic + ', const=True') return '; '.join(l_srefs) def translate_reaction_string(df_reactions): """Extracts reactants/products/reversibility from reaction string. To support defining reactants and products with in a more readable format. A simplified version of tellurium/antimony, see: https://tellurium.readthedocs.io/en/latest/antimony.html Used, e.g. when reactants/products not defined in the dataframe e.g. 'M_fum_c + M_h2o_c -> M_mal__L_c' for a reversible reaction e.g. 'M_ac_e => ' for an irreversible reaction with no product :param df_reactions: pandas DataFrames of reaction objects :type df_reactions: dataframe :returns: updated reactions table :rtype: pandas DataFrame """ df_reactions = df_reactions.copy() for rid, reaction_string in df_reactions['reactionString'].items(): if type(reaction_string) is str: if ('->' in reaction_string) or ('=>' in reaction_string): components = re.split(r'[=-]>', reaction_string) else: # actually an error components = ['', ''] df_reactions.at[rid, 'reversible'] = ('->' in reaction_string) df_reactions.at[rid, 'reactants'] = generate_srefs(components[0]) df_reactions.at[rid, 'products'] = generate_srefs(components[1]) return df_reactions