Source code for boltz_data.ccd._from_mmcif

"""Read chemical components from mmCIF format."""

import gemmi

from ._constants import MMCIF_BOND_ORDER_TO_NUMERIC
from ._models import ChemicalComponent, ChemicalComponentAtom, ChemicalComponentBond
from ._utils import clean_string


[docs] def read_chemical_component_dictionary_from_mmcif( mmcif: gemmi.cif.Document | gemmi.cif.Block, / ) -> dict[str, ChemicalComponent]: """ Read chemical component dictionary from mmCIF block. Args: mmcif: The mmCIF block containing chemical component definitions. Returns: Dictionary mapping component IDs to ChemicalComponent objects. """ chemical_component_dictionary: dict[str, ChemicalComponent] = {} if isinstance(mmcif, gemmi.cif.Document): for block in mmcif: chemical_component_dictionary.update(read_chemical_component_dictionary_from_mmcif(block)) return chemical_component_dictionary # Read component metadata for comp_id, comp_type, name in mmcif.find("_chem_comp.", ["id", "type", "name"]): chemical_component_dictionary[comp_id] = ChemicalComponent( comp_id=clean_string(comp_id), type=clean_string(comp_type), name=clean_string(name), atoms={}, bonds={}, ) # Track hydrogen atoms to omit them omitted_atom_ids: set[str] = set() # Read atoms for comp_id, atom_id, symbol, charge in mmcif.find( "_chem_comp_atom.", ["comp_id", "atom_id", "type_symbol", "charge"] ): # Skip hydrogen atoms if symbol == "H": omitted_atom_ids.add(atom_id) continue # Add atom to component chemical_component_dictionary[comp_id].atoms[clean_string(atom_id)] = ChemicalComponentAtom( atom_id=clean_string(atom_id), element=symbol, charge=int(charge) ) # Read bonds for comp_id, atom_id_1, atom_id_2, value_order in mmcif.find( "_chem_comp_bond.", ["comp_id", "atom_id_1", "atom_id_2", "value_order"] ): # Skip bonds involving hydrogen if atom_id_1 in omitted_atom_ids or atom_id_2 in omitted_atom_ids: continue # Add bond to component chemical_component_dictionary[comp_id].bonds[(clean_string(atom_id_1), clean_string(atom_id_2))] = ( ChemicalComponentBond( atom_id_1=clean_string(atom_id_1), atom_id_2=clean_string(atom_id_2), order=MMCIF_BOND_ORDER_TO_NUMERIC.get(value_order.lower(), 1), ) ) return chemical_component_dictionary