Source code for boltz_data.ccd._from_mmcif
"""Read chemical components from mmCIF format."""
import gemmi
from ._constants import MMCIF_BOND_ORDER_TO_NUMERIC
from ._models import ChemicalComponent, ChemicalComponentAtom, ChemicalComponentBond
from ._utils import clean_string
[docs]
def read_chemical_component_dictionary_from_mmcif(
mmcif: gemmi.cif.Document | gemmi.cif.Block, /
) -> dict[str, ChemicalComponent]:
"""
Read chemical component dictionary from mmCIF block.
Args:
mmcif: The mmCIF block containing chemical component definitions.
Returns:
Dictionary mapping component IDs to ChemicalComponent objects.
"""
chemical_component_dictionary: dict[str, ChemicalComponent] = {}
if isinstance(mmcif, gemmi.cif.Document):
for block in mmcif:
chemical_component_dictionary.update(read_chemical_component_dictionary_from_mmcif(block))
return chemical_component_dictionary
# Read component metadata
for comp_id, comp_type, name in mmcif.find("_chem_comp.", ["id", "type", "name"]):
chemical_component_dictionary[comp_id] = ChemicalComponent(
comp_id=clean_string(comp_id),
type=clean_string(comp_type),
name=clean_string(name),
atoms={},
bonds={},
)
# Track hydrogen atoms to omit them
omitted_atom_ids: set[str] = set()
# Read atoms
for comp_id, atom_id, symbol, charge in mmcif.find(
"_chem_comp_atom.", ["comp_id", "atom_id", "type_symbol", "charge"]
):
# Skip hydrogen atoms
if symbol == "H":
omitted_atom_ids.add(atom_id)
continue
# Add atom to component
chemical_component_dictionary[comp_id].atoms[clean_string(atom_id)] = ChemicalComponentAtom(
atom_id=clean_string(atom_id), element=symbol, charge=int(charge)
)
# Read bonds
for comp_id, atom_id_1, atom_id_2, value_order in mmcif.find(
"_chem_comp_bond.", ["comp_id", "atom_id_1", "atom_id_2", "value_order"]
):
# Skip bonds involving hydrogen
if atom_id_1 in omitted_atom_ids or atom_id_2 in omitted_atom_ids:
continue
# Add bond to component
chemical_component_dictionary[comp_id].bonds[(clean_string(atom_id_1), clean_string(atom_id_2))] = (
ChemicalComponentBond(
atom_id_1=clean_string(atom_id_1),
atom_id_2=clean_string(atom_id_2),
order=MMCIF_BOND_ORDER_TO_NUMERIC.get(value_order.lower(), 1),
)
)
return chemical_component_dictionary