Source code for boltz_data.rdkit._from_definition

from collections.abc import Mapping

from rdkit import Chem

from boltz_data.ccd import ChemicalComponent
from boltz_data.definition import (
    DNADefinition,
    EntityDefinition,
    ProteinDefinition,
    RNADefinition,
)
from boltz_data.sequence import BACKBONE_ATOMS, residue_names_from_sequence

from ._from_ccd import add_chemical_component_to_rdmol, rdmol_from_chemical_component


[docs] def rdmol_from_definition( definition: EntityDefinition, /, *, chemical_component_dictionary: Mapping[str, ChemicalComponent], ) -> Chem.Mol: """Create an RDKit Mol object from an EntityDefinition.""" if definition.type == "ligand_ccd": return rdmol_from_chemical_component(chemical_component_dictionary[definition.comp_id]) if definition.type == "ligand_smiles": return Chem.MolFromSmiles(definition.smiles) if definition.type in {"protein", "dna", "rna"}: # Type narrowing for mypy - noqa: S101 assert isinstance(definition, (ProteinDefinition, RNADefinition, DNADefinition)) # noqa: S101 return rdmol_from_polymer_definition(definition, chemical_component_dictionary=chemical_component_dictionary) if definition.type == "branched_polymer": msg = "Branched polymer definitions are not yet supported" raise NotImplementedError(msg) msg = f"Invalid entity definition type: {definition.type}" raise ValueError(msg)
def rdmol_from_polymer_definition( definition: ProteinDefinition | RNADefinition | DNADefinition, /, *, chemical_component_dictionary: Mapping[str, ChemicalComponent], ) -> Chem.Mol: """Create an RDKit Mol object from a polymer definition (protein, RNA, or DNA).""" rdmol = Chem.RWMol() previous_backbone: int | None = None residue_names = residue_names_from_sequence(definition.sequence, polymer_type=definition.type) for residue_index, residue_name in enumerate(residue_names): if residue_name not in chemical_component_dictionary: msg = f"Residue name {residue_name} not found in chemical component dictionary." raise ValueError(msg) atom_id_to_atom_idx: dict[str, int] = {} excluded_atom_names: set[str] = set() if definition.type == "protein" and residue_index < len(residue_names) - 1: excluded_atom_names.add("OXT") elif definition.type in {"rna", "dna"} and residue_index > 0: excluded_atom_names.add("OP3") add_chemical_component_to_rdmol( rdmol=rdmol, chemical_component=chemical_component_dictionary[residue_name], excluded_atom_names=excluded_atom_names, atom_id_to_atom_idx=atom_id_to_atom_idx, residue_number=residue_index + 1, ) if previous_backbone is not None: next_backbone_name = BACKBONE_ATOMS[definition.type]["next"] next_backbone = atom_id_to_atom_idx[next_backbone_name] rdmol.AddBond( beginAtomIdx=previous_backbone, endAtomIdx=next_backbone, order=Chem.rdchem.BondType.SINGLE, ) previous_backbone_name = BACKBONE_ATOMS[definition.type]["previous"] previous_backbone = atom_id_to_atom_idx[previous_backbone_name] rdmol = rdmol.GetMol() Chem.SanitizeMol(rdmol) return rdmol