Source code for boltz_data.rdkit._from_definition
from collections.abc import Mapping
from rdkit import Chem
from boltz_data.ccd import ChemicalComponent
from boltz_data.definition import (
DNADefinition,
EntityDefinition,
ProteinDefinition,
RNADefinition,
)
from boltz_data.sequence import BACKBONE_ATOMS, residue_names_from_sequence
from ._from_ccd import add_chemical_component_to_rdmol, rdmol_from_chemical_component
[docs]
def rdmol_from_definition(
definition: EntityDefinition,
/,
*,
chemical_component_dictionary: Mapping[str, ChemicalComponent],
) -> Chem.Mol:
"""Create an RDKit Mol object from an EntityDefinition."""
if definition.type == "ligand_ccd":
return rdmol_from_chemical_component(chemical_component_dictionary[definition.comp_id])
if definition.type == "ligand_smiles":
return Chem.MolFromSmiles(definition.smiles)
if definition.type in {"protein", "dna", "rna"}:
# Type narrowing for mypy - noqa: S101
assert isinstance(definition, (ProteinDefinition, RNADefinition, DNADefinition)) # noqa: S101
return rdmol_from_polymer_definition(definition, chemical_component_dictionary=chemical_component_dictionary)
if definition.type == "branched_polymer":
msg = "Branched polymer definitions are not yet supported"
raise NotImplementedError(msg)
msg = f"Invalid entity definition type: {definition.type}"
raise ValueError(msg)
def rdmol_from_polymer_definition(
definition: ProteinDefinition | RNADefinition | DNADefinition,
/,
*,
chemical_component_dictionary: Mapping[str, ChemicalComponent],
) -> Chem.Mol:
"""Create an RDKit Mol object from a polymer definition (protein, RNA, or DNA)."""
rdmol = Chem.RWMol()
previous_backbone: int | None = None
residue_names = residue_names_from_sequence(definition.sequence, polymer_type=definition.type)
for residue_index, residue_name in enumerate(residue_names):
if residue_name not in chemical_component_dictionary:
msg = f"Residue name {residue_name} not found in chemical component dictionary."
raise ValueError(msg)
atom_id_to_atom_idx: dict[str, int] = {}
excluded_atom_names: set[str] = set()
if definition.type == "protein" and residue_index < len(residue_names) - 1:
excluded_atom_names.add("OXT")
elif definition.type in {"rna", "dna"} and residue_index > 0:
excluded_atom_names.add("OP3")
add_chemical_component_to_rdmol(
rdmol=rdmol,
chemical_component=chemical_component_dictionary[residue_name],
excluded_atom_names=excluded_atom_names,
atom_id_to_atom_idx=atom_id_to_atom_idx,
residue_number=residue_index + 1,
)
if previous_backbone is not None:
next_backbone_name = BACKBONE_ATOMS[definition.type]["next"]
next_backbone = atom_id_to_atom_idx[next_backbone_name]
rdmol.AddBond(
beginAtomIdx=previous_backbone,
endAtomIdx=next_backbone,
order=Chem.rdchem.BondType.SINGLE,
)
previous_backbone_name = BACKBONE_ATOMS[definition.type]["previous"]
previous_backbone = atom_id_to_atom_idx[previous_backbone_name]
rdmol = rdmol.GetMol()
Chem.SanitizeMol(rdmol)
return rdmol