Source code for boltz_data.mol._from._from_rdmol
"""Convert RDKit molecules to BZMol objects."""
import numpy as np
from rdkit import Chem
from rdkit.Chem import Mol
from boltz_data.mol._mol import BZMol
RD_BOND_ORDER = {Chem.BondType.SINGLE: 1, Chem.BondType.DOUBLE: 2, Chem.BondType.TRIPLE: 3, Chem.BondType.DATIVE: 1}
def get_bond_orders_kekule(rdmol: Mol, /) -> list[int]:
rdmol = Chem.Mol(rdmol)
Chem.Kekulize(rdmol)
return [RD_BOND_ORDER[bond.GetBondType()] for bond in rdmol.GetBonds()] # type: ignore[no-untyped-call]
[docs]
def bzmol_from_rdmol(rdmol: Mol, /, *, conformer_id: int = -1) -> BZMol:
"""
Convert an RDKit molecule to a BZMol.
This function extracts the molecular structure from an RDKit molecule
to create a BZMol object. The resulting BZMol contains only atoms and bonds,
without residue or chain information.
Args:
rdmol: An RDKit Mol object.
conformer_id: The ID of the conformer to extract coordinates from.
Returns:
A BZMol object containing atoms and bonds without coordinates,
residues, or chains.
Example:
>>> from rdkit import Chem
>>> rdmol = Chem.MolFromSmiles("CCO")
>>> mol = bzmol_from_rdmol(rdmol)
>>> mol.num_atoms
3 # Without hydrogens
"""
bond_orders_list = get_bond_orders_kekule(rdmol)
# Extract atom information
atom_names = []
atom_elements_list = []
atom_charges = []
atom_coordinates = []
for i, atom in enumerate(rdmol.GetAtoms()): # type: ignore[no-untyped-call]
# Generate atom name based on element and index
element_symbol = atom.GetSymbol()
atom_name = f"{element_symbol}{i + 1}"
atom_names.append(atom_name)
atom_elements_list.append(atom.GetAtomicNum())
atom_charges.append(atom.GetFormalCharge())
if rdmol.GetNumConformers() > 0:
atom_coordinates.append(rdmol.GetConformer(conformer_id).GetAtomPosition(atom.GetIdx()))
atom_elements = np.array(atom_elements_list, dtype=np.uint8)
# Extract bonds
bonds = [[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] for bond in rdmol.GetBonds()] # type: ignore[no-untyped-call]
bond_atoms = np.array(bonds, dtype=np.uint32) if bonds else np.zeros((0, 2), dtype=np.uint32)
bond_orders = np.array(bond_orders_list, dtype=np.uint8) if bond_orders_list else np.zeros(0, dtype=np.uint8)
has_coordinates = rdmol.GetNumConformers() > 0
num_atoms = rdmol.GetNumAtoms()
return BZMol(
atom_name=np.array(atom_names),
atom_element=atom_elements,
atom_charge=np.array(atom_charges, dtype=np.int8),
atom_coordinates=np.array(atom_coordinates, dtype=np.float32) if has_coordinates else None,
atom_resolved=np.ones(num_atoms, dtype=bool) if has_coordinates else None,
bond_atoms=bond_atoms,
bond_order=bond_orders,
)