Source code for boltz_data.mol._from._from_rdmol

"""Convert RDKit molecules to BZMol objects."""

import numpy as np
from rdkit import Chem
from rdkit.Chem import Mol

from boltz_data.mol._mol import BZMol

RD_BOND_ORDER = {Chem.BondType.SINGLE: 1, Chem.BondType.DOUBLE: 2, Chem.BondType.TRIPLE: 3, Chem.BondType.DATIVE: 1}


def get_bond_orders_kekule(rdmol: Mol, /) -> list[int]:
    rdmol = Chem.Mol(rdmol)
    Chem.Kekulize(rdmol)
    return [RD_BOND_ORDER[bond.GetBondType()] for bond in rdmol.GetBonds()]  # type: ignore[no-untyped-call]


[docs] def bzmol_from_rdmol(rdmol: Mol, /, *, conformer_id: int = -1) -> BZMol: """ Convert an RDKit molecule to a BZMol. This function extracts the molecular structure from an RDKit molecule to create a BZMol object. The resulting BZMol contains only atoms and bonds, without residue or chain information. Args: rdmol: An RDKit Mol object. conformer_id: The ID of the conformer to extract coordinates from. Returns: A BZMol object containing atoms and bonds without coordinates, residues, or chains. Example: >>> from rdkit import Chem >>> rdmol = Chem.MolFromSmiles("CCO") >>> mol = bzmol_from_rdmol(rdmol) >>> mol.num_atoms 3 # Without hydrogens """ bond_orders_list = get_bond_orders_kekule(rdmol) # Extract atom information atom_names = [] atom_elements_list = [] atom_charges = [] atom_coordinates = [] for i, atom in enumerate(rdmol.GetAtoms()): # type: ignore[no-untyped-call] # Generate atom name based on element and index element_symbol = atom.GetSymbol() atom_name = f"{element_symbol}{i + 1}" atom_names.append(atom_name) atom_elements_list.append(atom.GetAtomicNum()) atom_charges.append(atom.GetFormalCharge()) if rdmol.GetNumConformers() > 0: atom_coordinates.append(rdmol.GetConformer(conformer_id).GetAtomPosition(atom.GetIdx())) atom_elements = np.array(atom_elements_list, dtype=np.uint8) # Extract bonds bonds = [[bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] for bond in rdmol.GetBonds()] # type: ignore[no-untyped-call] bond_atoms = np.array(bonds, dtype=np.uint32) if bonds else np.zeros((0, 2), dtype=np.uint32) bond_orders = np.array(bond_orders_list, dtype=np.uint8) if bond_orders_list else np.zeros(0, dtype=np.uint8) has_coordinates = rdmol.GetNumConformers() > 0 num_atoms = rdmol.GetNumAtoms() return BZMol( atom_name=np.array(atom_names), atom_element=atom_elements, atom_charge=np.array(atom_charges, dtype=np.int8), atom_coordinates=np.array(atom_coordinates, dtype=np.float32) if has_coordinates else None, atom_resolved=np.ones(num_atoms, dtype=bool) if has_coordinates else None, bond_atoms=bond_atoms, bond_order=bond_orders, )