from collections.abc import Mapping
import numpy as np
from boltz_data.ccd import ChemicalComponent, get_builtin_chemical_component_dictionary
from boltz_data.definition import EntityDefinition, StructureDefinition
from boltz_data.mol._mol import BZBioMol
from boltz_data.mol._op import concat_bzmols
from boltz_data.sequence import residue_names_from_sequence
from ._from_ccd import bzmol_from_chemical_components, polymer_bzmol_from_chemical_components
[docs]
def bzmol_from_structure(
structure: StructureDefinition, /, chemical_component_dictionary: Mapping[str, ChemicalComponent] | None = None
) -> BZBioMol:
"""Create a BZMol from a structure definition."""
# Create BZMols for each chain
bzmols: list[BZBioMol] = []
for chain_id, chain in structure.chains.items():
entity = structure.entities[chain.entity_idx]
mol = bzmol_from_definition(
entity,
chemical_component_dictionary=chemical_component_dictionary,
chain_id=chain_id,
residue_numbers=chain.residue_numbers,
)
bzmols.append(mol)
# Concatenate all BZMols
combined_mol = concat_bzmols(*bzmols)
if structure.bonds:
atom_mapping: dict[tuple[str, int, str], int] = {}
bond_atoms: list[tuple[int, int]] = []
bond_orders: list[int] = []
atom_names = combined_mol.atom_name
atom_chain_id = combined_mol.chain_id[combined_mol.residue_chain[combined_mol.atom_residue]]
atom_residue_ordinal = combined_mol.residue_ordinal[combined_mol.atom_residue]
for chain_id, residue_ordinal, atom_name in zip(atom_chain_id, atom_residue_ordinal, atom_names, strict=True):
atom_mapping[(chain_id, residue_ordinal, atom_name)] = len(atom_mapping)
for bond in structure.bonds:
start_atom = atom_mapping[(bond.chain_id_1, bond.residue_index_1, bond.atom_name_1)]
end_atom = atom_mapping[(bond.chain_id_2, bond.residue_index_2, bond.atom_name_2)]
bond_atoms.append((start_atom, end_atom))
bond_orders.append(bond.bond_order if bond.bond_order is not None else 1)
kwargs = {
**combined_mol.model_dump(),
"bond_atoms": np.concatenate([combined_mol.bond_atoms, bond_atoms]),
"bond_orders": np.concatenate([combined_mol.bond_order, bond_orders]),
}
combined_mol = BZBioMol(**kwargs) # type: ignore[missing-argument]
return combined_mol
[docs]
def bzmol_from_definition(
definition: EntityDefinition,
/,
*,
chemical_component_dictionary: Mapping[str, ChemicalComponent] | None = None,
chain_id: str,
residue_numbers: list[int] | None = None,
) -> BZBioMol:
"""Create a BZMol from an entity definition."""
chemical_component_dictionary = chemical_component_dictionary or get_builtin_chemical_component_dictionary()
match definition.type:
case "protein" | "dna" | "rna":
residue_names = residue_names_from_sequence(definition.sequence, polymer_type=definition.type)
return polymer_bzmol_from_chemical_components(
chemical_components=[chemical_component_dictionary[residue_name] for residue_name in residue_names],
polymer_type=definition.type,
chain_id=chain_id,
residue_numbers=residue_numbers,
description=definition.description,
bonds=definition.bonds,
)
case "ligand_ccd":
return bzmol_from_chemical_components(
chemical_components=[chemical_component_dictionary[definition.comp_id]],
chain_id=chain_id,
residue_numbers=residue_numbers,
description=definition.description,
)
case "branched_polymer":
return bzmol_from_chemical_components(
chemical_components=[chemical_component_dictionary[comp_id] for comp_id in definition.comp_ids],
chain_id=chain_id,
bonds=definition.bonds,
residue_numbers=residue_numbers,
description=definition.description,
)
case _:
msg = f"Entity type {definition.type} not supported yet."
raise NotImplementedError(msg)