Source code for boltz_data.mol._to._to_mmcif
from collections.abc import Mapping
import gemmi
import numpy as np
from rdkit.Chem import GetPeriodicTable
from boltz_data.ccd import ChemicalComponent
from boltz_data.cif import mmcif_from_structure
from boltz_data.mol._mol import BZBioMol
from boltz_data.sequence import DNA_COMP_ID_TO_ONE_LETTER, PROTEIN_COMP_ID_TO_ONE_LETTER, RNA_COMP_ID_TO_ONE_LETTER
from ._to_definition import structure_from_bzmol
STANDARD_PROTEIN_RESIDUE_NAMES = list(PROTEIN_COMP_ID_TO_ONE_LETTER.keys())
STANDARD_DNA_RESIDUE_NAMES = list(DNA_COMP_ID_TO_ONE_LETTER.keys())
STANDARD_RNA_RESIDUE_NAMES = list(RNA_COMP_ID_TO_ONE_LETTER.keys())
PERIODIC_TABLE = GetPeriodicTable()
[docs]
def mmcif_from_bzmol(
bzmol: BZBioMol,
/,
chemical_component_dictionary: Mapping[str, ChemicalComponent] | None = None,
name: str = "pred",
) -> gemmi.cif.Block:
"""
Convert a BZMol back to an mmCIF block.
Args:
bzmol: The BZMol to convert.
chemical_component_dictionary: Dictionary mapping component IDs to chemical components.
name: The name to give the mmCIF structure.
Returns:
An mmCIF block representing the structure in the BZMol.
"""
structure = structure_from_bzmol(bzmol, chemical_component_dictionary=chemical_component_dictionary)
mmcif = mmcif_from_structure(structure, name=name)
num_resolved = bzmol.atom_resolved.sum() if bzmol.atom_resolved is not None else bzmol.num_atoms
atom_resolved = bzmol.atom_resolved if bzmol.atom_resolved is not None else np.ones(bzmol.num_atoms, dtype=bool)
atom_serial = np.arange(1, num_resolved + 1, dtype=int)
atom_symbol = [PERIODIC_TABLE.GetElementSymbol(int(el)).upper() for el in bzmol.atom_element[atom_resolved]]
atom_names = bzmol.atom_name[atom_resolved]
atom_alt_locs = np.array([None] * num_resolved)
atom_residue_names = bzmol.residue_name[bzmol.atom_residue[atom_resolved]]
atom_chain_id = (
bzmol.chain_id[bzmol.residue_chain[bzmol.atom_residue[atom_resolved]]]
if bzmol.chain_id is not None
else np.array([None] * num_resolved)
)
atom_entity_ids = np.array([structure.chains[chain_id].entity_idx + 1 for chain_id in atom_chain_id])
atom_seq_ids = (bzmol.residue_ordinal + 1)[bzmol.atom_residue][atom_resolved]
atom_ins_codes = np.array([None] * num_resolved)
atom_charges = bzmol.atom_charge[atom_resolved]
chain_is_protein = np.array(
[structure.entities[chain.entity_idx].type == "protein" for chain in structure.chains.values()],
dtype=bool,
)
chain_is_dna = np.array(
[structure.entities[chain.entity_idx].type == "dna" for chain in structure.chains.values()],
dtype=bool,
)
chain_is_rna = np.array(
[structure.entities[chain.entity_idx].type == "rna" for chain in structure.chains.values()],
dtype=bool,
)
residue_is_protein = chain_is_protein[bzmol.residue_chain]
residue_is_dna = chain_is_dna[bzmol.residue_chain]
residue_is_rna = chain_is_rna[bzmol.residue_chain]
residue_is_standard = np.zeros(bzmol.num_residues, dtype=bool)
residue_is_standard[residue_is_protein] = np.isin(
bzmol.residue_name[residue_is_protein], STANDARD_PROTEIN_RESIDUE_NAMES
)
residue_is_standard[residue_is_dna] = np.isin(bzmol.residue_name[residue_is_dna], STANDARD_DNA_RESIDUE_NAMES)
residue_is_standard[residue_is_rna] = np.isin(bzmol.residue_name[residue_is_rna], STANDARD_RNA_RESIDUE_NAMES)
atom_is_standard = residue_is_standard[bzmol.atom_residue]
mmcif.set_mmcif_category(
"_atom_site",
{
"group_PDB": ["ATOM" if atom_is_standard[i] else "HETATM" for i in range(num_resolved)],
"id": atom_serial.tolist(),
"type_symbol": atom_symbol,
"label_atom_id": atom_names.tolist(),
"label_alt_id": atom_alt_locs.tolist(),
"label_comp_id": atom_residue_names.tolist(),
"label_asym_id": atom_chain_id.tolist(),
"label_entity_id": atom_entity_ids.tolist(),
"label_seq_id": atom_seq_ids.tolist(),
"pdbx_PDB_ins_code": atom_ins_codes.tolist(),
"Cartn_x": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 0]]
if bzmol.atom_coordinates is not None
else [None] * num_resolved,
"Cartn_y": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 1]]
if bzmol.atom_coordinates is not None
else [None] * num_resolved,
"Cartn_z": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 2]]
if bzmol.atom_coordinates is not None
else [None] * num_resolved,
"occupancy": np.ones(num_resolved).tolist(),
"B_iso_or_equiv": np.zeros(num_resolved).tolist(),
"pdbx_formal_charge": atom_charges.tolist(),
},
)
return mmcif