Source code for boltz_data.mol._to._to_mmcif

from collections.abc import Mapping

import gemmi
import numpy as np
from rdkit.Chem import GetPeriodicTable

from boltz_data.ccd import ChemicalComponent
from boltz_data.cif import mmcif_from_structure
from boltz_data.mol._mol import BZBioMol
from boltz_data.sequence import DNA_COMP_ID_TO_ONE_LETTER, PROTEIN_COMP_ID_TO_ONE_LETTER, RNA_COMP_ID_TO_ONE_LETTER

from ._to_definition import structure_from_bzmol

STANDARD_PROTEIN_RESIDUE_NAMES = list(PROTEIN_COMP_ID_TO_ONE_LETTER.keys())
STANDARD_DNA_RESIDUE_NAMES = list(DNA_COMP_ID_TO_ONE_LETTER.keys())
STANDARD_RNA_RESIDUE_NAMES = list(RNA_COMP_ID_TO_ONE_LETTER.keys())

PERIODIC_TABLE = GetPeriodicTable()


[docs] def mmcif_from_bzmol( bzmol: BZBioMol, /, chemical_component_dictionary: Mapping[str, ChemicalComponent] | None = None, name: str = "pred", ) -> gemmi.cif.Block: """ Convert a BZMol back to an mmCIF block. Args: bzmol: The BZMol to convert. chemical_component_dictionary: Dictionary mapping component IDs to chemical components. name: The name to give the mmCIF structure. Returns: An mmCIF block representing the structure in the BZMol. """ structure = structure_from_bzmol(bzmol, chemical_component_dictionary=chemical_component_dictionary) mmcif = mmcif_from_structure(structure, name=name) num_resolved = bzmol.atom_resolved.sum() if bzmol.atom_resolved is not None else bzmol.num_atoms atom_resolved = bzmol.atom_resolved if bzmol.atom_resolved is not None else np.ones(bzmol.num_atoms, dtype=bool) atom_serial = np.arange(1, num_resolved + 1, dtype=int) atom_symbol = [PERIODIC_TABLE.GetElementSymbol(int(el)).upper() for el in bzmol.atom_element[atom_resolved]] atom_names = bzmol.atom_name[atom_resolved] atom_alt_locs = np.array([None] * num_resolved) atom_residue_names = bzmol.residue_name[bzmol.atom_residue[atom_resolved]] atom_chain_id = ( bzmol.chain_id[bzmol.residue_chain[bzmol.atom_residue[atom_resolved]]] if bzmol.chain_id is not None else np.array([None] * num_resolved) ) atom_entity_ids = np.array([structure.chains[chain_id].entity_idx + 1 for chain_id in atom_chain_id]) atom_seq_ids = (bzmol.residue_ordinal + 1)[bzmol.atom_residue][atom_resolved] atom_ins_codes = np.array([None] * num_resolved) atom_charges = bzmol.atom_charge[atom_resolved] chain_is_protein = np.array( [structure.entities[chain.entity_idx].type == "protein" for chain in structure.chains.values()], dtype=bool, ) chain_is_dna = np.array( [structure.entities[chain.entity_idx].type == "dna" for chain in structure.chains.values()], dtype=bool, ) chain_is_rna = np.array( [structure.entities[chain.entity_idx].type == "rna" for chain in structure.chains.values()], dtype=bool, ) residue_is_protein = chain_is_protein[bzmol.residue_chain] residue_is_dna = chain_is_dna[bzmol.residue_chain] residue_is_rna = chain_is_rna[bzmol.residue_chain] residue_is_standard = np.zeros(bzmol.num_residues, dtype=bool) residue_is_standard[residue_is_protein] = np.isin( bzmol.residue_name[residue_is_protein], STANDARD_PROTEIN_RESIDUE_NAMES ) residue_is_standard[residue_is_dna] = np.isin(bzmol.residue_name[residue_is_dna], STANDARD_DNA_RESIDUE_NAMES) residue_is_standard[residue_is_rna] = np.isin(bzmol.residue_name[residue_is_rna], STANDARD_RNA_RESIDUE_NAMES) atom_is_standard = residue_is_standard[bzmol.atom_residue] mmcif.set_mmcif_category( "_atom_site", { "group_PDB": ["ATOM" if atom_is_standard[i] else "HETATM" for i in range(num_resolved)], "id": atom_serial.tolist(), "type_symbol": atom_symbol, "label_atom_id": atom_names.tolist(), "label_alt_id": atom_alt_locs.tolist(), "label_comp_id": atom_residue_names.tolist(), "label_asym_id": atom_chain_id.tolist(), "label_entity_id": atom_entity_ids.tolist(), "label_seq_id": atom_seq_ids.tolist(), "pdbx_PDB_ins_code": atom_ins_codes.tolist(), "Cartn_x": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 0]] if bzmol.atom_coordinates is not None else [None] * num_resolved, "Cartn_y": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 1]] if bzmol.atom_coordinates is not None else [None] * num_resolved, "Cartn_z": [f"{coord:.4f}" for coord in bzmol.atom_coordinates[atom_resolved, 2]] if bzmol.atom_coordinates is not None else [None] * num_resolved, "occupancy": np.ones(num_resolved).tolist(), "B_iso_or_equiv": np.zeros(num_resolved).tolist(), "pdbx_formal_charge": atom_charges.tolist(), }, ) return mmcif