Chemical Component Dictionary (CCD)#
The Chemical Component Dictionary (CCD) is a repository of small molecule definitions used in the Protein Data Bank (PDB). The boltz_data.ccd module provides tools for reading, writing, and working with CCD data.
Examples of entries in the CCD - Arginine (ARG) and Deoxycytidine monophosphate (DC)#
What is the CCD?#
Each entry is a chemical component (ligand or monomer) with a unique comp_id (1-3 characters). Includes atoms, bonds, and metadata.
Chemical components are represented by Pydantic models:
from boltz_data.ccd import (
ChemicalComponent,
ChemicalComponentAtom,
ChemicalComponentBond,
)
# ChemicalComponentAtom represents an atom
atom = ChemicalComponentAtom(
atom_id="C1",
element="C",
charge=0,
)
# ChemicalComponentBond represents a bond between atoms
bond = ChemicalComponentBond(
atom_id_1="C1",
atom_id_2="C2",
order=1, # Single bond
)
# ChemicalComponent represents a complete molecule
component = ChemicalComponent(
comp_id="ATP",
type="NON-POLYMER",
name="ADENOSINE-5'-TRIPHOSPHATE",
atoms={"C1": atom, ...},
bonds={("C1", "C2"): bond, ...},
)
Reading CCD Data#
From RCSB (Remote)#
from boltz_data.ccd import chemical_component_from_rcsb
# Fetch ATP from RCSB
chemical_component_from_rcsb(comp_id="ATP")
ChemicalComponent(comp_id='ATP', type='NON-POLYMER', name="ADENOSINE-5'-TRIPHOSPHATE", atoms={'PG': ChemicalComponentAtom(atom_id='PG', element='P', charge=0), 'O1G': ChemicalComponentAtom(atom_id='O1G', element='O', charge=0), 'O2G': ChemicalComponentAtom(atom_id='O2G', element='O', charge=0), 'O3G': ChemicalComponentAtom(atom_id='O3G', element='O', charge=0), 'PB': ChemicalComponentAtom(atom_id='PB', element='P', charge=0), 'O1B': ChemicalComponentAtom(atom_id='O1B', element='O', charge=0), 'O2B': ChemicalComponentAtom(atom_id='O2B', element='O', charge=0), 'O3B': ChemicalComponentAtom(atom_id='O3B', element='O', charge=0), 'PA': ChemicalComponentAtom(atom_id='PA', element='P', charge=0), 'O1A': ChemicalComponentAtom(atom_id='O1A', element='O', charge=0), 'O2A': ChemicalComponentAtom(atom_id='O2A', element='O', charge=0), 'O3A': ChemicalComponentAtom(atom_id='O3A', element='O', charge=0), "O5'": ChemicalComponentAtom(atom_id="O5'", element='O', charge=0), "C5'": ChemicalComponentAtom(atom_id="C5'", element='C', charge=0), "C4'": ChemicalComponentAtom(atom_id="C4'", element='C', charge=0), "O4'": ChemicalComponentAtom(atom_id="O4'", element='O', charge=0), "C3'": ChemicalComponentAtom(atom_id="C3'", element='C', charge=0), "O3'": ChemicalComponentAtom(atom_id="O3'", element='O', charge=0), "C2'": ChemicalComponentAtom(atom_id="C2'", element='C', charge=0), "O2'": ChemicalComponentAtom(atom_id="O2'", element='O', charge=0), "C1'": ChemicalComponentAtom(atom_id="C1'", element='C', charge=0), 'N9': ChemicalComponentAtom(atom_id='N9', element='N', charge=0), 'C8': ChemicalComponentAtom(atom_id='C8', element='C', charge=0), 'N7': ChemicalComponentAtom(atom_id='N7', element='N', charge=0), 'C5': ChemicalComponentAtom(atom_id='C5', element='C', charge=0), 'C6': ChemicalComponentAtom(atom_id='C6', element='C', charge=0), 'N6': ChemicalComponentAtom(atom_id='N6', element='N', charge=0), 'N1': ChemicalComponentAtom(atom_id='N1', element='N', charge=0), 'C2': ChemicalComponentAtom(atom_id='C2', element='C', charge=0), 'N3': ChemicalComponentAtom(atom_id='N3', element='N', charge=0), 'C4': ChemicalComponentAtom(atom_id='C4', element='C', charge=0)}, bonds={('PG', 'O1G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O1G', order=2), ('PG', 'O2G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O2G', order=1), ('PG', 'O3G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O3G', order=1), ('PG', 'O3B'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O3B', order=1), ('PB', 'O1B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O1B', order=2), ('PB', 'O2B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O2B', order=1), ('PB', 'O3B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O3B', order=1), ('PB', 'O3A'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O3A', order=1), ('PA', 'O1A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O1A', order=2), ('PA', 'O2A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O2A', order=1), ('PA', 'O3A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O3A', order=1), ('PA', "O5'"): ChemicalComponentBond(atom_id_1='PA', atom_id_2="O5'", order=1), ("O5'", "C5'"): ChemicalComponentBond(atom_id_1="O5'", atom_id_2="C5'", order=1), ("C5'", "C4'"): ChemicalComponentBond(atom_id_1="C5'", atom_id_2="C4'", order=1), ("C4'", "O4'"): ChemicalComponentBond(atom_id_1="C4'", atom_id_2="O4'", order=1), ("C4'", "C3'"): ChemicalComponentBond(atom_id_1="C4'", atom_id_2="C3'", order=1), ("O4'", "C1'"): ChemicalComponentBond(atom_id_1="O4'", atom_id_2="C1'", order=1), ("C3'", "O3'"): ChemicalComponentBond(atom_id_1="C3'", atom_id_2="O3'", order=1), ("C3'", "C2'"): ChemicalComponentBond(atom_id_1="C3'", atom_id_2="C2'", order=1), ("C2'", "O2'"): ChemicalComponentBond(atom_id_1="C2'", atom_id_2="O2'", order=1), ("C2'", "C1'"): ChemicalComponentBond(atom_id_1="C2'", atom_id_2="C1'", order=1), ("C1'", 'N9'): ChemicalComponentBond(atom_id_1="C1'", atom_id_2='N9', order=1), ('N9', 'C8'): ChemicalComponentBond(atom_id_1='N9', atom_id_2='C8', order=1), ('N9', 'C4'): ChemicalComponentBond(atom_id_1='N9', atom_id_2='C4', order=1), ('C8', 'N7'): ChemicalComponentBond(atom_id_1='C8', atom_id_2='N7', order=2), ('N7', 'C5'): ChemicalComponentBond(atom_id_1='N7', atom_id_2='C5', order=1), ('C5', 'C6'): ChemicalComponentBond(atom_id_1='C5', atom_id_2='C6', order=1), ('C5', 'C4'): ChemicalComponentBond(atom_id_1='C5', atom_id_2='C4', order=2), ('C6', 'N6'): ChemicalComponentBond(atom_id_1='C6', atom_id_2='N6', order=1), ('C6', 'N1'): ChemicalComponentBond(atom_id_1='C6', atom_id_2='N1', order=2), ('N1', 'C2'): ChemicalComponentBond(atom_id_1='N1', atom_id_2='C2', order=1), ('C2', 'N3'): ChemicalComponentBond(atom_id_1='C2', atom_id_2='N3', order=2), ('N3', 'C4'): ChemicalComponentBond(atom_id_1='N3', atom_id_2='C4', order=1)})
For frequent access, use the cached singleton:
from boltz_data.ccd import get_remote_chemical_component_database
ccd = get_remote_chemical_component_database()
ccd["ATP"]
ChemicalComponent(comp_id='ATP', type='NON-POLYMER', name="ADENOSINE-5'-TRIPHOSPHATE", atoms={'PG': ChemicalComponentAtom(atom_id='PG', element='P', charge=0), 'O1G': ChemicalComponentAtom(atom_id='O1G', element='O', charge=0), 'O2G': ChemicalComponentAtom(atom_id='O2G', element='O', charge=0), 'O3G': ChemicalComponentAtom(atom_id='O3G', element='O', charge=0), 'PB': ChemicalComponentAtom(atom_id='PB', element='P', charge=0), 'O1B': ChemicalComponentAtom(atom_id='O1B', element='O', charge=0), 'O2B': ChemicalComponentAtom(atom_id='O2B', element='O', charge=0), 'O3B': ChemicalComponentAtom(atom_id='O3B', element='O', charge=0), 'PA': ChemicalComponentAtom(atom_id='PA', element='P', charge=0), 'O1A': ChemicalComponentAtom(atom_id='O1A', element='O', charge=0), 'O2A': ChemicalComponentAtom(atom_id='O2A', element='O', charge=0), 'O3A': ChemicalComponentAtom(atom_id='O3A', element='O', charge=0), "O5'": ChemicalComponentAtom(atom_id="O5'", element='O', charge=0), "C5'": ChemicalComponentAtom(atom_id="C5'", element='C', charge=0), "C4'": ChemicalComponentAtom(atom_id="C4'", element='C', charge=0), "O4'": ChemicalComponentAtom(atom_id="O4'", element='O', charge=0), "C3'": ChemicalComponentAtom(atom_id="C3'", element='C', charge=0), "O3'": ChemicalComponentAtom(atom_id="O3'", element='O', charge=0), "C2'": ChemicalComponentAtom(atom_id="C2'", element='C', charge=0), "O2'": ChemicalComponentAtom(atom_id="O2'", element='O', charge=0), "C1'": ChemicalComponentAtom(atom_id="C1'", element='C', charge=0), 'N9': ChemicalComponentAtom(atom_id='N9', element='N', charge=0), 'C8': ChemicalComponentAtom(atom_id='C8', element='C', charge=0), 'N7': ChemicalComponentAtom(atom_id='N7', element='N', charge=0), 'C5': ChemicalComponentAtom(atom_id='C5', element='C', charge=0), 'C6': ChemicalComponentAtom(atom_id='C6', element='C', charge=0), 'N6': ChemicalComponentAtom(atom_id='N6', element='N', charge=0), 'N1': ChemicalComponentAtom(atom_id='N1', element='N', charge=0), 'C2': ChemicalComponentAtom(atom_id='C2', element='C', charge=0), 'N3': ChemicalComponentAtom(atom_id='N3', element='N', charge=0), 'C4': ChemicalComponentAtom(atom_id='C4', element='C', charge=0)}, bonds={('PG', 'O1G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O1G', order=2), ('PG', 'O2G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O2G', order=1), ('PG', 'O3G'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O3G', order=1), ('PG', 'O3B'): ChemicalComponentBond(atom_id_1='PG', atom_id_2='O3B', order=1), ('PB', 'O1B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O1B', order=2), ('PB', 'O2B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O2B', order=1), ('PB', 'O3B'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O3B', order=1), ('PB', 'O3A'): ChemicalComponentBond(atom_id_1='PB', atom_id_2='O3A', order=1), ('PA', 'O1A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O1A', order=2), ('PA', 'O2A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O2A', order=1), ('PA', 'O3A'): ChemicalComponentBond(atom_id_1='PA', atom_id_2='O3A', order=1), ('PA', "O5'"): ChemicalComponentBond(atom_id_1='PA', atom_id_2="O5'", order=1), ("O5'", "C5'"): ChemicalComponentBond(atom_id_1="O5'", atom_id_2="C5'", order=1), ("C5'", "C4'"): ChemicalComponentBond(atom_id_1="C5'", atom_id_2="C4'", order=1), ("C4'", "O4'"): ChemicalComponentBond(atom_id_1="C4'", atom_id_2="O4'", order=1), ("C4'", "C3'"): ChemicalComponentBond(atom_id_1="C4'", atom_id_2="C3'", order=1), ("O4'", "C1'"): ChemicalComponentBond(atom_id_1="O4'", atom_id_2="C1'", order=1), ("C3'", "O3'"): ChemicalComponentBond(atom_id_1="C3'", atom_id_2="O3'", order=1), ("C3'", "C2'"): ChemicalComponentBond(atom_id_1="C3'", atom_id_2="C2'", order=1), ("C2'", "O2'"): ChemicalComponentBond(atom_id_1="C2'", atom_id_2="O2'", order=1), ("C2'", "C1'"): ChemicalComponentBond(atom_id_1="C2'", atom_id_2="C1'", order=1), ("C1'", 'N9'): ChemicalComponentBond(atom_id_1="C1'", atom_id_2='N9', order=1), ('N9', 'C8'): ChemicalComponentBond(atom_id_1='N9', atom_id_2='C8', order=1), ('N9', 'C4'): ChemicalComponentBond(atom_id_1='N9', atom_id_2='C4', order=1), ('C8', 'N7'): ChemicalComponentBond(atom_id_1='C8', atom_id_2='N7', order=2), ('N7', 'C5'): ChemicalComponentBond(atom_id_1='N7', atom_id_2='C5', order=1), ('C5', 'C6'): ChemicalComponentBond(atom_id_1='C5', atom_id_2='C6', order=1), ('C5', 'C4'): ChemicalComponentBond(atom_id_1='C5', atom_id_2='C4', order=2), ('C6', 'N6'): ChemicalComponentBond(atom_id_1='C6', atom_id_2='N6', order=1), ('C6', 'N1'): ChemicalComponentBond(atom_id_1='C6', atom_id_2='N1', order=2), ('N1', 'C2'): ChemicalComponentBond(atom_id_1='N1', atom_id_2='C2', order=1), ('C2', 'N3'): ChemicalComponentBond(atom_id_1='C2', atom_id_2='N3', order=2), ('N3', 'C4'): ChemicalComponentBond(atom_id_1='N3', atom_id_2='C4', order=1)})
From CIF Files#
from boltz_data.ccd import chemical_component_from_path
chemical_component_from_path("ALA.cif")
Compression#
Compressing#
from boltz_data.ccd import (
chemical_component_from_path,
compress_chemical_component,
)
# Read a component
component = chemical_component_from_path("ATP.cif")
compressed_bytes = compress_chemical_component(component)
Decompressing#
from boltz_data.ccd import decompress_chemical_component
component = decompress_chemical_component(compressed_bytes)
Compressed Dictionary#
The full CCD (~500MB) can be compressed to ~8MB using CompressedChemicalComponentDictionary:
from boltz_data.ccd import CompressedChemicalComponentDictionary
ccd = CompressedChemicalComponentDictionary.from_file("ccd.pkl.gz")
atp = ccd["ATP"] # Decompressed on access
ccd.to_file("output.pkl.gz")
Working with CCD Data#
Accessing Atoms and Bonds#
component = chemical_component_from_rcsb("ATP")
for atom_id, atom in component.atoms.items():
print(f"{atom_id}: {atom.element}")
for (atom1, atom2), bond in component.bonds.items():
print(f"{atom1}-{atom2}: order {bond.order}")
Converting to BZMol#
from boltz_data.mol import bzmol_from_chemical_component
bzmol = bzmol_from_chemical_component(chemical_component_from_rcsb("ATP"))
API Reference#
For detailed API documentation, see the boltz_data.ccd API reference.