Source code for qp.cluster.spheres_bad

"""Extract active site coordination sphere clusters

**Usage**::

    >>> from qp.cluster import spheres

    >>> spheres.extract_clusters(
    ...     "path/to/PDB.pdb", 
    ...     "path/to/out/dir/", 
    ...     center_residues=["FE", "FE2"], # List of resnames of the residues to use as the cluster center
    ...     sphere_count=2,              # Number of spheres to extract
    ...     ligands=["AKG"]       # PDB IDs of additional ligands
    ... )

Extracting clusters leaves open valences in the outermost sphere. Capping may be
performed by specifying ``capping`` in ``spheres.extract_clusters``:

* 0. No capping. (Default)
* 1. Cap with hydrogens.
* 2. Cap with ACE/NME groups. 
"""

import os
from typing import Set, Literal, Optional
import numpy as np
from Bio.PDB import PDBParser, Polypeptide, PDBIO, Select
from Bio.PDB.Atom import Atom
from Bio.PDB.Residue import Residue
from Bio.PDB.Chain import Chain
from Bio.PDB.Model import Model
from Bio.PDB.NeighborSearch import NeighborSearch
from scipy.spatial import Voronoi
from qp.cluster import struct_to_file
from sklearn.cluster import DBSCAN


RANDOM_SEED = 66265


[docs]def get_grid_coord_idx(coord, coord_min, mean_distance): """ Compute a point's position in a 1D grid Parameters ---------- coord: float One coordinate of a point coord_min: float The minimum coordinate of the grid mean_distance: float The distance between neighbors in the grid Returns ------- idx: int The idx of the point in the grid """ return int((coord - coord_min + mean_distance * 0.5) // mean_distance)
[docs]def get_grid(coords, mean_distance): """ Compute the grid's parameter for a given 1D point list Parameters ---------- coords: numpy.array The coordinates of the 1D point list mean_distance: float The distance between neighbors in the grid Returns ------- coord_min: float The minimum coordinate of the grid coord_max: float The maximum coordinate of the grid grid: numpy.array The 1D grid """ coord_min, coord_max = coords.min() - mean_distance, coords.max() + mean_distance npoints = get_grid_coord_idx(coord_max, coord_min, mean_distance) return coord_min, coord_max, coord_min + np.linspace(0, npoints - 1, npoints) * mean_distance
def _visualize_dummy(dummy, path): # debug function with open(path + "/dummy.xyz", "w") as f: f.write(f"{len(dummy)}\n\n") for coord in dummy: f.write(f"He {coord[0]} {coord[1]} {coord[2]}\n")
[docs]def fill_dummy(points, mean_distance=3, noise_amp=0.2): """ Fill dummy atoms in a point cloud Parameters ---------- points: numpy.array The 3D coordinates of the point cloud mean_distance: float The distance between neighbors in the grid noise_amp: float The amplitude of the noise of dummy atoms' position Returns ------- points: The 3D coordinates of the point cloud filled with dummy atoms """ conf = np.stack(points, axis=0) x_min, _, x_grids = get_grid(conf[:,0], mean_distance) y_min, _, y_grids = get_grid(conf[:,1], mean_distance) z_min, _, z_grids = get_grid(conf[:,2], mean_distance) flags = np.ones((len(x_grids), len(y_grids), len(z_grids)), dtype=bool) for point in points: x, y, z = point flags[ get_grid_coord_idx(x, x_min, mean_distance), get_grid_coord_idx(y, y_min, mean_distance), get_grid_coord_idx(z, z_min, mean_distance) ] = False flags = flags.flatten() np.random.seed(RANDOM_SEED) noise = mean_distance * noise_amp * (np.random.rand(len(x_grids), len(y_grids), len(z_grids), 3) - 0.5) dummy = np.stack(np.meshgrid(x_grids, y_grids, z_grids, indexing="ij"), axis=-1) dummy = (dummy + noise).reshape(-1, 3) dummy = dummy[flags, :] return np.concatenate([points, dummy], axis=0)
[docs]def calc_dist(point_a, point_b): """ Calculate the Euclidean distance between two points Parameters ---------- point_a: numpy.array Point A point_b: numpy.array Point B Returns ------- dist: float The Euclidean distance between Point A and Point B """ return np.linalg.norm(point_a - point_b)
[docs]def voronoi(model, center_residues, ligands, smooth_method, **smooth_params): """ Computes the Voronoi tessellation of a protein structure. Parameters ---------- model: Bio.PDB.Model Protein structure model Returns ------- neighbors: dict Adjacency list corresponding to neighboring Voronoi cells """ atoms = [] points = [] for res in model.get_residues(): for atom in res.get_unpacked_list(): # includes atoms from multiple conformations atoms.append(atom) points.append(atom.get_coord()) # In your voronoi function points = np.array(points) # Convert points to a NumPy array for plotting points_count = len(points) # Store the number of real atoms before adding dummies if smooth_method == "dummy_atom": new_points = fill_dummy(points, **smooth_params) vor = Voronoi(new_points) else: vor = Voronoi(points) # Plot without dummy atoms (using only real atoms) plot_voronoi_2d(points, points_count) # 2D visualization (real atoms only) plot_voronoi_3d(points, points_count) # 3D visualization (real atoms only) neighbors = {} for a, b in vor.ridge_points: if a < points_count and b < points_count: dist = calc_dist(points[a], points[b]) neighbors.setdefault(atoms[a], []).append((atoms[b], dist)) neighbors.setdefault(atoms[b], []).append((atoms[a], dist)) return neighbors
[docs]def merge_centers(cur, search, seen, radius=0.0): if radius == 0.0: return {cur} center = {cur} seen.add(cur) nxt = set() for atom in cur.get_unpacked_list(): if atom.element == "H": continue for res in search.search(atom.get_coord(), radius, "R"): nxt.add(res) for res in nxt: if res not in seen: center |= merge_centers(res, search, seen, radius) return center
[docs]def get_center_residues(model, center_residues, merge_cutoff=0.0): found = set() center_atoms = [] for res in model.get_residues(): # We will assume the ligand is labeled as a heteroatom with res.id[0] != ' ' if res.get_resname() in center_residues and res.id[0] != ' ': found.add(res) center_atoms.extend([atom for atom in res.get_unpacked_list() if atom.element != "H"]) if not len(center_atoms): print("> WARNING: No matching cluster center found. Skipping cluster.") return [] search = NeighborSearch(center_atoms) seen = set() centers = [] for res in found: if res not in seen: centers.append(merge_centers(res, search, seen, merge_cutoff)) return centers
[docs]def box_outlier_thres(data, coeff=1.5): """ Compute the threshold for the boxplot outlier detection method Parameters ---------- data: list The data for the boxplot statistics coeff: float The coefficient for the outlier criterion from quartiles of the data Returns ------- lb: float the lower bound of non-outliers ub: float the upper bound of non-outliers """ Q3 = np.quantile(data, 0.75) Q1 = np.quantile(data, 0.25) IQR = Q3 - Q1 return Q1 - coeff * IQR, Q3 + coeff * IQR
[docs]def check_NC(atom, metal): """ Check if a nitrogen / carbon atom in the first sphere is coordinated. If the nitrogen / carbon atom is in the backbone, or it's the nearest atom to the metal among all atoms in its residue, it's considered coordinated. Parameters ---------- atom: Bio.PDB.Atom The nitrogen / carbon atom to be checked metal: The metal atom (coordination center) Returns ------- flag: bool Whether the atom is coordinated or not """ if atom.get_name() == "N": # TODO: check special coordinated nitrogens in the backbone return True else: ref_dist = calc_dist(atom.get_coord(), metal.get_coord()) res = atom.get_parent() for atom in res.get_unpacked_list(): if calc_dist(atom.get_coord(), metal.get_coord()) < ref_dist: return False return True
[docs]def get_next_neighbors( start, neighbors, sphere_count, ligands, first_sphere_radius=3, smooth_method="boxplot", include_ligands=2, **smooth_params): """ Iteratively determines spheres around a given starting atom Parameters ---------- start: Bio.PDB.Residue Starting metal atom neighbors: dict Adjacency list of neighboring atoms sphere_count: int Number of spheres to extract ligands: list A list of ligands to include smooth_method: ("boxplot" | "dbscan" | "dummy_atom") The method used to smoothen the spheres include_ligands: int the mode of including ligands in the sphere smooth_params: params of the specific smooth method Returns ------- metal_id: str Active site identifier seen: set Set of residues from all spheres spheres: list of sets Sets of residues separated by spheres """ seen = start.copy() spheres = [start] lig_adds = [set()] for i in range(0, sphere_count): # get candidate atoms in the new sphere nxt = set() lig_add = set() if i == 0 and first_sphere_radius > 0: start_atoms = [] for res in start: start_atoms.extend(res.get_unpacked_list()) is_metal_like = (len(start_atoms) == len(start)) search = NeighborSearch([atom for atom in start_atoms[0].get_parent().get_parent().get_parent().get_atoms() if atom.element != "H" and atom not in start_atoms]) first_sphere = [] for center in start_atoms: first_sphere += search.search(center=center.get_coord(), radius=first_sphere_radius, level="A") for atom in first_sphere: if atom.get_parent() not in seen: element = atom.element if ( not is_metal_like or element in "OS" or (element in "NC" and any(check_NC(atom, center) for center in start_atoms)) ): # only consider coordinated atoms res = atom.get_parent() seen.add(res) if Polypeptide.is_aa(res): nxt.add(res) else: if ( include_ligands != 1 or res.get_resname() != "HOH" # mode 1: exclude all waters ): lig_add.add(res) else: candidates = [] frontiers = spheres[-1] if spheres[-1] else spheres[0] for res in frontiers: for atom in res.get_unpacked_list(): for n, dist in neighbors[atom]: par = n.get_parent() candidates.append((n, dist, par)) # screen candidates if smooth_method == "box_plot": dist_data = [dist for n, dist, par in candidates] lb, ub = box_outlier_thres(dist_data, **smooth_params) screened_candidates = [(dist, par) for n, dist, par in candidates if dist < ub] elif smooth_method == "dbscan": optics = DBSCAN(**smooth_params) X = [n.get_coord() for n, dist, par in candidates] for res in spheres[-1]: for atom in res.get_unpacked_list(): X.append(atom.get_coord()) cluster_idx = optics.fit_predict(X) + 1 largest_idx = np.bincount(cluster_idx).argmax() screened_candidates = [(dist, par) for i, (n, dist, par) in enumerate(candidates) if cluster_idx[i] == largest_idx] else: screened_candidates = [(dist, par) for n, dist, par in candidates] screened_candidates = [par for dist, par in screened_candidates if i != 0 or dist < 2.7] # build the new sphere for par in screened_candidates: if par not in seen: seen.add(par) ## Added to include ligands in the first coordination sphere ## Produced unweildy clusters as some ligands are large ## Potentially useful in the future if Polypeptide.is_aa(par): nxt.add(par) else: if ( (include_ligands == 0 and par.get_resname() in ligands) or # mode 0: only include ligands in the first sphere unless specified (include_ligands == 1 and par.get_resname() != "HOH") or # mode 1: exclude all waters include_ligands == 2 # mode 2: include everything ): lig_add.add(par) spheres.append(nxt) lig_adds.append(lig_add) metal_id = [] for res in start: res_id = res.get_full_id() chain_name = res_id[2] metal_index = str(res_id[3][1]) metal_id.append(chain_name + metal_index) for i in range(len(spheres)): spheres[i] = spheres[i] | lig_adds[i] return "_".join(sorted(metal_id)), seen, spheres
[docs]def prune_atoms(center, residues, spheres, max_atom_count, ligands): """ Prune residues from the cluster model to meet the max atom count constraint, while keeping specified ligands and co-factors. Parameters ---------- center: set Set of central residues residues: set Set of residues in the cluster spheres: list of sets List of residue sets, each corresponding to a coordination sphere max_atom_count: int Maximum allowed atom count in the cluster ligands_to_keep: list List of ligand names to keep in the cluster """ atom_cnt = 0 for res in residues: atom_cnt += len(res) if atom_cnt <= max_atom_count: return center_atoms = [] for c in center: center_atoms.extend(c.get_unpacked_list()) def dist(res): return min(atom - x for x in center_atoms for atom in res.get_unpacked_list()) prune = set() for res in sorted(residues, key=dist, reverse=True): # Check if the residue is in the ligands_to_keep list if res.get_resname() not in ligands: prune.add(res) atom_cnt -= len(res) if atom_cnt <= max_atom_count: break residues -= prune for s in spheres: s -= prune while not spheres[-1]: spheres.pop()
[docs]def scale_hydrogen(a, b, scale): """ Replaces an atom with hydrogen, rescaling the original bond length Parameters ---------- a: Bio.PDB.Atom Bonded atom to keep b: Bio.PDB.Atom Bonded atom to replace scale: float Bond length scale Returns ------- pos: array of float Coordinates of new hydrogen atom """ p = a.get_coord() q = b.get_coord() return scale * (q - p) + p
[docs]def get_normalized_vector(atom1: Atom, atom2: Atom) -> np.array: v = atom2.get_coord() - atom1.get_coord() return v / np.linalg.norm(v)
[docs]def build_hydrogen(parent: Residue, template: Optional[Residue], atom: Literal["N", "C"]): """ Cap with hydrogen, building based on the upstream or downstream residue Parameters ---------- chain: Bio.PDB.Chain Chain with desired residue parent: Bio.PDB.Residue Residue to cap template: Bio.PDB.Residue Upstream or downstream residue atom: str Flag for adding to the 'N' or 'C' side of the residue Returns ------- res: Bio.PDB.Residue Residue containing added hydrogen """ if template is not None: if atom == "N": pos = scale_hydrogen(parent["N"], template["C"], 1 / 1.32) else: pos = scale_hydrogen(parent["C"], template["N"], 1.09 / 1.32) else: CA = parent["CA"] if atom == "N": N = parent["N"] H = parent["H"] bis = get_normalized_vector(N, CA) + get_normalized_vector(N, H) bis /= np.linalg.norm(bis) pos = N.get_coord() - bis else: C = parent["C"] O = parent["O"] bis = get_normalized_vector(C, CA) + get_normalized_vector(C, O) bis /= np.linalg.norm(bis) pos = C.get_coord() - bis * 1.09 for name in ["H1", "H2", "H3"]: if name not in parent: break atom = Atom(name, pos, 0, 1, " ", name, None, "H") parent.add(atom) return atom
[docs]def build_heavy(chain, parent, template, atom): """ Cap with ACE/NME, building based on the upstream or downstream residue Parameters ---------- chain: Bio.PDB.Chain Chain with desired residue parent: Bio.PDB.Residue Residue to cap template: Bio.PDB.Residue Upstream or downstream residue atom: str Flag for adding to the "N" (ACE) or "C" (NME) side of the residue Returns ------- res: Bio.PDB.Residue Residue containing added group """ pos = {"CH3": template["CA"].get_coord()} if template.get_resname() == "GLY": pos["HH31"] = template["HA2"].get_coord() pos["HH32"] = template["HA3"].get_coord() else: pos["HH31"] = template["HA"].get_coord() pos["HH32"] = scale_hydrogen(template["CA"], template["CB"], 1.09 / 1.54) if atom == "N": pos["C"] = template["C"].get_coord() pos["O"] = template["O"].get_coord() pos["HH33"] = scale_hydrogen(template["CA"], template["N"], 1.09 / 1.46) else: pos["N"] = template["N"].get_coord() if template.get_resname() == "PRO": pos["H"] = scale_hydrogen(template["N"], template["CD"], 1 / 1.46) else: pos["H"] = template["H"].get_coord() pos["HH33"] = scale_hydrogen(template["CA"], template["C"], 1.09 / 1.51) adj_id = ("H_" + ("NME" if atom == "N" else "ACE"), template.get_id()[1], " ") skip = chain.has_id(adj_id) # skip building methyl if already present in adjacent cap if skip: chain[adj_id].detach_child("HH33") name = "ACE" if atom == "N" else "NME" res_id = ("H_" + name, template.get_id()[1], " ") res = Residue(res_id, name, " ") for k, v in pos.items(): if skip and k in ["CH3", "HH31", "HH32", "HH33"]: continue res.add(Atom(k, v, 0, 1, " ", k, None, k[0])) chain.add(res) return res
[docs]def check_atom_valence(res: Residue, tree: NeighborSearch, atom: Literal["N", "C"], cn: int) -> bool: neighbors = tree.search(res[atom].get_coord(), radius=1.8) if len(neighbors) > cn: return True else: for neighbor in neighbors: if neighbor.get_name() == "C" and atom == "N": return True elif neighbor.get_name() == "N" and atom == "C": return True return False
[docs]def cap_chains(model: Model, residues: Set[Residue], capping: int) -> Set[Residue]: """ Cap chain breaks for a set of extracted residues Parameters ---------- model: Bio.PDB.Model Protein structure model residues: set Set of residues capping: int Flag for capping group, H (1) or ACE/NME (2) Returns ------- cap_residues: set Set of residues containing added groups """ orig_chains = {} for chain in model: orig_chains[chain.get_id()] = chain.get_unpacked_list() cap_residues = set() for res in list(sorted(residues)): if not Polypeptide.is_aa(res) or res.get_id()[0] != " ": continue res_id = res.get_full_id() chain = model[res_id[2]] chain_tree = NeighborSearch(list(chain.get_atoms())) chain_list = orig_chains[chain.get_id()] ind = chain_list.index(res) if ind > 0: pre = chain_list[ind - 1] if ( pre.get_id()[1] == res_id[3][1] - 1 and pre.get_id()[0] == " " and pre not in residues and Polypeptide.is_aa(pre) ): # ignores hetero residues if capping == 1: cap_residues.add(build_hydrogen(res, pre, "N")) else: cap_residues.add(build_heavy(res, pre, "N")) elif not check_atom_valence(res, chain_tree, "N", 3): cap_residues.add(build_hydrogen(res, None, "N")) if ind < len(chain_list) - 1: nxt = chain_list[ind + 1] if ( nxt.get_id()[1] == res_id[3][1] + 1 and nxt.get_id()[0] == " " and nxt not in residues and Polypeptide.is_aa(nxt) ): if capping == 1: cap_residues.add(build_hydrogen(res, nxt, "C")) else: cap_residues.add(build_heavy(res, nxt, "C")) elif not check_atom_valence(res, chain_tree, "C", 3): cap_residues.add(build_hydrogen(res, None, "C")) return cap_residues
[docs]def write_pdbs(io, sphere, out): """ Write coordination sphere to PDB file. Parameters ---------- io: Bio.PDB.PDBIO Bio.PDB writer sphere: list List of coordination sphere residues out: str Path to output PDB file """ class ResSelect(Select): def accept_residue(self, residue): return residue in sphere io.save(out, ResSelect())
[docs]def residue_in_ligands(resname, resid, res_is_aa, ligand_keys): res_key = f"{resname}_{resid[2]}{resid[3][1]}" if res_is_aa: return res_key in ligand_keys else: for ligand_key in ligand_keys: ligand_res_keys = ligand_key.split() if res_key in ligand_res_keys: return True return False
[docs]def compute_charge(spheres, structure, ligand_charge): """ Computes the total charge of coordinating AAs Parameters ---------- spheres: list of sets Sets of residues separated by spheres structure: Bio.PDB.Structure The protein structure ligand_charge: dict Key, value pairs of ligand names and charges Returns ------- charge: list Total charge of AAs in each sphere """ # Identifying N-terminal and C-terminal residues for each chain n_terminals = set() c_terminals = set() # Loop over the residues to get first and last as indices may be different for chain in structure.get_chains(): chain_residues = list(chain.get_residues()) if chain_residues: n_terminals.add(chain_residues[0].get_full_id()) c_terminals.add(chain_residues[-1].get_full_id()) pos = { "ARG": ["HE", "HH11", "HH12", "HH21", "HH22"], "LYS": ["HZ1", "HZ2", "HZ3"], "HIS": ["HD1", "HD2", "HE1", "HE2"], "MLZ": [], "M3L": [] } neg = { "ASP": ["HD2", "HOD1", "HOD2"], "GLU": ["HE2", "HOE1", "HOE2"], "CYS": ["HG"], "TYR": ["HH"], "OCS": [], "CSD": ["HD1", "HD2"], "KCX": ["HQ1", "HQ2", "HOQ1", "HOQ2"], "HIS": ["HD1", "HE2"] } charge = [] for s in spheres[1:]: c = 0 for res in s: res_id = res.get_full_id() resname = res.get_resname() res_is_aa = Polypeptide.is_aa(res) if not residue_in_ligands(resname, res_id, res_is_aa, ligand_charge.keys()): if resname in pos and all(res.has_id(h) for h in pos[resname]): c += 1 elif resname in neg and all(not res.has_id(h) for h in neg[resname]): c -= 1 if res_is_aa and resname != "PRO" and all(not res.has_id(h) for h in ["H", "H2"]): # TODO: termini c -= 1 # Check for charged N-terminus if res_id in n_terminals \ and res.has_id("N"): # exclude sugar chain terminus c += 1 # Check for charged C-terminus if res.has_id("OXT"): c -= 1 charge.append(c) return charge
[docs]def count_residues(spheres): """ Counts the frequency of coordinating residues Parameters ---------- spheres: list of sets Sets of residues separated by spheres Returns ------- count: list of dicts Frequency table by sphere """ count = [] for s in spheres[1:]: c = {} for res in s: c[res.get_resname()] = c.get(res.get_resname(), 0) + 1 count.append(c) return count
[docs]def make_res_key(res): resname = res.get_resname() resid = res.get_id()[1] chainid = res.get_parent().get_id() return f"{resname}_{chainid}{resid}"
[docs]def complete_oligomer(ligand_keys, model, residues, spheres, include_ligands): ligand_res_found = dict() oligomer_found = dict() for ligand_key in ligand_keys: ligand_res_keys = ligand_key.split() if len(ligand_res_keys) == 1: continue oligomer_found[ligand_key] = dict() for ligand_res_key in ligand_res_keys: ligand_res_found[ligand_res_key] = { "sphere": -1, "oligomer": ligand_key } oligomer_found[ligand_key][ligand_res_key] = False if not oligomer_found: return for i, sphere in enumerate(spheres): if include_ligands == 0 and i > 0: break for res in sphere: res_key = make_res_key(res) if res_key in ligand_res_found and not Polypeptide.is_aa(res): ligand_res_found[res_key]["sphere"] = i oligomer = ligand_res_found[res_key]["oligomer"] oligomer_found[oligomer][res_key] = True for chain in model: for res in chain.get_unpacked_list(): res_key = make_res_key(res) if ( res_key in ligand_res_found and not Polypeptide.is_aa(res) ): oligomer = ligand_res_found[res_key]["oligomer"] found_sphere = ligand_res_found[res_key]["sphere"] if found_sphere < 0 and any(oligomer_found[oligomer].values()): if include_ligands == 0: spheres[0].add(res) else: spheres[-1].add(res) residues.add(res) print(f"To avoid unpredictable charge error, {res_key} in {oligomer} is added to spheres")
[docs]def extract_clusters( path, out, center_residues, sphere_count=2, first_sphere_radius=4.0, max_atom_count=None, merge_cutoff=0.0, smooth_method="box_plot", ligands=[], capping=1, charge=True, ligand_charge=dict(), count=True, xyz=True, hetero_pdb=False, include_ligands=2, **smooth_params ): """ Extract active site coordination spheres. Neighboring residues determined by Voronoi tessellation. Parameters ---------- path: str Path to PDB file out: str Path to output directory center_residues: list List of resnames of the residues to use as the cluster center sphere_count: int Number of coordinations spheres to extract first_sphere_radius: float the radius cutoff of the first sphere max_atom_count: int the maximum number of atoms in the whole cluster merge_cutoff: int the distance cutoff when merging two centers of spheres smooth_method: ("boxplot" | "dbscan" | "dummy_atom") The method used to smoothen the spheres ligands: list Other ligand IDs to include, in addition to AAs and waters capping: int Whether to cap chains with nothing (0), H (1), or ACE/NME (2) charge: bool If true, total charge of coordinating AAs will be written to out/charge.csv ligand_charge: dict The dict containing each ligand's name and charge count: bool If true, residue counts will be written to out/count.csv xyz: bool If true, XYZ files will be written according to the output PDBs hetero_pdb: bool If true, keep all the heteroatoms in the cluster PDB output include_ligands: int the mode of including ligands in the sphere smooth_params: params of the specific smooth method """ parser = PDBParser(QUIET=True) structure = parser.get_structure("PDB", path) io = PDBIO() io.set_structure(structure) model = structure[0] neighbors = voronoi(model, center_residues, ligands, smooth_method, **smooth_params) centers = get_center_residues(model, center_residues, merge_cutoff) aa_charge = {} res_count = {} cluster_paths = [] for c in centers: metal_id, residues, spheres = get_next_neighbors( c, neighbors, sphere_count, ligands, first_sphere_radius, smooth_method, include_ligands, **smooth_params ) complete_oligomer(ligand_charge, model, residues, spheres, include_ligands) cluster_path = f"{out}/{metal_id}" cluster_paths.append(cluster_path) os.makedirs(cluster_path, exist_ok=True) if max_atom_count is not None: prune_atoms(c, residues, spheres, max_atom_count, ligands) if charge: aa_charge[metal_id] = compute_charge(spheres, structure, ligand_charge) if count: res_count[metal_id] = count_residues(spheres) if capping: cap_residues = cap_chains(model, residues, capping) if capping == 2: spheres[-1] |= cap_residues sphere_paths = [] for i, s in enumerate(spheres): sphere_path = f"{cluster_path}/{i}.pdb" sphere_paths.append(sphere_path) write_pdbs(io, s, sphere_path) if capping: for cap in cap_residues: cap.get_parent().detach_child(cap.get_id()) if xyz: struct_to_file.to_xyz(f"{cluster_path}/{metal_id}.xyz", *sphere_paths) struct_to_file.combine_pdbs(f"{cluster_path}/{metal_id}.pdb", center_residues, *sphere_paths, hetero_pdb=hetero_pdb) if charge: with open(f"{out}/charge.csv", "w") as f: f.write(f"Name,{','.join(str(i + 1) for i in range(sphere_count))}\n") for k, v in sorted(aa_charge.items()): f.write(k) for s in v: f.write(f",{s}") f.write(f"\n") if count: with open(f"{out}/count.csv", "w") as f: f.write(f"Name,{','.join(str(i + 1) for i in range(sphere_count))}\n") for k, v in sorted(res_count.items()): f.write(k) for sphere in v: s = ", ".join(f"{r} {c}" for r, c in sorted(sphere.items())) f.write(f',"{s}"') f.write("\n") return cluster_paths
import matplotlib.pyplot as plt from mpl_toolkits.mplot3d.art3d import Poly3DCollection from scipy.spatial import Voronoi
[docs]def plot_voronoi_2d(points, points_count): """ Plot a 2D Voronoi diagram from the atomic coordinates. Parameters ---------- points: numpy.array The 3D coordinates of the points (atoms) to be tessellated. Only the xy-plane projection will be plotted. points_count: int The number of real atoms (excluding dummy atoms). """ # Compute the Voronoi tessellation vor = Voronoi(points[:, :2]) # Using only xy coordinates for 2D plot # Plot the Voronoi diagram fig, ax = plt.subplots() # Plot Voronoi vertices ax.plot(vor.vertices[:, 0], vor.vertices[:, 1], 'o', markersize=2, color='red') # Plot ridges (edges) of the Voronoi cells, but only for real atoms for a, b in vor.ridge_points: if a < points_count and b < points_count: # Only plot ridges between real atoms ridge = np.asarray([a, b]) if np.all(ridge >= 0): # Valid ridge ax.plot(vor.vertices[ridge, 0], vor.vertices[ridge, 1], 'k-') # Plot the original points (real atoms) ax.plot(points[:points_count, 0], points[:points_count, 1], 'o', markersize=3, color='blue', label='Atoms') ax.set_xlabel('X Coordinate') ax.set_ylabel('Y Coordinate') ax.set_title('2D Voronoi Diagram (XY projection, real atoms only)') plt.legend() plt.savefig('2D_no_dummies.png', bbox_inches="tight", dpi=600) plt.show()
from mpl_toolkits.mplot3d import Axes3D
[docs]def plot_voronoi_3d(points, points_count): """ Plot a 3D Voronoi diagram from the atomic coordinates, excluding dummy atoms. Parameters ---------- points: numpy.array The 3D coordinates of the points (atoms) to be tessellated. points_count: int The number of real atoms (excluding dummy atoms). """ # Compute the Voronoi tessellation vor = Voronoi(points) # Plot the Voronoi diagram in 3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') # Plot Voronoi vertices ax.scatter(vor.vertices[:, 0], vor.vertices[:, 1], vor.vertices[:, 2], color='red', s=5, label='Voronoi Vertices') # Plot the original points (real atoms) ax.scatter(points[:points_count, 0], points[:points_count, 1], points[:points_count, 2], color='blue', s=10, label='Atoms') # Plot the Voronoi ridges (edges) between neighboring vertices for real atoms for a, b in vor.ridge_points: if a < points_count and b < points_count: # Only plot ridges between real atoms ridge = np.asarray([a, b]) if np.all(ridge >= 0): # Valid ridge ax.plot(vor.vertices[ridge, 0], vor.vertices[ridge, 1], vor.vertices[ridge, 2], 'k-') ax.set_xlabel('X Coordinate') ax.set_ylabel('Y Coordinate') ax.set_zlabel('Z Coordinate') ax.set_title('3D Voronoi Diagram (real atoms only)') plt.legend() plt.savefig('3D_no_dummies.png', bbox_inches="tight", dpi=600) plt.show()