Source code for qp.structure.convert_nhie_oxo

"""Convert NHIE active sites to reactive OXO/succinate state.

This module provides specialized functions for converting non-heme iron
enzyme (NHIE) active sites from the resting state (with alpha-ketoglutarate
and water/NO ligands) to the reactive ferryl-oxo state (with succinate
and OXO ligands). This is useful for modeling the reactive intermediate
in NHIE catalysis.

Note: This is a specialized module for NHIE enzyme studies and is not
part of the standard QuantumPDB workflow.
"""

import numpy as np
import shutil
from Bio.PDB import PDBParser, PDBIO


[docs]def read_pdb(pdb_path): """Parse a PDB file and extract all ATOM/HETATM records. Parameters ---------- pdb_path : str Path to the PDB file. Returns ------- list of dict Each dict contains PDB atom fields: record, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, charge. """ atoms = [] with open(pdb_path, 'r') as file: for line in file: if line.startswith("ATOM") or line.startswith("HETATM"): atom = { 'record': line[0:6].strip(), 'serial': int(line[6:11]), 'name': line[12:16].strip(), 'altLoc': line[16:17].strip(), 'resName': line[17:20].strip(), 'chainID': line[21:22].strip(), 'resSeq': int(line[22:26]), 'iCode': line[26:27].strip(), 'x': float(line[30:38]), 'y': float(line[38:46]), 'z': float(line[46:54]), 'occupancy': float(line[54:60]), 'tempFactor': float(line[60:66]), 'element': line[76:78].strip(), 'charge': line[78:80].strip() } atoms.append(atom) return atoms
[docs]def write_pdb(atoms, pdb_path): """Write a list of atom dictionaries to a PDB file. Atoms with ``record == 'DELETE'`` are skipped. Parameters ---------- atoms : list of dict Atom records as returned by :func:`read_pdb`. pdb_path : str Path to the output PDB file. """ with open(pdb_path, 'w') as file: for atom in atoms: if atom['record'] != 'DELETE': file.write(f"{atom['record']:<6}{atom['serial']:>5} {atom['name']:<4}{atom['altLoc']:<1}{atom['resName']:<3} {atom['chainID']:<1}{atom['resSeq']:>4}{atom['iCode']:<1} {atom['x']:>8.3f}{atom['y']:>8.3f}{atom['z']:>8.3f}{atom['occupancy']:>6.2f}{atom['tempFactor']:>6.2f} {atom['element']:>2}{atom['charge']:>2}\n")
[docs]def find_and_convert_ligands_to_oxo(atoms, iron_names, distance_cutoff=2.8): """Convert water or NO ligands near iron to OXO residues. Searches for water (HOH, WAT) or nitric oxide (HOA, NO) ligands within the distance cutoff of any iron atom and converts them to OXO (oxo) ligands with the Fe-O bond set to 1.65 A. Parameters ---------- atoms : list of dict Atom records from :func:`read_pdb`. iron_names : list of str Iron atom names to search for (e.g., ``['FE', 'FE2']``). distance_cutoff : float, optional Maximum Fe-ligand distance in angstroms (default 2.8). Returns ------- set Serial numbers of iron atoms that received an OXO ligand. """ ligands_to_convert = {'HOH': 'O', 'WAT': 'O', 'HOA': 'N', 'NO': 'N'} oxo_placed = set() for iron in atoms: if iron['name'] in iron_names: iron_coord = np.array([iron['x'], iron['y'], iron['z']]) for ligand in atoms: if ligand['resName'] in ligands_to_convert and ligand['name'] == ligands_to_convert[ligand['resName']]: ligand_coord = np.array([ligand['x'], ligand['y'], ligand['z']]) distance = np.linalg.norm(iron_coord - ligand_coord) if distance <= distance_cutoff: ligand['resName'] = 'OXO' ligand['name'] = 'O' ligand['element'] = 'O' ligand['x'], ligand['y'], ligand['z'] = iron_coord + 1.65 * (ligand_coord - iron_coord) / distance for other_atom in atoms: if other_atom['resSeq'] == ligand['resSeq'] and other_atom['chainID'] == ligand['chainID'] and other_atom['serial'] != ligand['serial']: other_atom['record'] = 'DELETE' oxo_placed.add(iron['serial']) print(f' > Converted {ligand["serial"]} {ligand["resName"]}/{ligand["resName"]} to OXO at distance {round(distance, 2)}') break return oxo_placed
[docs]def find_histidines(atoms, iron): """Find histidine NE2 atoms coordinating an iron center. Parameters ---------- atoms : list of dict Atom records from :func:`read_pdb`. iron : dict Iron atom record. Returns ------- list of dict Histidine NE2 atom records within 3.0 A of the iron. """ iron_coord = np.array([iron['x'], iron['y'], iron['z']]) histidines = [] for residue in atoms: if residue['resName'] == 'HIS' and residue['name'] == 'NE2': ne2_coord = np.array([residue['x'], residue['y'], residue['z']]) distance = np.linalg.norm(iron_coord - ne2_coord) coordination_cutoff_for_his = 3.0 if distance <= coordination_cutoff_for_his: histidines.append(residue) print(f" > Found {len(histidines)} His coordinating iron {iron['serial']}") return histidines
[docs]def calculate_dihedral(p1, p2, p3, p4): """Calculate the dihedral angle defined by four points. Parameters ---------- p1, p2, p3, p4 : numpy.ndarray 3D coordinates of the four points. Returns ------- float Dihedral angle in degrees. """ b0 = -1.0 * (p2 - p1) b1 = p3 - p2 b2 = p4 - p3 b1 /= np.linalg.norm(b1) v = b0 - np.dot(b0, b1) * b1 w = b2 - np.dot(b2, b1) * b1 x = np.dot(v, w) y = np.dot(np.cross(b1, v), w) return np.degrees(np.arctan2(y, x))
[docs]def place_oxo_manually(atoms, iron): """Place an OXO ligand opposite the coordinating histidines. For NHIE enzymes with two facial histidines, this function places the OXO ligand at the position opposite to one of the histidines, selecting the position with a dihedral angle closest to 90 degrees relative to the AKG C2-O5-Fe axis and avoiding steric clashes. Parameters ---------- atoms : list of dict Atom records from :func:`read_pdb` (modified in place). iron : dict Iron atom record. Returns ------- bool True if OXO was successfully placed, False otherwise. """ histidines = find_histidines(atoms, iron) if len(histidines) != 2: print(f"> Error: Expected two His coordinating iron {iron['serial']}") return False iron_coord = np.array([iron['x'], iron['y'], iron['z']]) potential_oxos = [] for ne2 in histidines: ne2_coord = np.array([ne2['x'], ne2['y'], ne2['z']]) oxo_coord = iron_coord + 1.65 * (iron_coord - ne2_coord) / np.linalg.norm(iron_coord - ne2_coord) print(f"> Checking potential oxo site opposite His {ne2['serial']}") clash_threshold = 1.2 clash = False for atom in atoms: if atom['serial'] != iron['serial'] and not (atom['resName'] == 'AKG' and atom['name'] in ['C1', 'O1', 'O2']): # Exclude specific AKG atoms from clash detection distance = np.linalg.norm(np.array([atom['x'], atom['y'], atom['z']]) - oxo_coord) if distance < clash_threshold: clash = True print(f"> Clash found with atom {atom['serial']} ({atom['name']} {atom['resName']}) at distance {round(distance, 2)}") break if not clash: potential_oxos.append((oxo_coord, ne2['serial'])) print(f"> Potential oxo site opposite His {ne2['serial']}.") # Find AKG coordinates for dihedral calculation try: akg_c2 = next(atom for atom in atoms if atom['resName'] == 'AKG' and atom['name'] == 'C2') akg_o5 = next(atom for atom in atoms if atom['resName'] == 'AKG' and atom['name'] == 'O5') except StopIteration: print("> Error: Could not find AKG atoms (C2 and O5) for dihedral calculation.") return False c2_coord = np.array([akg_c2['x'], akg_c2['y'], akg_c2['z']]) o5_coord = np.array([akg_o5['x'], akg_o5['y'], akg_o5['z']]) best_site = None best_dihedral = None # Calculate dihedral for all potential sites, including clashing ones for ne2 in histidines: ne2_coord = np.array([ne2['x'], ne2['y'], ne2['z']]) oxo_coord = iron_coord + 1.65 * (iron_coord - ne2_coord) / np.linalg.norm(iron_coord - ne2_coord) dihedral = abs(calculate_dihedral(c2_coord, o5_coord, iron_coord, oxo_coord)) print(f"> Dihedral angle for potential OXO site opposite His {ne2['serial']}: {round(dihedral, 2)}") # Select the best site among the non-clashing ones for oxo_coord, _ in potential_oxos: dihedral = abs(calculate_dihedral(c2_coord, o5_coord, iron_coord, oxo_coord)) print(f"> Dihedral angle for non-clashing site: {round(dihedral, 2)}") if best_dihedral is None or abs(dihedral - 90) < abs(best_dihedral - 90): best_site = oxo_coord best_dihedral = dihedral if best_site is None: print("> Error: Could not determine the best site for OXO") return False oxo = { 'record': 'HETATM', 'serial': max(atom['serial'] for atom in atoms) + 1, 'name': 'O', 'altLoc': '', 'resName': 'OXO', 'chainID': iron['chainID'], 'resSeq': iron['resSeq'], 'iCode': '', 'x': best_site[0], 'y': best_site[1], 'z': best_site[2], 'occupancy': 1.00, 'tempFactor': 0.00, 'element': 'O', 'charge': '' } atoms.append(oxo) rounded_best_site = [round(num, 2) for num in best_site] print(f'> OXO placed at {rounded_best_site} with dihedral {round(best_dihedral, 2)}\n') return True
[docs]def convert_akg_to_sin(atoms): """Convert alpha-ketoglutarate (AKG) to succinate (SIN). Removes the C1 carbonyl oxygens and converts C1 to an oxo group, simulating the decarboxylation that occurs during NHIE catalysis. Atoms are modified in place; deleted atoms have ``record = 'DELETE'``. Parameters ---------- atoms : list of dict Atom records from :func:`read_pdb` (modified in place). """ for atom in atoms: if atom['resName'] == 'AKG': if atom['name'] == 'O1' or atom['name'] == 'O2': atom['record'] = 'DELETE' elif atom['name'] == 'C1': atom['name'] = 'O1' atom['element'] = 'O' c2_atom = next(a for a in atoms if a['name'] == 'C2' and a['resSeq'] == atom['resSeq'] and a['chainID'] == atom['chainID']) c2_coord = np.array([c2_atom['x'], c2_atom['y'], c2_atom['z']]) atom_coord = np.array([atom['x'], atom['y'], atom['z']]) bond_vector = atom_coord - c2_coord bond_length = np.linalg.norm(bond_vector) new_bond_vector = bond_vector * (1.2 / bond_length) atom['x'], atom['y'], atom['z'] = c2_coord + new_bond_vector atom['resName'] = 'SIN' else: atom['resName'] = 'SIN'
[docs]def add_oxo_sin(pdb_path): """Convert an NHIE structure from resting to reactive state. Main orchestration function that: 1. Converts water/NO ligands near iron to OXO 2. Places OXO manually if no suitable ligand found 3. Converts AKG to succinate (SIN) The original file is backed up with ``_akg`` suffix. Parameters ---------- pdb_path : str Path to the PDB file (modified in place). """ # Start by making a copy of the file shutil.copy(pdb_path, f'{pdb_path[:-4]}_akg.pdb') atoms = read_pdb(pdb_path) iron_names = ['FE', 'FE2', 'FE3'] oxo_placed = find_and_convert_ligands_to_oxo(atoms, iron_names) for iron in atoms: if iron['name'] in iron_names and iron['serial'] not in oxo_placed: oxo_placed_manually = place_oxo_manually(atoms, iron) if oxo_placed_manually: oxo_placed.add(iron['serial']) if not oxo_placed: print("> Error: Could not place OXO") convert_akg_to_sin(atoms) atoms = [atom for atom in atoms if atom['record'] != 'DELETE'] write_pdb(atoms, pdb_path)
[docs]def remove_oxo_hydrogens(protoss_pdb): """Remove hydrogen atoms from OXO residues. Protoss may add hydrogens to the OXO oxygen; this function removes them to maintain the correct oxo (O2-) charge state. Parameters ---------- protoss_pdb : str Path to the PDB file (modified in place). """ parser = PDBParser(QUIET=True) structure = parser.get_structure('structure', protoss_pdb) for model in structure: for chain in model: for residue in chain: if residue.resname == 'OXO': to_remove = [] for atom in residue: if atom.element == 'H': to_remove.append(atom) for atom in to_remove: residue.detach_child(atom.id) io = PDBIO() io.set_structure(structure) io.save(protoss_pdb)
[docs]def update_oxo_sdf(pdb_path, sdf_path): """Append OXO entries to the ligand SDF file. Creates SDF entries for OXO residues with -2 formal charge so that the charge embedding correctly accounts for the oxo ligand. Parameters ---------- pdb_path : str Path to the PDB file containing OXO residues. sdf_path : str Path to the SDF file to append to. """ atoms = read_pdb(pdb_path) oxo_atoms = [atom for atom in atoms if atom['resName'] == 'OXO'] with open(sdf_path, 'a') as sdf_file: for oxo in oxo_atoms: sdf_file.write(f"{oxo['resName']}_{oxo['chainID']}_{oxo['resSeq']}\n") sdf_file.write("Generated by script\n") sdf_file.write(" QuantumPDB\n") sdf_file.write(" 1 0 0 0 0 0 999 V2000\n") sdf_file.write(f" {oxo['x']:>10.4f}{oxo['y']:>10.4f}{oxo['z']:>10.4f} O 0 0 0 0 0 0 0 0 0 0 0 0\n") sdf_file.write(f"M CHG 1 1 -2\n") sdf_file.write("M END\n$$$$\n")
[docs]def update_sin_sdf(akg_sdf_path, sin_sdf_path): """Replace AKG entries with SIN entries in the ligand SDF file. Removes all AKG (alpha-ketoglutarate) entries from the original SDF and appends the SIN (succinate) entries from the Protoss-processed temporary file. Parameters ---------- akg_sdf_path : str Path to the original SDF file (modified in place). sin_sdf_path : str Path to the temporary SDF file with SIN entries from Protoss. """ # Read the old SDF file and remove AKG entries with open(akg_sdf_path, 'r') as old_sdf_file: old_sdf_lines = old_sdf_file.readlines() filtered_old_sdf_lines = [] i = 0 while i < len(old_sdf_lines): if old_sdf_lines[i].startswith("AKG"): while i < len(old_sdf_lines) and not old_sdf_lines[i].strip() == "$$$$": i += 1 i += 1 # Skip the "$$$$" else: filtered_old_sdf_lines.append(old_sdf_lines[i]) i += 1 # Read the temporary SIN SDF file with open(sin_sdf_path, 'r') as temp_sin_sdf_file: sin_sdf_lines = temp_sin_sdf_file.readlines() # Filter SIN entries filtered_sin_sdf_lines = [] i = 0 while i < len(sin_sdf_lines): if sin_sdf_lines[i].startswith("SIN"): while i < len(sin_sdf_lines) and not sin_sdf_lines[i].strip() == "$$$$": filtered_sin_sdf_lines.append(sin_sdf_lines[i]) i += 1 filtered_sin_sdf_lines.append(sin_sdf_lines[i]) # Append the "$$$$" i += 1 # Write the updated SDF file with SIN entries with open(akg_sdf_path, 'w') as sdf_file: sdf_file.writelines(filtered_old_sdf_lines) sdf_file.writelines(filtered_sin_sdf_lines)