Source code for qp.protonate.ligand_prop

"""Calculate QM properties of the ligands

**Usage**

>>> protonate.compute_charge("path/to/OUT.sdf")
{"AAG_A331": 0, "SO4_A901": -2, "SO4_A325": -2, 
"SO4_A903": -2, "AKG_A330": -2,  "GOL_A328": 0, 
"GOL_A329": 0, "GOL_A900": 0, "GOL_A904": 0}

"""

[docs]def compute_charge(path_ligand, path_pdb): """Compute the formal charge of each ligand from Protoss SDF output. Parses the ligand SDF file generated by Protoss and extracts formal charges from the ``M CHG`` records. Also handles special cases like metal ions (Na+, K+, Mg2+, etc.) that are not included in Protoss SDF output, and corrects for atoms removed during protonation. Parameters ---------- path_ligand : str Path to the Protoss-generated ligand SDF file. path_pdb : str Path to the corresponding PDB file (used to detect removed atoms and standalone metal ions). Returns ------- dict Mapping of ligand ID strings (e.g., ``'FE_A199'``) to formal charges. Oligomeric ligands use space-separated keys (e.g., ``'GAL_A1 GAL_A2'``). """ with open(path_ligand, "r") as f: sdf = f.read() with open(path_pdb, "r") as f: pdb_lines = f.readlines() ligands = [ [t for t in s.splitlines() if t != ""] for s in sdf.split("$$$$") if s != "\n" and s != "" ] charge = {} for l in ligands: title = l[0].split("_") name_id = [ (res_name, chain_id, res_id) for res_name, chain_id, res_id in zip(title[::3], title[1::3], title[2::3]) ] name = " ".join([f"{res_name}_{chain_id}{res_id}" for res_name, chain_id, res_id in name_id]) c = 0 n_atom = 0 for line in l: if "V2000" in line: n_atom = int(line[:3]) # https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf if line.startswith("M RGP"): # R# atom is found in the addition between CSO and TAN # It's not a real atom and recorded in the RGP entry n_atom -= int(line.split()[2]) if line.startswith("M CHG"): c += sum([int(x) for x in line.split()[4::2]]) charge[name] = c cnt = 0 for res_name, chain_id, res_id in name_id: if res_name == "NO": cnt = n_atom break # to detect removed atoms # NO is corrected from NH2OH, thus the deleted 3 hydrogen shouldn't affect the charge for line in pdb_lines: if line[17:20].strip() == res_name and line[21] == chain_id and line[22:26].strip() == res_id and line[0:3] != "TER": cnt += 1 charge[name] -= (n_atom - cnt) # check Na+, K+, Mg2+, Ca2+ for line in pdb_lines: if len(line) > 26 and line[0:3] != "TER": res_name = line[17:20].strip() chain_id = line[21] res_id = line[22:26].strip() name = f"{res_name}_{chain_id}{res_id}" if res_name in {"NA", "K", "RB", "CS"} and name not in charge: print(f"> Find extra {res_name} in protein, charge + 1") charge[name] = 1 elif res_name in {"MG", "CA", "ZN", "PB", "CD"} and name not in charge: print(f"> Find extra {res_name} in protein, charge + 2") charge[name] = 2 elif res_name in {"AL"} and name not in charge: print(f"> Find extra {res_name} in protein, charge + 3") charge[name] = 3 return charge
[docs]def compute_spin(path_ligand): """Compute extra spin contributions from radical ligands in an SDF file. NO is treated as a doublet (spin = 1) and O2 (OXY) as a triplet (spin = 2). All other ligands are assumed to have zero extra spin. Parameters ---------- path_ligand : str Path to the Protoss-generated ligand SDF file. Returns ------- dict Keyed by ligand ID string, values are extra spin integers. """ with open(path_ligand, "r") as f: sdf = f.read() ligands = [ [t for t in s.splitlines() if t != ""] for s in sdf.split("$$$$") if s != "\n" and s != "" ] spin = {} for l in ligands: title = l[0].split("_") name_id = [ (res_name, chain_id, res_id) for res_name, chain_id, res_id in zip(title[::3], title[1::3], title[2::3]) ] name = " ".join([f"{res_name}_{chain_id}{res_id}" for res_name, chain_id, res_id in name_id]) for res_name, chain_id, res_id in name_id: if res_name == "NO": spin[name] = 1 elif res_name == "OXY": spin[name] = 2 return spin