"""Calculate QM properties of the ligands
**Usage**
>>> protonate.compute_charge("path/to/OUT.sdf")
{"AAG_A331": 0, "SO4_A901": -2, "SO4_A325": -2,
"SO4_A903": -2, "AKG_A330": -2, "GOL_A328": 0,
"GOL_A329": 0, "GOL_A900": 0, "GOL_A904": 0}
"""
[docs]def compute_charge(path_ligand, path_pdb):
"""Compute the formal charge of each ligand from Protoss SDF output.
Parses the ligand SDF file generated by Protoss and extracts formal
charges from the ``M CHG`` records. Also handles special cases like
metal ions (Na+, K+, Mg2+, etc.) that are not included in Protoss
SDF output, and corrects for atoms removed during protonation.
Parameters
----------
path_ligand : str
Path to the Protoss-generated ligand SDF file.
path_pdb : str
Path to the corresponding PDB file (used to detect removed atoms
and standalone metal ions).
Returns
-------
dict
Mapping of ligand ID strings (e.g., ``'FE_A199'``) to formal charges.
Oligomeric ligands use space-separated keys (e.g., ``'GAL_A1 GAL_A2'``).
"""
with open(path_ligand, "r") as f:
sdf = f.read()
with open(path_pdb, "r") as f:
pdb_lines = f.readlines()
ligands = [
[t for t in s.splitlines() if t != ""]
for s in sdf.split("$$$$")
if s != "\n" and s != ""
]
charge = {}
for l in ligands:
title = l[0].split("_")
name_id = [
(res_name, chain_id, res_id)
for res_name, chain_id, res_id
in zip(title[::3], title[1::3], title[2::3])
]
name = " ".join([f"{res_name}_{chain_id}{res_id}" for res_name, chain_id, res_id in name_id])
c = 0
n_atom = 0
for line in l:
if "V2000" in line:
n_atom = int(line[:3]) # https://discover.3ds.com/sites/default/files/2020-08/biovia_ctfileformats_2020.pdf
if line.startswith("M RGP"):
# R# atom is found in the addition between CSO and TAN
# It's not a real atom and recorded in the RGP entry
n_atom -= int(line.split()[2])
if line.startswith("M CHG"):
c += sum([int(x) for x in line.split()[4::2]])
charge[name] = c
cnt = 0
for res_name, chain_id, res_id in name_id:
if res_name == "NO":
cnt = n_atom
break
# to detect removed atoms
# NO is corrected from NH2OH, thus the deleted 3 hydrogen shouldn't affect the charge
for line in pdb_lines:
if line[17:20].strip() == res_name and line[21] == chain_id and line[22:26].strip() == res_id and line[0:3] != "TER":
cnt += 1
charge[name] -= (n_atom - cnt)
# check Na+, K+, Mg2+, Ca2+
for line in pdb_lines:
if len(line) > 26 and line[0:3] != "TER":
res_name = line[17:20].strip()
chain_id = line[21]
res_id = line[22:26].strip()
name = f"{res_name}_{chain_id}{res_id}"
if res_name in {"NA", "K", "RB", "CS"} and name not in charge:
print(f"> Find extra {res_name} in protein, charge + 1")
charge[name] = 1
elif res_name in {"MG", "CA", "ZN", "PB", "CD"} and name not in charge:
print(f"> Find extra {res_name} in protein, charge + 2")
charge[name] = 2
elif res_name in {"AL"} and name not in charge:
print(f"> Find extra {res_name} in protein, charge + 3")
charge[name] = 3
return charge
[docs]def compute_spin(path_ligand):
"""Compute extra spin contributions from radical ligands in an SDF file.
NO is treated as a doublet (spin = 1) and O2 (OXY) as a triplet
(spin = 2). All other ligands are assumed to have zero extra spin.
Parameters
----------
path_ligand : str
Path to the Protoss-generated ligand SDF file.
Returns
-------
dict
Keyed by ligand ID string, values are extra spin integers.
"""
with open(path_ligand, "r") as f:
sdf = f.read()
ligands = [
[t for t in s.splitlines() if t != ""]
for s in sdf.split("$$$$")
if s != "\n" and s != ""
]
spin = {}
for l in ligands:
title = l[0].split("_")
name_id = [
(res_name, chain_id, res_id)
for res_name, chain_id, res_id
in zip(title[::3], title[1::3], title[2::3])
]
name = " ".join([f"{res_name}_{chain_id}{res_id}" for res_name, chain_id, res_id in name_id])
for res_name, chain_id, res_id in name_id:
if res_name == "NO":
spin[name] = 1
elif res_name == "OXY":
spin[name] = 2
return spin