Source code for qp.protonate.get_protoss

"""Add hydrogens using Protoss

**Usage**

#. Submitting existing or custom PDB file::

    >>> from qp.protonate import get_protoss
    >>> pid = get_protoss.upload("path/to/PDB.pdb")
    >>> job = get_protoss.submit(pid)
    >>> get_protoss.download(job, "path/to/OUT.pdb")

#. Submitting PDB code::

    >>> from qp.protonate import get_protoss
    >>> pdb = "1dry"
    >>> job = get_protoss.submit(pdb)
    >>> get_protoss.download(job, "path/to/OUT.pdb")
    >>> get_protoss.download(job, "path/to/OUT.sdf", "ligands")

Protoss automatically removes alternative conformations and overlapping entries. 
Download the log file (``key="log"`` in ``get_protoss.download``) to see affected atoms. 

Some metal-coordinating residues may be incorrectly protonated. Use 
``get_protoss.adjust_activesites(path, metals)`` with the metal IDs to deprotonate
these residues. 
"""

import os
import json
import time
import requests
from Bio.PDB import PDBParser, PDBIO


[docs]def upload(path): """Upload a PDB file to the ProteinsPlus web server. Parameters ---------- path : str Path to the PDB file to upload. Returns ------- str ProteinsPlus ID for the uploaded structure. Raises ------ ValueError If the server returns a 400 Bad Request error. KeyError If the upload fails after 5 retry attempts. """ retries = 5 delay = 60 # seconds for _ in range(retries): try: pp = requests.post( "https://proteins.plus/api/pdb_files_rest", files={"pdb_file[pathvar]": open(path, "rb")}, ) if pp.status_code == 400: raise ValueError("Bad request") loc = json.loads(pp.text)["location"] r = requests.get(loc) while r.status_code == 202: time.sleep(1) r = requests.get(loc) return json.loads(r.text)["id"] # Exit if successful except KeyError: print(f"> KeyError encountered. Retrying in {delay} seconds...") time.sleep(delay) raise KeyError(f"> Failed to upload the file and retrieve 'id' after {retries} attempts.")
[docs]def submit(pid): """Submit a PDB code or ProteinsPlus ID to the Protoss web API. Parameters ---------- pid : str Four-character PDB code or ProteinsPlus ID from :func:`upload`. Returns ------- str URL of the Protoss job location for status polling. Raises ------ ValueError If the PDB code is invalid (server returns 400). KeyError If the submission fails after 5 retry attempts. """ retries = 5 delay = 60 # seconds for _ in range(retries): try: protoss = requests.post( "https://proteins.plus/api/protoss_rest", json={"protoss": {"pdbCode": pid}}, headers={"Accept": "application/json"}, ) if protoss.status_code == 400: raise ValueError("Invalid PDB code") return json.loads(protoss.text)["location"] # Exit if successful except KeyError: print(f"> KeyError encountered. Retrying in {delay} seconds...") time.sleep(delay) raise KeyError(f"> Failed to submit the PDB code and retrieve 'location' after {retries} attempts.")
[docs]def download(job, out, key="protein"): """Download a Protoss output file. Polls the Protoss job URL until completion, then downloads the requested output file. Parameters ---------- job : str URL of the Protoss job location from :func:`submit`. out : str Path to the output file (directory created if needed). key : str, optional File type to download: ``'protein'`` (protonated PDB), ``'ligand'`` (ligand SDF), or ``'log'`` (processing log). Default is ``'protein'``. Raises ------ KeyError If the download fails after 5 retry attempts. """ # Sometimes the Protoss server doesn't respond correctly with the first query retries = 5 delay = 60 # seconds for _ in range(retries): try: r = requests.get(job) while r.status_code == 202: time.sleep(1) r = requests.get(job) protoss = requests.get(json.loads(r.text)[key]) os.makedirs(os.path.dirname(os.path.abspath(out)), exist_ok=True) with open(out, "w") as f: f.write(protoss.text) return # Exit if successful except KeyError: time.sleep(delay) continue # Retry on failure raise KeyError(f"> Failed to download the file with key '{key}' after {retries} attempts.")
[docs]def repair_ligands(path, orig): """Repair ligands that Protoss renamed to ``MOL``. Protoss sometimes replaces unrecognized ligand residues with a generic ``MOL`` label. This function restores the original residue names and structures by matching them back from the pre-Protoss PDB, then reassigns hydrogen atoms to the closest heavy atoms. Parameters ---------- path : str Path to the Protoss output PDB file (modified in place). orig : str Path to the original (pre-Protoss) PDB file. """ parser = PDBParser(QUIET=True) prot_structure = parser.get_structure("Prot", path) orig_structure = parser.get_structure("Orig", orig) for res in prot_structure[0].get_residues(): if res.get_resname() == "MOL": resid = res.get_id() chain = res.get_parent() chain.detach_child(resid) missing = [] found = False for r in orig_structure[0][chain.get_id()].get_residues(): if r.get_id()[1] == resid[1]: found = True if found: if r.get_id() not in chain: chain.add(r) missing.append(r) else: break for r in missing: for a in r.get_unpacked_list(): if a.element == "H": r.detach_child(atom.get_id()) for atom in res.get_unpacked_list(): if atom.element != "H": continue closest = None for r in missing: for a in r.get_unpacked_list(): if closest is None or atom - a < atom - closest: closest = a closest.get_parent().add(atom) io = PDBIO() io.set_structure(prot_structure) io.save(path)