Source code for qp.protonate.parse_output

"""Parse Protoss output logs and detect residue type changes."""

from Bio.PDB import PDBParser
from qp.structure.missing import get_chain_order

[docs]def parse_log(log_path, pdb_path, AA): """ Parse the Protoss log file to identify residues with clashes and their chain indices. Parameters ---------- log_path: str Path to the Protoss log file. pdb_path: str Path to the PDB file. AA: dict Dictionary mapping three-letter codes to one-letter codes. Returns ------- residues_with_clashes: set of tuples Set of residues with clashes in the format compatible with `residues`. Each tuple contains ((res_id, ' '), one_letter_code, 'R') and the chain index. """ chain_order = get_chain_order(pdb_path) residues_with_clashes = set() with open(log_path, "r") as f: for line in f: if "ATOM" in line: # Remove everything before "ATOM" atom_line = line[line.index("ATOM"):] # Extract information based on PDB format columns res_name = atom_line[17:20].strip() chain = atom_line[21].strip() res_id = int(atom_line[22:26].strip()) one_letter_code = AA.get(res_name, 'X') # Use 'X' for unknown residues chain_index = chain_order.get(chain, -1) residues_with_clashes.add((res_id, one_letter_code, chain_index, chain, res_name)) return residues_with_clashes
[docs]def record_type(modeller_pdb_path, protoss_pdb_path): ''' Checks if protoss changed HETATM or ATOM classifications. Parameters ---------- modeller_pdb_path: string The path to the original modeller PDB. protoss_pdb_path: string The path to the PDB processed by protoss. Returns ------- changed_residues: list List of residues that have been changed from ATOM to HETATM or vice versa. Note ---- changed_residues = [ ('A', (' ', 123, ' '), 'ATOM', 'HETATM'), ('B', (' ', 45, ' '), 'HETATM', 'ATOM'), ('C', ('H', 67, ' '), 'HETATM', 'ATOM')] ''' # Initialize the parser parser = PDBParser(QUIET=True) # Parse the PDB files modeller_structure = parser.get_structure('modeller', modeller_pdb_path) protoss_structure = parser.get_structure('protoss', protoss_pdb_path) changed_residues = [] # Iterate through all chains and residues in the modeller structure for modeller_chain in modeller_structure.get_chains(): chain_id = modeller_chain.get_id() if chain_id not in protoss_structure[0]: print(f"> WARNING: Chain {chain_id} not found in protoss structure.") continue protoss_chain = protoss_structure[0][chain_id] # Assume the chains are the same for modeller_residue in modeller_chain.get_residues(): res_num = modeller_residue.get_id()[1] resname = modeller_residue.get_resname() # Find the corresponding residue in the protoss chain by residue number and name protoss_residue = None for res in protoss_chain.get_residues(): if res.get_id()[1] == res_num and res.get_resname() == resname: protoss_residue = res break if protoss_residue is None: print(f"> WARNING: Residue {modeller_residue.get_id()} in chain {chain_id} not found in protoss structure.") continue # Compare the residue types (HETATM vs ATOM) modeller_type = 'HETATM' if modeller_residue.id[0].startswith('H_') else 'ATOM' protoss_type = 'HETATM' if protoss_residue.id[0].startswith('H_') else 'ATOM' if modeller_type != protoss_type: changed_residues.append((chain_id, resname, res_num, modeller_type, protoss_type)) # Alert the user if any residues have changed for chain_id, resname, res_num, old_type, new_type in changed_residues: print(f"> WARNING: {resname} {res_num} in chain {chain_id} was changed from {old_type} to {new_type}.") return changed_residues