Source code for qp.cli

"""Command-line interface (CLI) entry point.

**Usage** ``qp run --config path/to/config.yaml``
"""

import os
import sys
import click
import traceback

[docs]def welcome(): """Print the QuantumPDB welcome banner with ASCII art and usage hints. Displays the package logo, links to documentation and GitHub, and basic usage examples. Called automatically when the CLI is invoked. """ click.secho("\n") click.secho(" ╔════════════════════════╗ ", bold=True) click.secho(" ║ __________ ║ ", bold=True) click.secho(" ║ / ____/\____ \ ║ ", bold=True) click.secho(" ║ < <_| || |_> > ║ ", bold=True) click.secho(" ║ \__ || __/ ║ ", bold=True) click.secho(" ║ |__||__| ║ ", bold=True) click.secho(" ║ ║ ", bold=True) click.secho(" ║ QUANTUMPDB ║ ", bold=True) click.secho(" ║ [quantumpdb.rtfd.io] ║ ", bold=True) click.secho(" ╚═══════════╗╔═══════════╝ ", bold=True) click.secho(" ╔═══════╝╚═══════╗ ", bold=True) click.secho(" ║ THE KULIK LAB ║ ", bold=True) click.secho(" ╚═══════╗╔═══════╝ ", bold=True) click.secho(" ╔══════════════════════╝╚══════════════════════╗ ", bold=True) click.secho(" ║ Code: github.com/davidkastner/quantumpdb ║ ", bold=True) click.secho(" ║ Docs: quantumpdb.readthedocs.io ║ ", bold=True) click.secho(" ║ - Clusters: qp run -c config.yaml ║ ", bold=True) click.secho(" ║ - QM calcs: qp submit -c config.yaml ║ ", bold=True) click.secho(" ╚══════════════════════════════════════════════╝\n", bold=True)
# Welcome even if no flags welcome() @click.group() def cli(): """QuantumPDB command-line interface. Use ``qp run``, ``qp submit``, or ``qp analyze`` with a configuration YAML file to execute different stages of the pipeline. """ pass @cli.command() @click.option("--config", "-c", required=True, type=click.Path(exists=True), help="Path to the configuration YAML file") def run(config): """Execute the structure preparation, protonation, and cluster extraction pipeline. This command runs stages 1-3 of the QuantumPDB workflow: fetching PDB structures, modeling missing residues with Modeller, assigning protonation states with Protoss, and extracting QM cluster models using Voronoi tessellation. """ import os from shutil import copy from Bio.PDB.PDBExceptions import PDBIOException from qp.structure import setup config_data = setup.read_config(config) err = {"PDB": [], "Protoss": [], "Coordination sphere": [], "Other": []} # Parse configuration parameters modeller = config_data.get('modeller', False) protoss = config_data.get('protoss', False) coordination = config_data.get('coordination', False) skip = config_data.get('skip', 'all') max_clash_refinement_iter = config_data.get('max_clash_refinement_iter', 5) input = config_data.get('input', []) output = config_data.get('output_dir', '') center_yaml_residues = config_data.get('center_residues', []) pdb_all, center_residues = setup.parse_input(input, output, center_yaml_residues) if modeller: from qp.structure import missing optimize = config_data.get('optimize_select_residues', 1) convert_to_nhie_oxo = config_data.get('convert_to_nhie_oxo', False) if protoss: from qp.protonate import get_protoss, fix if coordination: from qp.cluster import spheres sphere_count = config_data.get('number_of_spheres', 2) first_sphere_radius = config_data.get('radius_of_first_sphere', 4.0) max_atom_count = config_data.get('max_atom_count', None) merge_cutoff = config_data.get('merge_distance_cutoff', 0.0) include_ligands = config_data.get('include_ligands', 2) ligands = config_data.get('additional_ligands', []) capping = config_data.get('capping_method', 1) charge = config_data.get('compute_charges', True) count = config_data.get('count_residues', True) xyz = config_data.get('write_xyz', True) hetero_pdb = config_data.get('write_hetero_pdb', False) smooth_choice = config_data.get('smoothing_method', 2) smooth_options = {0: {}, 1: {"eps": 6, "min_samples": 3}, 2: {"mean_distance": 3}, 3: {}} smooth_method_options = {0: "box_plot", 1: "dbscan", 2: "dummy_atom", 3: False} smooth_params = smooth_options[smooth_choice] smooth_method = smooth_method_options[smooth_choice] if capping or charge: protoss = True for pdb, path in pdb_all: try: click.secho("╔══════╗", bold=True) click.secho(f"║ {pdb.upper()} ║", bold=True) click.secho("╚══════╝", bold=True) # Skips fetching if PDB file exists if not os.path.isfile(path): click.echo(f"> Fetching PDB file") try: setup.fetch_pdb(pdb, path) except ValueError as e: # Catches invalid PDB ID click.secho(f"> Error: {e}\n", italic=True, fg="red") err["PDB"].append(pdb) continue except IOError as e: # Catches network and server issues click.secho(f"> Error: A server or network issue occurred. {e}\n", italic=True, fg="red") err["PDB"].append(pdb) continue # Extract the current center residue from the list of all residues from qp.cluster.spheres import CenterResidue center_residue = CenterResidue(center_residues.pop(0)) click.echo(f"> Using center residue: {center_residue}") residues_with_clashes = [] # Start by assuming no protoss clashes for i in range(max_clash_refinement_iter): mod_path = f"{output}/{pdb}/{pdb}_modeller.pdb" if modeller: if skip in ["modeller", "all"] and os.path.isfile(mod_path) and not residues_with_clashes: click.echo("> Modeller file found") else: click.echo("> Building model") AA = missing.define_residues() residues = missing.get_residues(path, AA) # Remove trailing missing residues from the ends of all chains residues = missing.clean_termini(residues) # Update residues with clashes if any if residues_with_clashes: missing.delete_clashes(path, residues_with_clashes) for residue_with_clashes in residues_with_clashes: res_id = residue_with_clashes[0] one_letter_code = residue_with_clashes[1] chain_index = residue_with_clashes[2] for j, residue in enumerate(residues[chain_index]): if residue[0][0] == res_id: if residue[1] == one_letter_code: residues[chain_index][j] = ((res_id, ' '), one_letter_code, 'R') else: print("> Residue index matched but residue name did not.") exit() ali_path = f"{output}/{pdb}/{pdb}.ali" missing.write_alignment(residues, pdb, path, ali_path) print("> Generated alignment file and starting Modeller run:\n") if not os.path.isfile(f"{output}/{pdb}/{pdb}.pdb"): copy(path, f"{output}/{pdb}/{pdb}.pdb") missing.build_model(residues, pdb, path, ali_path, mod_path, optimize) prot_path = f"{output}/{pdb}/Protoss" protoss_log_file = f"{prot_path}/{pdb}_log.txt" protoss_pdb = f"{prot_path}/{pdb}_protoss.pdb" ligands_sdf = f"{prot_path}/{pdb}_ligands.sdf" if protoss: if skip in ["protoss", "all"] and os.path.isfile(protoss_pdb) and not residues_with_clashes: click.echo("> Protoss file found") else: import qp pdbl = pdb.lower() prepared_flag = False for qp_path in qp.__path__: prepared_prot_path = os.path.join(qp_path, f"resources/prepared/{pdbl}/Protoss") if os.path.exists(prepared_prot_path): prepared_flag = True if prepared_flag: os.makedirs(prot_path, exist_ok=True) copy(os.path.join(prepared_prot_path, f"{pdbl}_protoss.pdb"), protoss_pdb) copy(os.path.join(prepared_prot_path, f"{pdbl}_ligands.sdf"), ligands_sdf) click.echo("> Using pre-prepared protoss file") else: from qp.protonate.fix import clean_occupancy, fix_OXT click.echo("> Running Protoss") if modeller: pdb_path = mod_path else: pdb_path = path fix_OXT(pdb_path) clean_occupancy(pdb_path, center_residue) try: pid = get_protoss.upload(pdb_path) except ValueError: click.secho("> Error: Could not upload PDB file\n", italic=True, fg="red") # Occurs when uploading a PDB file > 4MB # TODO retry with parsed PDB code? Will raise Bio.PDB error if # number of atoms in output exceeds 99999 err["Protoss"].append(pdb) continue job = get_protoss.submit(pid) get_protoss.download(job, protoss_pdb, "protein") get_protoss.download(job, ligands_sdf, "ligands") get_protoss.download(job, protoss_log_file, "log") get_protoss.repair_ligands(protoss_pdb, pdb_path) # Get any residues identified by Protoss as problematic from qp.protonate.parse_output import parse_log AA = missing.define_residues() residues_with_clashes = parse_log(protoss_log_file, protoss_pdb, AA) if residues_with_clashes: print(f"> WARNING: Protoss removed {len(residues_with_clashes)} residues due to clashes") clash_details = ", ".join([f"{res_name}{res_id} in chain {chain}" for res_id, _, _, chain, res_name in sorted(list(residues_with_clashes))]) print(f"> WARNING: Remodeling {clash_details} with Modeller.") else: break # Don't loop again as no clashes were detected if protoss and convert_to_nhie_oxo: click.echo("> Converting AKG to reactive OXO and SIN state") from qp.structure.convert_nhie_oxo import add_oxo_sin add_oxo_sin(protoss_pdb) click.echo("> Removing hydrogens from OXO's") from qp.structure.convert_nhie_oxo import remove_oxo_hydrogens, update_oxo_sdf, update_sin_sdf remove_oxo_hydrogens(protoss_pdb) click.echo("> Adding OXO to ligands.sdf") update_oxo_sdf(protoss_pdb, ligands_sdf) click.echo("> Updating SIN in ligands.sdf") sin_ligands_sdf = f"{prot_path}/{pdb}_ligands_sin.sdf" click.echo("> Uploading new succinate structure to Protoss") pid = get_protoss.upload(protoss_pdb) job = get_protoss.submit(pid) # Send Protoss the new SIN ligands get_protoss.download(job, sin_ligands_sdf, "ligands") # Get the .sdf file for the SIN ligands update_sin_sdf(ligands_sdf, sin_ligands_sdf) # Set the path to the current structure, there may be a better way to do this if modeller: path = mod_path if protoss: # Check if any atoms changed from HETATM to ATOM or vice versa for future troubleshooting purposes from qp.protonate.parse_output import record_type changed_residues = record_type(mod_path, protoss_pdb) # I want to do something with this eventually path = f"{prot_path}/{pdb}_protoss.pdb" old_path = f"{prot_path}/{pdb}_protoss_orig.pdb" if not os.path.exists(old_path): from shutil import copy copy(path, old_path) fix.adjust_activesites(path, center_residue) if coordination: from qp.protonate.ligand_prop import compute_charge, compute_spin click.echo("> Extracting clusters") if charge: ligand_charge = compute_charge(f"{prot_path}/{pdb}_ligands.sdf", path) ligand_spin = compute_spin(f"{prot_path}/{pdb}_ligands.sdf") else: ligand_charge = dict() cluster_paths = spheres.extract_clusters( path, f"{output}/{pdb}", center_residue, sphere_count, first_sphere_radius, max_atom_count, merge_cutoff, smooth_method, ligands, capping, charge, ligand_charge, count, xyz, hetero_pdb, include_ligands, **smooth_params ) if charge: charge_csv_path = f"{output}/{pdb}/charge.csv" with open(charge_csv_path, "a") as f: f.write("\n") for k, v in sorted(ligand_charge.items()): f.write(f"{k},{v}\n") if ligand_spin: with open(f"{output}/{pdb}/spin.csv", "w") as f: f.write("\n") for k, v in sorted(ligand_spin.items()): f.write(f"{k},{v}\n") from qp.cluster.charge_count import check_charge for cluster_path in cluster_paths: check_charge(cluster_path) click.echo("") for k, v in err.items(): if v: click.echo(click.style(k + " errors: ", bold=True, fg="red") + ", ".join(v)) except Exception as e: # Log the exception details to stderr, which is already redirected to log.out click.echo(f"> CRITICAL FAILURE: Error processing {pdb.upper()}", err=True) traceback.print_exc(file=sys.stderr) # if keyboard interrupt, exit the program if isinstance(e, KeyboardInterrupt): click.echo(f"> Keyboard interrupt detected. Exiting program.") exit() @cli.command() @click.option("--config", "-c", required=True, type=click.Path(exists=True), help="Path to the configuration YAML file") def submit(config): """Create QM input files and submit calculations to the job scheduler. This command runs stage 4 of the QuantumPDB workflow: generating TeraChem or ORCA input files and submitting them to SLURM or SGE job schedulers. """ from qp.structure import setup from qp.manager import create from qp.manager import submit # Parse configuration parameters config_data = setup.read_config(config) optimization = config_data.get('optimization', False) method = config_data.get('method', 'wpbeh') basis = config_data.get('basis', 'lacvps_ecp') guess = config_data.get('guess', 'generate') gpus = config_data.get('gpus', 1) memory = config_data.get('memory', '8G') scheduler = config_data.get('scheduler', 'slurm') pcm_radii_file = config_data.get('pcm_radii_file', 'pcm_radii') job_count = config_data.get('job_count', 80) charge_embedding = config_data.get('charge_embedding', False) charge_embedding_cutoff = config_data.get('charge_embedding_cutoff', 20) charge_embedding_charges = config_data.get('charge_embedding_charges', None) dielectric = config_data.get('dielectric', 10) use_implicit_solvent = config_data.get('use_implicit_solvent', not charge_embedding) create_jobs = config_data.get('create_jobs', False) submit_jobs = config_data.get('submit_jobs', False) input = config_data.get('input', []) output = config_data.get('output_dir', '') if not os.path.exists(input): raise FileNotFoundError(f"Could not find input file named {input}.") input = os.path.abspath(input) if create_jobs: click.echo("> Creating job files for QM calculations") create.create_jobs(input, output, optimization, basis, method, guess, charge_embedding, charge_embedding_cutoff, charge_embedding_charges, gpus, memory, scheduler, pcm_radii_file, dielectric, use_implicit_solvent) if submit_jobs: click.echo("\n> Submitting QM calculations") submit.manage_jobs(output, job_count, method, scheduler) @cli.command() @click.option("--config", "-c", required=True, type=click.Path(exists=True), help="Path to the configuration YAML file") def analyze(config): """Check job status and run post-processing analysis with Multiwfn. This command runs stage 5 of the QuantumPDB workflow: classifying job outcomes (done, failed, queued), and optionally computing partial charges and dipole moments using Multiwfn. """ from qp.structure import setup config_data = setup.read_config(config) method = config_data.get('method', 'wpbeh') job_checkup = config_data.get('job_checkup', False) delete_queued = config_data.get('delete_queued', False) calc_charge_schemes = config_data.get('calc_charge_schemes', False) calc_dipole = config_data.get('calc_dipole', False) multiwfn_path = config_data.get('multiwfn_path', 'Multiwfn') charge_scheme = config_data.get('charge_scheme', 'Hirshfeld') input = config_data.get('input', []) output = config_data.get('output_dir', '') center_yaml_residues = config_data.get('center_residues', []) pdb_all, center_residues = setup.parse_input(input, output, center_yaml_residues) if job_checkup: from qp.analyze import checkup click.echo("> Checking to see the status of the jobs...") failure_counts, author_counts = checkup.check_all_jobs(method, output, delete_queued) checkup.plot_failures(failure_counts) checkup.plot_authors(author_counts) checkup.write_author_credit_csv(author_counts) checkup.plot_failure_modes_from_csv(os.path.join("checkup", "failure_modes.csv")) if calc_charge_schemes or calc_dipole: from qp.analyze import multiwfn click.echo("> Activate Multiwfn module (e.g., module load multiwfn/noGUI_3.7)") settings_ini_path = multiwfn.get_settings_ini_path() atmrad_path = multiwfn.get_atmrad_path() if calc_charge_schemes: click.echo("> Calculating charge schemes...") multiwfn.iterate_qm_output(pdb_all, method, output, multiwfn_path, settings_ini_path, atmrad_path, charge_scheme, multiwfn.charge_scheme) if calc_dipole: click.echo("> Calculating dipoles using center of mass as the reference...") multiwfn.iterate_qm_output(pdb_all, method, output, multiwfn_path, settings_ini_path, atmrad_path, charge_scheme, multiwfn.calc_dipole) if __name__ == "__main__": # Run the command-line interface when this script is executed welcome() cli()