Source code for qp.analyze.molden

"""Process Molden files for Multiwfn analysis."""

import numpy as np
import os

ECP_ACTIVE_ELECTRONS = {
    # s-block
    'K': 9, 'CA': 10,
    'RB': 9, 'SR': 10,
    'CS': 9, 'BA': 10,

    # 3d transition metals (LANL, 10e core except Zn)
    'SC': 11, 'TI': 12, 'V': 13, 'CR': 14, 'MN': 15,
    'FE': 16, 'CO': 17, 'NI': 18, 'CU': 19, 'ZN': 12,

    # 4d transition metals (ECP28)
    'Y': 11, 'ZR': 12, 'NB': 13, 'MO': 14, 'TC': 15,
    'RU': 16, 'RH': 17, 'PD': 18, 'AG': 19, 'CD': 12,

    # 5d transition metals
    'LA': 11,
    'HF': 12, 'TA': 13, 'W': 14, 'RE': 15,
    'OS': 16, 'IR': 17, 'PT': 18, 'AU': 19, 'HG': 12,

    # p-block heavier main-group
    'GA': 3, 'GE': 4, 'AS': 5, 'SE': 6, 'BR': 7,
    'IN': 3, 'SN': 4, 'SB': 5, 'TE': 6, 'I': 7,
    'TL': 13, 'PB': 4, 'BI': 5,
}

[docs]def correct_ecp_charges_inplace(molden_file): """Correct nuclear charges in a Molden file for ECP-treated elements. Multiwfn requires the nuclear charge in Molden files to reflect the number of active electrons (not the full atomic number) for elements using effective core potentials (ECPs). This function reads the Molden file, corrects charges for known ECP elements, and overwrites the file. Parameters ---------- molden_file : str Path to the Molden file to correct. Returns ------- bool True if the file was modified, False otherwise. Notes ----- The ECP electron counts are defined in :data:`ECP_ACTIVE_ELECTRONS` and cover common LANL/ECP28 basis sets for transition metals and heavy main-group elements. """ try: with open(molden_file, 'r') as f: lines = f.readlines() except FileNotFoundError: print(f"> ERROR: Molden file not found at {molden_file}") return False modified_lines = [] in_atoms_section = False was_modified = False for line in lines: if line.strip() == "[Atoms] Angs": in_atoms_section = True modified_lines.append(line) continue elif in_atoms_section and line.strip().startswith("["): in_atoms_section = False if in_atoms_section: parts = line.strip().split() if len(parts) >= 6: element_symbol = parts[0].upper() # Check if the element needs correction and if it hasn't been corrected already if element_symbol in ECP_ACTIVE_ELECTRONS and parts[2] != str(ECP_ACTIVE_ELECTRONS[element_symbol]): parts[2] = str(ECP_ACTIVE_ELECTRONS[element_symbol]) line_to_write = ' '.join(parts) + '\n' modified_lines.append(line_to_write) was_modified = True else: modified_lines.append(line) else: modified_lines.append(line) else: modified_lines.append(line) if was_modified: with open(molden_file, 'w') as f: f.writelines(modified_lines) print(f"> Overwrote {os.path.basename(molden_file)} with ECP-corrected nuclear charges.") return was_modified
[docs]def translate_to_origin(coordinates, center_of_mass): """Translate coordinates so a reference point is at the origin. Parameters ---------- coordinates : numpy.ndarray of shape (N, 3) Atomic coordinates to translate. center_of_mass : array-like of shape (3,) Reference point to move to the origin. Returns ------- numpy.ndarray of shape (N, 3) Translated coordinates. """ return coordinates - center_of_mass
[docs]def process_molden(input_file, output_file, com): """Translate atomic coordinates in a Molden file to center on a reference point. Reads the ``[Atoms] Angs`` section of the input Molden file, subtracts the provided center of mass from all coordinates, and writes the modified file. All other sections are copied unchanged. Parameters ---------- input_file : str Path to the input Molden file. output_file : str Path for the output (centered) Molden file. com : array-like of shape (3,) Center of mass coordinates to subtract from all atoms. Raises ------ ValueError If the ``[Atoms] Angs`` section is not found or contains no atoms. """ with open(input_file, 'r') as infile: lines = infile.readlines() # Identify the indices of the sections atoms_start_idx = None gto_start_idx = None for idx, line in enumerate(lines): stripped_line = line.strip() if stripped_line == "[Atoms] Angs": atoms_start_idx = idx elif stripped_line.startswith("[") and stripped_line != "[Atoms] Angs" and atoms_start_idx is not None: # First section after [Atoms] Angs gto_start_idx = idx break if atoms_start_idx is None: raise ValueError("The [Atoms] Angs section was not found in the file.") if gto_start_idx is None: # If [GTO] or another section is not found, assume the rest of the file is tail_lines gto_start_idx = len(lines) # Header lines up to and including [Atoms] Angs header_lines = lines[:atoms_start_idx + 1] # Atom lines between [Atoms] Angs and the next section atom_lines_raw = lines[atoms_start_idx + 1 : gto_start_idx] # Rest of the file from the next section onwards tail_lines = lines[gto_start_idx:] # Process atom lines atoms = [] coordinates = [] atom_lines = [] for line in atom_lines_raw: parts = line.strip().split() if len(parts) >= 6: try: x, y, z = map(float, parts[3:6]) except ValueError as e: print(f"Error parsing coordinates in line: {line.strip()}") raise e atoms.append(parts[0]) coordinates.append([x, y, z]) atom_lines.append(parts) else: print(f"Warning: Line skipped due to unexpected format: {line.strip()}") if not atom_lines: raise ValueError("No atom lines found in the [Atoms] Angs section.") # Convert coordinates to numpy array coordinates = np.array(coordinates) original_center_of_mass = np.array(com) # Translate the coordinates new_coordinates = translate_to_origin(coordinates, original_center_of_mass) # Now write everything to the output file with open(output_file, 'w') as outfile: # Write header lines outfile.writelines(header_lines) # Write updated atom lines for i, parts in enumerate(atom_lines): # Update the coordinates with new values, formatted to match the original precision formatted_x = f"{new_coordinates[i][0]:.5f}" formatted_y = f"{new_coordinates[i][1]:.5f}" formatted_z = f"{new_coordinates[i][2]:.5f}" parts[3], parts[4], parts[5] = formatted_x, formatted_y, formatted_z # Reconstruct the line with original spacing updated_line = ' '.join(parts) + '\n' outfile.write(updated_line) # Write the rest of the file outfile.writelines(tail_lines)
[docs]def center_molden(molden_file, com): """Create a centered copy of a Molden file for dipole calculations. Generates a new Molden file with atomic coordinates translated so the substrate center of mass is at the origin. This is required for meaningful dipole moment calculations. Parameters ---------- molden_file : str Path to the input Molden file. com : array-like of shape (3,) Center of mass coordinates. Returns ------- str Path to the output file (``centered_{original_name}``). """ output_molden = f"centered_{molden_file}" process_molden(molden_file, output_molden, com) print(f"> Centered coordinates written to: {output_molden}") return output_molden