"""Process Molden files for Multiwfn analysis."""
import numpy as np
import os
ECP_ACTIVE_ELECTRONS = {
# s-block
'K': 9, 'CA': 10,
'RB': 9, 'SR': 10,
'CS': 9, 'BA': 10,
# 3d transition metals (LANL, 10e core except Zn)
'SC': 11, 'TI': 12, 'V': 13, 'CR': 14, 'MN': 15,
'FE': 16, 'CO': 17, 'NI': 18, 'CU': 19, 'ZN': 12,
# 4d transition metals (ECP28)
'Y': 11, 'ZR': 12, 'NB': 13, 'MO': 14, 'TC': 15,
'RU': 16, 'RH': 17, 'PD': 18, 'AG': 19, 'CD': 12,
# 5d transition metals
'LA': 11,
'HF': 12, 'TA': 13, 'W': 14, 'RE': 15,
'OS': 16, 'IR': 17, 'PT': 18, 'AU': 19, 'HG': 12,
# p-block heavier main-group
'GA': 3, 'GE': 4, 'AS': 5, 'SE': 6, 'BR': 7,
'IN': 3, 'SN': 4, 'SB': 5, 'TE': 6, 'I': 7,
'TL': 13, 'PB': 4, 'BI': 5,
}
[docs]def correct_ecp_charges_inplace(molden_file):
"""Correct nuclear charges in a Molden file for ECP-treated elements.
Multiwfn requires the nuclear charge in Molden files to reflect the
number of active electrons (not the full atomic number) for elements
using effective core potentials (ECPs). This function reads the Molden
file, corrects charges for known ECP elements, and overwrites the file.
Parameters
----------
molden_file : str
Path to the Molden file to correct.
Returns
-------
bool
True if the file was modified, False otherwise.
Notes
-----
The ECP electron counts are defined in :data:`ECP_ACTIVE_ELECTRONS` and
cover common LANL/ECP28 basis sets for transition metals and heavy
main-group elements.
"""
try:
with open(molden_file, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
print(f"> ERROR: Molden file not found at {molden_file}")
return False
modified_lines = []
in_atoms_section = False
was_modified = False
for line in lines:
if line.strip() == "[Atoms] Angs":
in_atoms_section = True
modified_lines.append(line)
continue
elif in_atoms_section and line.strip().startswith("["):
in_atoms_section = False
if in_atoms_section:
parts = line.strip().split()
if len(parts) >= 6:
element_symbol = parts[0].upper()
# Check if the element needs correction and if it hasn't been corrected already
if element_symbol in ECP_ACTIVE_ELECTRONS and parts[2] != str(ECP_ACTIVE_ELECTRONS[element_symbol]):
parts[2] = str(ECP_ACTIVE_ELECTRONS[element_symbol])
line_to_write = ' '.join(parts) + '\n'
modified_lines.append(line_to_write)
was_modified = True
else:
modified_lines.append(line)
else:
modified_lines.append(line)
else:
modified_lines.append(line)
if was_modified:
with open(molden_file, 'w') as f:
f.writelines(modified_lines)
print(f"> Overwrote {os.path.basename(molden_file)} with ECP-corrected nuclear charges.")
return was_modified
[docs]def translate_to_origin(coordinates, center_of_mass):
"""Translate coordinates so a reference point is at the origin.
Parameters
----------
coordinates : numpy.ndarray of shape (N, 3)
Atomic coordinates to translate.
center_of_mass : array-like of shape (3,)
Reference point to move to the origin.
Returns
-------
numpy.ndarray of shape (N, 3)
Translated coordinates.
"""
return coordinates - center_of_mass
[docs]def process_molden(input_file, output_file, com):
"""Translate atomic coordinates in a Molden file to center on a reference point.
Reads the ``[Atoms] Angs`` section of the input Molden file, subtracts
the provided center of mass from all coordinates, and writes the
modified file. All other sections are copied unchanged.
Parameters
----------
input_file : str
Path to the input Molden file.
output_file : str
Path for the output (centered) Molden file.
com : array-like of shape (3,)
Center of mass coordinates to subtract from all atoms.
Raises
------
ValueError
If the ``[Atoms] Angs`` section is not found or contains no atoms.
"""
with open(input_file, 'r') as infile:
lines = infile.readlines()
# Identify the indices of the sections
atoms_start_idx = None
gto_start_idx = None
for idx, line in enumerate(lines):
stripped_line = line.strip()
if stripped_line == "[Atoms] Angs":
atoms_start_idx = idx
elif stripped_line.startswith("[") and stripped_line != "[Atoms] Angs" and atoms_start_idx is not None:
# First section after [Atoms] Angs
gto_start_idx = idx
break
if atoms_start_idx is None:
raise ValueError("The [Atoms] Angs section was not found in the file.")
if gto_start_idx is None:
# If [GTO] or another section is not found, assume the rest of the file is tail_lines
gto_start_idx = len(lines)
# Header lines up to and including [Atoms] Angs
header_lines = lines[:atoms_start_idx + 1]
# Atom lines between [Atoms] Angs and the next section
atom_lines_raw = lines[atoms_start_idx + 1 : gto_start_idx]
# Rest of the file from the next section onwards
tail_lines = lines[gto_start_idx:]
# Process atom lines
atoms = []
coordinates = []
atom_lines = []
for line in atom_lines_raw:
parts = line.strip().split()
if len(parts) >= 6:
try:
x, y, z = map(float, parts[3:6])
except ValueError as e:
print(f"Error parsing coordinates in line: {line.strip()}")
raise e
atoms.append(parts[0])
coordinates.append([x, y, z])
atom_lines.append(parts)
else:
print(f"Warning: Line skipped due to unexpected format: {line.strip()}")
if not atom_lines:
raise ValueError("No atom lines found in the [Atoms] Angs section.")
# Convert coordinates to numpy array
coordinates = np.array(coordinates)
original_center_of_mass = np.array(com)
# Translate the coordinates
new_coordinates = translate_to_origin(coordinates, original_center_of_mass)
# Now write everything to the output file
with open(output_file, 'w') as outfile:
# Write header lines
outfile.writelines(header_lines)
# Write updated atom lines
for i, parts in enumerate(atom_lines):
# Update the coordinates with new values, formatted to match the original precision
formatted_x = f"{new_coordinates[i][0]:.5f}"
formatted_y = f"{new_coordinates[i][1]:.5f}"
formatted_z = f"{new_coordinates[i][2]:.5f}"
parts[3], parts[4], parts[5] = formatted_x, formatted_y, formatted_z
# Reconstruct the line with original spacing
updated_line = ' '.join(parts) + '\n'
outfile.write(updated_line)
# Write the rest of the file
outfile.writelines(tail_lines)
[docs]def center_molden(molden_file, com):
"""Create a centered copy of a Molden file for dipole calculations.
Generates a new Molden file with atomic coordinates translated so
the substrate center of mass is at the origin. This is required
for meaningful dipole moment calculations.
Parameters
----------
molden_file : str
Path to the input Molden file.
com : array-like of shape (3,)
Center of mass coordinates.
Returns
-------
str
Path to the output file (``centered_{original_name}``).
"""
output_molden = f"centered_{molden_file}"
process_molden(molden_file, output_molden, com)
print(f"> Centered coordinates written to: {output_molden}")
return output_molden