Source code for ceparser

#!/usr/bin/env python
"""
A simple python script for parsing CASTEP (http://www.castep.org/) output files especially for ELNES calculations.

This includes some functions for reading .cell, .castep, .bands, and .eels_mat files, calculating excitation energy, and forming core-loss spectra with Gaussian smearing.

Copyright (c) 2022 kiyou, nmdl-mizo
This software is released under the MIT License, see LICENSE.

Note
-----
This script is tested on input and output files of CASTEP version 8 and may not be incompatible to other versions.

In all papers using the CASTEP code, you should cite:
"First principles methods using CASTEP",
Zeitschrift fuer Kristallographie 220(5-6) pp. 567-570 (2005)
S. J. Clark, M. D. Segall, C. J. Pickard, P. J. Hasnip, M. J. Probert, K. Refson, M. C. Payne

If you use get_energies() for calculating excitation energy, please consider to cite:
Mizoguchi, T.; Tanaka, I.; Gao, S.-P.; Pickard, C. J.
"First-Principles Calculation of Spectral Features, Chemical Shift and Absolute Threshold of ELNES and XANES Using a Plane Wave Pseudopotential Method." \
J. Phys. Condens. Matter 2009, 21 (10), 104204.
"""
import os
import re
import struct
import numpy as np

[docs]def split_castep(filename): """ Split .castep file into each calculation Running CASTEP several times yields a single .castep file with concatenated output. This function splits the outputs into a list of each calculation run. Parameters -------- filename : str path to the .castep file Returns -------- run_list : list list of lines of castep output for each run """ with open(filename, "rt") as f: lines = f.readlines() castep_header = [ ' +-------------------------------------------------+\n', ' | |\n', ' | CCC AA SSS TTTTT EEEEE PPPP |\n', ' | C A A S T E P P |\n', ' | C AAAA SS T EEE PPPP |\n', ' | C A A S T E P |\n', ' | CCC A A SSS T EEEEE P |\n', ' | |\n', ' +-------------------------------------------------+\n' ] header_position_list = list() for l in range(len(lines) - 9): if lines[l: l + 9] == castep_header: header_position_list.append(l) header_position_list.append(-1) run_list = [lines[l_s:l_e] for l_s, l_e in zip(header_position_list[:-1], header_position_list[1:])] return run_list
[docs]def get_final_energy_castep(lines): """ get the final energy of the system from .castep output Parameters -------- lines : list of str Castep output for a calculation run. Can be obtained by split_castep Returns -------- final_energy: float final energy in eV """ for line in lines: if "Final energy" in line: final_energy = float(line.split()[-2]) break else: raise RuntimeError("Could not find Final energy") return final_energy
[docs]def get_ae_energy_castep(lines, element="C", suffix=":ex"): """ get the atomic energy of the all-electron (ae) calculation from .castep output Parameters -------- lines : list of str Castep output for a calculation run. Can be obtained by split_castep element : str, default "C" element name. e.g. "C" suffix : str, default ":ex" suffix for the site name e.g. ":ex" Returns -------- ae_energy: float ae energy in eV """ re_ac = re.compile(f"Atomic calculation performed for {element}{suffix}") re_ae = re.compile("Converged in \d+ iterations to an ae energy of ([\d\.\-]+) eV") for i, line in enumerate(lines): if re_ac.search(line) is not None: break else: raise RuntimeError(f"Could not find atomic calculation for {element}{suffix}") ae_energy = float(re_ae.search(lines[i + 2]).group(1)) return ae_energy
[docs]def get_pa_energy_castep(lines, element="C", suffix=":ex"): """ get the atomic energy of the pseudo atomic (pa) calculation from .castep output Parameters -------- lines : list of str Castep output for a calculation run. Can be obtained by split_castep element : str, default "C" element name. e.g. "C" suffix : str, default ":ex" suffix for the site name e.g. ":ex" Returns -------- pa_energy: float pseudo atomic energy in eV """ re_pac = re.compile(f"Pseudo atomic calculation performed for {element}{suffix}") re_pa = re.compile("Converged in \d+ iterations to a total energy of ([\d\.\-]+) eV") for i, line in enumerate(lines): if re_pac.search(line) is not None: break else: raise RuntimeError(f"Could not find pseudo atomic calculation for {element}{suffix}") pa_energy = float(re_pa.search(lines[i + 2]).group(1)) return pa_energy
[docs]def get_energies(filename_gs, filename_ex, element="C", suffix=":ex", gs_split=-1, ex_split=-1): """ get energies from .castep outputs Parameters -------- filename_gs : str path to the .castep file of the ground state filename_ex : str path to the .castep file of the excited state element : str, default "C" element name. e.g. "C" suffix : str, default ":ex" suffix for the site name e.g. ":ex" gs_split : int, default -1 index of the split used for extracting energies from ground state calculation. -1 to latest. ex_split : int, default -1 index of the split used for extracting energies from excited state calculation. -1 to latest. Returns -------- energy_dict: dict a dictionary wiht str keys of labels and flot values of energies in eV gs_final: ground state final_energy, ex_final: excited state final energy, gs_ae: ground state atomic energy in the all-electron calculation, ex_ae: excited state atomic energy in the all-electron calculation, gs_pa: ground state atomic energy in the pseudo-atomic calculation, ex_pa: excited state atomic energy in the pseudo-atomic calculation, excitation_energy: excitation energy, Note ------- The calculation of excitation energy is based on the following paper: Mizoguchi, T.; Tanaka, I.; Gao, S.-P.; Pickard, C. J. \ "First-Principles Calculation of Spectral Features, Chemical Shift and Absolute Threshold of ELNES and XANES Using a Plane Wave Pseudopotential Method." \ J. Phys. Condens. Matter 2009, 21 (10), 104204. """ lines_gs = split_castep(filename_gs)[gs_split] lines_ex = split_castep(filename_ex)[ex_split] energy_dict = { "gs_final": get_final_energy_castep(lines_gs), "ex_final": get_final_energy_castep(lines_ex), "gs_ae": get_ae_energy_castep(lines_gs, element, suffix), "ex_ae": get_ae_energy_castep(lines_ex, element, suffix), "gs_pa": get_pa_energy_castep(lines_gs, element, suffix), "ex_pa": get_pa_energy_castep(lines_ex, element, suffix), } excitation_energy = (energy_dict["ex_final"] - energy_dict["gs_final"]) + ((energy_dict["ex_ae"] - energy_dict["ex_pa"]) - (energy_dict["gs_ae"] - energy_dict["gs_pa"])) energy_dict["excitation_energy"] = excitation_energy return energy_dict
[docs]def get_coords_cell(filename): """ extract species and coordinates from .cell file Parameters -------- filename : str path to the .cell file Returns -------- coords_list : list a list of two elements - a list of species - a numpy array of coordinations """ with open(filename, "r") as f: while not "%BLOCK POSITIONS_FRAC" in f.readline(): pass lines = list() while True: line = f.readline() if "%ENDBLOCK POSITIONS_FRAC" in line: break lines.append(line) coords_list = [line.split()[0] for line in lines], np.array([line.split()[1:] for line in lines], dtype=np.float) return coords_list
[docs]def read_bin_data(f, dtype, data_byte, header_byte=4, footer_byte=4): """ read data from binary file This function reads a binary chunk from current position. Parameters -------- f : file object a file object opened in read only mode. dtype : str dtype of the binary chunk data_byte : int length of the binary chunk to read in byte header_byte : int, default 4 length of the header before the binary chunk to read in byte footer_byte : int, default 4 length of the footer after the binary chunk to read in byte Returns -------- data : int, str, float, or list a data converted by struct.unpack """ f.seek(header_byte, 1) data = struct.unpack(dtype, f.read(data_byte)) f.seek(footer_byte, 1) return data
[docs]def read_eels_mat(filename): """ extract data from .eels_mat file Parameters -------- filename : str path to the .eels_mat file Returns -------- eels_mat_dict : dict a dictionary of the extracted data - tot_core_projectors - max_eigenvalues - sum_num_kpoints - num_spins - core_orbital_species - core_orbital_ion - core_orbital_n - core_orbital_lm - transition_matrix """ params = [ "tot_core_projectors", "max_eigenvalues", "sum_num_kpoints", "num_spins", ] co = [ "core_orbital_species", "core_orbital_ion", "core_orbital_n", "core_orbital_lm", ] with open(filename, "rb") as f: param_chunk = {p: read_bin_data(f, ">i", 4)[0] for p in params} n_proj = param_chunk["tot_core_projectors"] if n_proj == 1: dt_proj = ">i" else: dt_proj = f">{n_proj}i" co_chunk = {p: read_bin_data(f, dt_proj, n_proj * 4) for p in co} mat = np.array([read_bin_data(f, '>6d', 3 * 8 * 2) for i in range(np.prod([param_chunk[p] for p in params]))]) mat.dtype = complex eels_mat_dict = dict() for p in params: eels_mat_dict[p] = param_chunk[p] for p in co: eels_mat_dict[p] = co_chunk[p] eels_mat_dict["transition_matrix"] = mat.reshape( eels_mat_dict["sum_num_kpoints"], eels_mat_dict["num_spins"], eels_mat_dict["tot_core_projectors"], eels_mat_dict["max_eigenvalues"], 3, ) return eels_mat_dict
[docs]def read_bands(filename, output_eV=True): """ extract data from .bands file Parameters -------- filename : str path to the .bands file output_eV : bool, default True whether output energy in eV (True) or hartree (False) Returns -------- bands_dict : dict a dictionary of the extracted data """ with open(filename, "rt") as f: # start reading header nkpnt = int(f.readline().split()[-1]) nspin = int(f.readline().split()[-1]) nelectrons = list(map(float, f.readline().split()[-nspin:])) nbands = list(map(int, f.readline().split()[-nspin:])) efermi = list(map(float, f.readline().split()[-nspin:])) f.readline() a = list(map(float, f.readline().split())) b = list(map(float, f.readline().split())) c = list(map(float, f.readline().split())) # finish reading header nk = list() kpts = list() wk = list() spin = list() eigenvalues = list() for _ in range(nkpnt): line = f.readline().split() nk.append(int(line[1])) kpts.append(list(map(float, line[2:5]))) wk.append(float(line[5])) spin_k = list() eigenvalues_k = list() for nb in nbands: spin_k.append(int(f.readline().split()[-1])) eigenvalues_k.append(list(map(float, [f.readline() for _ in range(nb)]))) spin.append(spin_k) eigenvalues.append(eigenvalues_k) bands_dict = { "num_kponts": nkpnt, "num_spins": nspin, "num_electrons": nelectrons, "num_eigenvalues": nbands, "efermi": efermi, "lattice_vectors": np.array([a, b, c]), "nk": nk, "kpoint": kpts, "kpoint_weights": wk, "spin": spin, "eigenvalues": np.array(eigenvalues) } if output_eV: hart2eV = 27.211396132 bands_dict["efermi"] = np.array(bands_dict["efermi"]) * hart2eV bands_dict["eigenvalues"] = np.array(bands_dict["eigenvalues"]) * hart2eV return bands_dict
[docs]def get_spectrum(calc_dir=".", seed_name="case_elnes", output_eV=True, atomic_unit=False): """ get primitive spectral data from a .bands file and a .eels_mat file Parameters -------- calc_dir : str, default "." path to the directory containing .bands and .eels_mat seed_name : str, default "case_elnes" seed name of the calculation output_eV : bool, default True whether output energy in eV (True) or hartree (False) atomic_unit : bool, default False whether output dynamical structure factor in the unit of Bohr radius^2 (True) or angstrom^2 (False). Returns -------- spectrum_dict : dict a dictionary of the spectral data """ eels_mat_dict = read_eels_mat(os.path.join(calc_dir, f"{seed_name}.eels_mat")) en = np.square(np.abs(eels_mat_dict["transition_matrix"])) if not atomic_unit: en *= 0.529177210903**2 # Bohr radius^-2 to Angstrom^-2 bands_dict = read_bands(os.path.join(calc_dir, f"{seed_name}.bands"), output_eV) spectrum_dict = { "eigenvalues": bands_dict["eigenvalues"], "efermi": bands_dict["efermi"], "num_electrons": bands_dict["num_electrons"], "kpoint_weights": bands_dict["kpoint_weights"], "num_spins": bands_dict["num_spins"], "tot_core_projectors": eels_mat_dict["tot_core_projectors"], "dsf":en } return spectrum_dict
[docs]def gaussian(x, c, w, s): """ gaussian function for smearing Parameters -------- x : list or numpy array 1d list of energies c : float center position (=mu) w : float height scaling factor, weights s : float standard deviation of gaussian smearing (=sigma) Returns -------- numpy array numpy array of gaussian distribution """ return w / (np.sqrt(2. * np.pi) * s) * np.exp(-((x - c) / s)**2 / 2.)
[docs]def get_smeared_spectrum(energies, sigma=0.3, calc_dir=".", seed_name="case_elnes", e_origin="eigen_value", output_eV=True, atomic_unit=False): """ get gaussian smeared spectra from a .bands file and a .eels_mat file Parameters -------- energies : list or numpy array 1d list of energies sigma : float, default 0.3 standard deviation of gaussian smearing calc_dir : str, default "." path to the directory containing .bands and .eels_mat seed_name : str, default "case_elnes" seed name of the calculation e_origin : str, default "eigen_value" set energy origin output_eV : bool, default True whether output energy in eV (True) or hartree (False) atomic_unit : bool, default False whether output dynamical structure factor in the unit of Bohr radius^2 (True) or angstrom^2 (False). Returns -------- spectrum : numpy array numpy array of smeared spectra Examples -------- >>> !ls . case_elnes.bands case_elnes.eels_mat >>> energies = np.arange(-4.999, 30.002, 0.001) # make an array for energies >>> sp = get_smeared_spectrum(energies, 0.3) # parse and make spectra >>> fig, ax = plt.subplots(1) >>> ax.set_xlabel("Energy (eV)") >>> ax.set_ylabel("Intensity (arb. u.)") >>> ax.plot(energies, sp[0, 0], label="x") # plot a spectrum for x component of the 1st core projection >>> ax.plot(energies, sp[0, 1], label="y") # plot a spectrum for y component of the 1st core projection >>> ax.plot(energies, sp[0, 2], label="z") # plot a spectrum for z component of the 1st core projection >>> ax.plot(energies, np.mean(sp[0], axis=0), label="total") # plot a total spectrum of the 1st core projection """ spectrum = get_spectrum(calc_dir, seed_name, output_eV, atomic_unit) if e_origin == "eigen_value": e_origin_value = np.min( [ [ ev[int(ne // (2 / spectrum["num_spins"]))] for ev, ne in zip(ev_k, spectrum["num_electrons"]) ] for ev_k in spectrum["eigenvalues"] ] ) elif e_origin == "efermi": e_origin_value = spectrum["efermi"] sp = [ [ [ [ kw * np.sum( [ gaussian(energies, en, w, sigma) for en, w in zip(spectrum["eigenvalues"][i_kp, i_spin] - e_origin_value, spectrum["dsf"][i_kp, i_spin, i_proj, :, i_ra]) if en >= 0. ], axis=0 ) for i_kp, kw in enumerate(spectrum["kpoint_weights"]) ] for i_spin in range(spectrum["num_spins"]) ] for i_ra in range(3) ] for i_proj in range(spectrum["tot_core_projectors"]) ] sp = np.sum(np.array(sp), axis=(2, 3)) sp *= 2. / spectrum["num_spins"] # consider spin multiplicity return sp