#!/usr/bin/env python
"""
A simple python script for parsing CASTEP (http://www.castep.org/) output files especially for ELNES calculations.
This includes some functions for reading .cell, .castep, .bands, and .eels_mat files, calculating excitation energy, and forming core-loss spectra with Gaussian smearing.
Copyright (c) 2022 kiyou, nmdl-mizo
This software is released under the MIT License, see LICENSE.
Note
-----
This script is tested on input and output files of CASTEP version 8 and may not be incompatible to other versions.
In all papers using the CASTEP code, you should cite:
"First principles methods using CASTEP",
Zeitschrift fuer Kristallographie 220(5-6) pp. 567-570 (2005)
S. J. Clark, M. D. Segall, C. J. Pickard, P. J. Hasnip, M. J. Probert, K. Refson, M. C. Payne
If you use get_energies() for calculating excitation energy, please consider to cite:
Mizoguchi, T.; Tanaka, I.; Gao, S.-P.; Pickard, C. J.
"First-Principles Calculation of Spectral Features, Chemical Shift and Absolute Threshold of ELNES and XANES Using a Plane Wave Pseudopotential Method." \
J. Phys. Condens. Matter 2009, 21 (10), 104204.
"""
import os
import re
import struct
import numpy as np
[docs]def split_castep(filename):
"""
Split .castep file into each calculation
Running CASTEP several times yields a single .castep file with concatenated output.
This function splits the outputs into a list of each calculation run.
Parameters
--------
filename : str
path to the .castep file
Returns
--------
run_list : list
list of lines of castep output for each run
"""
with open(filename, "rt") as f:
lines = f.readlines()
castep_header = [
' +-------------------------------------------------+\n',
' | |\n',
' | CCC AA SSS TTTTT EEEEE PPPP |\n',
' | C A A S T E P P |\n',
' | C AAAA SS T EEE PPPP |\n',
' | C A A S T E P |\n',
' | CCC A A SSS T EEEEE P |\n',
' | |\n',
' +-------------------------------------------------+\n'
]
header_position_list = list()
for l in range(len(lines) - 9):
if lines[l: l + 9] == castep_header:
header_position_list.append(l)
header_position_list.append(-1)
run_list = [lines[l_s:l_e] for l_s, l_e in zip(header_position_list[:-1], header_position_list[1:])]
return run_list
[docs]def get_final_energy_castep(lines):
"""
get the final energy of the system from .castep output
Parameters
--------
lines : list of str
Castep output for a calculation run.
Can be obtained by split_castep
Returns
--------
final_energy: float
final energy in eV
"""
for line in lines:
if "Final energy" in line:
final_energy = float(line.split()[-2])
break
else:
raise RuntimeError("Could not find Final energy")
return final_energy
[docs]def get_ae_energy_castep(lines, element="C", suffix=":ex"):
"""
get the atomic energy of the all-electron (ae) calculation from .castep output
Parameters
--------
lines : list of str
Castep output for a calculation run.
Can be obtained by split_castep
element : str, default "C"
element name. e.g. "C"
suffix : str, default ":ex"
suffix for the site name e.g. ":ex"
Returns
--------
ae_energy: float
ae energy in eV
"""
re_ac = re.compile(f"Atomic calculation performed for {element}{suffix}")
re_ae = re.compile("Converged in \d+ iterations to an ae energy of ([\d\.\-]+) eV")
for i, line in enumerate(lines):
if re_ac.search(line) is not None:
break
else:
raise RuntimeError(f"Could not find atomic calculation for {element}{suffix}")
ae_energy = float(re_ae.search(lines[i + 2]).group(1))
return ae_energy
[docs]def get_pa_energy_castep(lines, element="C", suffix=":ex"):
"""
get the atomic energy of the pseudo atomic (pa) calculation from .castep output
Parameters
--------
lines : list of str
Castep output for a calculation run.
Can be obtained by split_castep
element : str, default "C"
element name. e.g. "C"
suffix : str, default ":ex"
suffix for the site name e.g. ":ex"
Returns
--------
pa_energy: float
pseudo atomic energy in eV
"""
re_pac = re.compile(f"Pseudo atomic calculation performed for {element}{suffix}")
re_pa = re.compile("Converged in \d+ iterations to a total energy of ([\d\.\-]+) eV")
for i, line in enumerate(lines):
if re_pac.search(line) is not None:
break
else:
raise RuntimeError(f"Could not find pseudo atomic calculation for {element}{suffix}")
pa_energy = float(re_pa.search(lines[i + 2]).group(1))
return pa_energy
[docs]def get_energies(filename_gs, filename_ex, element="C", suffix=":ex", gs_split=-1, ex_split=-1):
"""
get energies from .castep outputs
Parameters
--------
filename_gs : str
path to the .castep file of the ground state
filename_ex : str
path to the .castep file of the excited state
element : str, default "C"
element name. e.g. "C"
suffix : str, default ":ex"
suffix for the site name e.g. ":ex"
gs_split : int, default -1
index of the split used for extracting energies from ground state calculation. -1 to latest.
ex_split : int, default -1
index of the split used for extracting energies from excited state calculation. -1 to latest.
Returns
--------
energy_dict: dict
a dictionary wiht str keys of labels and flot values of energies in eV
gs_final: ground state final_energy,
ex_final: excited state final energy,
gs_ae: ground state atomic energy in the all-electron calculation,
ex_ae: excited state atomic energy in the all-electron calculation,
gs_pa: ground state atomic energy in the pseudo-atomic calculation,
ex_pa: excited state atomic energy in the pseudo-atomic calculation,
excitation_energy: excitation energy,
Note
-------
The calculation of excitation energy is based on the following paper:
Mizoguchi, T.; Tanaka, I.; Gao, S.-P.; Pickard, C. J. \
"First-Principles Calculation of Spectral Features, Chemical Shift and Absolute Threshold of ELNES and XANES Using a Plane Wave Pseudopotential Method." \
J. Phys. Condens. Matter 2009, 21 (10), 104204.
"""
lines_gs = split_castep(filename_gs)[gs_split]
lines_ex = split_castep(filename_ex)[ex_split]
energy_dict = {
"gs_final": get_final_energy_castep(lines_gs),
"ex_final": get_final_energy_castep(lines_ex),
"gs_ae": get_ae_energy_castep(lines_gs, element, suffix),
"ex_ae": get_ae_energy_castep(lines_ex, element, suffix),
"gs_pa": get_pa_energy_castep(lines_gs, element, suffix),
"ex_pa": get_pa_energy_castep(lines_ex, element, suffix),
}
excitation_energy = (energy_dict["ex_final"] - energy_dict["gs_final"]) + ((energy_dict["ex_ae"] - energy_dict["ex_pa"]) - (energy_dict["gs_ae"] - energy_dict["gs_pa"]))
energy_dict["excitation_energy"] = excitation_energy
return energy_dict
[docs]def get_coords_cell(filename):
"""
extract species and coordinates from .cell file
Parameters
--------
filename : str
path to the .cell file
Returns
--------
coords_list : list
a list of two elements
- a list of species
- a numpy array of coordinations
"""
with open(filename, "r") as f:
while not "%BLOCK POSITIONS_FRAC" in f.readline():
pass
lines = list()
while True:
line = f.readline()
if "%ENDBLOCK POSITIONS_FRAC" in line:
break
lines.append(line)
coords_list = [line.split()[0] for line in lines], np.array([line.split()[1:] for line in lines], dtype=np.float)
return coords_list
[docs]def read_bin_data(f, dtype, data_byte, header_byte=4, footer_byte=4):
"""
read data from binary file
This function reads a binary chunk from current position.
Parameters
--------
f : file object
a file object opened in read only mode.
dtype : str
dtype of the binary chunk
data_byte : int
length of the binary chunk to read in byte
header_byte : int, default 4
length of the header before the binary chunk to read in byte
footer_byte : int, default 4
length of the footer after the binary chunk to read in byte
Returns
--------
data : int, str, float, or list
a data converted by struct.unpack
"""
f.seek(header_byte, 1)
data = struct.unpack(dtype, f.read(data_byte))
f.seek(footer_byte, 1)
return data
[docs]def read_eels_mat(filename):
"""
extract data from .eels_mat file
Parameters
--------
filename : str
path to the .eels_mat file
Returns
--------
eels_mat_dict : dict
a dictionary of the extracted data
- tot_core_projectors
- max_eigenvalues
- sum_num_kpoints
- num_spins
- core_orbital_species
- core_orbital_ion
- core_orbital_n
- core_orbital_lm
- transition_matrix
"""
params = [
"tot_core_projectors",
"max_eigenvalues",
"sum_num_kpoints",
"num_spins",
]
co = [
"core_orbital_species",
"core_orbital_ion",
"core_orbital_n",
"core_orbital_lm",
]
with open(filename, "rb") as f:
param_chunk = {p: read_bin_data(f, ">i", 4)[0] for p in params}
n_proj = param_chunk["tot_core_projectors"]
if n_proj == 1:
dt_proj = ">i"
else:
dt_proj = f">{n_proj}i"
co_chunk = {p: read_bin_data(f, dt_proj, n_proj * 4) for p in co}
mat = np.array([read_bin_data(f, '>6d', 3 * 8 * 2) for i in range(np.prod([param_chunk[p] for p in params]))])
mat.dtype = complex
eels_mat_dict = dict()
for p in params:
eels_mat_dict[p] = param_chunk[p]
for p in co:
eels_mat_dict[p] = co_chunk[p]
eels_mat_dict["transition_matrix"] = mat.reshape(
eels_mat_dict["sum_num_kpoints"],
eels_mat_dict["num_spins"],
eels_mat_dict["tot_core_projectors"],
eels_mat_dict["max_eigenvalues"],
3,
)
return eels_mat_dict
[docs]def read_bands(filename, output_eV=True):
"""
extract data from .bands file
Parameters
--------
filename : str
path to the .bands file
output_eV : bool, default True
whether output energy in eV (True) or hartree (False)
Returns
--------
bands_dict : dict
a dictionary of the extracted data
"""
with open(filename, "rt") as f:
# start reading header
nkpnt = int(f.readline().split()[-1])
nspin = int(f.readline().split()[-1])
nelectrons = list(map(float, f.readline().split()[-nspin:]))
nbands = list(map(int, f.readline().split()[-nspin:]))
efermi = list(map(float, f.readline().split()[-nspin:]))
f.readline()
a = list(map(float, f.readline().split()))
b = list(map(float, f.readline().split()))
c = list(map(float, f.readline().split()))
# finish reading header
nk = list()
kpts = list()
wk = list()
spin = list()
eigenvalues = list()
for _ in range(nkpnt):
line = f.readline().split()
nk.append(int(line[1]))
kpts.append(list(map(float, line[2:5])))
wk.append(float(line[5]))
spin_k = list()
eigenvalues_k = list()
for nb in nbands:
spin_k.append(int(f.readline().split()[-1]))
eigenvalues_k.append(list(map(float, [f.readline() for _ in range(nb)])))
spin.append(spin_k)
eigenvalues.append(eigenvalues_k)
bands_dict = {
"num_kponts": nkpnt,
"num_spins": nspin,
"num_electrons": nelectrons,
"num_eigenvalues": nbands,
"efermi": efermi,
"lattice_vectors": np.array([a, b, c]),
"nk": nk,
"kpoint": kpts,
"kpoint_weights": wk,
"spin": spin,
"eigenvalues": np.array(eigenvalues)
}
if output_eV:
hart2eV = 27.211396132
bands_dict["efermi"] = np.array(bands_dict["efermi"]) * hart2eV
bands_dict["eigenvalues"] = np.array(bands_dict["eigenvalues"]) * hart2eV
return bands_dict
[docs]def get_spectrum(calc_dir=".", seed_name="case_elnes", output_eV=True, atomic_unit=False):
"""
get primitive spectral data from a .bands file and a .eels_mat file
Parameters
--------
calc_dir : str, default "."
path to the directory containing .bands and .eels_mat
seed_name : str, default "case_elnes"
seed name of the calculation
output_eV : bool, default True
whether output energy in eV (True) or hartree (False)
atomic_unit : bool, default False
whether output dynamical structure factor in the unit of Bohr radius^2 (True) or angstrom^2 (False).
Returns
--------
spectrum_dict : dict
a dictionary of the spectral data
"""
eels_mat_dict = read_eels_mat(os.path.join(calc_dir, f"{seed_name}.eels_mat"))
en = np.square(np.abs(eels_mat_dict["transition_matrix"]))
if not atomic_unit:
en *= 0.529177210903**2 # Bohr radius^-2 to Angstrom^-2
bands_dict = read_bands(os.path.join(calc_dir, f"{seed_name}.bands"), output_eV)
spectrum_dict = {
"eigenvalues": bands_dict["eigenvalues"],
"efermi": bands_dict["efermi"],
"num_electrons": bands_dict["num_electrons"],
"kpoint_weights": bands_dict["kpoint_weights"],
"num_spins": bands_dict["num_spins"],
"tot_core_projectors": eels_mat_dict["tot_core_projectors"],
"dsf":en
}
return spectrum_dict
[docs]def gaussian(x, c, w, s):
"""
gaussian function for smearing
Parameters
--------
x : list or numpy array
1d list of energies
c : float
center position (=mu)
w : float
height scaling factor, weights
s : float
standard deviation of gaussian smearing (=sigma)
Returns
--------
numpy array
numpy array of gaussian distribution
"""
return w / (np.sqrt(2. * np.pi) * s) * np.exp(-((x - c) / s)**2 / 2.)
[docs]def get_smeared_spectrum(energies, sigma=0.3, calc_dir=".", seed_name="case_elnes", e_origin="eigen_value", output_eV=True, atomic_unit=False):
"""
get gaussian smeared spectra from a .bands file and a .eels_mat file
Parameters
--------
energies : list or numpy array
1d list of energies
sigma : float, default 0.3
standard deviation of gaussian smearing
calc_dir : str, default "."
path to the directory containing .bands and .eels_mat
seed_name : str, default "case_elnes"
seed name of the calculation
e_origin : str, default "eigen_value"
set energy origin
output_eV : bool, default True
whether output energy in eV (True) or hartree (False)
atomic_unit : bool, default False
whether output dynamical structure factor in the unit of Bohr radius^2 (True) or angstrom^2 (False).
Returns
--------
spectrum : numpy array
numpy array of smeared spectra
Examples
--------
>>> !ls .
case_elnes.bands case_elnes.eels_mat
>>> energies = np.arange(-4.999, 30.002, 0.001) # make an array for energies
>>> sp = get_smeared_spectrum(energies, 0.3) # parse and make spectra
>>> fig, ax = plt.subplots(1)
>>> ax.set_xlabel("Energy (eV)")
>>> ax.set_ylabel("Intensity (arb. u.)")
>>> ax.plot(energies, sp[0, 0], label="x") # plot a spectrum for x component of the 1st core projection
>>> ax.plot(energies, sp[0, 1], label="y") # plot a spectrum for y component of the 1st core projection
>>> ax.plot(energies, sp[0, 2], label="z") # plot a spectrum for z component of the 1st core projection
>>> ax.plot(energies, np.mean(sp[0], axis=0), label="total") # plot a total spectrum of the 1st core projection
"""
spectrum = get_spectrum(calc_dir, seed_name, output_eV, atomic_unit)
if e_origin == "eigen_value":
e_origin_value = np.min(
[
[
ev[int(ne // (2 / spectrum["num_spins"]))]
for ev, ne in zip(ev_k, spectrum["num_electrons"])
]
for ev_k in spectrum["eigenvalues"]
]
)
elif e_origin == "efermi":
e_origin_value = spectrum["efermi"]
sp = [
[
[
[
kw * np.sum(
[
gaussian(energies, en, w, sigma)
for en, w in zip(spectrum["eigenvalues"][i_kp, i_spin] - e_origin_value, spectrum["dsf"][i_kp, i_spin, i_proj, :, i_ra])
if en >= 0.
],
axis=0
)
for i_kp, kw in enumerate(spectrum["kpoint_weights"])
]
for i_spin in range(spectrum["num_spins"])
]
for i_ra in range(3)
]
for i_proj in range(spectrum["tot_core_projectors"])
]
sp = np.sum(np.array(sp), axis=(2, 3))
sp *= 2. / spectrum["num_spins"] # consider spin multiplicity
return sp