Coverage for /builds/alexhroom/ase/ase/io/castep/castep_reader.py: 91.04%
335 statements
« prev ^ index » next coverage.py v7.5.3, created at 2024-08-05 14:37 +0000
« prev ^ index » next coverage.py v7.5.3, created at 2024-08-05 14:37 +0000
1import io
2import re
3import warnings
4from collections import defaultdict
5from typing import Any, Dict, List, Optional
7import numpy as np
9from ase import Atoms, units
10from ase.calculators.singlepoint import SinglePointCalculator
11from ase.constraints import FixAtoms, FixCartesian, FixConstraint
12from ase.io import ParseError
13from ase.utils import reader, string2index
16@reader
17def read_castep_castep(fd, index=-1):
18 """Read a .castep file and returns an Atoms object.
20 The calculator information will be stored in the calc attribute.
22 Notes
23 -----
24 This routine will return an atom ordering as found within the castep file.
25 This means that the species will be ordered by ascending atomic numbers.
26 The atoms witin a species are ordered as given in the original cell file.
28 """
29 # look for last result, if several CASTEP run are appended
30 line_start, line_end, end_found = _find_last_record(fd)
31 if not end_found:
32 raise ParseError(f'No regular end found in {fd.name} file.')
34 # These variables are finally assigned to `SinglePointCalculator`
35 # for backward compatibility with the `Castep` calculator.
36 cut_off_energy = None
37 kpoints = None
38 total_time = None
39 peak_memory = None
41 # jump back to the beginning to the last record
42 fd.seek(0)
43 for i, line in enumerate(fd):
44 if i == line_start:
45 break
47 # read header
48 parameters_header = _read_header(fd)
49 if 'cut_off_energy' in parameters_header:
50 cut_off_energy = parameters_header['cut_off_energy']
51 if 'basis_precision' in parameters_header:
52 del parameters_header['cut_off_energy'] # avoid conflict
54 markers_new_iteration = [
55 'BFGS: starting iteration',
56 'BFGS: improving iteration',
57 'Starting MD iteration',
58 ]
60 images = []
62 results = {}
63 species_pot = []
64 castep_warnings = []
65 for i, line in enumerate(fd):
67 if i > line_end:
68 break
70 if 'Number of kpoints used' in line:
71 kpoints = int(line.split('=')[-1].strip())
72 elif 'Unit Cell' in line:
73 lattice_real = _read_unit_cell(fd)
74 elif 'Cell Contents' in line:
75 for line in fd:
76 if 'Total number of ions in cell' in line:
77 n_atoms = int(line.split()[7])
78 if 'Total number of species in cell' in line:
79 int(line.split()[7])
80 fields = line.split()
81 if len(fields) == 0:
82 break
83 elif 'Fractional coordinates of atoms' in line:
84 species, custom_species, positions_frac = \
85 _read_fractional_coordinates(fd, n_atoms)
86 elif 'Files used for pseudopotentials' in line:
87 for line in fd:
88 line = fd.readline()
89 if 'Pseudopotential generated on-the-fly' in line:
90 continue
91 fields = line.split()
92 if len(fields) == 2:
93 species_pot.append(fields)
94 else:
95 break
96 elif 'k-Points For BZ Sampling' in line:
97 # TODO: generalize for non-Monkhorst Pack case
98 # (i.e. kpoint lists) -
99 # kpoints_offset cannot be read this way and
100 # is hence always set to None
101 for line in fd:
102 if not line.strip():
103 break
104 if 'MP grid size for SCF calculation' in line:
105 # kpoints = ' '.join(line.split()[-3:])
106 # self.kpoints_mp_grid = kpoints
107 # self.kpoints_mp_offset = '0. 0. 0.'
108 # not set here anymore because otherwise
109 # two calculator objects go out of sync
110 # after each calculation triggering unnecessary
111 # recalculation
112 break
114 elif 'Final energy' in line:
115 key = 'energy_without_dispersion_correction'
116 results[key] = float(line.split()[-2])
117 elif 'Final free energy' in line:
118 key = 'free_energy_without_dispersion_correction'
119 results[key] = float(line.split()[-2])
120 elif 'NB est. 0K energy' in line:
121 key = 'energy_zero_without_dispersion_correction'
122 results[key] = float(line.split()[-2])
124 # Add support for dispersion correction
125 # filtering due to SEDC is done in get_potential_energy
126 elif 'Dispersion corrected final energy' in line:
127 key = 'energy_with_dispersion_correlation'
128 results[key] = float(line.split()[-2])
129 elif 'Dispersion corrected final free energy' in line:
130 key = 'free_energy_with_dispersion_correlation'
131 results[key] = float(line.split()[-2])
132 elif 'NB dispersion corrected est. 0K energy' in line:
133 key = 'energy_zero_with_dispersion_correlation'
134 results[key] = float(line.split()[-2])
136 # check if we had a finite basis set correction
137 elif 'Total energy corrected for finite basis set' in line:
138 key = 'energy_with_finite_basis_set_correction'
139 results[key] = float(line.split()[-2])
141 # ******************** Forces *********************
142 # ************** Symmetrised Forces ***************
143 # ******************** Constrained Forces ********************
144 # ******************* Unconstrained Forces *******************
145 elif re.search(r'\**.* Forces \**', line):
146 forces, constraints = _read_forces(fd, n_atoms)
147 results['forces'] = np.array(forces)
149 # ***************** Stress Tensor *****************
150 # *********** Symmetrised Stress Tensor ***********
151 elif re.search(r'\**.* Stress Tensor \**', line):
152 results.update(_read_stress(fd))
154 elif any(_ in line for _ in markers_new_iteration):
155 _add_atoms(
156 images,
157 lattice_real,
158 species,
159 custom_species,
160 positions_frac,
161 constraints,
162 results,
163 )
164 # reset for the next step
165 lattice_real = None
166 species = None
167 positions_frac = None
168 results = {}
170 # extract info from the Mulliken analysis
171 elif 'Atomic Populations' in line:
172 results.update(_read_mulliken_charges(fd))
174 # extract detailed Hirshfeld analysis (iprint > 1)
175 elif 'Hirshfeld total electronic charge (e)' in line:
176 results.update(_read_hirshfeld_details(fd, n_atoms))
178 elif 'Hirshfeld Analysis' in line:
179 results.update(_read_hirshfeld_charges(fd))
181 # There is actually no good reason to get out of the loop
182 # already at this point... or do I miss something?
183 # elif 'BFGS: Final Configuration:' in line:
184 # break
185 elif 'warn' in line.lower():
186 castep_warnings.append(line)
188 # fetch some last info
189 elif 'Total time' in line:
190 pattern = r'.*=\s*([\d\.]+) s'
191 total_time = float(re.search(pattern, line).group(1))
193 elif 'Peak Memory Use' in line:
194 pattern = r'.*=\s*([\d]+) kB'
195 peak_memory = int(re.search(pattern, line).group(1))
197 # add the last image
198 _add_atoms(
199 images,
200 lattice_real,
201 species,
202 custom_species,
203 positions_frac,
204 constraints,
205 results,
206 )
208 for atoms in images:
209 # these variables are temporarily assigned to `SinglePointCalculator`
210 # to be assigned to the `Castep` calculator for backward compatibility
211 atoms.calc._cut_off_energy = cut_off_energy
212 atoms.calc._kpoints = kpoints
213 atoms.calc._species_pot = species_pot
214 atoms.calc._total_time = total_time
215 atoms.calc._peak_memory = peak_memory
216 atoms.calc._parameters_header = parameters_header
218 if castep_warnings:
219 warnings.warn('WARNING: .castep file contains warnings')
220 for warning in castep_warnings:
221 warnings.warn(warning)
223 if isinstance(index, str):
224 index = string2index(index)
226 return images[index]
229def _find_last_record(fd):
230 """Find the last record of the .castep file.
232 Returns
233 -------
234 start : int
235 Line number of the first line of the last record.
236 end : int
237 Line number of the last line of the last record.
238 end_found : bool
239 True if the .castep file ends as expected.
241 """
242 start = -1
243 for i, line in enumerate(fd):
244 if (('Welcome' in line or 'Materials Studio' in line)
245 and 'CASTEP' in line):
246 start = i
248 if start < 0:
249 warnings.warn(
250 f'Could not find CASTEP label in result file: {fd.name}.'
251 ' Are you sure this is a .castep file?'
252 )
253 return None
255 # search for regular end of file
256 end_found = False
257 # start to search from record beginning from the back
258 # and see if
259 end = -1
260 fd.seek(0)
261 for i, line in enumerate(fd):
262 if i < start:
263 continue
264 if 'Finalisation time =' in line:
265 end_found = True
266 end = i
267 break
269 return (start, end, end_found)
272def _read_header(out: io.TextIOBase):
273 """Read the header blocks from a .castep file.
275 Returns
276 -------
277 parameters : dict
278 Dictionary storing keys and values of a .param file.
279 """
280 def _parse_on_off(_: str):
281 return {'on': True, 'off': False}[_]
283 read_title = False
284 parameters: Dict[str, Any] = {}
285 for line in out:
286 if len(line) == 0: # end of file
287 break
288 if re.search(r'^\s*\*+$', line) and read_title: # end of header
289 break
291 if re.search(r'\**.* Title \**', line):
292 read_title = True
294 # General Parameters
296 elif 'output verbosity' in line:
297 parameters['iprint'] = int(line.split()[-1][1])
298 elif re.match(r'\stype of calculation\s*:', line):
299 parameters['task'] = {
300 'single point energy': 'SinglePoint',
301 'geometry optimization': 'GeometryOptimization',
302 'band structure': 'BandStructure',
303 'molecular dynamics': 'MolecularDynamics',
304 'optical properties': 'Optics',
305 'phonon calculation': 'Phonon',
306 'E-field calculation': 'Efield',
307 'Phonon followed by E-field': 'Phonon+Efield',
308 'transition state search': 'TransitionStateSearch',
309 'Magnetic Resonance': 'MagRes',
310 'Core level spectra': 'Elnes',
311 'Electronic Spectroscopy': 'ElectronicSpectroscopy',
312 }[line.split(':')[-1].strip()]
313 elif 'stress calculation' in line:
314 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])
315 elif 'calculation limited to maximum' in line:
316 parameters['run_time'] = float(line.split()[-2])
317 elif re.match(r'\soptimization strategy\s*:', line):
318 parameters['opt_strategy'] = {
319 'maximize speed(+++)': 'Speed',
320 'minimize memory(---)': 'Memory',
321 'balance speed and memory': 'Default',
322 }[line.split(':')[-1].strip()]
324 # Exchange-Correlation Parameters
326 elif re.match(r'\susing functional\s*:', line):
327 parameters['xc_functional'] = {
328 'Local Density Approximation': 'LDA',
329 'Perdew Wang (1991)': 'PW91',
330 'Perdew Burke Ernzerhof': 'PBE',
331 'revised Perdew Burke Ernzerhof': 'RPBE',
332 'PBE with Wu-Cohen exchange': 'WC',
333 'PBE for solids (2008)': 'PBESOL',
334 'Hartree-Fock': 'HF',
335 'Hartree-Fock +': 'HF-LDA',
336 'Screened Hartree-Fock': 'sX',
337 'Screened Hartree-Fock + ': 'sX-LDA',
338 'hybrid PBE0': 'PBE0',
339 'hybrid B3LYP': 'B3LYP',
340 'hybrid HSE03': 'HSE03',
341 'hybrid HSE06': 'HSE06',
342 'RSCAN': 'RSCAN',
343 }[line.split(':')[-1].strip()]
344 elif 'DFT+D: Semi-empirical dispersion correction' in line:
345 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])
346 elif 'SEDC with' in line:
347 parameters['sedc_scheme'] = {
348 'OBS correction scheme': 'OBS',
349 'G06 correction scheme': 'G06',
350 'D3 correction scheme': 'D3',
351 'D3(BJ) correction scheme': 'D3-BJ',
352 'D4 correction scheme': 'D4',
353 'JCHS correction scheme': 'JCHS',
354 'TS correction scheme': 'TS',
355 'TSsurf correction scheme': 'TSSURF',
356 'TS+SCS correction scheme': 'TSSCS',
357 'aperiodic TS+SCS correction scheme': 'ATSSCS',
358 'aperiodic MBD@SCS method': 'AMBD',
359 'MBD@SCS method': 'MBD',
360 'aperiodic MBD@rsSCS method': 'AMBD*',
361 'MBD@rsSCS method': 'MBD*',
362 'XDM correction scheme': 'XDM',
363 }[line.split(':')[-1].strip()]
365 # Basis Set Parameters
367 elif 'basis set accuracy' in line:
368 parameters['basis_precision'] = line.split()[-1]
369 elif 'plane wave basis set cut-off' in line:
370 parameters['cut_off_energy'] = float(line.split()[-2])
371 elif re.match(r'\sfinite basis set correction\s*:', line):
372 parameters['finite_basis_corr'] = {
373 'none': 0,
374 'manual': 1,
375 'automatic': 2,
376 }[line.split()[-1]]
378 # Electronic Parameters
380 elif 'treating system as spin-polarized' in line:
381 parameters['spin_polarized'] = True
383 # Electronic Minimization Parameters
385 elif 'Treating system as non-metallic' in line:
386 parameters['fix_occupancy'] = True
387 elif 'total energy / atom convergence tol.' in line:
388 parameters['elec_energy_tol'] = float(line.split()[-2])
389 elif 'convergence tolerance window' in line:
390 parameters['elec_convergence_win'] = int(line.split()[-2])
391 elif 'max. number of SCF cycles:' in line:
392 parameters['max_scf_cycles'] = float(line.split()[-1])
393 elif 'dump wavefunctions every' in line:
394 parameters['num_dump_cycles'] = float(line.split()[-3])
396 # Density Mixing Parameters
398 elif 'density-mixing scheme' in line:
399 parameters['mixing_scheme'] = line.split()[-1]
401 return parameters
404def _read_unit_cell(out: io.TextIOBase):
405 """Read a Unit Cell block from a .castep file."""
406 for line in out:
407 fields = line.split()
408 if len(fields) == 6:
409 break
410 lattice_real = []
411 for _ in range(3):
412 lattice_real.append([float(f) for f in fields[0:3]])
413 line = out.readline()
414 fields = line.split()
415 return np.array(lattice_real)
418def _read_forces(out: io.TextIOBase, n_atoms: int):
419 """Read a block for atomic forces from a .castep file."""
420 constraints: List[FixConstraint] = []
421 forces = []
422 for line in out:
423 fields = line.split()
424 if len(fields) == 7:
425 break
426 for n in range(n_atoms):
427 consd = np.array([0, 0, 0])
428 fxyz = [0.0, 0.0, 0.0]
429 for i, force_component in enumerate(fields[-4:-1]):
430 if force_component.count("(cons'd)") > 0:
431 consd[i] = 1
432 # remove constraint labels in force components
433 fxyz[i] = float(force_component.replace("(cons'd)", ''))
434 if consd.all():
435 constraints.append(FixAtoms(n))
436 elif consd.any():
437 constraints.append(FixCartesian(n, consd))
438 forces.append(fxyz)
439 line = out.readline()
440 fields = line.split()
441 return forces, constraints
444def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):
445 """Read fractional coordinates from a .castep file."""
446 species: List[str] = []
447 custom_species: Optional[List[str]] = None # A CASTEP special thing
448 positions_frac: List[List[float]] = []
449 for line in out:
450 fields = line.split()
451 if len(fields) == 7:
452 break
453 for _ in range(n_atoms):
454 spec_custom = fields[1].split(':', 1)
455 elem = spec_custom[0]
456 if len(spec_custom) > 1 and custom_species is None:
457 # Add it to the custom info!
458 custom_species = list(species)
459 species.append(elem)
460 if custom_species is not None:
461 custom_species.append(fields[1])
462 positions_frac.append([float(s) for s in fields[3:6]])
463 line = out.readline()
464 fields = line.split()
465 return species, custom_species, positions_frac
468def _read_stress(out: io.TextIOBase):
469 """Read a block for the stress tensor from a .castep file."""
470 for line in out:
471 fields = line.split()
472 if len(fields) == 6:
473 break
474 results = {}
475 stress = []
476 for _ in range(3):
477 stress.append([float(s) for s in fields[2:5]])
478 line = out.readline()
479 fields = line.split()
480 # stress in .castep file is given in GPa
481 results['stress'] = np.array(stress) * units.GPa
482 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]
483 line = out.readline()
484 if "Pressure:" in line:
485 results['pressure'] = float(line.split()[-2]) * units.GPa
486 return results
489def _add_atoms(
490 images,
491 lattice_real,
492 species,
493 custom_species,
494 positions_frac,
495 constraints,
496 results,
497):
498 # If all the lattice parameters are fixed,
499 # the `Unit Cell` block in the .castep file is not printed
500 # in every ionic step.
501 # The lattice paramters are therefore taken from the last step.
502 # This happens:
503 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`
504 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`
505 if lattice_real is None:
506 lattice_real = images[-1].cell.copy()
508 # for highly symmetric systems (where essentially only the
509 # stress is optimized, but the atomic positions) positions
510 # are only printed once.
511 if species is None:
512 species = images[-1].symbols
513 if positions_frac is None:
514 positions_frac = images[-1].get_scaled_positions()
516 _set_energy_and_free_energy(results)
518 atoms = Atoms(
519 species,
520 cell=lattice_real,
521 constraint=constraints,
522 pbc=True,
523 scaled_positions=positions_frac,
524 )
525 if custom_species is not None:
526 atoms.new_array(
527 'castep_custom_species',
528 np.array(custom_species),
529 )
531 atoms.calc = SinglePointCalculator(atoms)
532 atoms.calc.results = results
534 images.append(atoms)
537def _read_mulliken_charges(out: io.TextIOBase):
538 """Read a block for Mulliken charges from a .castep file."""
539 for i in range(3):
540 line = out.readline()
541 if i == 1:
542 spin_polarized = 'Spin' in line
543 results = defaultdict(list)
544 for line in out:
545 fields = line.split()
546 if len(fields) == 1:
547 break
548 if spin_polarized:
549 if len(fields) != 7: # due to CASTEP 18 outformat changes
550 results['charges'].append(float(fields[-2]))
551 results['magmoms'].append(float(fields[-1]))
552 else:
553 results['charges'].append(float(fields[-1]))
554 return {k: np.array(v) for k, v in results.items()}
557def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):
558 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""
559 results = defaultdict(list)
560 for _ in range(n_atoms):
561 for line in out:
562 if line.strip() == '':
563 break # end for each atom
564 if 'Hirshfeld / free atomic volume :' in line:
565 line = out.readline()
566 fields = line.split()
567 results['hirshfeld_volume_ratios'].append(float(fields[0]))
568 return {k: np.array(v) for k, v in results.items()}
571def _read_hirshfeld_charges(out: io.TextIOBase):
572 """Read a block for Hirshfeld charges from a .castep file."""
573 for i in range(3):
574 line = out.readline()
575 if i == 1:
576 spin_polarized = 'Spin' in line
577 results = defaultdict(list)
578 for line in out:
579 fields = line.split()
580 if len(fields) == 1:
581 break
582 if spin_polarized:
583 results['hirshfeld_charges'].append(float(fields[-2]))
584 results['hirshfeld_magmoms'].append(float(fields[-1]))
585 else:
586 results['hirshfeld_charges'].append(float(fields[-1]))
587 return {k: np.array(v) for k, v in results.items()}
590def _set_energy_and_free_energy(results: Dict[str, Any]):
591 """Set values referred to as `energy` and `free_energy`."""
592 if 'energy_with_dispersion_correction' in results:
593 suffix = '_with_dispersion_correction'
594 else:
595 suffix = '_without_dispersion_correction'
597 if 'free_energy' + suffix in results: # metallic
598 keye = 'energy_zero' + suffix
599 keyf = 'free_energy' + suffix
600 else: # non-metallic
601 # The finite basis set correction is applied to the energy at finite T
602 # (not the energy at 0 K). We should hence refer to the corrected value
603 # as `energy` only when the free energy is unavailable, i.e., only when
604 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.
605 if 'energy_with_finite_basis_set_correction' in results:
606 keye = 'energy_with_finite_basis_set_correction'
607 else:
608 keye = 'energy' + suffix
609 keyf = 'energy' + suffix
611 results['energy'] = results[keye]
612 results['free_energy'] = results[keyf]