Coverage for /builds/alexhroom/ase/ase/io/castep/castep_reader.py: 91.04%

335 statements  

« prev     ^ index     » next       coverage.py v7.5.3, created at 2024-08-05 14:37 +0000

1import io 

2import re 

3import warnings 

4from collections import defaultdict 

5from typing import Any, Dict, List, Optional 

6 

7import numpy as np 

8 

9from ase import Atoms, units 

10from ase.calculators.singlepoint import SinglePointCalculator 

11from ase.constraints import FixAtoms, FixCartesian, FixConstraint 

12from ase.io import ParseError 

13from ase.utils import reader, string2index 

14 

15 

16@reader 

17def read_castep_castep(fd, index=-1): 

18 """Read a .castep file and returns an Atoms object. 

19 

20 The calculator information will be stored in the calc attribute. 

21 

22 Notes 

23 ----- 

24 This routine will return an atom ordering as found within the castep file. 

25 This means that the species will be ordered by ascending atomic numbers. 

26 The atoms witin a species are ordered as given in the original cell file. 

27 

28 """ 

29 # look for last result, if several CASTEP run are appended 

30 line_start, line_end, end_found = _find_last_record(fd) 

31 if not end_found: 

32 raise ParseError(f'No regular end found in {fd.name} file.') 

33 

34 # These variables are finally assigned to `SinglePointCalculator` 

35 # for backward compatibility with the `Castep` calculator. 

36 cut_off_energy = None 

37 kpoints = None 

38 total_time = None 

39 peak_memory = None 

40 

41 # jump back to the beginning to the last record 

42 fd.seek(0) 

43 for i, line in enumerate(fd): 

44 if i == line_start: 

45 break 

46 

47 # read header 

48 parameters_header = _read_header(fd) 

49 if 'cut_off_energy' in parameters_header: 

50 cut_off_energy = parameters_header['cut_off_energy'] 

51 if 'basis_precision' in parameters_header: 

52 del parameters_header['cut_off_energy'] # avoid conflict 

53 

54 markers_new_iteration = [ 

55 'BFGS: starting iteration', 

56 'BFGS: improving iteration', 

57 'Starting MD iteration', 

58 ] 

59 

60 images = [] 

61 

62 results = {} 

63 species_pot = [] 

64 castep_warnings = [] 

65 for i, line in enumerate(fd): 

66 

67 if i > line_end: 

68 break 

69 

70 if 'Number of kpoints used' in line: 

71 kpoints = int(line.split('=')[-1].strip()) 

72 elif 'Unit Cell' in line: 

73 lattice_real = _read_unit_cell(fd) 

74 elif 'Cell Contents' in line: 

75 for line in fd: 

76 if 'Total number of ions in cell' in line: 

77 n_atoms = int(line.split()[7]) 

78 if 'Total number of species in cell' in line: 

79 int(line.split()[7]) 

80 fields = line.split() 

81 if len(fields) == 0: 

82 break 

83 elif 'Fractional coordinates of atoms' in line: 

84 species, custom_species, positions_frac = \ 

85 _read_fractional_coordinates(fd, n_atoms) 

86 elif 'Files used for pseudopotentials' in line: 

87 for line in fd: 

88 line = fd.readline() 

89 if 'Pseudopotential generated on-the-fly' in line: 

90 continue 

91 fields = line.split() 

92 if len(fields) == 2: 

93 species_pot.append(fields) 

94 else: 

95 break 

96 elif 'k-Points For BZ Sampling' in line: 

97 # TODO: generalize for non-Monkhorst Pack case 

98 # (i.e. kpoint lists) - 

99 # kpoints_offset cannot be read this way and 

100 # is hence always set to None 

101 for line in fd: 

102 if not line.strip(): 

103 break 

104 if 'MP grid size for SCF calculation' in line: 

105 # kpoints = ' '.join(line.split()[-3:]) 

106 # self.kpoints_mp_grid = kpoints 

107 # self.kpoints_mp_offset = '0. 0. 0.' 

108 # not set here anymore because otherwise 

109 # two calculator objects go out of sync 

110 # after each calculation triggering unnecessary 

111 # recalculation 

112 break 

113 

114 elif 'Final energy' in line: 

115 key = 'energy_without_dispersion_correction' 

116 results[key] = float(line.split()[-2]) 

117 elif 'Final free energy' in line: 

118 key = 'free_energy_without_dispersion_correction' 

119 results[key] = float(line.split()[-2]) 

120 elif 'NB est. 0K energy' in line: 

121 key = 'energy_zero_without_dispersion_correction' 

122 results[key] = float(line.split()[-2]) 

123 

124 # Add support for dispersion correction 

125 # filtering due to SEDC is done in get_potential_energy 

126 elif 'Dispersion corrected final energy' in line: 

127 key = 'energy_with_dispersion_correlation' 

128 results[key] = float(line.split()[-2]) 

129 elif 'Dispersion corrected final free energy' in line: 

130 key = 'free_energy_with_dispersion_correlation' 

131 results[key] = float(line.split()[-2]) 

132 elif 'NB dispersion corrected est. 0K energy' in line: 

133 key = 'energy_zero_with_dispersion_correlation' 

134 results[key] = float(line.split()[-2]) 

135 

136 # check if we had a finite basis set correction 

137 elif 'Total energy corrected for finite basis set' in line: 

138 key = 'energy_with_finite_basis_set_correction' 

139 results[key] = float(line.split()[-2]) 

140 

141 # ******************** Forces ********************* 

142 # ************** Symmetrised Forces *************** 

143 # ******************** Constrained Forces ******************** 

144 # ******************* Unconstrained Forces ******************* 

145 elif re.search(r'\**.* Forces \**', line): 

146 forces, constraints = _read_forces(fd, n_atoms) 

147 results['forces'] = np.array(forces) 

148 

149 # ***************** Stress Tensor ***************** 

150 # *********** Symmetrised Stress Tensor *********** 

151 elif re.search(r'\**.* Stress Tensor \**', line): 

152 results.update(_read_stress(fd)) 

153 

154 elif any(_ in line for _ in markers_new_iteration): 

155 _add_atoms( 

156 images, 

157 lattice_real, 

158 species, 

159 custom_species, 

160 positions_frac, 

161 constraints, 

162 results, 

163 ) 

164 # reset for the next step 

165 lattice_real = None 

166 species = None 

167 positions_frac = None 

168 results = {} 

169 

170 # extract info from the Mulliken analysis 

171 elif 'Atomic Populations' in line: 

172 results.update(_read_mulliken_charges(fd)) 

173 

174 # extract detailed Hirshfeld analysis (iprint > 1) 

175 elif 'Hirshfeld total electronic charge (e)' in line: 

176 results.update(_read_hirshfeld_details(fd, n_atoms)) 

177 

178 elif 'Hirshfeld Analysis' in line: 

179 results.update(_read_hirshfeld_charges(fd)) 

180 

181 # There is actually no good reason to get out of the loop 

182 # already at this point... or do I miss something? 

183 # elif 'BFGS: Final Configuration:' in line: 

184 # break 

185 elif 'warn' in line.lower(): 

186 castep_warnings.append(line) 

187 

188 # fetch some last info 

189 elif 'Total time' in line: 

190 pattern = r'.*=\s*([\d\.]+) s' 

191 total_time = float(re.search(pattern, line).group(1)) 

192 

193 elif 'Peak Memory Use' in line: 

194 pattern = r'.*=\s*([\d]+) kB' 

195 peak_memory = int(re.search(pattern, line).group(1)) 

196 

197 # add the last image 

198 _add_atoms( 

199 images, 

200 lattice_real, 

201 species, 

202 custom_species, 

203 positions_frac, 

204 constraints, 

205 results, 

206 ) 

207 

208 for atoms in images: 

209 # these variables are temporarily assigned to `SinglePointCalculator` 

210 # to be assigned to the `Castep` calculator for backward compatibility 

211 atoms.calc._cut_off_energy = cut_off_energy 

212 atoms.calc._kpoints = kpoints 

213 atoms.calc._species_pot = species_pot 

214 atoms.calc._total_time = total_time 

215 atoms.calc._peak_memory = peak_memory 

216 atoms.calc._parameters_header = parameters_header 

217 

218 if castep_warnings: 

219 warnings.warn('WARNING: .castep file contains warnings') 

220 for warning in castep_warnings: 

221 warnings.warn(warning) 

222 

223 if isinstance(index, str): 

224 index = string2index(index) 

225 

226 return images[index] 

227 

228 

229def _find_last_record(fd): 

230 """Find the last record of the .castep file. 

231 

232 Returns 

233 ------- 

234 start : int 

235 Line number of the first line of the last record. 

236 end : int 

237 Line number of the last line of the last record. 

238 end_found : bool 

239 True if the .castep file ends as expected. 

240 

241 """ 

242 start = -1 

243 for i, line in enumerate(fd): 

244 if (('Welcome' in line or 'Materials Studio' in line) 

245 and 'CASTEP' in line): 

246 start = i 

247 

248 if start < 0: 

249 warnings.warn( 

250 f'Could not find CASTEP label in result file: {fd.name}.' 

251 ' Are you sure this is a .castep file?' 

252 ) 

253 return None 

254 

255 # search for regular end of file 

256 end_found = False 

257 # start to search from record beginning from the back 

258 # and see if 

259 end = -1 

260 fd.seek(0) 

261 for i, line in enumerate(fd): 

262 if i < start: 

263 continue 

264 if 'Finalisation time =' in line: 

265 end_found = True 

266 end = i 

267 break 

268 

269 return (start, end, end_found) 

270 

271 

272def _read_header(out: io.TextIOBase): 

273 """Read the header blocks from a .castep file. 

274 

275 Returns 

276 ------- 

277 parameters : dict 

278 Dictionary storing keys and values of a .param file. 

279 """ 

280 def _parse_on_off(_: str): 

281 return {'on': True, 'off': False}[_] 

282 

283 read_title = False 

284 parameters: Dict[str, Any] = {} 

285 for line in out: 

286 if len(line) == 0: # end of file 

287 break 

288 if re.search(r'^\s*\*+$', line) and read_title: # end of header 

289 break 

290 

291 if re.search(r'\**.* Title \**', line): 

292 read_title = True 

293 

294 # General Parameters 

295 

296 elif 'output verbosity' in line: 

297 parameters['iprint'] = int(line.split()[-1][1]) 

298 elif re.match(r'\stype of calculation\s*:', line): 

299 parameters['task'] = { 

300 'single point energy': 'SinglePoint', 

301 'geometry optimization': 'GeometryOptimization', 

302 'band structure': 'BandStructure', 

303 'molecular dynamics': 'MolecularDynamics', 

304 'optical properties': 'Optics', 

305 'phonon calculation': 'Phonon', 

306 'E-field calculation': 'Efield', 

307 'Phonon followed by E-field': 'Phonon+Efield', 

308 'transition state search': 'TransitionStateSearch', 

309 'Magnetic Resonance': 'MagRes', 

310 'Core level spectra': 'Elnes', 

311 'Electronic Spectroscopy': 'ElectronicSpectroscopy', 

312 }[line.split(':')[-1].strip()] 

313 elif 'stress calculation' in line: 

314 parameters['calculate_stress'] = _parse_on_off(line.split()[-1]) 

315 elif 'calculation limited to maximum' in line: 

316 parameters['run_time'] = float(line.split()[-2]) 

317 elif re.match(r'\soptimization strategy\s*:', line): 

318 parameters['opt_strategy'] = { 

319 'maximize speed(+++)': 'Speed', 

320 'minimize memory(---)': 'Memory', 

321 'balance speed and memory': 'Default', 

322 }[line.split(':')[-1].strip()] 

323 

324 # Exchange-Correlation Parameters 

325 

326 elif re.match(r'\susing functional\s*:', line): 

327 parameters['xc_functional'] = { 

328 'Local Density Approximation': 'LDA', 

329 'Perdew Wang (1991)': 'PW91', 

330 'Perdew Burke Ernzerhof': 'PBE', 

331 'revised Perdew Burke Ernzerhof': 'RPBE', 

332 'PBE with Wu-Cohen exchange': 'WC', 

333 'PBE for solids (2008)': 'PBESOL', 

334 'Hartree-Fock': 'HF', 

335 'Hartree-Fock +': 'HF-LDA', 

336 'Screened Hartree-Fock': 'sX', 

337 'Screened Hartree-Fock + ': 'sX-LDA', 

338 'hybrid PBE0': 'PBE0', 

339 'hybrid B3LYP': 'B3LYP', 

340 'hybrid HSE03': 'HSE03', 

341 'hybrid HSE06': 'HSE06', 

342 'RSCAN': 'RSCAN', 

343 }[line.split(':')[-1].strip()] 

344 elif 'DFT+D: Semi-empirical dispersion correction' in line: 

345 parameters['sedc_apply'] = _parse_on_off(line.split()[-1]) 

346 elif 'SEDC with' in line: 

347 parameters['sedc_scheme'] = { 

348 'OBS correction scheme': 'OBS', 

349 'G06 correction scheme': 'G06', 

350 'D3 correction scheme': 'D3', 

351 'D3(BJ) correction scheme': 'D3-BJ', 

352 'D4 correction scheme': 'D4', 

353 'JCHS correction scheme': 'JCHS', 

354 'TS correction scheme': 'TS', 

355 'TSsurf correction scheme': 'TSSURF', 

356 'TS+SCS correction scheme': 'TSSCS', 

357 'aperiodic TS+SCS correction scheme': 'ATSSCS', 

358 'aperiodic MBD@SCS method': 'AMBD', 

359 'MBD@SCS method': 'MBD', 

360 'aperiodic MBD@rsSCS method': 'AMBD*', 

361 'MBD@rsSCS method': 'MBD*', 

362 'XDM correction scheme': 'XDM', 

363 }[line.split(':')[-1].strip()] 

364 

365 # Basis Set Parameters 

366 

367 elif 'basis set accuracy' in line: 

368 parameters['basis_precision'] = line.split()[-1] 

369 elif 'plane wave basis set cut-off' in line: 

370 parameters['cut_off_energy'] = float(line.split()[-2]) 

371 elif re.match(r'\sfinite basis set correction\s*:', line): 

372 parameters['finite_basis_corr'] = { 

373 'none': 0, 

374 'manual': 1, 

375 'automatic': 2, 

376 }[line.split()[-1]] 

377 

378 # Electronic Parameters 

379 

380 elif 'treating system as spin-polarized' in line: 

381 parameters['spin_polarized'] = True 

382 

383 # Electronic Minimization Parameters 

384 

385 elif 'Treating system as non-metallic' in line: 

386 parameters['fix_occupancy'] = True 

387 elif 'total energy / atom convergence tol.' in line: 

388 parameters['elec_energy_tol'] = float(line.split()[-2]) 

389 elif 'convergence tolerance window' in line: 

390 parameters['elec_convergence_win'] = int(line.split()[-2]) 

391 elif 'max. number of SCF cycles:' in line: 

392 parameters['max_scf_cycles'] = float(line.split()[-1]) 

393 elif 'dump wavefunctions every' in line: 

394 parameters['num_dump_cycles'] = float(line.split()[-3]) 

395 

396 # Density Mixing Parameters 

397 

398 elif 'density-mixing scheme' in line: 

399 parameters['mixing_scheme'] = line.split()[-1] 

400 

401 return parameters 

402 

403 

404def _read_unit_cell(out: io.TextIOBase): 

405 """Read a Unit Cell block from a .castep file.""" 

406 for line in out: 

407 fields = line.split() 

408 if len(fields) == 6: 

409 break 

410 lattice_real = [] 

411 for _ in range(3): 

412 lattice_real.append([float(f) for f in fields[0:3]]) 

413 line = out.readline() 

414 fields = line.split() 

415 return np.array(lattice_real) 

416 

417 

418def _read_forces(out: io.TextIOBase, n_atoms: int): 

419 """Read a block for atomic forces from a .castep file.""" 

420 constraints: List[FixConstraint] = [] 

421 forces = [] 

422 for line in out: 

423 fields = line.split() 

424 if len(fields) == 7: 

425 break 

426 for n in range(n_atoms): 

427 consd = np.array([0, 0, 0]) 

428 fxyz = [0.0, 0.0, 0.0] 

429 for i, force_component in enumerate(fields[-4:-1]): 

430 if force_component.count("(cons'd)") > 0: 

431 consd[i] = 1 

432 # remove constraint labels in force components 

433 fxyz[i] = float(force_component.replace("(cons'd)", '')) 

434 if consd.all(): 

435 constraints.append(FixAtoms(n)) 

436 elif consd.any(): 

437 constraints.append(FixCartesian(n, consd)) 

438 forces.append(fxyz) 

439 line = out.readline() 

440 fields = line.split() 

441 return forces, constraints 

442 

443 

444def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int): 

445 """Read fractional coordinates from a .castep file.""" 

446 species: List[str] = [] 

447 custom_species: Optional[List[str]] = None # A CASTEP special thing 

448 positions_frac: List[List[float]] = [] 

449 for line in out: 

450 fields = line.split() 

451 if len(fields) == 7: 

452 break 

453 for _ in range(n_atoms): 

454 spec_custom = fields[1].split(':', 1) 

455 elem = spec_custom[0] 

456 if len(spec_custom) > 1 and custom_species is None: 

457 # Add it to the custom info! 

458 custom_species = list(species) 

459 species.append(elem) 

460 if custom_species is not None: 

461 custom_species.append(fields[1]) 

462 positions_frac.append([float(s) for s in fields[3:6]]) 

463 line = out.readline() 

464 fields = line.split() 

465 return species, custom_species, positions_frac 

466 

467 

468def _read_stress(out: io.TextIOBase): 

469 """Read a block for the stress tensor from a .castep file.""" 

470 for line in out: 

471 fields = line.split() 

472 if len(fields) == 6: 

473 break 

474 results = {} 

475 stress = [] 

476 for _ in range(3): 

477 stress.append([float(s) for s in fields[2:5]]) 

478 line = out.readline() 

479 fields = line.split() 

480 # stress in .castep file is given in GPa 

481 results['stress'] = np.array(stress) * units.GPa 

482 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]] 

483 line = out.readline() 

484 if "Pressure:" in line: 

485 results['pressure'] = float(line.split()[-2]) * units.GPa 

486 return results 

487 

488 

489def _add_atoms( 

490 images, 

491 lattice_real, 

492 species, 

493 custom_species, 

494 positions_frac, 

495 constraints, 

496 results, 

497): 

498 # If all the lattice parameters are fixed, 

499 # the `Unit Cell` block in the .castep file is not printed 

500 # in every ionic step. 

501 # The lattice paramters are therefore taken from the last step. 

502 # This happens: 

503 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE` 

504 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT` 

505 if lattice_real is None: 

506 lattice_real = images[-1].cell.copy() 

507 

508 # for highly symmetric systems (where essentially only the 

509 # stress is optimized, but the atomic positions) positions 

510 # are only printed once. 

511 if species is None: 

512 species = images[-1].symbols 

513 if positions_frac is None: 

514 positions_frac = images[-1].get_scaled_positions() 

515 

516 _set_energy_and_free_energy(results) 

517 

518 atoms = Atoms( 

519 species, 

520 cell=lattice_real, 

521 constraint=constraints, 

522 pbc=True, 

523 scaled_positions=positions_frac, 

524 ) 

525 if custom_species is not None: 

526 atoms.new_array( 

527 'castep_custom_species', 

528 np.array(custom_species), 

529 ) 

530 

531 atoms.calc = SinglePointCalculator(atoms) 

532 atoms.calc.results = results 

533 

534 images.append(atoms) 

535 

536 

537def _read_mulliken_charges(out: io.TextIOBase): 

538 """Read a block for Mulliken charges from a .castep file.""" 

539 for i in range(3): 

540 line = out.readline() 

541 if i == 1: 

542 spin_polarized = 'Spin' in line 

543 results = defaultdict(list) 

544 for line in out: 

545 fields = line.split() 

546 if len(fields) == 1: 

547 break 

548 if spin_polarized: 

549 if len(fields) != 7: # due to CASTEP 18 outformat changes 

550 results['charges'].append(float(fields[-2])) 

551 results['magmoms'].append(float(fields[-1])) 

552 else: 

553 results['charges'].append(float(fields[-1])) 

554 return {k: np.array(v) for k, v in results.items()} 

555 

556 

557def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int): 

558 """Read the Hirshfeld analysis when iprint > 1 from a .castep file.""" 

559 results = defaultdict(list) 

560 for _ in range(n_atoms): 

561 for line in out: 

562 if line.strip() == '': 

563 break # end for each atom 

564 if 'Hirshfeld / free atomic volume :' in line: 

565 line = out.readline() 

566 fields = line.split() 

567 results['hirshfeld_volume_ratios'].append(float(fields[0])) 

568 return {k: np.array(v) for k, v in results.items()} 

569 

570 

571def _read_hirshfeld_charges(out: io.TextIOBase): 

572 """Read a block for Hirshfeld charges from a .castep file.""" 

573 for i in range(3): 

574 line = out.readline() 

575 if i == 1: 

576 spin_polarized = 'Spin' in line 

577 results = defaultdict(list) 

578 for line in out: 

579 fields = line.split() 

580 if len(fields) == 1: 

581 break 

582 if spin_polarized: 

583 results['hirshfeld_charges'].append(float(fields[-2])) 

584 results['hirshfeld_magmoms'].append(float(fields[-1])) 

585 else: 

586 results['hirshfeld_charges'].append(float(fields[-1])) 

587 return {k: np.array(v) for k, v in results.items()} 

588 

589 

590def _set_energy_and_free_energy(results: Dict[str, Any]): 

591 """Set values referred to as `energy` and `free_energy`.""" 

592 if 'energy_with_dispersion_correction' in results: 

593 suffix = '_with_dispersion_correction' 

594 else: 

595 suffix = '_without_dispersion_correction' 

596 

597 if 'free_energy' + suffix in results: # metallic 

598 keye = 'energy_zero' + suffix 

599 keyf = 'free_energy' + suffix 

600 else: # non-metallic 

601 # The finite basis set correction is applied to the energy at finite T 

602 # (not the energy at 0 K). We should hence refer to the corrected value 

603 # as `energy` only when the free energy is unavailable, i.e., only when 

604 # FIX_OCCUPANCY : TRUE and thus no smearing is applied. 

605 if 'energy_with_finite_basis_set_correction' in results: 

606 keye = 'energy_with_finite_basis_set_correction' 

607 else: 

608 keye = 'energy' + suffix 

609 keyf = 'energy' + suffix 

610 

611 results['energy'] = results[keye] 

612 results['free_energy'] = results[keyf]