Coverage for /builds/alexhroom/ase/ase/io/castep/castep

1import io

2import re

3import warnings

4from collections import defaultdict

5from typing import Any, Dict, List, Optional

7import numpy as np

9from ase import Atoms, units

10from ase.calculators.singlepoint import SinglePointCalculator

11from ase.constraints import FixAtoms, FixCartesian, FixConstraint

12from ase.io import ParseError

13from ase.utils import reader, string2index

16@reader

17def read_castep_castep(fd, index=-1):

18 """Read a .castep file and returns an Atoms object.

20 The calculator information will be stored in the calc attribute.

22 Notes

23 -----

24 This routine will return an atom ordering as found within the castep file.

25 This means that the species will be ordered by ascending atomic numbers.

26 The atoms witin a species are ordered as given in the original cell file.

28 """

29 # look for last result, if several CASTEP run are appended

30 line_start, line_end, end_found = _find_last_record(fd)

31 if not end_found:

32 raise ParseError(f'No regular end found in {fd.name} file.')

34 # These variables are finally assigned to `SinglePointCalculator`

35 # for backward compatibility with the `Castep` calculator.

36 cut_off_energy = None

37 kpoints = None

38 total_time = None

39 peak_memory = None

41 # jump back to the beginning to the last record

42 fd.seek(0)

43 for i, line in enumerate(fd):

44 if i == line_start:

45 break

47 # read header

48 parameters_header = _read_header(fd)

49 if 'cut_off_energy' in parameters_header:

50 cut_off_energy = parameters_header['cut_off_energy']

51 if 'basis_precision' in parameters_header:

52 del parameters_header['cut_off_energy'] # avoid conflict

54 markers_new_iteration = [

55 'BFGS: starting iteration',

56 'BFGS: improving iteration',

57 'Starting MD iteration',

58 ]

60 images = []

62 results = {}

63 species_pot = []

64 castep_warnings = []

65 for i, line in enumerate(fd):

67 if i > line_end:

68 break

70 if 'Number of kpoints used' in line:

71 kpoints = int(line.split('=')[-1].strip())

72 elif 'Unit Cell' in line:

73 lattice_real = _read_unit_cell(fd)

74 elif 'Cell Contents' in line:

75 for line in fd:

76 if 'Total number of ions in cell' in line:

77 n_atoms = int(line.split()[7])

78 if 'Total number of species in cell' in line:

79 int(line.split()[7])

80 fields = line.split()

81 if len(fields) == 0:

82 break

83 elif 'Fractional coordinates of atoms' in line:

84 species, custom_species, positions_frac = \

85 _read_fractional_coordinates(fd, n_atoms)

86 elif 'Files used for pseudopotentials' in line:

87 for line in fd:

88 line = fd.readline()

89 if 'Pseudopotential generated on-the-fly' in line:

90 continue

91 fields = line.split()

92 if len(fields) == 2:

93 species_pot.append(fields)

94 else:

95 break

96 elif 'k-Points For BZ Sampling' in line:

97 # TODO: generalize for non-Monkhorst Pack case

98 # (i.e. kpoint lists) -

99 # kpoints_offset cannot be read this way and

100 # is hence always set to None

101 for line in fd:

102 if not line.strip():

103 break

104 if 'MP grid size for SCF calculation' in line:

105 # kpoints = ' '.join(line.split()[-3:])

106 # self.kpoints_mp_grid = kpoints

107 # self.kpoints_mp_offset = '0. 0. 0.'

108 # not set here anymore because otherwise

109 # two calculator objects go out of sync

110 # after each calculation triggering unnecessary

111 # recalculation

112 break

113

114 elif 'Final energy' in line:

115 key = 'energy_without_dispersion_correction'

116 results[key] = float(line.split()[-2])

117 elif 'Final free energy' in line:

118 key = 'free_energy_without_dispersion_correction'

119 results[key] = float(line.split()[-2])

120 elif 'NB est. 0K energy' in line:

121 key = 'energy_zero_without_dispersion_correction'

122 results[key] = float(line.split()[-2])

123

124 # Add support for dispersion correction

125 # filtering due to SEDC is done in get_potential_energy

126 elif 'Dispersion corrected final energy' in line:

127 key = 'energy_with_dispersion_correlation'

128 results[key] = float(line.split()[-2])

129 elif 'Dispersion corrected final free energy' in line:

130 key = 'free_energy_with_dispersion_correlation'

131 results[key] = float(line.split()[-2])

132 elif 'NB dispersion corrected est. 0K energy' in line:

133 key = 'energy_zero_with_dispersion_correlation'

134 results[key] = float(line.split()[-2])

135

136 # check if we had a finite basis set correction

137 elif 'Total energy corrected for finite basis set' in line:

138 key = 'energy_with_finite_basis_set_correction'

139 results[key] = float(line.split()[-2])

140

141 # ******************** Forces *********************

142 # ************** Symmetrised Forces ***************

143 # ******************** Constrained Forces ********************

144 # ******************* Unconstrained Forces *******************

145 elif re.search(r'\**.* Forces \**', line):

146 forces, constraints = _read_forces(fd, n_atoms)

147 results['forces'] = np.array(forces)

148

149 # ***************** Stress Tensor *****************

150 # *********** Symmetrised Stress Tensor ***********

151 elif re.search(r'\**.* Stress Tensor \**', line):

152 results.update(_read_stress(fd))

153

154 elif any(_ in line for _ in markers_new_iteration):

155 _add_atoms(

156 images,

157 lattice_real,

158 species,

159 custom_species,

160 positions_frac,

161 constraints,

162 results,

163 )

164 # reset for the next step

165 lattice_real = None

166 species = None

167 positions_frac = None

168 results = {}

169

170 # extract info from the Mulliken analysis

171 elif 'Atomic Populations' in line:

172 results.update(_read_mulliken_charges(fd))

173

174 # extract detailed Hirshfeld analysis (iprint > 1)

175 elif 'Hirshfeld total electronic charge (e)' in line:

176 results.update(_read_hirshfeld_details(fd, n_atoms))

177

178 elif 'Hirshfeld Analysis' in line:

179 results.update(_read_hirshfeld_charges(fd))

180

181 # There is actually no good reason to get out of the loop

182 # already at this point... or do I miss something?

183 # elif 'BFGS: Final Configuration:' in line:

184 # break

185 elif 'warn' in line.lower():

186 castep_warnings.append(line)

187

188 # fetch some last info

189 elif 'Total time' in line:

190 pattern = r'.*=\s*([\d\.]+) s'

191 total_time = float(re.search(pattern, line).group(1))

192

193 elif 'Peak Memory Use' in line:

194 pattern = r'.*=\s*([\d]+) kB'

195 peak_memory = int(re.search(pattern, line).group(1))

196

197 # add the last image

198 _add_atoms(

199 images,

200 lattice_real,

201 species,

202 custom_species,

203 positions_frac,

204 constraints,

205 results,

206 )

207

208 for atoms in images:

209 # these variables are temporarily assigned to `SinglePointCalculator`

210 # to be assigned to the `Castep` calculator for backward compatibility

211 atoms.calc._cut_off_energy = cut_off_energy

212 atoms.calc._kpoints = kpoints

213 atoms.calc._species_pot = species_pot

214 atoms.calc._total_time = total_time

215 atoms.calc._peak_memory = peak_memory

216 atoms.calc._parameters_header = parameters_header

217

218 if castep_warnings:

219 warnings.warn('WARNING: .castep file contains warnings')

220 for warning in castep_warnings:

221 warnings.warn(warning)

222

223 if isinstance(index, str):

224 index = string2index(index)

225

226 return images[index]

227

228

229def _find_last_record(fd):

230 """Find the last record of the .castep file.

231

232 Returns

233 -------

234 start : int

235 Line number of the first line of the last record.

236 end : int

237 Line number of the last line of the last record.

238 end_found : bool

239 True if the .castep file ends as expected.

240

241 """

242 start = -1

243 for i, line in enumerate(fd):

244 if (('Welcome' in line or 'Materials Studio' in line)

245 and 'CASTEP' in line):

246 start = i

247

248 if start < 0:

249 warnings.warn(

250 f'Could not find CASTEP label in result file: {fd.name}.'

251 ' Are you sure this is a .castep file?'

252 )

253 return None

254

255 # search for regular end of file

256 end_found = False

257 # start to search from record beginning from the back

258 # and see if

259 end = -1

260 fd.seek(0)

261 for i, line in enumerate(fd):

262 if i < start:

263 continue

264 if 'Finalisation time =' in line:

265 end_found = True

266 end = i

267 break

268

269 return (start, end, end_found)

270

271

272def _read_header(out: io.TextIOBase):

273 """Read the header blocks from a .castep file.

274

275 Returns

276 -------

277 parameters : dict

278 Dictionary storing keys and values of a .param file.

279 """

280 def _parse_on_off(_: str):

281 return {'on': True, 'off': False}[_]

282

283 read_title = False

284 parameters: Dict[str, Any] = {}

285 for line in out:

286 if len(line) == 0: # end of file

287 break

288 if re.search(r'^\s*\*+$', line) and read_title: # end of header

289 break

290

291 if re.search(r'\**.* Title \**', line):

292 read_title = True

293

294 # General Parameters

295

296 elif 'output verbosity' in line:

297 parameters['iprint'] = int(line.split()[-1][1])

298 elif re.match(r'\stype of calculation\s*:', line):

299 parameters['task'] = {

300 'single point energy': 'SinglePoint',

301 'geometry optimization': 'GeometryOptimization',

302 'band structure': 'BandStructure',

303 'molecular dynamics': 'MolecularDynamics',

304 'optical properties': 'Optics',

305 'phonon calculation': 'Phonon',

306 'E-field calculation': 'Efield',

307 'Phonon followed by E-field': 'Phonon+Efield',

308 'transition state search': 'TransitionStateSearch',

309 'Magnetic Resonance': 'MagRes',

310 'Core level spectra': 'Elnes',

311 'Electronic Spectroscopy': 'ElectronicSpectroscopy',

312 }[line.split(':')[-1].strip()]

313 elif 'stress calculation' in line:

314 parameters['calculate_stress'] = _parse_on_off(line.split()[-1])

315 elif 'calculation limited to maximum' in line:

316 parameters['run_time'] = float(line.split()[-2])

317 elif re.match(r'\soptimization strategy\s*:', line):

318 parameters['opt_strategy'] = {

319 'maximize speed(+++)': 'Speed',

320 'minimize memory(---)': 'Memory',

321 'balance speed and memory': 'Default',

322 }[line.split(':')[-1].strip()]

323

324 # Exchange-Correlation Parameters

325

326 elif re.match(r'\susing functional\s*:', line):

327 parameters['xc_functional'] = {

328 'Local Density Approximation': 'LDA',

329 'Perdew Wang (1991)': 'PW91',

330 'Perdew Burke Ernzerhof': 'PBE',

331 'revised Perdew Burke Ernzerhof': 'RPBE',

332 'PBE with Wu-Cohen exchange': 'WC',

333 'PBE for solids (2008)': 'PBESOL',

334 'Hartree-Fock': 'HF',

335 'Hartree-Fock +': 'HF-LDA',

336 'Screened Hartree-Fock': 'sX',

337 'Screened Hartree-Fock + ': 'sX-LDA',

338 'hybrid PBE0': 'PBE0',

339 'hybrid B3LYP': 'B3LYP',

340 'hybrid HSE03': 'HSE03',

341 'hybrid HSE06': 'HSE06',

342 'RSCAN': 'RSCAN',

343 }[line.split(':')[-1].strip()]

344 elif 'DFT+D: Semi-empirical dispersion correction' in line:

345 parameters['sedc_apply'] = _parse_on_off(line.split()[-1])

346 elif 'SEDC with' in line:

347 parameters['sedc_scheme'] = {

348 'OBS correction scheme': 'OBS',

349 'G06 correction scheme': 'G06',

350 'D3 correction scheme': 'D3',

351 'D3(BJ) correction scheme': 'D3-BJ',

352 'D4 correction scheme': 'D4',

353 'JCHS correction scheme': 'JCHS',

354 'TS correction scheme': 'TS',

355 'TSsurf correction scheme': 'TSSURF',

356 'TS+SCS correction scheme': 'TSSCS',

357 'aperiodic TS+SCS correction scheme': 'ATSSCS',

358 'aperiodic MBD@SCS method': 'AMBD',

359 'MBD@SCS method': 'MBD',

360 'aperiodic MBD@rsSCS method': 'AMBD*',

361 'MBD@rsSCS method': 'MBD*',

362 'XDM correction scheme': 'XDM',

363 }[line.split(':')[-1].strip()]

364

365 # Basis Set Parameters

366

367 elif 'basis set accuracy' in line:

368 parameters['basis_precision'] = line.split()[-1]

369 elif 'plane wave basis set cut-off' in line:

370 parameters['cut_off_energy'] = float(line.split()[-2])

371 elif re.match(r'\sfinite basis set correction\s*:', line):

372 parameters['finite_basis_corr'] = {

373 'none': 0,

374 'manual': 1,

375 'automatic': 2,

376 }[line.split()[-1]]

377

378 # Electronic Parameters

379

380 elif 'treating system as spin-polarized' in line:

381 parameters['spin_polarized'] = True

382

383 # Electronic Minimization Parameters

384

385 elif 'Treating system as non-metallic' in line:

386 parameters['fix_occupancy'] = True

387 elif 'total energy / atom convergence tol.' in line:

388 parameters['elec_energy_tol'] = float(line.split()[-2])

389 elif 'convergence tolerance window' in line:

390 parameters['elec_convergence_win'] = int(line.split()[-2])

391 elif 'max. number of SCF cycles:' in line:

392 parameters['max_scf_cycles'] = float(line.split()[-1])

393 elif 'dump wavefunctions every' in line:

394 parameters['num_dump_cycles'] = float(line.split()[-3])

395

396 # Density Mixing Parameters

397

398 elif 'density-mixing scheme' in line:

399 parameters['mixing_scheme'] = line.split()[-1]

400

401 return parameters

402

403

404def _read_unit_cell(out: io.TextIOBase):

405 """Read a Unit Cell block from a .castep file."""

406 for line in out:

407 fields = line.split()

408 if len(fields) == 6:

409 break

410 lattice_real = []

411 for _ in range(3):

412 lattice_real.append([float(f) for f in fields[0:3]])

413 line = out.readline()

414 fields = line.split()

415 return np.array(lattice_real)

416

417

418def _read_forces(out: io.TextIOBase, n_atoms: int):

419 """Read a block for atomic forces from a .castep file."""

420 constraints: List[FixConstraint] = []

421 forces = []

422 for line in out:

423 fields = line.split()

424 if len(fields) == 7:

425 break

426 for n in range(n_atoms):

427 consd = np.array([0, 0, 0])

428 fxyz = [0.0, 0.0, 0.0]

429 for i, force_component in enumerate(fields[-4:-1]):

430 if force_component.count("(cons'd)") > 0:

431 consd[i] = 1

432 # remove constraint labels in force components

433 fxyz[i] = float(force_component.replace("(cons'd)", ''))

434 if consd.all():

435 constraints.append(FixAtoms(n))

436 elif consd.any():

437 constraints.append(FixCartesian(n, consd))

438 forces.append(fxyz)

439 line = out.readline()

440 fields = line.split()

441 return forces, constraints

442

443

444def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int):

445 """Read fractional coordinates from a .castep file."""

446 species: List[str] = []

447 custom_species: Optional[List[str]] = None # A CASTEP special thing

448 positions_frac: List[List[float]] = []

449 for line in out:

450 fields = line.split()

451 if len(fields) == 7:

452 break

453 for _ in range(n_atoms):

454 spec_custom = fields[1].split(':', 1)

455 elem = spec_custom[0]

456 if len(spec_custom) > 1 and custom_species is None:

457 # Add it to the custom info!

458 custom_species = list(species)

459 species.append(elem)

460 if custom_species is not None:

461 custom_species.append(fields[1])

462 positions_frac.append([float(s) for s in fields[3:6]])

463 line = out.readline()

464 fields = line.split()

465 return species, custom_species, positions_frac

466

467

468def _read_stress(out: io.TextIOBase):

469 """Read a block for the stress tensor from a .castep file."""

470 for line in out:

471 fields = line.split()

472 if len(fields) == 6:

473 break

474 results = {}

475 stress = []

476 for _ in range(3):

477 stress.append([float(s) for s in fields[2:5]])

478 line = out.readline()

479 fields = line.split()

480 # stress in .castep file is given in GPa

481 results['stress'] = np.array(stress) * units.GPa

482 results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]]

483 line = out.readline()

484 if "Pressure:" in line:

485 results['pressure'] = float(line.split()[-2]) * units.GPa

486 return results

487

488

489def _add_atoms(

490 images,

491 lattice_real,

492 species,

493 custom_species,

494 positions_frac,

495 constraints,

496 results,

497):

498 # If all the lattice parameters are fixed,

499 # the `Unit Cell` block in the .castep file is not printed

500 # in every ionic step.

501 # The lattice paramters are therefore taken from the last step.

502 # This happens:

503 # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE`

504 # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT`

505 if lattice_real is None:

506 lattice_real = images[-1].cell.copy()

507

508 # for highly symmetric systems (where essentially only the

509 # stress is optimized, but the atomic positions) positions

510 # are only printed once.

511 if species is None:

512 species = images[-1].symbols

513 if positions_frac is None:

514 positions_frac = images[-1].get_scaled_positions()

515

516 _set_energy_and_free_energy(results)

517

518 atoms = Atoms(

519 species,

520 cell=lattice_real,

521 constraint=constraints,

522 pbc=True,

523 scaled_positions=positions_frac,

524 )

525 if custom_species is not None:

526 atoms.new_array(

527 'castep_custom_species',

528 np.array(custom_species),

529 )

530

531 atoms.calc = SinglePointCalculator(atoms)

532 atoms.calc.results = results

533

534 images.append(atoms)

535

536

537def _read_mulliken_charges(out: io.TextIOBase):

538 """Read a block for Mulliken charges from a .castep file."""

539 for i in range(3):

540 line = out.readline()

541 if i == 1:

542 spin_polarized = 'Spin' in line

543 results = defaultdict(list)

544 for line in out:

545 fields = line.split()

546 if len(fields) == 1:

547 break

548 if spin_polarized:

549 if len(fields) != 7: # due to CASTEP 18 outformat changes

550 results['charges'].append(float(fields[-2]))

551 results['magmoms'].append(float(fields[-1]))

552 else:

553 results['charges'].append(float(fields[-1]))

554 return {k: np.array(v) for k, v in results.items()}

555

556

557def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int):

558 """Read the Hirshfeld analysis when iprint > 1 from a .castep file."""

559 results = defaultdict(list)

560 for _ in range(n_atoms):

561 for line in out:

562 if line.strip() == '':

563 break # end for each atom

564 if 'Hirshfeld / free atomic volume :' in line:

565 line = out.readline()

566 fields = line.split()

567 results['hirshfeld_volume_ratios'].append(float(fields[0]))

568 return {k: np.array(v) for k, v in results.items()}

569

570

571def _read_hirshfeld_charges(out: io.TextIOBase):

572 """Read a block for Hirshfeld charges from a .castep file."""

573 for i in range(3):

574 line = out.readline()

575 if i == 1:

576 spin_polarized = 'Spin' in line

577 results = defaultdict(list)

578 for line in out:

579 fields = line.split()

580 if len(fields) == 1:

581 break

582 if spin_polarized:

583 results['hirshfeld_charges'].append(float(fields[-2]))

584 results['hirshfeld_magmoms'].append(float(fields[-1]))

585 else:

586 results['hirshfeld_charges'].append(float(fields[-1]))

587 return {k: np.array(v) for k, v in results.items()}

588

589

590def _set_energy_and_free_energy(results: Dict[str, Any]):

591 """Set values referred to as `energy` and `free_energy`."""

592 if 'energy_with_dispersion_correction' in results:

593 suffix = '_with_dispersion_correction'

594 else:

595 suffix = '_without_dispersion_correction'

596

597 if 'free_energy' + suffix in results: # metallic

598 keye = 'energy_zero' + suffix

599 keyf = 'free_energy' + suffix

600 else: # non-metallic

601 # The finite basis set correction is applied to the energy at finite T

602 # (not the energy at 0 K). We should hence refer to the corrected value

603 # as `energy` only when the free energy is unavailable, i.e., only when

604 # FIX_OCCUPANCY : TRUE and thus no smearing is applied.

605 if 'energy_with_finite_basis_set_correction' in results:

606 keye = 'energy_with_finite_basis_set_correction'

607 else:

608 keye = 'energy' + suffix

609 keyf = 'energy' + suffix

610

611 results['energy'] = results[keye]

612 results['free_energy'] = results[keyf]

Coverage for /builds/alexhroom/ase/ase/io/castep/castep_reader.py: 91.04%

335 statements