From 3864bddbba5c867e4bcd7571d6c30fffd0c53787 Mon Sep 17 00:00:00 2001 From: Marnik Bercx Date: Fri, 28 Jan 2022 03:19:41 +0100 Subject: [PATCH] Refactor base parsing --- aiida_quantumespresso/calculations/dos.py | 4 - aiida_quantumespresso/calculations/matdyn.py | 4 - .../calculations/namelists.py | 6 + aiida_quantumespresso/calculations/projwfc.py | 4 - aiida_quantumespresso/calculations/pw2gw.py | 6 - .../calculations/pw2wannier90.py | 4 - aiida_quantumespresso/calculations/q2r.py | 4 - aiida_quantumespresso/parsers/base.py | 125 +++++++++++++- aiida_quantumespresso/parsers/cp.py | 10 +- aiida_quantumespresso/parsers/dos.py | 50 ++---- aiida_quantumespresso/parsers/matdyn.py | 34 ++-- aiida_quantumespresso/parsers/neb.py | 129 +++++++-------- .../parsers/parse_raw/base.py | 105 +----------- .../parsers/parse_raw/neb.py | 110 +----------- aiida_quantumespresso/parsers/parse_raw/ph.py | 56 +------ .../parsers/parse_raw/pw2gw.py | 46 ------ aiida_quantumespresso/parsers/ph.py | 79 ++++----- aiida_quantumespresso/parsers/pp.py | 156 ++++++------------ aiida_quantumespresso/parsers/projwfc.py | 74 ++++----- aiida_quantumespresso/parsers/pw.py | 18 +- aiida_quantumespresso/parsers/pw2gw.py | 76 +++------ aiida_quantumespresso/parsers/pw2wannier90.py | 31 ++-- aiida_quantumespresso/parsers/q2r.py | 32 ++-- tests/parsers/test_dos/test_dos_default.yml | 2 +- .../test_matdyn/test_matdyn_default.yml | 4 +- tests/parsers/test_neb.py | 2 - tests/parsers/test_neb/test_neb_default.yml | 1 + tests/parsers/test_ph/test_ph_default.yml | 3 +- .../parsers/test_ph/test_ph_not_converged.yml | 3 +- .../test_ph/test_ph_out_of_walltime.yml | 3 +- .../test_pw2gw/test_pw2gw_default_data.yml | 3 +- .../test_pw2wannier90_default.yml | 2 +- 32 files changed, 423 insertions(+), 763 deletions(-) delete mode 100644 aiida_quantumespresso/parsers/parse_raw/pw2gw.py diff --git a/aiida_quantumespresso/calculations/dos.py b/aiida_quantumespresso/calculations/dos.py index c97b8e6c1..2e61f8b95 100644 --- a/aiida_quantumespresso/calculations/dos.py +++ b/aiida_quantumespresso/calculations/dos.py @@ -26,10 +26,6 @@ def define(cls, spec): spec.output('output_parameters', valid_type=orm.Dict) spec.output('output_dos', valid_type=orm.XyData) spec.default_output_node = 'output_parameters' - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(330, 'ERROR_READING_DOS_FILE', message='The dos file could not be read from the retrieved folder.') # yapf: enable diff --git a/aiida_quantumespresso/calculations/matdyn.py b/aiida_quantumespresso/calculations/matdyn.py index 64b6dd617..b20f8b813 100644 --- a/aiida_quantumespresso/calculations/matdyn.py +++ b/aiida_quantumespresso/calculations/matdyn.py @@ -34,10 +34,6 @@ def define(cls, spec): spec.output('output_parameters', valid_type=orm.Dict) spec.output('output_phonon_bands', valid_type=orm.BandsData) spec.default_output_node = 'output_parameters' - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES', message='The output frequencies file could not be read from the retrieved folder.') spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING', diff --git a/aiida_quantumespresso/calculations/namelists.py b/aiida_quantumespresso/calculations/namelists.py index 7e466fcb3..67fec76af 100644 --- a/aiida_quantumespresso/calculations/namelists.py +++ b/aiida_quantumespresso/calculations/namelists.py @@ -59,6 +59,12 @@ def define(cls, spec): help='Use an additional node for special settings') spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False, help='Use a local or remote folder as parent folder (for restarts and similar)') + spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING', + message='The retrieved folder did not contain the required stdout output file.') + spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', + message='The stdout output file could not be read.') + spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', + message='The stdout output file was incomplete probably because the calculation got interrupted.') # yapf: enable def _get_following_text(self): diff --git a/aiida_quantumespresso/calculations/projwfc.py b/aiida_quantumespresso/calculations/projwfc.py index 5bfaddccb..f65598513 100644 --- a/aiida_quantumespresso/calculations/projwfc.py +++ b/aiida_quantumespresso/calculations/projwfc.py @@ -61,10 +61,6 @@ def define(cls, spec): message='The retrieved temporary folder could not be accessed.') spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING', message='The retrieved folder did not contain the required XML file.') - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(320, 'ERROR_OUTPUT_XML_READ', message='The XML output file could not be read.') spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE', diff --git a/aiida_quantumespresso/calculations/pw2gw.py b/aiida_quantumespresso/calculations/pw2gw.py index ba331dd33..d715b22fa 100644 --- a/aiida_quantumespresso/calculations/pw2gw.py +++ b/aiida_quantumespresso/calculations/pw2gw.py @@ -37,16 +37,10 @@ def define(cls, spec): spec.output('eps', valid_type=orm.ArrayData, help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`') - spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING', - message='The retrieved folder did not contain the required stdout output file.') spec.exit_code(305, 'ERROR_OUTPUT_FILES', message='The eps*.dat output files could not be read or parsed.') - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE', message='The stdout output file could not be parsed.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT', message='The eps*.dat output files do not have the expected shape (N, 2).') spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH', diff --git a/aiida_quantumespresso/calculations/pw2wannier90.py b/aiida_quantumespresso/calculations/pw2wannier90.py index cc00f5968..25781e3ad 100644 --- a/aiida_quantumespresso/calculations/pw2wannier90.py +++ b/aiida_quantumespresso/calculations/pw2wannier90.py @@ -31,10 +31,6 @@ def define(cls, spec): help='The output folder of a pw.x calculation') spec.output('output_parameters', valid_type=Dict) spec.default_output_node = 'output_parameters' - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR', message='Encountered a generic error message') spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION', diff --git a/aiida_quantumespresso/calculations/q2r.py b/aiida_quantumespresso/calculations/q2r.py index 6f585b99a..05684e1b4 100644 --- a/aiida_quantumespresso/calculations/q2r.py +++ b/aiida_quantumespresso/calculations/q2r.py @@ -31,10 +31,6 @@ def define(cls, spec): super().define(spec) spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True) spec.output('force_constants', valid_type=ForceConstantsData) - spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ', - message='The stdout output file could not be read.') - spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE', - message='The stdout output file was incomplete probably because the calculation got interrupted.') spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE', message='The force constants file could not be read.') # yapf: enable diff --git a/aiida_quantumespresso/parsers/base.py b/aiida_quantumespresso/parsers/base.py index 063985d02..bde64dbab 100644 --- a/aiida_quantumespresso/parsers/base.py +++ b/aiida_quantumespresso/parsers/base.py @@ -3,15 +3,128 @@ All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`. """ -from aiida.parsers import Parser as _BaseParser +import re +import typing + +from aiida.parsers import Parser +from aiida.common import AttributeDict +from aiida.engine import ExitCode +from aiida_quantumespresso.utils.mapping import get_logging_container +from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec + +__all__ = ('BaseParser',) + + +class BaseParser(Parser): # pylint: disable=abstract-method + """Custom ``Parser`` class for ``aiida-quantumespresso`` parser implementations.""" + + class_error_map = {} + class_warning_map = {} + + base_error_map = { + 'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME', + } + base_warning_map = { + 'Warning:': None, + 'DEPRECATED:': None, + } + + @classmethod + def get_error_map(cls): + """The full error map of the parser class.""" + error_map = cls.base_error_map.copy() + error_map.update(cls.class_error_map) + return error_map + + @classmethod + def get_warning_map(cls): + """The full error map of the parser class.""" + warning_map = cls.base_warning_map.copy() + warning_map.update(cls.class_warning_map) + return warning_map + + def _retrieve_parse_stdout(self, **kwargs) -> typing.Tuple[str, dict, AttributeDict]: + """Retrieve and parse the ``stdout`` content of a Quantum ESPRESSO calculation. + + :returns: size 3 tuple with the stdout content, parsed data and log messages + """ + logs = get_logging_container() + + filename_stdout = self.node.get_option('output_filename') + + if filename_stdout not in self.retrieved.list_object_names(): + logs.error.append('ERROR_OUTPUT_STDOUT_MISSING') + return {}, logs + + try: + with self.retrieved.open(filename_stdout, 'r') as handle: + stdout = handle.read() + except OSError: + logs.error.append('ERROR_OUTPUT_STDOUT_READ') + return {}, logs + + parsed_data, stdout_logs = self.parse_stdout(stdout, **kwargs) + + for log_type, log_items in stdout_logs.items(): + logs[log_type].extend(log_items) + + return parsed_data, logs + + @classmethod + def parse_stdout(cls, stdout: str) -> typing.Tuple[dict, AttributeDict]: + """Parse the ``stdout`` content of a Quantum ESPRESSO calculation. + + This function only checks for basic content like JOB DONE, errors with %%%%% etc. + + :param stdout: the stdout content as a string. + :returns: tuple of two dictionaries, with the parsed data and log messages, respectively. + """ + logs = get_logging_container() + parsed_data = {} + + if not re.search(r'JOB DONE', stdout): + logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') + + code_match = re.search(r'Program\s(?P[A-Z|\_|\d]+)\sv\.(?P[\d\.|a-z|A-Z]+)\s', stdout) + + if code_match: + + code_name = code_match.groupdict()['code_name'] + parsed_data['code_version'] = code_match.groupdict()['code_version'] + + wall_match = re.search(fr'{code_name}\s+:[\s\S]+\s+(?P[.\d|s|m|d|h]+)\sWALL', stdout) + + if wall_match: + parsed_data['wall_time'] = wall_match.groupdict()['wall_time'] + + try: + parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time']) + except ValueError: + logs.warnings.append('Unable to convert wall time from `stdout` to seconds.') + + # Look for typical Quantum ESPRESSO error messages between %%%%%-lines that are not in our error map + if re.search(r'\%\%\%\%\%', stdout): # Note: using e.g. `\%{5}` is significantly slower + for error_message in set(re.split(r'\%\%\%\%\%\n', stdout)[1::2]): + + if not any(error_marker in error_message for error_marker in cls.get_error_map().keys()): + logs.error.append(error_message.rstrip('\n%')) -__all__ = ('Parser',) + # Look for error messages in general + for error_marker, error, in cls.get_error_map().items(): + if re.search(fr'{error_marker}', stdout): + logs.error.append(error) + # Look for lines with warnings from the `warning_map` + for warning_marker, warning in cls.get_warning_map().items(): + for warning_message in set(re.findall(fr'({warning_marker}.+)\n', stdout)): + if warning is not None: + logs.warning.append(warning) + else: + logs.warning.append(warning_message) -class Parser(_BaseParser): # pylint: disable=abstract-method - """Custom `Parser` class for `aiida-quantumespresso` parser implementations.""" + return parsed_data, logs - def emit_logs(self, logging_dictionaries, ignore=None): + def _emit_logs(self, logging_dictionaries: AttributeDict, ignore: list = None) -> None: """Emit the messages in one or multiple "log dictionaries" through the logger of the parser. A log dictionary is expected to have the following structure: each key must correspond to a log level of the @@ -50,7 +163,7 @@ def emit_logs(self, logging_dictionaries, ignore=None): except AttributeError: pass - def exit(self, exit_code): + def _exit(self, exit_code: ExitCode) -> ExitCode: """Log the exit message of the give exit code with level `ERROR` and return the exit code. This is a utility function if one wants to return from the parse method and automically add the exit message diff --git a/aiida_quantumespresso/parsers/cp.py b/aiida_quantumespresso/parsers/cp.py index 143030d7b..1318dd7cc 100644 --- a/aiida_quantumespresso/parsers/cp.py +++ b/aiida_quantumespresso/parsers/cp.py @@ -5,11 +5,11 @@ from aiida.orm import Dict, TrajectoryData from qe_tools import CONSTANTS -from .base import Parser +from .base import BaseParser from .parse_raw.cp import parse_cp_raw_output, parse_cp_traj_stanzas -class CpParser(Parser): +class CpParser(BaseParser): """This class is the implementation of the Parser class for Cp.""" def parse(self, **kwargs): @@ -26,14 +26,14 @@ def parse(self, **kwargs): stdout_filename = self.node.get_attribute('output_filename') # at least the stdout should exist if stdout_filename not in list_of_files: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) # This should match 1 file xml_files = [xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in list_of_files] if not xml_files: - return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) + return self._exit(self.exit_codes.ERROR_MISSING_XML_FILE) elif len(xml_files) > 1: - return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE) + return self._exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE) # cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or # `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory diff --git a/aiida_quantumespresso/parsers/dos.py b/aiida_quantumespresso/parsers/dos.py index 5893a63d7..d015e9e37 100644 --- a/aiida_quantumespresso/parsers/dos.py +++ b/aiida_quantumespresso/parsers/dos.py @@ -2,47 +2,31 @@ import numpy as np from aiida.orm import Dict, XyData +from aiida.common import AttributeDict from aiida_quantumespresso.parsers import QEOutputParsingError -from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base -from .base import Parser +from .base import BaseParser -class DosParser(Parser): - """This class is the implementation of the Parser class for Dos.""" +class DosParser(BaseParser): + """``Parser`` implementation for the ``DosCalculation`` calculation job class.""" def parse(self, **kwargs): - """Parses the datafolder, stores results. + """Parse the retrieved files of a ``DosCalculation`` into output nodes.""" + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() - Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time - """ - retrieved = self.retrieved + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - # Read standard out - try: - filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer - with retrieved.open(filename_stdout, 'r') as fil: - out_file = fil.readlines() - except OSError: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) - - job_done = False - for i in range(len(out_file)): - line = out_file[-i] - if 'JOB DONE' in line: - job_done = True - break - if not job_done: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) - - # check that the dos file is present, if it is, read it + self.out('output_parameters', Dict(dict=parsed_stdout)) + + # Parse the DOS try: - with retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as fil: - dos_file = fil.readlines() + with self.retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as handle: + dos_file = handle.readlines() except OSError: - return self.exit(self.exit_codes.ERROR_READING_DOS_FILE) - - # end of initial checks + return self._exit(self.exit_codes.ERROR_READING_DOS_FILE) array_names = [[], []] array_units = [[], []] @@ -79,11 +63,7 @@ def parse(self, **kwargs): y_units += ['states/eV'] xy_data.set_y(y_arrays, y_names, y_units) - parsed_data, logs = parse_output_base(out_file, 'DOS') - self.emit_logs(logs) - self.out('output_dos', xy_data) - self.out('output_parameters', Dict(dict=parsed_data)) def parse_raw_dos(dos_file, array_names, array_units): diff --git a/aiida_quantumespresso/parsers/matdyn.py b/aiida_quantumespresso/parsers/matdyn.py index c8462e49a..d60680c90 100644 --- a/aiida_quantumespresso/parsers/matdyn.py +++ b/aiida_quantumespresso/parsers/matdyn.py @@ -3,26 +3,24 @@ from qe_tools import CONSTANTS from aiida_quantumespresso.calculations.matdyn import MatdynCalculation -from .base import Parser +from .base import BaseParser -class MatdynParser(Parser): - """Parser implementation for the MatdynCalculation.""" +class MatdynParser(BaseParser): + """``Parser`` implementation for the ``MatDynCalculation`` calculation job class.""" def parse(self, **kwargs): - """Parse the retrieved files from a `MatdynCalculation`.""" - retrieved = self.retrieved - filename_stdout = self.node.get_option('output_filename') - filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME + """Parse the retrieved files from a ``MatdynCalculation`` into output nodes.""" + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() - if filename_stdout not in retrieved.list_object_names(): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - if 'JOB DONE' not in retrieved.get_object_content(filename_stdout): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) + filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME - if filename_frequencies not in retrieved.list_object_names(): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + if filename_frequencies not in self.retrieved.list_object_names(): + return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) # Extract the kpoints from the input data and create the `KpointsData` for the `BandsData` try: @@ -33,15 +31,15 @@ def parse(self, **kwargs): kpoints_for_bands = orm.KpointsData() kpoints_for_bands.set_kpoints(kpoints) - parsed_data = parse_raw_matdyn_phonon_file(retrieved.get_object_content(filename_frequencies)) + parsed_data = parse_raw_matdyn_phonon_file(self.retrieved.get_object_content(filename_frequencies)) try: num_kpoints = parsed_data.pop('num_kpoints') except KeyError: - return self.exit(self.exit_codes.ERROR_OUTPUT_KPOINTS_MISSING) + return self._exit(self.exit_codes.ERROR_OUTPUT_KPOINTS_MISSING) if num_kpoints != kpoints.shape[0]: - return self.exit(self.exit_codes.ERROR_OUTPUT_KPOINTS_INCOMMENSURATE) + return self._exit(self.exit_codes.ERROR_OUTPUT_KPOINTS_INCOMMENSURATE) output_bands = orm.BandsData() output_bands.set_kpointsdata(kpoints_for_bands) @@ -50,11 +48,9 @@ def parse(self, **kwargs): for message in parsed_data['warnings']: self.logger.error(message) - self.out('output_parameters', orm.Dict(dict=parsed_data)) + self.out('output_parameters', orm.Dict(dict=parsed_stdout)) self.out('output_phonon_bands', output_bands) - return - def parse_raw_matdyn_phonon_file(phonon_frequencies): """Parses the phonon frequencies file. diff --git a/aiida_quantumespresso/parsers/neb.py b/aiida_quantumespresso/parsers/neb.py index fb41a2490..a893a7bf3 100644 --- a/aiida_quantumespresso/parsers/neb.py +++ b/aiida_quantumespresso/parsers/neb.py @@ -2,8 +2,11 @@ from aiida.common import NotExistent from aiida.orm import Dict -from aiida_quantumespresso.parsers import QEOutputParsingError from aiida_quantumespresso.parsers.parse_raw import convert_qe_to_aiida_structure +from aiida.orm import TrajectoryData, ArrayData +import os +import numpy + from aiida_quantumespresso.parsers.parse_raw.pw import reduce_symmetries from aiida_quantumespresso.parsers.parse_raw.pw import parse_stdout as parse_pw_stdout from aiida_quantumespresso.parsers.parse_xml.pw.parse import parse_xml as parse_pw_xml @@ -11,40 +14,36 @@ from aiida_quantumespresso.parsers.parse_raw.neb import parse_raw_output_neb from aiida_quantumespresso.parsers.pw import PwParser from aiida_quantumespresso.calculations.pw import PwCalculation -from .base import Parser +from .base import BaseParser -class NebParser(Parser): +class NebParser(BaseParser): """`Parser` implementation for the `NebCalculation` calculation job class.""" + # Key that contains the optional parser options in the `settings` input node. + parser_settings_key = 'parser_options' + + class_warning_map = { + 'scf convergence NOT achieved on image': 'SCF did not converge for a given image', + 'Maximum CPU time exceeded': 'Maximum CPU time exceeded', + 'reached the maximum number of steps': 'Maximum number of iterations reached in the image optimization', + } + def parse(self, **kwargs): - """Parse the retrieved files of a completed `NebCalculation` into output nodes. + """Parse the retrieved files of a completed ``NebCalculation`` into output nodes. - Two nodes that are expected are the default 'retrieved' `FolderData` node which will store the retrieved files + Two nodes that are expected are the default 'retrieved' ``FolderData`` node which will store the retrieved files permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files` which should contain the temporary retrieved files. """ - from aiida.orm import TrajectoryData, ArrayData - import os - import numpy - - PREFIX = self.node.process_class._PREFIX - - retrieved = self.retrieved - list_of_files = retrieved.list_object_names() # Note: this includes folders, but not the files they contain. - - # The stdout is required for parsing - filename_stdout = self.node.get_attribute('output_filename') - - if filename_stdout not in list_of_files: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + prefix = self.node.process_class._PREFIX # Look for optional settings input node and potential 'parser_options' dictionary within it # Note that we look for both NEB and PW parser options under "inputs.settings.parser_options"; # we don't even have a namespace "inputs.pw.settings". try: settings = self.node.inputs.settings.get_dict() - parser_options = settings[self.get_parser_settings_key()] + parser_options = settings[self.parser_settings_key] except (AttributeError, KeyError, NotExistent): settings = {} parser_options = {} @@ -52,80 +51,63 @@ def parse(self, **kwargs): # load the pw input parameters dictionary pw_input_dict = self.node.inputs.pw__parameters.get_dict() - # load the neb input parameters dictionary - neb_input_dict = self.node.inputs.parameters.get_dict() - # First parse the Neb output - try: - stdout = retrieved.get_object_content(filename_stdout) - neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb(stdout, neb_input_dict) - # TODO: why do we ignore raw_successful ? - except (OSError, QEOutputParsingError): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() + self._emit_logs(logs_stdout) + iteration_data = parsed_stdout.pop('iteration_data') - for warn_type in ['warnings', 'parser_warnings']: - for message in neb_out_dict[warn_type]: - self.logger.warning(f'parsing NEB output: {message}') + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - if 'QE neb run did not reach the end of the execution.' in neb_out_dict['parser_warnings']: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) - - # Retrieve the number of images - try: - num_images = neb_input_dict['num_of_images'] - except KeyError: - try: - num_images = neb_out_dict['num_of_images'] - except KeyError: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_PARSE) - if num_images < 2: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_PARSE) + num_images = parsed_stdout['num_of_images'] # Now parse the information from the individual pw calculations for the different images image_data = {} positions = [] cells = [] - # for each image... + for i in range(num_images): # check if any of the known XML output file names are present, and parse the first that we find - relative_output_folder = os.path.join(f'{PREFIX}_{i + 1}', f'{PREFIX}.save') + relative_output_folder = os.path.join(f'{prefix}_{i + 1}', f'{prefix}.save') retrieved_files = self.retrieved.list_object_names(relative_output_folder) + for xml_filename in PwCalculation.xml_filenames: if xml_filename in retrieved_files: xml_file_path = os.path.join(relative_output_folder, xml_filename) try: - with retrieved.open(xml_file_path) as xml_file: + with self.retrieved.open(xml_file_path) as xml_file: parsed_data_xml, logs_xml = parse_pw_xml(xml_file, None) except IOError: - return self.exit(self.exit_codes.ERROR_OUTPUT_XML_READ) + return self._exit(self.exit_codes.ERROR_OUTPUT_XML_READ) except XMLParseError: - return self.exit(self.exit_codes.ERROR_OUTPUT_XML_PARSE) + return self._exit(self.exit_codes.ERROR_OUTPUT_XML_PARSE) except XMLUnsupportedFormatError: - return self.exit(self.exit_codes.ERROR_OUTPUT_XML_FORMAT) + return self._exit(self.exit_codes.ERROR_OUTPUT_XML_FORMAT) except Exception: import traceback traceback.print_exc() - return self.exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) + return self._exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) # this image is dealt with, so break the inner loop and go to the next image break # otherwise, if none of the filenames we tried exists, exit with an error else: - return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE) + return self._exit(self.exit_codes.ERROR_MISSING_XML_FILE) # look for pw output and parse it - pw_out_file = os.path.join(f'{PREFIX}_{i + 1}', 'PW.out') + pw_out_file = os.path.join(f'{prefix}_{i + 1}', 'PW.out') try: - with retrieved.open(pw_out_file, 'r') as f: + with self.retrieved.open(pw_out_file, 'r') as f: pw_out_text = f.read() # Note: read() and not readlines() except IOError: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + return self._exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) try: parsed_data_stdout, logs_stdout = parse_pw_stdout( pw_out_text, pw_input_dict, parser_options, parsed_data_xml ) except Exception: - return self.exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) + return self._exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) parsed_structure = parsed_data_stdout.pop('structure', {}) parsed_trajectory = parsed_data_stdout.pop('trajectory', {}) @@ -159,13 +141,13 @@ def parse(self, **kwargs): for log_type in ['warning', 'error']: for message in logs_stdout[log_type]: formatted_message = f'{log_type}: {message}' - if formatted_message not in neb_out_dict['warnings']: - neb_out_dict['warnings'].append(formatted_message) + if formatted_message not in parsed_stdout['warnings']: + parsed_stdout['warnings'].append(formatted_message) # Symbols can be obtained simply from the last image symbols = [str(site.kind_name) for site in structure_data.sites] - output_params = Dict(dict=dict(list(neb_out_dict.items()) + list(image_data.items()))) + output_params = Dict(dict=dict(list(parsed_stdout.items()) + list(image_data.items()))) self.out('output_parameters', output_params) trajectory = TrajectoryData() @@ -186,16 +168,16 @@ def parse(self, **kwargs): # Load the original and interpolated energy profile along the minimum-energy path (mep) try: - filename = PREFIX + '.dat' - with retrieved.open(filename, 'r') as handle: + filename = prefix + '.dat' + with self.retrieved.open(filename, 'r') as handle: mep = numpy.loadtxt(handle) except Exception: self.logger.warning(f'could not open expected output file `{filename}`.') mep = numpy.array([[]]) try: - filename = PREFIX + '.int' - with retrieved.open(filename, 'r') as handle: + filename = prefix + '.int' + with self.retrieved.open(filename, 'r') as handle: interp_mep = numpy.loadtxt(handle) except Exception: self.logger.warning(f'could not open expected output file `{filename}`.') @@ -209,7 +191,18 @@ def parse(self, **kwargs): return - @staticmethod - def get_parser_settings_key(): - """Return the key that contains the optional parser options in the `settings` input node.""" - return 'parser_options' + @classmethod + def parse_stdout(cls, stdout: str) -> tuple: + """Parse the ``stdout`` content of a Quantum ESPRESSO ``neb.x`` calculation. + + :param stdout: the stdout content as a string. + :returns: tuple of two dictionaries, with the parsed data and log messages, respectively. + """ + parsed_data, logs = super().parse_stdout(stdout) + + neb_out_dict, iteration_data = parse_raw_output_neb(stdout) + + parsed_data.update(neb_out_dict) + parsed_data['iteration_data'] = iteration_data + + return parsed_data, logs diff --git a/aiida_quantumespresso/parsers/parse_raw/base.py b/aiida_quantumespresso/parsers/parse_raw/base.py index 4ffbf7402..35c1c6180 100644 --- a/aiida_quantumespresso/parsers/parse_raw/base.py +++ b/aiida_quantumespresso/parsers/parse_raw/base.py @@ -2,110 +2,9 @@ """A basic parser for the common format of QE.""" import re -from aiida.orm.nodes.data.structure import Kind, Site +from aiida.orm import StructureData -from aiida.plugins import DataFactory - -StructureData = DataFactory('structure') - -__all__ = ('parse_output_base', 'parse_output_error', 'convert_qe_time_to_sec', 'convert_qe_to_aiida_structure') - - -def parse_output_base(filecontent, codename=None, message_map=None): - """Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc. - - :param filecontent: a string with the output file content - :param codename: the string printed both in the header and near the walltime. - If passed, a few more things are parsed (e.g. code version, walltime, ...) - :returns: tuple of two dictionaries, with the parsed data and log messages, respectively - """ - from aiida_quantumespresso.utils.mapping import get_logging_container - - keys = ['error', 'warning'] - - if message_map is not None and (not isinstance(message_map, dict) or any(key not in message_map for key in keys)): - raise RuntimeError(f'invalid format `message_map`: should be dictionary with two keys {keys}') - - logs = get_logging_container() - parsed_data = {} - - lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n') - - for line in lines: - if 'JOB DONE' in line: - break - else: - logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') - - if codename is not None: - - codestring = f'Program {codename}' - - for line_number, line in enumerate(lines): - - if codestring in line and 'starts on' in line: - parsed_data['code_version'] = line.split(codestring)[1].split('starts on')[0].strip() - - # Parse the walltime - if codename in line and 'WALL' in line: - try: - time = line.split('CPU')[1].split('WALL')[0].strip() - parsed_data['wall_time'] = time - except (ValueError, IndexError): - logs.warnings.append('ERROR_PARSING_WALLTIME') - else: - try: - parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) - except ValueError: - logs.warnings.append('ERROR_CONVERTING_WALLTIME_TO_SECONDS') - - # Parse an error message with optional mapping of the message - if '%%%%%%%%%%%%%%' in line: - parse_output_error(lines, line_number, logs, message_map) - - return parsed_data, logs - - -def parse_output_error(lines, line_number_start, logs, message_map=None): - """Parse a Quantum ESPRESSO error message which appears between two lines marked by ``%%%%%%%%``) - - :param lines: a list of strings gotten by splitting the standard output content on newlines - :param line_number_start: the line at which we identified some ``%%%%%%%%`` - :param logs: a logging container from `aiida_quantumespresso.utils.mapping.get_logging_container` - """ - - def map_message(message, message_map, logs): - - # Match any known error and warning messages - for marker, message in message_map['error'].items(): - if marker in line: - if message is None: - message = line - logs.error.append(message) - - for marker, message in message_map['warning'].items(): - if marker in line: - if message is None: - message = line - logs.warning.append(message) - - # First determine the line that closes the error block which is also marked by ``%%%%%%%`` in the line - for line_number, line in enumerate(lines[line_number_start + 1:]): - if '%%%%%%%%%%%%' in line: - line_number_end = line_number - break - else: - return - - # Get the set of unique lines between the error indicators and pass them through the message map, or if not provided - # simply append the message to the `error` list of the logs container - for message in set(lines[line_number_start:line_number_end]): - if message_map is not None: - map_message(message, message_map, logs) - else: - logs.error(message) - - return +__all__ = ('convert_qe_time_to_sec', 'convert_qe2aiida_structure', 'convert_qe_to_kpoints') def convert_qe_time_to_sec(timestr): diff --git a/aiida_quantumespresso/parsers/parse_raw/neb.py b/aiida_quantumespresso/parsers/parse_raw/neb.py index df040d169..1fdf1bbf7 100644 --- a/aiida_quantumespresso/parsers/parse_raw/neb.py +++ b/aiida_quantumespresso/parsers/parse_raw/neb.py @@ -11,53 +11,20 @@ from aiida_quantumespresso.parsers.parse_raw import convert_qe_time_to_sec -def parse_raw_output_neb(stdout, input_dict, parser_opts=None): +def parse_raw_output_neb(stdout): """Parses the output of a neb calculation Receives in input the paths to the output file. :param stdout: the stdout content as a string - :param input_dict: dictionary with the neb input parameters - :param parser_opts: not used :return parameter_data: a dictionary with parsed parameters :return iteration_data: a dictionary with arrays (for relax & md calcs.) - :return job_successful: a boolean that is False in case of failed calculations - - :raises QEOutputParsingError: for errors in the parsing, - - 2 different keys to check in output: parser_warnings and warnings. - On an upper level, these flags MUST be checked. - The first is expected to be empty unless QE failures or unfinished jobs. """ import copy - job_successful = True parser_warnings = [] - if not stdout: # there is an output file, but it's empty -> crash - job_successful = False - - # check if the job has finished (that doesn't mean without errors) - finished_run = False - for line in stdout.split('\n')[::-1]: - if 'JOB DONE' in line: - finished_run = True - break - if not finished_run: # error if the job has not finished - warning = 'QE neb run did not reach the end of the execution.' - parser_warnings.append(warning) - job_successful = False - # parse the text output of the neb calculation - try: - out_data, iteration_data, critical_messages = parse_neb_text_output(stdout, input_dict) - except QEOutputParsingError as exc: - if not finished_run: # I try to parse it as much as possible - parser_warnings.append('Error while parsing the output file') - out_data = {'warnings': []} - iteration_data = {} - critical_messages = [] - else: # if it was finished and I got an error, it's a mistake of the parser - raise QEOutputParsingError(f'Error while parsing NEB text output: {exc}') + out_data, iteration_data = parse_neb_text_output(stdout) # I add in the out_data all the last elements of iteration_data values. # I leave the possibility to skip some large arrays (None for the time being). @@ -68,19 +35,12 @@ def parse_raw_output_neb(stdout, input_dict, parser_opts=None): continue out_data[k] = v[-1] - # if there is a severe error, the calculation is FAILED - if any([x in out_data['warnings'] for x in critical_messages]): - job_successful = False - parameter_data = dict(list(out_data.items()) + [('parser_warnings', parser_warnings)]) - # return various data. - # parameter data will be mapped in Dict - # iteration_data in ArrayData - return parameter_data, iteration_data, job_successful + return parameter_data, iteration_data -def parse_neb_text_output(data, input_dict={}): +def parse_neb_text_output(data): """Parses the text output of QE Neb. :param data: a string, the file as read by read() @@ -93,52 +53,16 @@ def parse_neb_text_output(data, input_dict={}): :return critical_messages: a list with critical messages. If any is found in parsed_data['warnings'], the calculation is FAILED! """ - from aiida_quantumespresso.parsers.parse_raw import parse_output_error - from aiida_quantumespresso.utils.mapping import get_logging_container from collections import defaultdict - # TODO: find a more exhaustive list of the common errors of neb - - # critical warnings: if any is found, the calculation status is FAILED - critical_warnings = { - 'scf convergence NOT achieved on image': 'SCF did not converge for a given image', - 'Maximum CPU time exceeded': 'Maximum CPU time exceeded', - 'reached the maximum number of steps': 'Maximum number of iterations reached in the image optimization', - } - - minor_warnings = { - 'Warning:': None, - } - - all_warnings = dict(list(critical_warnings.items()) + list(minor_warnings.items())) - parsed_data = {} parsed_data['warnings'] = [] iteration_data = defaultdict(list) - # parse time, starting from the end - # apparently, the time is written multiple times - for line in reversed(data.split('\n')): - if 'NEB' in line and 'WALL' in line: - try: - time = line.split('CPU')[1].split('WALL')[0].strip() - parsed_data['wall_time'] = time - except Exception: - parsed_data['warnings'].append('Error while parsing wall time.') - - try: - parsed_data['wall_time_seconds'] = \ - convert_qe_time_to_sec(parsed_data['wall_time']) - except ValueError: - raise QEOutputParsingError('Unable to convert wall_time in seconds.') - break - # set by default the calculation as not converged. parsed_data['converged'] = [False, 0] - logs = get_logging_container() - lines = data.split('\n') - for count, line in enumerate(lines): + for count, line in enumerate(data.split('\n')): if 'initial path length' in line: initial_path_length = float(line.split('=')[1].split('bohr')[0]) parsed_data['initial_path_length'] = initial_path_length * CONSTANTS.bohr_to_ang @@ -177,26 +101,8 @@ def parse_neb_text_output(data, input_dict={}): parsed_data['climbing_images_manual'] = [int(_) for _ in line.split(':')[1].split(',')[:-1]] elif 'neb: convergence achieved in' in line: parsed_data['converged'] = [True, int(line.split('iteration')[0].split()[-1])] - elif '%%%%%%%%%%%%%%' in line: - parse_output_error(lines, count, logs) - elif any(i in line for i in all_warnings): - message = [all_warnings[i] for i in all_warnings.keys() if i in line][0] - - if message is not None: - parsed_data['warnings'].append(message) - - parsed_data['warnings'].extend(logs.error) - - try: - num_images = parsed_data['num_of_images'] - except KeyError: - try: - num_images = input_dict['PATH']['num_of_images'] - except KeyError: - raise QEOutputParsingError( - 'No information on the number ' - 'of images available (neither in input nor in output' - ) + + num_images = parsed_data['num_of_images'] iteration_lines = data.split('-- iteration')[1:] iteration_lines = [i.split('\n') for i in iteration_lines] @@ -233,4 +139,4 @@ def parse_neb_text_output(data, input_dict={}): image_dist = float(line.split('=')[1].split('bohr')[0]) iteration_data['image_dist'].append(image_dist * CONSTANTS.bohr_to_ang) - return parsed_data, dict(iteration_data), list(critical_warnings.values()) + return parsed_data, dict(iteration_data) diff --git a/aiida_quantumespresso/parsers/parse_raw/ph.py b/aiida_quantumespresso/parsers/parse_raw/ph.py index 5127b7373..46e5ae380 100644 --- a/aiida_quantumespresso/parsers/parse_raw/ph.py +++ b/aiida_quantumespresso/parsers/parse_raw/ph.py @@ -25,13 +25,6 @@ def parse_raw_ph_output(stdout, tensors=None, dynamical_matrices=None): logs = get_logging_container() data_lines = stdout.split('\n') - # First check whether the `JOB DONE` message was written, otherwise the job was interrupted - for line in data_lines: - if 'JOB DONE' in line: - break - else: - logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') - # Parse tensors, if present tensor_data = {} if tensors: @@ -140,61 +133,16 @@ def parse_xml_matrices(tagname, target_tags): def parse_ph_text_output(lines, logs): - """Parses the stdout of Quantum ESPRESSO ph.x. + """Parses the stdout of Quantum ESPRESSO ``ph.x``. :param lines: list of strings, the file as read by readlines() :return: dictionary with parsed values """ - def detect_important_message(logs, line): - - message_map = { - 'error': { - 'Maximum CPU time exceeded': 'ERROR_OUT_OF_WALLTIME', - 'No convergence has been achieved': 'ERROR_CONVERGENCE_NOT_REACHED', - 'problems computing cholesky': 'ERROR_COMPUTING_CHOLESKY', - }, - 'warning': { - 'Warning:': None, - 'DEPRECATED:': None, - } - } - - # Match any known error and warning messages - for marker, message in message_map['error'].items(): - if marker in line: - if message is None: - message = line - logs.error.append(message) - - for marker, message in message_map['warning'].items(): - if marker in line: - if message is None: - message = line - logs.warning.append(message) - parsed_data = {} - # Parse time, starting from the end because the time is written multiple times - for line in reversed(lines): - if 'PHONON' in line and 'WALL' in line: - try: - time = line.split('CPU')[1].split('WALL')[0] - parsed_data['wall_time'] = time - except Exception: - logs.warning.append('Error while parsing wall time.') - - try: - parsed_data['wall_time_seconds'] = \ - convert_qe_time_to_sec(parsed_data['wall_time']) - except ValueError: - raise QEOutputParsingError('Unable to convert wall_time in seconds.') - break - # Parse number of q-points and number of atoms - for count, line in enumerate(lines): - - detect_important_message(logs, line) + for line in lines: if 'q-points for this run' in line: try: diff --git a/aiida_quantumespresso/parsers/parse_raw/pw2gw.py b/aiida_quantumespresso/parsers/parse_raw/pw2gw.py deleted file mode 100644 index 22a0035ec..000000000 --- a/aiida_quantumespresso/parsers/parse_raw/pw2gw.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -"""A collection of function that are used to parse the output of Quantum Espresso pw2gw. - -The function that needs to be called from outside is parse_raw_output(). The functions mostly work without aiida -specific functionalities. The parsing will try to convert whatever it can in some dictionary, which by operative -decision doesn't have much structure encoded, [the values are simple ] -""" -from aiida_quantumespresso.parsers import QEOutputParsingError -from aiida_quantumespresso.parsers.parse_raw import convert_qe_time_to_sec -from aiida_quantumespresso.utils.mapping import get_logging_container - - -def parse_stdout(stdout): - """Parses the stdout content of a Quantum ESPRESSO `pw2gw.x` calculation. - - :param stdout: the stdout content as a string - :param input_parameters: dictionary with the input parameters - :param parser_options: the parser options from the settings input parameter node - :returns: tuple of two dictionaries, with the parsed data and log messages, respectively - """ - # Separate the input string into separate lines - data_lines = stdout.split('\n') - - logs = get_logging_container() - - parsed_data = {} - - for line in data_lines: - if 'JOB DONE' in line: - break - else: - logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') - - for count, line in enumerate(data_lines): - if 'PW2GW' in line and 'WALL' in line: - try: - time = line.split('CPU')[1].split('WALL')[0] - parsed_data['wall_time'] = time - except Exception: - logs.warning.append('Error while parsing wall time.') - try: - parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) - except ValueError: - raise QEOutputParsingError('Unable to convert wall_time in seconds.') - - return parsed_data, logs diff --git a/aiida_quantumespresso/parsers/ph.py b/aiida_quantumespresso/parsers/ph.py index 0d19cb72d..1c857eddb 100644 --- a/aiida_quantumespresso/parsers/ph.py +++ b/aiida_quantumespresso/parsers/ph.py @@ -7,67 +7,68 @@ from aiida import orm from aiida_quantumespresso.calculations.ph import PhCalculation -from aiida_quantumespresso.parsers.parse_raw.ph import parse_raw_ph_output as parse_stdout -from .base import Parser +from .base import BaseParser -class PhParser(Parser): - """`Parser` implementation for the `PhCalculation` calculation job class.""" +class PhParser(BaseParser): + """``Parser`` implementation for the ``PhCalculation`` calculation job class.""" - def parse(self, **kwargs): - """Parse the retrieved files from a `PhCalculation`.""" - retrieved = self.retrieved - - # The stdout is required for parsing - filename_stdout = self.node.get_attribute('output_filename') - filename_tensor = PhCalculation._OUTPUT_XML_TENSOR_FILE_NAME + class_error_map = { + 'No convergence has been achieved': 'ERROR_CONVERGENCE_NOT_REACHED', + 'problems computing cholesky': 'ERROR_COMPUTING_CHOLESKY', + } - if filename_stdout not in retrieved.list_object_names(): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING) - - try: - stdout = retrieved.get_object_content(filename_stdout) - except (IOError, OSError): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + def parse(self, **kwargs): + """Parse the retrieved files from a ``PhCalculation`` into output nodes.""" + filename_tensor = self.node.process_class._OUTPUT_XML_TENSOR_FILE_NAME try: - tensor_file = retrieved.get_object_content(filename_tensor) - except (IOError, OSError): + with self.retrieved.open(filename_tensor, 'r') as handle: + tensor_file = handle.read() + except OSError: tensor_file = None # Look for dynamical matrices dynmat_files = [] - dynmat_folder = PhCalculation._FOLDER_DYNAMICAL_MATRIX - dynmat_prefix = os.path.split(PhCalculation._OUTPUT_DYNAMICAL_MATRIX_PREFIX)[1] + dynmat_folder = self.node.process_class._FOLDER_DYNAMICAL_MATRIX + dynmat_prefix = os.path.split(self.node.process_class._OUTPUT_DYNAMICAL_MATRIX_PREFIX)[1] natural_sort = lambda string: [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', string)] - for filename in sorted(retrieved.list_object_names(dynmat_folder), key=natural_sort): + for filename in sorted(self.retrieved.list_object_names(dynmat_folder), key=natural_sort): if not filename.startswith(dynmat_prefix) or filename.endswith('.freq'): continue - dynmat_files.append(retrieved.get_object_content(os.path.join(dynmat_folder, filename))) + dynmat_files.append(self.retrieved.get_object_content(os.path.join(dynmat_folder, filename))) - try: - parsed_data, logs = parse_stdout(stdout, tensor_file, dynmat_files) - except Exception: - self.logger.error(traceback.format_exc()) - return self.exit(self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION) + parsed_stdout, logs_stdout = self._retrieve_parse_stdout(tensor_file=tensor_file, dynmat_files=dynmat_files) + self._emit_logs(logs_stdout) + self.out('output_parameters', orm.Dict(dict=parsed_stdout)) - self.emit_logs(logs) - self.out('output_parameters', orm.Dict(dict=parsed_data)) + for exit_code in [ + 'ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUT_OF_WALLTIME', + 'ERROR_CONVERGENCE_NOT_REACHED', 'ERROR_COMPUTING_CHOLESKY', 'ERROR_OUTPUT_STDOUT_INCOMPLETE' + ]: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) # If the scheduler detected OOW, simply keep that exit code by not returning anything more specific. if self.node.exit_status == PhCalculation.exit_codes.ERROR_SCHEDULER_OUT_OF_WALLTIME: return - if 'ERROR_OUT_OF_WALLTIME' in logs['error']: - return self.exit_codes.ERROR_OUT_OF_WALLTIME + @classmethod + def parse_stdout(cls, stdout: str, tensor_file: str, dynmat_files: list) -> tuple: + """Parse the ``stdout`` content of a Quantum ESPRESSO ``neb.x`` calculation. + + :param stdout: the stdout content as a string. + :returns: tuple of two dictionaries, with the parsed data and log messages, respectively. + """ + from aiida_quantumespresso.parsers.parse_raw.ph import parse_raw_ph_output as parse_stdout - if 'ERROR_CONVERGENCE_NOT_REACHED' in logs['error']: - return self.exit_codes.ERROR_CONVERGENCE_NOT_REACHED + parsed_base, logs_base = super().parse_stdout(stdout) + parsed_data, logs = parse_stdout(stdout, tensor_file, dynmat_files) - if 'ERROR_COMPUTING_CHOLESKY' in logs['error']: - return self.exit_codes.ERROR_COMPUTING_CHOLESKY + parsed_data.update(parsed_base) + for log_type, log_items in logs_base.items(): + logs[log_type].extend(log_items) - if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']: - return self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE + return parsed_data, logs diff --git a/aiida_quantumespresso/parsers/pp.py b/aiida_quantumespresso/parsers/pp.py index 709126fc2..271f37a7e 100644 --- a/aiida_quantumespresso/parsers/pp.py +++ b/aiida_quantumespresso/parsers/pp.py @@ -11,11 +11,15 @@ from aiida_quantumespresso.calculations.pp import PpCalculation from aiida_quantumespresso.utils.mapping import get_logging_container -from .base import Parser +from .base import BaseParser -class PpParser(Parser): - """`Parser` implementation for the `PpCalculation` calculation job class.""" +class PpParser(BaseParser): + """``Parser`` implementation for the ``PpCalculation`` calculation job class.""" + + class_error_map = { + 'xml data file not found': 'ERROR_PARENT_XML_MISSING', + } # Lookup: plot_num --> units units_dict = { @@ -43,28 +47,25 @@ class PpParser(Parser): } def parse(self, **kwargs): - """ - Parse raw files retrieved from remote dir - """ - retrieved = self.retrieved + """Parse the retrieved files of a ``PpCalculation`` into output nodes.""" + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() + self._emit_logs(logs_stdout) + + for exit_code in [ + 'ERROR_PARENT_XML_MISSING', 'ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', + 'ERROR_OUTPUT_STDOUT_INCOMPLETE' + ]: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) + retrieve_temporary_list = self.node.get_attribute('retrieve_temporary_list', None) - filename_stdout = self.node.get_option('output_filename') # If temporary files were specified, check that we have them if retrieve_temporary_list: try: retrieved_temporary_folder = kwargs['retrieved_temporary_folder'] except KeyError: - return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) - - # The stdout is required for parsing - if filename_stdout not in retrieved.list_object_names(): - return self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING - - try: - stdout_raw = retrieved.get_object_content(filename_stdout) - except (IOError, OSError): - return self.exit_codes.ERROR_OUTPUT_STDOUT_READ + return self._exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) # Currently all plot output files should start with the `filplot` as prefix. If only one file was produced the # prefix is the entire filename, but in the case of multiple files, there will be pairs of two files where the @@ -83,22 +84,8 @@ def parse(self, **kwargs): filenames = os.listdir(retrieved_temporary_folder) file_opener = lambda filename: open(os.path.join(retrieved_temporary_folder, filename)) else: - filenames = retrieved.list_object_names() - file_opener = retrieved.open - - try: - logs, self.output_parameters = self.parse_stdout(stdout_raw) - except Exception: - self.logger.error(traceback.format_exc()) - return self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION - - self.emit_logs(logs) - - # Scan logs for known errors - if 'ERROR_PARENT_XML_MISSING' in logs['error']: - return self.exit_codes.ERROR_PARENT_XML_MISSING - if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']: - return self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE + filenames = self.retrieved.list_object_names() + file_opener = self.retrieved.open # The following check should in principle always succeed since the iflag should in principle be set by the # `PpCalculation` plugin which only ever sets 0 - 4, but we check in order for the code not to except. @@ -137,7 +124,7 @@ def get_key_from_filename(filename): # Parse the file try: key = get_key_from_filename(filename) - data_parsed.append((key, parsers[iflag](data_raw))) + data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_stdout['plot_num']]))) del data_raw except Exception: # pylint: disable=broad-except return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename) @@ -154,83 +141,39 @@ def get_key_from_filename(filename): else: self.out('output_data_multiple', dict(data_parsed)) - self.out('output_parameters', orm.Dict(dict=self.output_parameters)) + self.out('output_parameters', orm.Dict(dict=parsed_stdout)) - def parse_stdout(self, stdout_str): - """ - Parses the output written to StdOut to retrieve basic information about the post processing + def parse_stdout(self, stdout): + """Parse the ``stdout`` content of a Quantum ESPRESSO ``pp.x`` calculation. - :param stdout_str: the stdout file read in as a single string + :param stdout: the stdout content as a string. """ - - def detect_important_message(logs, line): - """ - Detect know errors and warnings printed in the stdout - - :param logs: - :param line: a line from the stdout as a string - """ - message_map = { - 'error': { - 'xml data file not found': 'ERROR_PARENT_XML_MISSING' - }, - 'warning': { - 'Warning:': None, - 'DEPRECATED:': None, - } - } - - # Match any known error and warning messages - for marker, message in message_map['error'].items(): - if marker in line: - if message is None: - message = line - logs.error.append(message) - - for marker, message in message_map['warning'].items(): - if marker in line: - if message is None: - message = line - logs.warning.append(message) - - stdout_lines = stdout_str.splitlines() - logs = get_logging_container() - output_dict = {} - - # Check for job completion, indicating that pp.x exited without interruption, even if there was an error. - for line in stdout_lines: - if 'JOB DONE' in line: - break - else: - logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') - - # Detect any issues and detect job completion - for line in stdout_lines: - detect_important_message(logs, line) + parsed_data, logs = super().parse_stdout(stdout) # Parse useful data from stdout - for line in stdout_lines: + for line in stdout.splitlines(): if 'Check:' in line: # QE < 6.5 split_line = line.split('=') if 'negative/imaginary' in line: # QE6.1-6.3 - output_dict['negative_core_charge'] = float(split_line[-1].split()[0]) - output_dict['imaginary_core_charge'] = float(split_line[-1].split()[-1]) + parsed_data['negative_core_charge'] = float(split_line[-1].split()[0]) + parsed_data['imaginary_core_charge'] = float(split_line[-1].split()[-1]) else: # QE6.4 - output_dict['negative_core_charge'] = float(split_line[1]) + parsed_data['negative_core_charge'] = float(split_line[1]) if 'Min, Max, imaginary charge:' in line: split_line = line.split() - output_dict['charge_min'] = float(split_line[-3]) - output_dict['charge_max'] = float(split_line[-2]) - output_dict['charge_img'] = float(split_line[-1]) + parsed_data['charge_min'] = float(split_line[-3]) + parsed_data['charge_max'] = float(split_line[-2]) + parsed_data['charge_img'] = float(split_line[-1]) if 'plot_num = ' in line: - output_dict['plot_num'] = int(line.split('=')[1]) + parsed_data['plot_num'] = int(line.split('=')[1]) if 'Plot Type:' in line: - output_dict['plot_type'] = line.split('Output format')[0].split(':')[-1].strip() - output_dict['output_format'] = line.split(':')[-1].strip() + parsed_data['plot_type'] = line.split('Output format')[0].split(':')[-1].strip() + parsed_data['output_format'] = line.split(':')[-1].strip() - return logs, output_dict + return parsed_data, logs - def parse_gnuplot1D(self, data_file_str): + @staticmethod + def parse_gnuplot1D(data_file_str, data_units): """Parse 1D GNUPlot formatted output. :param data_file_str: the data file read in as a single string @@ -250,7 +193,7 @@ def parse_gnuplot1D(self, data_file_str): data.append(float(split_line[1])) y_data = [data] y_names = ['data'] - y_units = [self.units_dict[self.output_parameters['plot_num']]] + y_units = [data_units] # 1D case with spherical averaging if n_col == 3: @@ -264,8 +207,7 @@ def parse_gnuplot1D(self, data_file_str): data_integral.append(float(split_line[2])) y_data = [data, data_integral] y_names = ['data', 'integrated_data'] - unit = self.units_dict[self.output_parameters['plot_num']] - y_units = [unit, unit.replace('bohr^3', 'bohr')] + y_units = [data_units, data_units.replace('bohr^3', 'bohr')] x_units = 'bohr' arraydata = orm.ArrayData() @@ -277,7 +219,8 @@ def parse_gnuplot1D(self, data_file_str): return arraydata - def parse_gnuplot_polar(self, data_file_str): + @staticmethod + def parse_gnuplot_polar(data_file_str, data_units): """Parse 2D Polar GNUPlot formatted, single column output. :param data_file_str: the data file read in as a single string @@ -288,15 +231,15 @@ def parse_gnuplot_polar(self, data_file_str): data = [] for line in data_lines: data.append(float(line)) - data_units = [self.units_dict[self.output_parameters['plot_num']]] arraydata = orm.ArrayData() arraydata.set_array('data', np.array(data)) - arraydata.set_array('data_units', np.array(data_units)) + arraydata.set_array('data_units', np.array([data_units])) return arraydata - def parse_gnuplot2D(self, data_file_str): + @staticmethod + def parse_gnuplot2D(data_file_str, data_units): """Parse 2D GNUPlot formatted output. :param data_file_str: the data file read in as a single string @@ -316,7 +259,6 @@ def parse_gnuplot2D(self, data_file_str): data.append(float(split_line[2])) coords_units = 'bohr' - data_units = self.units_dict[self.output_parameters['plot_num']] arraydata = orm.ArrayData() arraydata.set_array('xy_coordinates', np.array(coords)) arraydata.set_array('data', np.array(data)) @@ -325,7 +267,8 @@ def parse_gnuplot2D(self, data_file_str): return arraydata - def parse_gaussian(self, data_file_str): + @staticmethod + def parse_gaussian(data_file_str, data_units): """Parse Gaussian Cube formatted output. :param data_file_str: the data file read in as a single string @@ -362,7 +305,6 @@ def parse_gaussian(self, data_file_str): data_array = data_array.reshape((xdim, ydim, zdim)) coordinates_units = 'bohr' - data_units = self.units_dict[self.output_parameters['plot_num']] arraydata = orm.ArrayData() arraydata.set_array('voxel', voxel_array) diff --git a/aiida_quantumespresso/parsers/projwfc.py b/aiida_quantumespresso/parsers/projwfc.py index 2ced391f4..622db42dd 100644 --- a/aiida_quantumespresso/parsers/projwfc.py +++ b/aiida_quantumespresso/parsers/projwfc.py @@ -10,11 +10,11 @@ from aiida_quantumespresso.parsers import QEOutputParsingError from aiida_quantumespresso.parsers.parse_raw.base import ( - parse_output_base, convert_qe_to_aiida_structure, convert_qe_to_kpoints + convert_qe_to_aiida_structure, convert_qe_to_kpoints ) from aiida_quantumespresso.utils.mapping import get_logging_container -from .base import Parser +from .base import BaseParser def find_orbitals_from_statelines(out_info_dict): @@ -271,57 +271,38 @@ def spin_dependent_pdos_subparser(out_info_dict): return out_arrays -class ProjwfcParser(Parser): - """This class is the implementation of the Parser class for projwfc.x in Quantum Espresso. +class ProjwfcParser(BaseParser): + """``Parser`` implementation for the ``ProjwfcCalculation`` calculation job class. Parses projection arrays that map the projection onto each point in the bands structure, as well as pdos arrays, which map the projected density of states onto an energy axis. """ def parse(self, **kwargs): - """Parses the datafolder, stores results. + """Parse the retrieved files from a ``ProjwfcCalculation`` into output nodes.""" + # we create a dictionary the progressively accumulates more info + out_info_dict = {} + + parsed_stdout, logs_stdout = self._retrieve_parse_stdout(out_info_dict=out_info_dict) + self._emit_logs(logs_stdout) + self.out('output_parameters', Dict(dict=parsed_stdout)) + + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - Retrieves projwfc output, and some basic information from the out_file, such as warnings and wall_time - """ - retrieved = self.retrieved - # Get the temporary retrieved folder try: retrieved_temporary_folder = kwargs['retrieved_temporary_folder'] except KeyError: - return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) - - # Read standard out - try: - filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer - with retrieved.open(filename_stdout, 'r') as fil: - out_file = fil.readlines() - except OSError: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) - - job_done = False - for i in range(len(out_file)): - line = out_file[-i] - if 'JOB DONE' in line: - job_done = True - break - if not job_done: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) - - # Parse basic info and warnings, and output them as output_parmeters - parsed_data, logs = parse_output_base(out_file, 'PROJWFC') - self.emit_logs(logs) - self.out('output_parameters', Dict(dict=parsed_data)) + return self._exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) # Parse the XML to obtain the `structure`, `kpoints` and spin-related settings from the parent calculation self.exit_code_xml = None parsed_xml, logs_xml = self._parse_xml(retrieved_temporary_folder) - self.emit_logs(logs_xml) + self._emit_logs(logs_xml) if self.exit_code_xml: - return self.exit(self.exit_code_xml) - - # we create a dictionary the progressively accumulates more info - out_info_dict = {} + return self._exit(self.exit_code_xml) out_info_dict['structure'] = convert_qe_to_aiida_structure(parsed_xml['structure']) out_info_dict['kpoints'] = convert_qe_to_kpoints(parsed_xml, out_info_dict['structure']) @@ -331,33 +312,32 @@ def parse(self, **kwargs): out_info_dict['spin'] = out_info_dict['nspin'] == 2 # check and read pdos_tot file - out_filenames = retrieved.list_object_names() + out_filenames = self.retrieved.list_object_names() try: pdostot_filename = fnmatch.filter(out_filenames, '*pdos_tot*')[0] - with retrieved.open(pdostot_filename, 'r') as pdostot_file: + with self.retrieved.open(pdostot_filename, 'r') as pdostot_file: # Columns: Energy(eV), Ldos, Pdos pdostot_array = np.atleast_2d(np.genfromtxt(pdostot_file)) energy = pdostot_array[:, 0] dos = pdostot_array[:, 1] except (OSError, KeyError): - return self.exit(self.exit_codes.ERROR_READING_PDOSTOT_FILE) + return self._exit(self.exit_codes.ERROR_READING_PDOSTOT_FILE) # check and read all of the individual pdos_atm files pdos_atm_filenames = fnmatch.filter(out_filenames, '*pdos_atm*') pdos_atm_array_dict = {} for name in pdos_atm_filenames: - with retrieved.open(name, 'r') as pdosatm_file: + with self.retrieved.open(name, 'r') as pdosatm_file: pdos_atm_array_dict[name] = np.atleast_2d(np.genfromtxt(pdosatm_file)) # finding the bands and projections - out_info_dict['out_file'] = out_file out_info_dict['energy'] = energy out_info_dict['pdos_atm_array_dict'] = pdos_atm_array_dict try: new_nodes_list = self._parse_bands_and_projections(out_info_dict) except QEOutputParsingError as err: self.logger.error(f'Error parsing bands and projections: {err}') - return self.exit(self.exit_codes.ERROR_PARSING_PROJECTIONS) + return self._exit(self.exit_codes.ERROR_PARSING_PROJECTIONS) for linkname, node in new_nodes_list: self.out(linkname, node) @@ -366,6 +346,14 @@ def parse(self, **kwargs): Dos_out.set_y(dos, 'Dos', 'states/eV') self.out('Dos', Dos_out) + def parse_stdout(self, stdout, out_info_dict): + """Parse the ``stdout`` content of a Quantum ESPRESSO ``projwfc.x`` calculation. + + :param stdout: the stdout content as a string. + """ + out_info_dict['out_file'] = stdout.split('\n') + return super().parse_stdout(stdout) + def _parse_xml(self, retrieved_temporary_folder): """Parse the XML file. diff --git a/aiida_quantumespresso/parsers/pw.py b/aiida_quantumespresso/parsers/pw.py index b9608595b..aa5fce25b 100644 --- a/aiida_quantumespresso/parsers/pw.py +++ b/aiida_quantumespresso/parsers/pw.py @@ -8,11 +8,11 @@ from aiida.common import exceptions from aiida_quantumespresso.utils.mapping import get_logging_container -from .base import Parser +from .base import BaseParser from .parse_raw.pw import reduce_symmetries -class PwParser(Parser): +class PwParser(BaseParser): """`Parser` implementation for the `PwCalculation` calculation job class.""" def parse(self, **kwargs): @@ -40,7 +40,7 @@ def parse(self, **kwargs): try: dir_with_bands = kwargs['retrieved_temporary_folder'] except KeyError: - return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) + return self._exit(self.exit_codes.ERROR_NO_RETRIEVED_TEMPORARY_FOLDER) parameters = self.node.inputs.parameters.get_dict() parsed_xml, logs_xml = self.parse_xml(dir_with_bands, parser_options) @@ -99,30 +99,30 @@ def parse(self, **kwargs): logs_xml.pop('error') ignore = ['Error while parsing ethr.', 'DEPRECATED: symmetry with ibrav=0, use correct ibrav instead'] - self.emit_logs([logs_stdout, logs_xml], ignore=ignore) + self._emit_logs([logs_stdout, logs_xml], ignore=ignore) # First check for specific known problems that can cause a pre-mature termination of the calculation exit_code = self.validate_premature_exit(logs_stdout) if exit_code: - return self.exit(exit_code) + return self._exit(exit_code) # If the both stdout and xml exit codes are set, there was a basic problem with both output files and there # is no need to investigate any further. if self.exit_code_stdout and self.exit_code_xml: - return self.exit(self.exit_codes.ERROR_OUTPUT_FILES) + return self._exit(self.exit_codes.ERROR_OUTPUT_FILES) if self.exit_code_stdout: - return self.exit(self.exit_code_stdout) + return self._exit(self.exit_code_stdout) if self.exit_code_xml: - return self.exit(self.exit_code_xml) + return self._exit(self.exit_code_xml) # First determine issues that can occurr for all calculation types. Note that the generic errors, that are # common to all types are done first. If a problem is found there, we return the exit code and don't continue for validator in [self.validate_electronic, self.validate_dynamics, self.validate_ionic]: exit_code = validator(trajectory, parsed_parameters, logs_stdout) if exit_code: - return self.exit(exit_code) + return self._exit(exit_code) def get_calculation_type(self): """Return the type of the calculation.""" diff --git a/aiida_quantumespresso/parsers/pw2gw.py b/aiida_quantumespresso/parsers/pw2gw.py index 4dc55005a..ea6bc798d 100644 --- a/aiida_quantumespresso/parsers/pw2gw.py +++ b/aiida_quantumespresso/parsers/pw2gw.py @@ -3,56 +3,52 @@ import io import numpy as np -from aiida import orm +from aiida.orm import Dict, ArrayData from aiida_quantumespresso.calculations.pw2gw import Pw2gwCalculation -from .base import Parser +from .base import BaseParser -class Pw2gwParser(Parser): - """`Parser` implementation for the `Pw2gwCalculation` calculation job class.""" +class Pw2gwParser(BaseParser): + """``Parser`` implementation for the ``Pw2gwCalculation`` calculation job class.""" def parse(self, **kwargs): - """Parse the retrieved files of a completed `Pw2gwCalculation` into output nodes. + """Parse the retrieved files of a completed ``Pw2gwCalculation`` into output nodes. Two nodes that are expected are the default 'retrieved' `FolderData` node which will store the retrieved files - permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files` - which should contain the temporary retrieved files. + permanently in the repository. The second required node is a filepath under the key + ``retrieved_temporary_files`` which should contain the temporary retrieved files. """ - self.exit_code_stdout = None - self.exit_code_eps = None - - # Parse the pw2gw stout file - data, logs_stdout = self.parse_stdout() - - self.emit_logs(logs_stdout) + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() + self._emit_logs(logs_stdout) - if self.exit_code_stdout: - return self.exit(self.exit_code_stdout) + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - self.out('output_parameters', orm.Dict(dict=data)) + self.out('output_parameters', Dict(dict=parsed_stdout)) - # Parse the pw2g outputfiles + self.exit_code_eps = None eps = self.parse_eps_files() if self.exit_code_eps: - return self.exit(self.exit_code_eps) + return self._exit(self.exit_code_eps) self.out('eps', eps) def parse_eps_files(self): - """Parse the eps*.dat files produced by pw2gw.x and store them in the `eps` node.""" + """Parse the ``eps*.dat`` files produced by ``pw2gw.x``.""" retrieved = self.retrieved retrieved_names = retrieved.list_object_names() - files = Pw2gwCalculation._internal_retrieve_list + files = self.node.process_class._internal_retrieve_list if any(_ not in retrieved_names for _ in files): self.exit_code_eps = self.exit_codes.ERROR_OUTPUT_FILES return energy = None - eps = orm.ArrayData() - for name in Pw2gwCalculation._internal_retrieve_list: + eps = ArrayData() + for name in self.node.process_class._internal_retrieve_list: content = retrieved.get_object_content(name) base = name.split('.')[0] @@ -76,37 +72,3 @@ def parse_eps_files(self): eps.set_array(base, y) return eps - - def parse_stdout(self): - """Parse the stdout file of pw2gw to build the `output_parameters` node.""" - from aiida_quantumespresso.utils.mapping import get_logging_container - from aiida_quantumespresso.parsers.parse_raw.pw2gw import parse_stdout - - logs = get_logging_container() - parsed_data = {} - - filename_stdout = self.node.get_attribute('output_filename') - - if filename_stdout not in self.retrieved.list_object_names(): - self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING - return parsed_data, logs - - try: - stdout = self.retrieved.get_object_content(filename_stdout) - except IOError: - self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_READ - return parsed_data, logs - - try: - parsed_data, logs = parse_stdout(stdout) - except Exception: - import traceback - traceback.print_exc() - self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION - - # If the stdout was incomplete, most likely the job was interrupted before it could cleanly finish, so the - # output files are most likely corrupt and cannot be restarted from - if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']: - self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE - - return parsed_data, logs diff --git a/aiida_quantumespresso/parsers/pw2wannier90.py b/aiida_quantumespresso/parsers/pw2wannier90.py index b4de0eee3..4f418e41f 100644 --- a/aiida_quantumespresso/parsers/pw2wannier90.py +++ b/aiida_quantumespresso/parsers/pw2wannier90.py @@ -1,32 +1,23 @@ # -*- coding: utf-8 -*- from aiida.orm import Dict -from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base -from .base import Parser +from .base import BaseParser -class Pw2wannier90Parser(Parser): - """`Parser` implementation for the `Pw2wannierCalculation` calculation job class.""" +class Pw2wannier90Parser(BaseParser): + """``Parser`` implementation for the ``Pw2wannierCalculation`` calculation job class.""" def parse(self, **kwargs): - """Parse the retrieved files of a completed `Pw2wannierCalculation` into output nodes. + """Parse the retrieved files of a completed ``Pw2wannierCalculation`` into output nodes. - Two nodes that are expected are the default 'retrieved' `FolderData` node which will store the retrieved files + Two nodes that are expected are the default 'retrieved' ``FolderData`` node which will store the retrieved files permanently in the repository. """ - try: - filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer - with self.retrieved.open(filename_stdout, 'r') as fil: - out_file = fil.read() - except OSError: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() + self._emit_logs(logs_stdout) - parsed_data, logs = parse_output_base(out_file, codename='PW2WANNIER') - self.emit_logs(logs) + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - self.out('output_parameters', Dict(dict=parsed_data)) - - if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs.error: - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) - elif logs.error: - return self.exit(self.exit_codes.ERROR_GENERIC_QE_ERROR) + self.out('output_parameters', Dict(dict=parsed_stdout)) diff --git a/aiida_quantumespresso/parsers/q2r.py b/aiida_quantumespresso/parsers/q2r.py index 1e483d2a5..5537e7684 100644 --- a/aiida_quantumespresso/parsers/q2r.py +++ b/aiida_quantumespresso/parsers/q2r.py @@ -1,28 +1,30 @@ # -*- coding: utf-8 -*- -from aiida_quantumespresso.calculations.q2r import Q2rCalculation from aiida_quantumespresso.data.force_constants import ForceConstantsData -from .base import Parser +from .base import BaseParser +from aiida.orm import Dict -class Q2rParser(Parser): - """Parser implementation for the Q2rCalculation.""" + +class Q2rParser(BaseParser): + """``Parser`` implementation for the ``Q2rCalculation`` calculation job class.""" def parse(self, **kwargs): - """Parse the retrieved files from a `Q2rCalculation`.""" - retrieved = self.retrieved - filename_stdout = self.node.get_option('output_filename') - filename_force_constants = Q2rCalculation._FORCE_CONSTANTS_NAME + """Parse the retrieved files of a ``Q2rCalculation`` into output nodes.""" + parsed_stdout, logs_stdout = self._retrieve_parse_stdout() + self._emit_logs(logs_stdout) + + for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']: + if exit_code in logs_stdout.error: + return self._exit(self.exit_codes.get(exit_code)) - if filename_stdout not in retrieved.list_object_names(): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ) + self.out('output_parameters', Dict(dict=parsed_stdout)) - if filename_force_constants not in retrieved.list_object_names(): - return self.exit(self.exit_codes.ERROR_READING_FORCE_CONSTANTS_FILE) + filename_force_constants = self.node.process_class._FORCE_CONSTANTS_NAME - if 'JOB DONE' not in retrieved.get_object_content(filename_stdout): - return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE) + if filename_force_constants not in self.retrieved.list_object_names(): + return self._exit(self.exit_codes.ERROR_READING_FORCE_CONSTANTS_FILE) - with retrieved.open(filename_force_constants, 'rb') as handle: + with self.retrieved.open(filename_force_constants, 'rb') as handle: self.out('force_constants', ForceConstantsData(file=handle)) return diff --git a/tests/parsers/test_dos/test_dos_default.yml b/tests/parsers/test_dos/test_dos_default.yml index fa7576bec..587ce2337 100644 --- a/tests/parsers/test_dos/test_dos_default.yml +++ b/tests/parsers/test_dos/test_dos_default.yml @@ -10,6 +10,6 @@ dos: - states/eV - states/eV parameters: - code_version: v.6.4.1 + code_version: 6.4.1 wall_time: 0.41s wall_time_seconds: 0.41 diff --git a/tests/parsers/test_matdyn/test_matdyn_default.yml b/tests/parsers/test_matdyn/test_matdyn_default.yml index 30b9f6f61..41ed8d9aa 100644 --- a/tests/parsers/test_matdyn/test_matdyn_default.yml +++ b/tests/parsers/test_matdyn/test_matdyn_default.yml @@ -1,5 +1,7 @@ output_parameters: - warnings: [] + code_version: '6.1' + wall_time: 0.00s + wall_time_seconds: 0.0 output_phonon_bands: array|bands: - 1 diff --git a/tests/parsers/test_neb.py b/tests/parsers/test_neb.py index c6d9604fe..fc41b6dfc 100644 --- a/tests/parsers/test_neb.py +++ b/tests/parsers/test_neb.py @@ -58,7 +58,6 @@ def test_neb_default(fixture_localhost, generate_calc_job_node, generate_parser, assert calcfunction.is_finished, calcfunction.exception assert calcfunction.is_finished_ok, calcfunction.exit_message - assert not orm.Log.objects.get_logs_for(node) assert 'output_parameters' in results assert 'output_mep' in results assert 'output_trajectory' in results @@ -90,7 +89,6 @@ def test_neb_all_iterations( assert calcfunction.is_finished, calcfunction.exception assert calcfunction.is_finished_ok, calcfunction.exit_message - assert not orm.Log.objects.get_logs_for(node) assert 'output_parameters' in results assert 'output_mep' in results assert 'output_trajectory' in results diff --git a/tests/parsers/test_neb/test_neb_default.yml b/tests/parsers/test_neb/test_neb_default.yml index 7d1e6897f..32a688b85 100644 --- a/tests/parsers/test_neb/test_neb_default.yml +++ b/tests/parsers/test_neb/test_neb_default.yml @@ -25,6 +25,7 @@ parameters: ci_scheme: auto climbing_image_auto: - 1 + code_version: 6.4.1 converged: - true - 13 diff --git a/tests/parsers/test_ph/test_ph_default.yml b/tests/parsers/test_ph/test_ph_default.yml index 6248928e4..3381fed55 100644 --- a/tests/parsers/test_ph/test_ph_default.yml +++ b/tests/parsers/test_ph/test_ph_default.yml @@ -1,3 +1,4 @@ +code_version: '6.1' dielectric_constant: - - 57.36256076907993 - -2.842170943040401e-14 @@ -47,5 +48,5 @@ number_of_atoms: 2 number_of_irr_representations_for_each_q: - 2 number_of_qpoints: 1 -wall_time: ' 15.25s ' +wall_time: 15.25s wall_time_seconds: 15.25 diff --git a/tests/parsers/test_ph/test_ph_not_converged.yml b/tests/parsers/test_ph/test_ph_not_converged.yml index 7596df924..8cdff1a3e 100644 --- a/tests/parsers/test_ph/test_ph_not_converged.yml +++ b/tests/parsers/test_ph/test_ph_not_converged.yml @@ -1,7 +1,8 @@ +code_version: '6.1' number_of_atoms: 2 number_of_irr_representations_for_each_q: - 2 - 4 number_of_qpoints: 8 -wall_time: ' 3m21.58s ' +wall_time: 3m21.58s wall_time_seconds: 201.57999999999998 diff --git a/tests/parsers/test_ph/test_ph_out_of_walltime.yml b/tests/parsers/test_ph/test_ph_out_of_walltime.yml index d6bd29add..d5da67d25 100644 --- a/tests/parsers/test_ph/test_ph_out_of_walltime.yml +++ b/tests/parsers/test_ph/test_ph_out_of_walltime.yml @@ -1,7 +1,8 @@ +code_version: 6.3MaX number_of_atoms: 2 number_of_irr_representations_for_each_q: - 2 - 4 number_of_qpoints: 3 -wall_time: ' 7.13s ' +wall_time: 7.13s wall_time_seconds: 7.13 diff --git a/tests/parsers/test_pw2gw/test_pw2gw_default_data.yml b/tests/parsers/test_pw2gw/test_pw2gw_default_data.yml index 886f2e4e1..64636d951 100644 --- a/tests/parsers/test_pw2gw/test_pw2gw_default_data.yml +++ b/tests/parsers/test_pw2gw/test_pw2gw_default_data.yml @@ -1,3 +1,4 @@ output_parameters: - wall_time: ' 1m26.21s ' + code_version: '6.2' + wall_time: 1m26.21s wall_time_seconds: 86.21000000000001 diff --git a/tests/parsers/test_pw2wannier90/test_pw2wannier90_default.yml b/tests/parsers/test_pw2wannier90/test_pw2wannier90_default.yml index e4fbf1f8e..4746d9d82 100644 --- a/tests/parsers/test_pw2wannier90/test_pw2wannier90_default.yml +++ b/tests/parsers/test_pw2wannier90/test_pw2wannier90_default.yml @@ -1,4 +1,4 @@ parameters: - code_version: v.6.4.1 + code_version: 6.4.1 wall_time: 2.18s wall_time_seconds: 2.18