first draft of refactoring base stdout parsing

aiidateam · Oct 12, 2021 · b08b206 · b08b206
1 parent 0874d95
commit b08b206
Show file tree

Hide file tree

Showing 22 changed files with 423 additions and 359 deletions.
diff --git a/aiida_quantumespresso/calculations/dos.py b/aiida_quantumespresso/calculations/dos.py
@@ -26,10 +26,6 @@ def define(cls, spec):
         spec.output('output_parameters', valid_type=orm.Dict)
         spec.output('output_dos', valid_type=orm.XyData)
         spec.default_output_node = 'output_parameters'
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_READING_DOS_FILE',
             message='The dos file could not be read from the retrieved folder.')
         # yapf: enable
diff --git a/aiida_quantumespresso/calculations/matdyn.py b/aiida_quantumespresso/calculations/matdyn.py
@@ -34,10 +34,6 @@ def define(cls, spec):
         spec.output('output_parameters', valid_type=orm.Dict)
         spec.output('output_phonon_bands', valid_type=orm.BandsData)
         spec.default_output_node = 'output_parameters'
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES',
             message='The output frequencies file could not be read from the retrieved folder.')
         spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING',

diff --git a/aiida_quantumespresso/calculations/namelists.py b/aiida_quantumespresso/calculations/namelists.py
@@ -59,6 +59,12 @@ def define(cls, spec):
             help='Use an additional node for special settings')
         spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False,
             help='Use a local or remote folder as parent folder (for restarts and similar)')
+        spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
+            message='The retrieved folder did not contain the required stdout output file.')
+        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
+            message='The stdout output file could not be read.')
+        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
+            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         # yapf: enable
 
     def _get_following_text(self):

diff --git a/aiida_quantumespresso/calculations/projwfc.py b/aiida_quantumespresso/calculations/projwfc.py
@@ -59,12 +59,6 @@ def define(cls, spec):
         spec.default_output_node = 'output_parameters'
         spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER',
             message='The retrieved temporary folder could not be accessed.')
-        spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
-            message='The retrieved folder did not contain the required XML file.')
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
             message='The XML output file could not be read.')
         spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',

diff --git a/aiida_quantumespresso/calculations/pw2gw.py b/aiida_quantumespresso/calculations/pw2gw.py
@@ -37,16 +37,10 @@ def define(cls, spec):
         spec.output('eps', valid_type=orm.ArrayData,
             help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`')
 
-        spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
-            message='The retrieved folder did not contain the required stdout output file.')
         spec.exit_code(305, 'ERROR_OUTPUT_FILES',
             message='The eps*.dat output files could not be read or parsed.')
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
         spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
             message='The stdout output file could not be parsed.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT',
             message='The eps*.dat output files do not have the expected shape (N, 2).')
         spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH',

diff --git a/aiida_quantumespresso/calculations/pw2wannier90.py b/aiida_quantumespresso/calculations/pw2wannier90.py
@@ -31,10 +31,6 @@ def define(cls, spec):
                    help='The output folder of a pw.x calculation')
         spec.output('output_parameters', valid_type=Dict)
         spec.default_output_node = 'output_parameters'
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR',
             message='Encountered a generic error message')
         spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION',

diff --git a/aiida_quantumespresso/calculations/q2r.py b/aiida_quantumespresso/calculations/q2r.py
@@ -31,10 +31,6 @@ def define(cls, spec):
         super().define(spec)
         spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True)
         spec.output('force_constants', valid_type=ForceConstantsData)
-        spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
-            message='The stdout output file could not be read.')
-        spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
-            message='The stdout output file was incomplete probably because the calculation got interrupted.')
         spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE',
             message='The force constants file could not be read.')
         # yapf: enable
diff --git a/aiida_quantumespresso/parsers/base.py b/aiida_quantumespresso/parsers/base.py
@@ -3,14 +3,94 @@
 
 All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`.
 """
+import re
+
 from aiida.parsers import Parser as _BaseParser
+from aiida_quantumespresso.utils.mapping import get_logging_container
+from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec
 
 __all__ = ('Parser',)
 
 
 class Parser(_BaseParser):  # pylint: disable=abstract-method
     """Custom `Parser` class for `aiida-quantumespresso` parser implementations."""
 
+    error_map = {}
+    warning_map = {
+        'Warning:': None,
+        'DEPRECATED:': None,
+    }
+
+    def parse_stdout_base(self):
+        """Basic parsing of the ``stdout`` content of a Quantum ESPRESSO calculation.
+
+        This function only checks for basic content like JOB DONE, errors indicated by %%%%-lines etc.
+
+        :param stdout: the stdout content as a string
+        :param message_map:
+        :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
+        """
+        logs = get_logging_container()
+        parsed_data = {}
+
+        filename_stdout = self.node.get_option('output_filename')
+
+        if filename_stdout not in self.retrieved.list_object_names():
+            logs.error.append('ERROR_OUTPUT_STDOUT_MISSING')
+            return parsed_data, logs
+
+        try:
+            with self.retrieved.open(filename_stdout, 'r') as handle:
+                stdout = handle.read()
+        except OSError:
+            logs.error.append('ERROR_OUTPUT_STDOUT_READ')
+            return parsed_data, logs
+
+        if not re.search(r'JOB DONE', stdout):
+            logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')
+
+        code_match = re.search(r'Program\s(?P<code_name>[A-Z|\_]+)\s(?P<code_version>[v\.\d\.|A-Z]+)\sstarts', stdout)
+
+        if code_match:
+
+            code_name = code_match.groupdict()['code_name']
+            parsed_data['code_version'] = code_match.groupdict()['code_version']
+
+            wall_match = re.search(fr'{code_name}\s+:[\s\S]+CPU\s+(?P<wall_time>[\.\d|s|m|d|h]+)\sWALL', stdout)
+
+            if wall_match:
+                parsed_data['wall_time'] = wall_match.groupdict()['wall_time']
+
+                try:
+                    parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time'])
+                except ValueError:
+                    logs.warnings.append('Unable to convert wall time from `stdout` to seconds.')
+
+        # Parse an error message with optional mapping of the message
+        if re.search(r'\%{10,}', stdout):
+            self.parse_stdout_errors(stdout, logs, self.error_map)
+
+        return stdout, parsed_data, logs
+
+    @staticmethod
+    def parse_stdout_errors(stdout, logs, error_map=None):
+        """Parse a Quantum ESPRESSO ``stdout`` file for errors marked with ``%%%%%%%%`` lines.
+
+        :param lines: a list of strings gotten by splitting the standard output content on newlines
+        :param line_number_start: the line at which we identified some ``%%%%%%%%``
+        :param logs: a logging container from `aiida_quantumespresso.utils.mapping.get_logging_container`
+        """
+        for error_message in set(re.split(r'\%{10,}', stdout)[1::2]):
+
+            error_log = None
+
+            if error_map is not None:
+                for error_marker, error in error_map:
+                    if error_marker in error_message:
+                        error_log = error
+
+            logs.error.append(error_log if error_log is not None else error_message)
+
     def emit_logs(self, logging_dictionaries, ignore=None):
         """Emit the messages in one or multiple "log dictionaries" through the logger of the parser.
 

diff --git a/aiida_quantumespresso/parsers/dos.py b/aiida_quantumespresso/parsers/dos.py
@@ -4,7 +4,6 @@
 from aiida.orm import Dict, XyData
 
 from aiida_quantumespresso.parsers import QEOutputParsingError
-from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
 from .base import Parser
 
 
@@ -16,34 +15,22 @@ def parse(self, **kwargs):
 
         Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time
         """
-        retrieved = self.retrieved
+        _, parsed_stdout, logs_stdout = self.parse_stdout_base()
+        self.emit_logs(logs_stdout)
 
-        # Read standard out
-        try:
-            filename_stdout = self.node.get_option('output_filename')  # or get_attribute(), but this is clearer
-            with retrieved.open(filename_stdout, 'r') as fil:
-                out_file = fil.readlines()
-        except OSError:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
-
-        job_done = False
-        for i in range(len(out_file)):
-            line = out_file[-i]
-            if 'JOB DONE' in line:
-                job_done = True
-                break
-        if not job_done:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)
-
-        # check that the dos file is present, if it is, read it
+        for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
+            if exit_code in logs_stdout.error:
+                return self.exit(self.exit_codes.get(exit_code))
+
+        self.out('output_parameters', Dict(dict=parsed_stdout))
+
+        # Parse the DOS
         try:
-            with retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as fil:
-                dos_file = fil.readlines()
+            with self.retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as handle:
+                dos_file = handle.readlines()
         except OSError:
             return self.exit(self.exit_codes.ERROR_READING_DOS_FILE)
 
-        # end of initial checks
-
         array_names = [[], []]
         array_units = [[], []]
         array_names[0] = ['dos_energy', 'dos', 'integrated_dos']  # When spin is not displayed
@@ -79,11 +66,7 @@ def parse(self, **kwargs):
             y_units += ['states/eV']
         xy_data.set_y(y_arrays, y_names, y_units)
 
-        parsed_data, logs = parse_output_base(out_file, 'DOS')
-        self.emit_logs(logs)
-
         self.out('output_dos', xy_data)
-        self.out('output_parameters', Dict(dict=parsed_data))
 
 
 def parse_raw_dos(dos_file, array_names, array_units):

diff --git a/aiida_quantumespresso/parsers/matdyn.py b/aiida_quantumespresso/parsers/matdyn.py
@@ -11,17 +11,15 @@ class MatdynParser(Parser):
 
     def parse(self, **kwargs):
         """Parse the retrieved files from a `MatdynCalculation`."""
-        retrieved = self.retrieved
-        filename_stdout = self.node.get_option('output_filename')
-        filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME
+        parsed_stdout, logs_stdout = self.parse_stdout_base()
 
-        if filename_stdout not in retrieved.list_object_names():
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
+        for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
+            if exit_code in logs_stdout.error:
+                return self.exit(self.exit_codes.get(exit_code))
 
-        if 'JOB DONE' not in retrieved.get_object_content(filename_stdout):
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)
+        filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME
 
-        if filename_frequencies not in retrieved.list_object_names():
+        if filename_frequencies not in self.retrieved.list_object_names():
             return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
 
         # Extract the kpoints from the input data and create the `KpointsData` for the `BandsData`
@@ -33,7 +31,7 @@ def parse(self, **kwargs):
             kpoints_for_bands = orm.KpointsData()
             kpoints_for_bands.set_kpoints(kpoints)
 
-        parsed_data = parse_raw_matdyn_phonon_file(retrieved.get_object_content(filename_frequencies))
+        parsed_data = parse_raw_matdyn_phonon_file(self.retrieved.get_object_content(filename_frequencies))
 
         try:
             num_kpoints = parsed_data.pop('num_kpoints')
@@ -50,11 +48,9 @@ def parse(self, **kwargs):
         for message in parsed_data['warnings']:
             self.logger.error(message)
 
-        self.out('output_parameters', orm.Dict(dict=parsed_data))
+        self.out('output_parameters', orm.Dict(dict=parsed_stdout))
         self.out('output_phonon_bands', output_bands)
 
-        return
-
 
 def parse_raw_matdyn_phonon_file(phonon_frequencies):
     """Parses the phonon frequencies file.

diff --git a/aiida_quantumespresso/parsers/neb.py b/aiida_quantumespresso/parsers/neb.py
@@ -2,6 +2,10 @@
 from aiida.common import NotExistent
 from aiida.orm import Dict
 
+from aiida.orm import TrajectoryData, ArrayData
+import os
+import numpy
+
 from aiida_quantumespresso.parsers import QEOutputParsingError
 from aiida_quantumespresso.parsers.parse_raw import convert_qe2aiida_structure
 from aiida_quantumespresso.parsers.parse_raw.pw import reduce_symmetries
@@ -24,20 +28,13 @@ def parse(self, **kwargs):
         permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files`
         which should contain the temporary retrieved files.
         """
-        from aiida.orm import TrajectoryData, ArrayData
-        import os
-        import numpy
-
-        PREFIX = self.node.process_class._PREFIX
+        stdout, parsed_stdout, logs_stdout = self.parse_stdout_base()
 
-        retrieved = self.retrieved
-        list_of_files = retrieved.list_object_names()  # Note: this includes folders, but not the files they contain.
+        for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
+            if exit_code in logs_stdout.error:
+                return self.exit(self.exit_codes.get(exit_code))
 
-        # The stdout is required for parsing
-        filename_stdout = self.node.get_attribute('output_filename')
-
-        if filename_stdout not in list_of_files:
-            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
+        PREFIX = self.node.process_class._PREFIX
 
         # Look for optional settings input node and potential 'parser_options' dictionary within it
         # Note that we look for both NEB and PW parser options under "inputs.settings.parser_options";
@@ -57,7 +54,6 @@ def parse(self, **kwargs):
 
         # First parse the Neb output
         try:
-            stdout = retrieved.get_object_content(filename_stdout)
             neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb(stdout, neb_input_dict)
             # TODO: why do we ignore raw_successful ?
         except (OSError, QEOutputParsingError):
@@ -94,7 +90,7 @@ def parse(self, **kwargs):
                 if xml_filename in retrieved_files:
                     xml_file_path = os.path.join(relative_output_folder, xml_filename)
                     try:
-                        with retrieved.open(xml_file_path) as xml_file:
+                        with self.retrieved.open(xml_file_path) as xml_file:
                             parsed_data_xml, logs_xml = parse_pw_xml(xml_file, None)
                     except IOError:
                         return self.exit(self.exit_codes.ERROR_OUTPUT_XML_READ)
@@ -115,7 +111,7 @@ def parse(self, **kwargs):
             # look for pw output and parse it
             pw_out_file = os.path.join(f'{PREFIX}_{i + 1}', 'PW.out')
             try:
-                with retrieved.open(pw_out_file, 'r') as f:
+                with self.retrieved.open(pw_out_file, 'r') as f:
                     pw_out_text = f.read()  # Note: read() and not readlines()
             except IOError:
                 return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
@@ -187,15 +183,15 @@ def parse(self, **kwargs):
         # Load the original and interpolated energy profile along the minimum-energy path (mep)
         try:
             filename = PREFIX + '.dat'
-            with retrieved.open(filename, 'r') as handle:
+            with self.retrieved.open(filename, 'r') as handle:
                 mep = numpy.loadtxt(handle)
         except Exception:
             self.logger.warning(f'could not open expected output file `{filename}`.')
             mep = numpy.array([[]])
 
         try:
             filename = PREFIX + '.int'
-            with retrieved.open(filename, 'r') as handle:
+            with self.retrieved.open(filename, 'r') as handle:
                 interp_mep = numpy.loadtxt(handle)
         except Exception:
             self.logger.warning(f'could not open expected output file `{filename}`.')