Skip to content

Commit

Permalink
first draft of refactoring base stdout parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
mbercx committed Oct 12, 2021
1 parent 0874d95 commit b08b206
Show file tree
Hide file tree
Showing 22 changed files with 423 additions and 359 deletions.
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,6 @@ def define(cls, spec):
spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_dos', valid_type=orm.XyData)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_DOS_FILE',
message='The dos file could not be read from the retrieved folder.')
# yapf: enable
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/matdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ def define(cls, spec):
spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_phonon_bands', valid_type=orm.BandsData)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FREQUENCIES',
message='The output frequencies file could not be read from the retrieved folder.')
spec.exit_code(410, 'ERROR_OUTPUT_KPOINTS_MISSING',
Expand Down
6 changes: 6 additions & 0 deletions aiida_quantumespresso/calculations/namelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ def define(cls, spec):
help='Use an additional node for special settings')
spec.input('parent_folder', valid_type=(RemoteData, FolderData, SinglefileData), required=False,
help='Use a local or remote folder as parent folder (for restarts and similar)')
spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
# yapf: enable

def _get_following_text(self):
Expand Down
6 changes: 0 additions & 6 deletions aiida_quantumespresso/calculations/projwfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@ def define(cls, spec):
spec.default_output_node = 'output_parameters'
spec.exit_code(301, 'ERROR_NO_RETRIEVED_TEMPORARY_FOLDER',
message='The retrieved temporary folder could not be accessed.')
spec.exit_code(303, 'ERROR_OUTPUT_XML_MISSING',
message='The retrieved folder did not contain the required XML file.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(320, 'ERROR_OUTPUT_XML_READ',
message='The XML output file could not be read.')
spec.exit_code(321, 'ERROR_OUTPUT_XML_PARSE',
Expand Down
6 changes: 0 additions & 6 deletions aiida_quantumespresso/calculations/pw2gw.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,10 @@ def define(cls, spec):
spec.output('eps', valid_type=orm.ArrayData,
help='The `eps` output node containing 5 arrays `energy`, `epsX`, `epsY`, `epsZ`, `epsTOT`')

spec.exit_code(302, 'ERROR_OUTPUT_STDOUT_MISSING',
message='The retrieved folder did not contain the required stdout output file.')
spec.exit_code(305, 'ERROR_OUTPUT_FILES',
message='The eps*.dat output files could not be read or parsed.')
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(311, 'ERROR_OUTPUT_STDOUT_PARSE',
message='The stdout output file could not be parsed.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_OUTPUT_FILES_INVALID_FORMAT',
message='The eps*.dat output files do not have the expected shape (N, 2).')
spec.exit_code(331, 'ERROR_OUTPUT_FILES_ENERGY_MISMATCH',
Expand Down
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/pw2wannier90.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def define(cls, spec):
help='The output folder of a pw.x calculation')
spec.output('output_parameters', valid_type=Dict)
spec.default_output_node = 'output_parameters'
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(340, 'ERROR_GENERIC_QE_ERROR',
message='Encountered a generic error message')
spec.exit_code(350, 'ERROR_UNEXPECTED_PARSER_EXCEPTION',
Expand Down
4 changes: 0 additions & 4 deletions aiida_quantumespresso/calculations/q2r.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ def define(cls, spec):
super().define(spec)
spec.input('parent_folder', valid_type=(orm.RemoteData, orm.FolderData), required=True)
spec.output('force_constants', valid_type=ForceConstantsData)
spec.exit_code(310, 'ERROR_OUTPUT_STDOUT_READ',
message='The stdout output file could not be read.')
spec.exit_code(312, 'ERROR_OUTPUT_STDOUT_INCOMPLETE',
message='The stdout output file was incomplete probably because the calculation got interrupted.')
spec.exit_code(330, 'ERROR_READING_FORCE_CONSTANTS_FILE',
message='The force constants file could not be read.')
# yapf: enable
80 changes: 80 additions & 0 deletions aiida_quantumespresso/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,94 @@
All `Parser` implementations in `aiida-quantumespresso` must use this base class, not `aiida.parsers.Parser`.
"""
import re

from aiida.parsers import Parser as _BaseParser
from aiida_quantumespresso.utils.mapping import get_logging_container
from aiida_quantumespresso.parsers.parse_raw.base import convert_qe_time_to_sec

__all__ = ('Parser',)


class Parser(_BaseParser): # pylint: disable=abstract-method
"""Custom `Parser` class for `aiida-quantumespresso` parser implementations."""

error_map = {}
warning_map = {
'Warning:': None,
'DEPRECATED:': None,
}

def parse_stdout_base(self):
"""Basic parsing of the ``stdout`` content of a Quantum ESPRESSO calculation.
This function only checks for basic content like JOB DONE, errors indicated by %%%%-lines etc.
:param stdout: the stdout content as a string
:param message_map:
:returns: tuple of two dictionaries, with the parsed data and log messages, respectively
"""
logs = get_logging_container()
parsed_data = {}

filename_stdout = self.node.get_option('output_filename')

if filename_stdout not in self.retrieved.list_object_names():
logs.error.append('ERROR_OUTPUT_STDOUT_MISSING')
return parsed_data, logs

try:
with self.retrieved.open(filename_stdout, 'r') as handle:
stdout = handle.read()
except OSError:
logs.error.append('ERROR_OUTPUT_STDOUT_READ')
return parsed_data, logs

if not re.search(r'JOB DONE', stdout):
logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

code_match = re.search(r'Program\s(?P<code_name>[A-Z|\_]+)\s(?P<code_version>[v\.\d\.|A-Z]+)\sstarts', stdout)

if code_match:

code_name = code_match.groupdict()['code_name']
parsed_data['code_version'] = code_match.groupdict()['code_version']

wall_match = re.search(fr'{code_name}\s+:[\s\S]+CPU\s+(?P<wall_time>[\.\d|s|m|d|h]+)\sWALL', stdout)

if wall_match:
parsed_data['wall_time'] = wall_match.groupdict()['wall_time']

try:
parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(wall_match.groupdict()['wall_time'])
except ValueError:
logs.warnings.append('Unable to convert wall time from `stdout` to seconds.')

# Parse an error message with optional mapping of the message
if re.search(r'\%{10,}', stdout):
self.parse_stdout_errors(stdout, logs, self.error_map)

return stdout, parsed_data, logs

@staticmethod
def parse_stdout_errors(stdout, logs, error_map=None):
"""Parse a Quantum ESPRESSO ``stdout`` file for errors marked with ``%%%%%%%%`` lines.
:param lines: a list of strings gotten by splitting the standard output content on newlines
:param line_number_start: the line at which we identified some ``%%%%%%%%``
:param logs: a logging container from `aiida_quantumespresso.utils.mapping.get_logging_container`
"""
for error_message in set(re.split(r'\%{10,}', stdout)[1::2]):

error_log = None

if error_map is not None:
for error_marker, error in error_map:
if error_marker in error_message:
error_log = error

logs.error.append(error_log if error_log is not None else error_message)

def emit_logs(self, logging_dictionaries, ignore=None):
"""Emit the messages in one or multiple "log dictionaries" through the logger of the parser.
Expand Down
39 changes: 11 additions & 28 deletions aiida_quantumespresso/parsers/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from aiida.orm import Dict, XyData

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw.base import parse_output_base
from .base import Parser


Expand All @@ -16,34 +15,22 @@ def parse(self, **kwargs):
Retrieves dos output, and some basic information from the out_file, such as warnings and wall_time
"""
retrieved = self.retrieved
_, parsed_stdout, logs_stdout = self.parse_stdout_base()
self.emit_logs(logs_stdout)

# Read standard out
try:
filename_stdout = self.node.get_option('output_filename') # or get_attribute(), but this is clearer
with retrieved.open(filename_stdout, 'r') as fil:
out_file = fil.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

job_done = False
for i in range(len(out_file)):
line = out_file[-i]
if 'JOB DONE' in line:
job_done = True
break
if not job_done:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)

# check that the dos file is present, if it is, read it
for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
if exit_code in logs_stdout.error:
return self.exit(self.exit_codes.get(exit_code))

self.out('output_parameters', Dict(dict=parsed_stdout))

# Parse the DOS
try:
with retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as fil:
dos_file = fil.readlines()
with self.retrieved.open(self.node.process_class._DOS_FILENAME, 'r') as handle:
dos_file = handle.readlines()
except OSError:
return self.exit(self.exit_codes.ERROR_READING_DOS_FILE)

# end of initial checks

array_names = [[], []]
array_units = [[], []]
array_names[0] = ['dos_energy', 'dos', 'integrated_dos'] # When spin is not displayed
Expand Down Expand Up @@ -79,11 +66,7 @@ def parse(self, **kwargs):
y_units += ['states/eV']
xy_data.set_y(y_arrays, y_names, y_units)

parsed_data, logs = parse_output_base(out_file, 'DOS')
self.emit_logs(logs)

self.out('output_dos', xy_data)
self.out('output_parameters', Dict(dict=parsed_data))


def parse_raw_dos(dos_file, array_names, array_units):
Expand Down
20 changes: 8 additions & 12 deletions aiida_quantumespresso/parsers/matdyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,15 @@ class MatdynParser(Parser):

def parse(self, **kwargs):
"""Parse the retrieved files from a `MatdynCalculation`."""
retrieved = self.retrieved
filename_stdout = self.node.get_option('output_filename')
filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME
parsed_stdout, logs_stdout = self.parse_stdout_base()

if filename_stdout not in retrieved.list_object_names():
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
if exit_code in logs_stdout.error:
return self.exit(self.exit_codes.get(exit_code))

if 'JOB DONE' not in retrieved.get_object_content(filename_stdout):
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE)
filename_frequencies = MatdynCalculation._PHONON_FREQUENCIES_NAME

if filename_frequencies not in retrieved.list_object_names():
if filename_frequencies not in self.retrieved.list_object_names():
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

# Extract the kpoints from the input data and create the `KpointsData` for the `BandsData`
Expand All @@ -33,7 +31,7 @@ def parse(self, **kwargs):
kpoints_for_bands = orm.KpointsData()
kpoints_for_bands.set_kpoints(kpoints)

parsed_data = parse_raw_matdyn_phonon_file(retrieved.get_object_content(filename_frequencies))
parsed_data = parse_raw_matdyn_phonon_file(self.retrieved.get_object_content(filename_frequencies))

try:
num_kpoints = parsed_data.pop('num_kpoints')
Expand All @@ -50,11 +48,9 @@ def parse(self, **kwargs):
for message in parsed_data['warnings']:
self.logger.error(message)

self.out('output_parameters', orm.Dict(dict=parsed_data))
self.out('output_parameters', orm.Dict(dict=parsed_stdout))
self.out('output_phonon_bands', output_bands)

return


def parse_raw_matdyn_phonon_file(phonon_frequencies):
"""Parses the phonon frequencies file.
Expand Down
30 changes: 13 additions & 17 deletions aiida_quantumespresso/parsers/neb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from aiida.common import NotExistent
from aiida.orm import Dict

from aiida.orm import TrajectoryData, ArrayData
import os
import numpy

from aiida_quantumespresso.parsers import QEOutputParsingError
from aiida_quantumespresso.parsers.parse_raw import convert_qe2aiida_structure
from aiida_quantumespresso.parsers.parse_raw.pw import reduce_symmetries
Expand All @@ -24,20 +28,13 @@ def parse(self, **kwargs):
permanently in the repository. The second required node is a filepath under the key `retrieved_temporary_files`
which should contain the temporary retrieved files.
"""
from aiida.orm import TrajectoryData, ArrayData
import os
import numpy

PREFIX = self.node.process_class._PREFIX
stdout, parsed_stdout, logs_stdout = self.parse_stdout_base()

retrieved = self.retrieved
list_of_files = retrieved.list_object_names() # Note: this includes folders, but not the files they contain.
for exit_code in ['ERROR_OUTPUT_STDOUT_MISSING', 'ERROR_OUTPUT_STDOUT_READ', 'ERROR_OUTPUT_STDOUT_INCOMPLETE']:
if exit_code in logs_stdout.error:
return self.exit(self.exit_codes.get(exit_code))

# The stdout is required for parsing
filename_stdout = self.node.get_attribute('output_filename')

if filename_stdout not in list_of_files:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
PREFIX = self.node.process_class._PREFIX

# Look for optional settings input node and potential 'parser_options' dictionary within it
# Note that we look for both NEB and PW parser options under "inputs.settings.parser_options";
Expand All @@ -57,7 +54,6 @@ def parse(self, **kwargs):

# First parse the Neb output
try:
stdout = retrieved.get_object_content(filename_stdout)
neb_out_dict, iteration_data, raw_successful = parse_raw_output_neb(stdout, neb_input_dict)
# TODO: why do we ignore raw_successful ?
except (OSError, QEOutputParsingError):
Expand Down Expand Up @@ -94,7 +90,7 @@ def parse(self, **kwargs):
if xml_filename in retrieved_files:
xml_file_path = os.path.join(relative_output_folder, xml_filename)
try:
with retrieved.open(xml_file_path) as xml_file:
with self.retrieved.open(xml_file_path) as xml_file:
parsed_data_xml, logs_xml = parse_pw_xml(xml_file, None)
except IOError:
return self.exit(self.exit_codes.ERROR_OUTPUT_XML_READ)
Expand All @@ -115,7 +111,7 @@ def parse(self, **kwargs):
# look for pw output and parse it
pw_out_file = os.path.join(f'{PREFIX}_{i + 1}', 'PW.out')
try:
with retrieved.open(pw_out_file, 'r') as f:
with self.retrieved.open(pw_out_file, 'r') as f:
pw_out_text = f.read() # Note: read() and not readlines()
except IOError:
return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)
Expand Down Expand Up @@ -187,15 +183,15 @@ def parse(self, **kwargs):
# Load the original and interpolated energy profile along the minimum-energy path (mep)
try:
filename = PREFIX + '.dat'
with retrieved.open(filename, 'r') as handle:
with self.retrieved.open(filename, 'r') as handle:
mep = numpy.loadtxt(handle)
except Exception:
self.logger.warning(f'could not open expected output file `{filename}`.')
mep = numpy.array([[]])

try:
filename = PREFIX + '.int'
with retrieved.open(filename, 'r') as handle:
with self.retrieved.open(filename, 'r') as handle:
interp_mep = numpy.loadtxt(handle)
except Exception:
self.logger.warning(f'could not open expected output file `{filename}`.')
Expand Down
Loading

0 comments on commit b08b206

Please sign in to comment.