Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_retrieved_files option for the PP plugin. #1029

Merged
merged 12 commits into from
Nov 20, 2024
18 changes: 14 additions & 4 deletions src/aiida_quantumespresso/calculations/pp.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the pp.x code of Quantum ESPRESSO."""
import os
import warnings

from aiida import orm
from aiida.common import datastructures, exceptions
from aiida.common.warnings import AiidaDeprecationWarning

from aiida_quantumespresso.calculations import _lowercase_dict, _uppercase_dict
from aiida_quantumespresso.utils.convert import convert_input_to_namelist_entry
Expand Down Expand Up @@ -82,7 +84,9 @@ def define(cls, spec):
spec.input('metadata.options.output_filename', valid_type=str, default=cls._DEFAULT_OUTPUT_FILE)
spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp')
spec.input('metadata.options.withmpi', valid_type=bool, default=True)
spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False)
spec.input('metadata.options.keep_plot_file', valid_type=bool, required=False)
spec.input('metadata.options.keep_data_files', valid_type=bool, default=False)
spec.input('metadata.options.parse_data_files', valid_type=bool, default=True)

spec.output('output_parameters', valid_type=orm.Dict)
spec.output('output_data', valid_type=orm.ArrayData)
Expand Down Expand Up @@ -218,10 +222,16 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t
# distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout`
# value as a suffix.
retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)]

if self.inputs.metadata.options.keep_plot_file:
if 'keep_plot_file' in self.inputs.metadata.options:
self.inputs.metadata.options.keep_data_files = self.inputs.metadata.options.keep_plot_file
warnings.warn(
"The input parameter 'keep_plot_file' is deprecated and will be removed in version 5.0.0. "
"Please use 'keep_data_files' instead.", AiidaDeprecationWarning
)
if self.inputs.metadata.options.keep_data_files:
calcinfo.retrieve_list.extend(retrieve_tuples)
else:
# If we do not want to parse the retrieved files, temporary retrieval is meaningless
elif self.inputs.metadata.options.parse_data_files:
calcinfo.retrieve_temporary_list.extend(retrieve_tuples)

return calcinfo
58 changes: 29 additions & 29 deletions src/aiida_quantumespresso/parsers/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,35 +117,35 @@ def get_key_from_filename(filename):
matches = re.search(pattern, filename)
return matches.group(1)

for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

# Create output nodes
if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))
if self.node.base.attributes.get('parse_data_files'):
for filename in filenames:
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
# of each file is released from memory after parsing, to improve memory usage.
if filename.endswith(filename_suffix):
# Read the file to memory
try:
with file_opener(filename) as handle:
data_raw = handle.read()
except OSError:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
# Parse the file
try:
key = get_key_from_filename(filename)
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
del data_raw
except Exception as exception: # pylint: disable=broad-except
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)

# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
# should be retrieved there really is no way to check this explicitly.
if not data_parsed:
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)

if len(data_parsed) == 1:
self.out('output_data', data_parsed[0][1])
else:
self.out('output_data_multiple', dict(data_parsed))

return self.exit(logs=logs)

Expand Down
24 changes: 22 additions & 2 deletions tests/calculations/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ def test_pp_default(fixture_sandbox, generate_calc_job, generate_inputs, file_re
file_regression.check(input_written, encoding='utf-8', extension='.in')


def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
def test_pp_keep_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` where we want to retrieve the plot file."""
entry_point_name = 'quantumespresso.pp'
inputs = generate_inputs()
inputs.metadata.options.keep_plot_file = True
inputs.metadata.options.keep_data_files = True

calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
retrieve_list = ['aiida.out', 'aiida.fileout', ('aiida.filplot_*aiida.fileout', '.', 0)]
Expand All @@ -80,6 +80,26 @@ def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
assert element in calc_info.retrieve_list


def test_pp_parse_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` where we want to retrieve the plot file."""
entry_point_name = 'quantumespresso.pp'
inputs = generate_inputs()
inputs.metadata.options.parse_data_files = False

calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
retrieve_list = ['aiida.out']
retrieve_temporary_list = []
local_copy_list = []

# When both `keep_data_files` (default) and `parse_data_files` are set to False, the data files won't be pulled.
assert isinstance(calc_info, datastructures.CalcInfo)
assert sorted(calc_info.local_copy_list) == sorted(local_copy_list)
assert sorted(calc_info.retrieve_temporary_list) == sorted(retrieve_temporary_list)
assert len(calc_info.retrieve_list) == 1
for element in retrieve_list:
assert element in calc_info.retrieve_list


def test_pp_cmdline_setting(fixture_sandbox, generate_calc_job, generate_inputs):
"""Test a `PpCalculation` with user-defined cmdline settings."""
entry_point_name = 'quantumespresso.pp'
Expand Down
75 changes: 63 additions & 12 deletions tests/parsers/test_pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,11 @@ def test_pp_default_1d(
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d)
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -157,9 +161,13 @@ def test_pp_default_1d_spherical(
"""Test a default `pp.x` calculation producing a 1D data set with spherical averaging."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'keep_data_files': False, 'parse_data_files': True}
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_1d_spherical', generate_inputs_1d_spherical
entry_point_calc_job,
fixture_localhost,
'default_1d_spherical',
generate_inputs_1d_spherical,
attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down Expand Up @@ -200,8 +208,11 @@ def test_pp_default_2d(
"""Test a default `pp.x` calculation producing a 2D data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -237,8 +248,11 @@ def test_pp_default_polar(
"""Test a default `pp.x` calculation producing a polar coordinates data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -267,8 +281,11 @@ def test_pp_default_3d(
"""Test a default `pp.x` calculation producing a 3D data set."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d)
node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -297,12 +314,16 @@ def test_pp_default_3d(
})


def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_plot_file=False` meaning files will be parsed from temporary directory."""
def test_pp_default_3d_keep_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `keep_data_files=False` meaning files will be parsed from temporary directory."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'options': {'keep_plot_file': False}, 'retrieve_temporary_list': ['aiida.fileout']}
attributes = {
'keep_data_files': False,
'parse_data_files': True,
'retrieve_temporary_list': ['aiida.fileout'],
}
node = generate_calc_job_node(
entry_point_calc_job,
test_name='default_3d',
Expand All @@ -320,12 +341,36 @@ def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, g
assert len(results['output_data'].get_arraynames()) == 4


def test_pp_default_3d_parse_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
"""Test a `pp.x` calculation where `parse_data_files=False`, so data files won't be parsed."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'

attributes = {'keep_data_files': False, 'parse_data_files': False}
node = generate_calc_job_node(
entry_point_calc_job,
test_name='default_3d',
inputs=generate_inputs_3d,
attributes=attributes,
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False, retrieved_temporary_folder=tmpdir)

assert calcfunction.is_finished, calcfunction.exception
assert calcfunction.is_finished_ok, calcfunction.exit_message
assert 'output_parameters' in results
assert 'output_data' not in results


def test_pp_default_3d_multiple(generate_calc_job_node, generate_parser, generate_inputs_3d):
"""Test a default `pp.x` calculation producing multiple files in 3D format."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d)
node = generate_calc_job_node(
entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

Expand Down Expand Up @@ -364,9 +409,14 @@ def test_pp_default_3d_failed_missing_data(
"""Test a default `pp.x` calculation where the aiida.fileout file is missing."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d_failed_missing_data', generate_inputs_3d
entry_point_calc_job,
fixture_localhost,
'default_3d_failed_missing_data',
generate_inputs_3d,
attributes=attributes
)
parser = generate_parser(entry_point_parser)
_, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down Expand Up @@ -398,9 +448,10 @@ def test_pp_default_3d_failed_format(fixture_localhost, generate_calc_job_node,
"""Test a default `pp.x` calculation where an unsupported output file format is used."""
entry_point_calc_job = 'quantumespresso.pp'
entry_point_parser = 'quantumespresso.pp'
attributes = {'keep_data_files': False, 'parse_data_files': True}

node = generate_calc_job_node(
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d, attributes=attributes
)
parser = generate_parser(entry_point_parser)
_, calcfunction = parser.parse_from_node(node, store_provenance=False)
Expand Down
Loading