Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PhParser: allow for pattern initialization #1034

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/aiida_quantumespresso/parsers/parse_raw/ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,37 @@ def parse_ph_dynmat(data, logs, lattice_parameter=None, also_eigenvectors=False,
parsed_data['eigenvectors'] = eigenvectors

return parsed_data


def parse_initialization_qpoints(stdout: str) -> dict:
"""Return the number of q-points from an initialization run.

Here, the initialization run refers to the one performed by specifying
`start_irr` and `last_irr` to 0 in the inputs.
"""
import re

parameters = {}

# Regular expression to match `N` in `( N q-points)`
pattern = r'\(\s*(\d+)\s*q-points\)'
match = re.search(pattern, stdout)
if match:
parameters.update({'number_of_qpoints': int(match.group(1))})
sphuber marked this conversation as resolved.
Show resolved Hide resolved

# Regular expression pattern to match the q-points section
pattern = r'\(\s*\d+\s*q-points\):\s*\n\s*N\s*xq\(1\)\s*xq\(2\)\s*xq\(3\)\s*\n((?:\s*\d+\s*[\d\.\-\s]+\n?)*)'
match = re.search(pattern, stdout)

if match:
q_points_block = match.group(1)

# Regular expression to match each line of coordinates
coord_pattern = r'\s*\d+\s*([\d\.\-]+)\s*([\d\.\-]+)\s*([\d\.\-]+)'

coords = re.findall(coord_pattern, q_points_block) # Find all coordinates in the block
q_points = [list(map(float, coord)) for coord in coords]

parameters.update({'q_points': q_points})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense perhaps to return this as an actual KpointsData instead of a Dict?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I thought about that, maybe it would. On the other hand, I was even wondering whether it's worth it to actually parse it or not. If one has a grid, one can/should use start/last_q, if you provide a KpointsData, this would return the same node basically. So, don't know. Any strong opinion? What about maybe adding some "post-process" parsing via tools?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not quite sure I fully understand the use case of this initialization run. But if for the common use case a user would actually want to use the parsed grid as an input for the next calculation (i.e. they are going to turn it into a KpointsData anyway) then we might as well have the parser do it here.

If, instead, the kpoints won't be used as is, but in parts and so the KpointsData would have to be transformed, then you might as well just leave it as is.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This initialization run is the proper initialization for ph.x, which avoids the use of the .EXIT file, which would still make ph.x to rewrite the wavefunctions for nothing (hence, wasting time - to give an idea, an 18 atoms system it would take ~20 min, which are wasted node hours). The key ingredient here is just to determine the number of q points, and the next runs would be parallelized not with the specific q point but using start_q and last_q instead. The parsing of the q points as either dictionary in the output parameters or as kpointsdata is just out of completeness, but not really meant to be used (at least, as I am thinking to use this initialization run). I could simply remove it at this point.


return parameters
23 changes: 22 additions & 1 deletion src/aiida_quantumespresso/parsers/ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,25 @@
from aiida import orm

from aiida_quantumespresso.calculations.ph import PhCalculation
from aiida_quantumespresso.parsers.parse_raw.ph import parse_raw_ph_output
from aiida_quantumespresso.parsers.parse_raw.ph import parse_initialization_qpoints, parse_raw_ph_output
from aiida_quantumespresso.utils.mapping import get_logging_container

from .base import BaseParser


def _is_initialization(parameters: dict) -> bool:
"""Return whether the `ph.x` was run with (patterns) initialization options.

When `ph.x` is used with `start_irr` and `last_irr` set to 0, the binary doesn't
produce the usual `JOB DONE` statement, and immediately exits the job. This is
used to quickly generate the displacement patterns needed for a correct parallelization
of the code over both q-points and irreducible representations (irreps).
"""
if 'start_irr' in parameters['INPUTPH'] and 'last_irr' in parameters['INPUTPH']:
return parameters['INPUTPH']['start_irr'] == parameters['INPUTPH']['last_irr'] == 0
return False


class PhParser(BaseParser):
"""``Parser`` implementation for the ``PhCalculation`` calculation job class."""

Expand All @@ -28,6 +41,14 @@ def parse(self, **kwargs):

stdout, parsed_data, logs = self.parse_stdout_from_retrieved(logs)

# When `start_irr` and `last_irr` are set to 0, `JOB DONE` is not in stdout (expected behaviour).
# Though, we at least expect that `stdout` is not empty, otherwise something went wrong.
if stdout and _is_initialization(self.node.inputs.parameters.get_dict()):
parameters = parse_initialization_qpoints(stdout)
if parameters:
self.out('output_parameters', orm.Dict(parameters))
return
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case that parameters is empty, wouldn't that reasonably correspond to some kind of error? Or are you intentionally letting it continue the parsing in that case to find a generic error?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah in principle ph.x can still throw some errors, say if something was wrong with some files etc.


# If the scheduler detected OOW, simply keep that exit code by not returning anything more specific.
if self.node.exit_status == PhCalculation.exit_codes.ERROR_SCHEDULER_OUT_OF_WALLTIME:
return
Expand Down
67 changes: 67 additions & 0 deletions tests/parsers/fixtures/ph/initialization/aiida.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

Program PHONON v.7.2 starts on 18Jun2024 at 18:33:27

This program is part of the open-source Quantum ESPRESSO suite
for quantum simulation of materials; please cite
"P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
"P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
"P. Giannozzi et al., J. Chem. Phys. 152 154105 (2020);
URL http://www.quantum-espresso.org",
in publications or presentations arising from this work. More details at
http://www.quantum-espresso.org/quote

Parallel version (MPI), running on 8 processors

MPI processes distributed on 1 nodes
37704 MiB available memory on the printing compute node when the environment starts

Reading input from ph.init.in

Reading xml data from directory:

./tmp/graphene.save/

R & G space division: proc/nbgrp/npool/nimage = 8
Subspace diagonalization in iterative solution of the eigenvalue problem:
a serial algorithm will be used


IMPORTANT: XC functional enforced from input :
Exchange-correlation= PZ
( 1 1 0 0 0 0 0)
Any further DFT definition will be discarded
Please, verify this is what you really want


Parallelization info
--------------------
sticks: dense smooth PW G-vecs: dense smooth PW
Min 30 30 10 3040 3040 660
Max 31 31 11 3065 3065 691
Sum 241 241 85 24369 24369 5409

Using Slab Decomposition

----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
The code is running with the 2D cutoff
Please refer to:
Sohier, T., Calandra, M., & Mauri, F. (2017),
Density functional perturbation theory for gated two-dimensional heterostructu
res:
Theoretical developments and application to flexural phonons in graphene.
Physical Review B, 96(7), 75448. https://doi.org/10.1103/PhysRevB.96.075448
----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D----2D
Reading collected, re-writing distributed wavefunctions in ./tmp/


Dynamical matrices for ( 2, 2, 2) uniform grid of q-points
( 8 q-points):
N xq(1) xq(2) xq(3)
1 0.000000000 0.000000000 0.000000000
2 0.000000000 0.000000000 -0.061660223
3 0.000000000 -0.577350269 0.000000000
4 0.000000000 -0.577350269 -0.061660223
5 -0.500000000 -0.288675135 0.000000000
6 -0.500000000 -0.288675135 -0.061660223
7 -0.500000000 -0.866025404 0.000000000
8 -0.500000000 -0.866025404 -0.061660223
20 changes: 19 additions & 1 deletion tests/parsers/test_ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def generate_inputs():
"""Return only those inputs that the parser will expect to be there."""
return {}
return {'parameters': orm.Dict({'INPUTPH': {}})}


@pytest.mark.parametrize('test_name', ['default', 'single_qpoint', 'no_modes_printed'])
Expand Down Expand Up @@ -61,6 +61,24 @@ def test_ph_out_of_walltime(fixture_localhost, generate_calc_job_node, generate_
data_regression.check(results['output_parameters'].get_dict())


def test_ph_initialization(fixture_localhost, generate_calc_job_node, generate_parser, data_regression):
"""Test a `ph.x` calculation performed with `start_irr` and `last_irr` set to 0."""
name = 'initialization'
entry_point_calc_job = 'quantumespresso.ph'
entry_point_parser = 'quantumespresso.ph'

inputs = {'parameters': orm.Dict({'INPUTPH': {'start_irr': 0, 'last_irr': 0}})}

node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, name, inputs)
parser = generate_parser(entry_point_parser)
results, calcfunction = parser.parse_from_node(node, store_provenance=False)

assert calcfunction.is_finished, calcfunction.exception
assert calcfunction.is_finished_ok, calcfunction.exit_message
assert 'output_parameters' in results
data_regression.check(results['output_parameters'].get_dict())


def test_ph_failed_computing_cholesky(fixture_localhost, generate_calc_job_node, generate_parser):
"""Test the parsing of a calculation that failed during cholesky factorization.

Expand Down
26 changes: 26 additions & 0 deletions tests/parsers/test_ph/test_ph_initialization.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
number_of_qpoints: 8
q_points:
- - 0.0
- 0.0
- 0.0
- - 0.0
- 0.0
- -0.061660223
- - 0.0
- -0.577350269
- 0.0
- - 0.0
- -0.577350269
- -0.061660223
- - -0.5
- -0.288675135
- 0.0
- - -0.5
- -0.288675135
- -0.061660223
- - -0.5
- -0.866025404
- 0.0
- - -0.5
- -0.866025404
- -0.061660223
10 changes: 8 additions & 2 deletions tests/workflows/ph/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
from aiida_quantumespresso.workflows.ph.base import PhBaseWorkChain


def generate_inputs():
"""Return only those inputs that the parser will expect to be there."""
return {'parameters': orm.Dict({'INPUTPH': {}})}


@pytest.fixture
def generate_ph_calc_job_node(generate_calc_job_node, fixture_localhost):
"""Generate a ``CalcJobNode`` that would have been created by a ``PhCalculation``."""
Expand Down Expand Up @@ -221,9 +226,10 @@ def test_merge_outputs(

entry_point_calc_job = 'quantumespresso.ph'
parser = generate_parser('quantumespresso.ph')
inputs = generate_inputs()

node_1 = generate_calc_job_node(
entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_1'
entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_1', inputs=inputs
)
results_1, calcjob_1 = parser.parse_from_node(node_1, store_provenance=False)

Expand All @@ -236,7 +242,7 @@ def test_merge_outputs(
assert calcjob_1.exit_status == PhCalculation.exit_codes.ERROR_OUT_OF_WALLTIME.status

node_2 = generate_calc_job_node(
entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_2'
entry_point_name=entry_point_calc_job, computer=fixture_localhost, test_name=f'{name}_2', inputs=inputs
)
results_2, calcjob_2 = parser.parse_from_node(node_2, store_provenance=False)

Expand Down
Loading