diff --git a/docs/src/getting-started.rst b/docs/src/getting-started.rst
index 8d352a782..113c7516a 100644
--- a/docs/src/getting-started.rst
+++ b/docs/src/getting-started.rst
@@ -16,9 +16,8 @@ as the system has installed:
- Python version 3.6 or greater (starting from version 2.0, i-PI is not Python2
compatible)
-
- The Python numerical library NumPy
-src/getting-started.rst-
+
See :ref:`runningsimulations` for more details on how to launch i-PI.
Additionally, most client codes will have their own requirements. Many
diff --git a/docs/src/output-files.rst b/docs/src/output-files.rst
index eda6e1f42..e075f21ec 100644
--- a/docs/src/output-files.rst
+++ b/docs/src/output-files.rst
@@ -218,3 +218,36 @@ run
> python i-pi RESTART
+
+Reading output files
+--------------------
+
+It can be useful to parse the output files of i-PI into a format that can
+be readily manipulated in a custom Python script. To this end, i-PI provides
+a few utilities. `ipi.read_output` that can be used to parse a property output
+file, that returns data blocks as a dictionary of numpy array, and additional
+information from the header (such as units, and the description of each
+property) as a separate dictionary
+
+.. code-block::
+
+ from ipi import read_output
+ data, info = read_output("simulation.out")
+
+Trajectory files can be read with `ipi.read_trajectory`. This reads the
+trajectory output into a list of `ase.Atoms` objects (hence this functionality
+has a dependency on `ase`), converting positions and cell to angstrom, and
+moving other properties to arrays and converting
+them to ASE units (e.g. forces are in eV/Å). `extras` output files (that
+contain either numerical data, or raw strings, returned by the driver code
+in addition to energy and forces) can be processed by using `format='extras'`
+and an option: the call will then return a dictionary with an entry having the
+name of the type of extra (if present) and either a list of the raw strings,
+or a numpy array with the data. A second dictionary entry contains the list
+of step numbers.
+
+.. code-block::
+
+ from ipi import read_trajectory
+ data = read_trajectory("simulation.dipoles", format="extras")
+
diff --git a/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml b/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml
index 8065346ea..1db387739 100644
--- a/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml
+++ b/examples/features/path_integral_molecular_dynamics/piglet_liquid_water/input.xml
@@ -2,6 +2,7 @@
100
diff --git a/ipi/__init__.py b/ipi/__init__.py
index c37bad218..54fecc771 100644
--- a/ipi/__init__.py
+++ b/ipi/__init__.py
@@ -2,6 +2,18 @@
The i-PI package.
"""
-__all__ = ["clients", "engine", "inputs", "interfaces", "utils", "ipi_global_settings"]
+# expose some utility functions in a more direct way
+from ipi.utils.parsing import read_output, read_trajectory
+
+__all__ = [
+ "clients",
+ "engine",
+ "inputs",
+ "interfaces",
+ "utils",
+ "ipi_global_settings",
+ "read_output",
+ "read_trajectory",
+]
ipi_global_settings = {"floatformat": "%16.8e"}
diff --git a/ipi/utils/__init__.py b/ipi/utils/__init__.py
index acbbb8f32..e084081d6 100644
--- a/ipi/utils/__init__.py
+++ b/ipi/utils/__init__.py
@@ -11,6 +11,7 @@
"mathtools",
"prng",
"inputvalue",
+ "parsing",
"nmtransform",
"messages",
"softexit",
diff --git a/ipi/utils/io/io_units.py b/ipi/utils/io/io_units.py
index fcd649d52..b01685575 100755
--- a/ipi/utils/io/io_units.py
+++ b/ipi/utils/io/io_units.py
@@ -125,7 +125,4 @@ def process_units(
atoms.names[:] = names
atoms.m[:] = masses
- return {
- "atoms": atoms,
- "cell": cell,
- }
+ return {"atoms": atoms, "cell": cell, "comment": comment}
diff --git a/ipi/utils/messages.py b/ipi/utils/messages.py
index 5d3cadf2f..35d122d70 100644
--- a/ipi/utils/messages.py
+++ b/ipi/utils/messages.py
@@ -107,7 +107,7 @@ def banner():
| ################################# |
\__#_/ \____/ \____/ \_#__/
# _ _______ _____ #
- # (_) |_ __ \|_ _| # -*- v 2.6.0 -*-
+ # (_) |_ __ \|_ _| # -*- v 2.6.3 -*-
# __ ______ | |__) | | | #
Y [ ||______|| ___/ | | # A Universal Force Engine
0 0 | | _| |_ _| |_ #
diff --git a/ipi/utils/parsing.py b/ipi/utils/parsing.py
new file mode 100644
index 000000000..a1612ea5b
--- /dev/null
+++ b/ipi/utils/parsing.py
@@ -0,0 +1,222 @@
+""" Utility functions to parse i-PI output files.
+
+These are meant to be used in Python post-processing pipelines, so
+trajectory files are read as ASE objects (assuming units to be
+Angstrom and eV), and output files are read in a dictionary of
+numpy arrays.
+"""
+
+import re
+import numpy as np
+from ipi.utils.units import unit_to_user
+
+try:
+ import ase
+except ImportError:
+ ase = None
+
+__all__ = ["read_output", "read_trajectory"]
+
+
+def read_output(filename):
+ """Reads an i-PI output file and returns a dictionary with the properties in a tidy order,
+ and information on units and descriptions of the content.
+
+ Usage:
+ read_output("filename")
+
+ Returns:
+ values, info
+
+ values: a dictionary with the property names as keys, and the values as numpy arrays
+ info: a dictionary with the property names as keys and as values tuples of (units, description)
+ """
+
+ # Regex pattern to match header lines and capture relevant parts
+ header_pattern = re.compile(
+ r"#\s*(column|cols\.)\s+(\d+)(?:-(\d+))?\s*-->\s*([^\s\{]+)(?:\{([^\}]+)\})?\s*:\s*(.*)"
+ )
+
+ # Reading the file
+ with open(filename, "r") as file:
+ lines = file.readlines()
+
+ header_lines = [line for line in lines if line.startswith("#")]
+ data_lines = [line for line in lines if not line.startswith("#") and line.strip()]
+
+ # Interprets properties
+ properties = {}
+ for line in header_lines:
+ match = header_pattern.match(line)
+ if match:
+ # Extracting matched groups
+ col_type, start_col, end_col, property_name, units, description = (
+ match.groups()
+ )
+ col_info = f"{start_col}-{end_col}" if end_col else start_col
+ properties[col_info] = {
+ "name": property_name,
+ "units": units,
+ "description": description,
+ }
+
+ # Parse data
+ values_dict = {}
+ info_dict = {}
+ for col_info, prop_info in properties.items():
+ # Initialize list to hold values for each property
+ values_dict[prop_info["name"]] = []
+ # Save units and description
+ info_dict[prop_info["name"]] = (prop_info["units"], prop_info["description"])
+
+ for line in data_lines:
+ values = line.split()
+ for column_info, prop_info in properties.items():
+ if "-" in column_info: # Multi-column property
+ start_col, end_col = map(
+ int, column_info.split("-")
+ ) # 1-based indexing
+ prop_values = values[
+ start_col - 1 : end_col
+ ] # Adjust to 0-based indexing
+ else: # Single column property
+ col_index = int(column_info) - 1 # Adjust to 0-based indexing
+ prop_values = [values[col_index]]
+
+ values_dict[prop_info["name"]].append([float(val) for val in prop_values])
+
+ for prop_name, prop_values in values_dict.items():
+ values_dict[prop_name] = np.array(
+ prop_values
+ ).squeeze() # make 1-col into a flat array
+
+ return values_dict, info_dict
+
+
+def read_trajectory(
+ filename,
+ format=None,
+ dimension="automatic",
+ units="automatic",
+ cell_units="automatic",
+):
+ """Reads a file in i-PI format and returns it in ASE format.
+
+ `format` can be `xyz` (i-PI formatted), `pdb`, `binary`, `json`, `ase`, and if not specified it'll
+ be inferred from the filename extension. `extras` will read a trajectory containing the additional
+ data returned from a calculator; will try to read as a float array, and if it fails it'll resort
+ to returning a list of raw strings.
+ units can be inferred from the file content, or specified with `dimension`, `units` and `cell_units`
+ """
+
+ if ase is None:
+ raise ImportError(
+ "read_trajectory requires the `ase` package to return the structure in ASE format"
+ )
+
+ from ipi.utils.io import read_file
+
+ if format is None:
+ # tries to infer the file format
+ format = filename[filename.rfind(".") + 1 :]
+ if format not in ["xyz", "pdb", "binary", "json", "ase"]:
+ raise ValueError(f"Unrecognized file format: {format}")
+
+ file_handle = open(filename, "r")
+ bohr2angstrom = unit_to_user("length", "angstrom", 1.0)
+ comment_regex = re.compile(r"(\w+)\{([^}]+)\}")
+ step_regex = re.compile(r"Step:\s+(\d+)")
+
+ frames = []
+ while True:
+ try:
+ if format == "extras":
+ file = open(filename, "r")
+ step_regex = re.compile(r"#EXTRAS\((\w+)\)# *Step:\s+(\d+)")
+ step_list = []
+ data_list = []
+ data_frame = []
+ is_array = True
+ property_name = ""
+ for line in file:
+ matches = step_regex.findall(line)
+ if len(matches) > 0:
+ if len(data_frame) > 0:
+ try:
+ data_processed = np.loadtxt(data_frame)
+ except:
+ is_array = False
+ data_processed = "\n".join(data_frame)
+ data_list.append(data_processed)
+ data_frame = []
+
+ # Found a new step, update current_step and initialize the list for data
+ step_list.append(int(matches[0][1]))
+ if property_name == "":
+ property_name = matches[0][0]
+ elif property_name != matches[0][0]:
+ raise ValueError(
+ f"Inconsistent property {matches[0][0]} found in extras containing {property_name}"
+ )
+ else:
+ data_frame.append(line)
+
+ if len(data_frame) > 0:
+ try:
+ data_processed = np.loadtxt(data_frame)
+ except:
+ is_array = False
+ data_processed = "\n".join(data_frame)
+ data_list.append(data_processed)
+ if is_array:
+ data_list = np.array(data_list).squeeze()
+ return {
+ "step": np.asarray(step_list, dtype=int),
+ property_name: data_list,
+ }
+ else:
+ ret = read_file(
+ format,
+ file_handle,
+ dimension=dimension,
+ units=units,
+ cell_units=cell_units,
+ )
+
+ frame = ase.Atoms(
+ ret["atoms"].names,
+ positions=ret["atoms"].q.reshape((-1, 3)) * bohr2angstrom,
+ cell=ret["cell"].h.T * bohr2angstrom,
+ pbc=True,
+ )
+
+ # parse comment to get the property
+ matches = comment_regex.findall(ret["comment"])
+
+ # get what we have found
+ if len(matches) >= 2:
+ what = matches[-2][0]
+ else: # defaults to reading positions
+ what = "positions"
+
+ # ... and the step
+ matches = step_regex.findall(ret["comment"])
+ if len(matches) >= 1:
+ frame.info["step"] = int(matches[-1][0])
+
+ # if we have forces, set positions to zero (that data is missing!) and set forces instead
+ if what == "forces":
+ # set forces and convert to eV/angstrom
+ frame.positions *= 0
+ frame.arrays["forces"] = ret["atoms"].q.reshape(
+ (-1, 3)
+ ) * unit_to_user("force", "ev/ang", 1.0)
+
+ frames.append(frame)
+
+ except EOFError:
+ break
+ except:
+ raise
+
+ return frames
diff --git a/setup.cfg b/setup.cfg
index a2c9dcb6a..ff9f9d8b3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = ipi
-version = 2.6.2
+version = 2.6.3
description = A Python interface for ab initio path integral molecular dynamics simulations
long_description = file: README.md
long_description_content_type = text/x-rst
diff --git a/setup.py b/setup.py
index 69e020cf3..bf8364962 100644
--- a/setup.py
+++ b/setup.py
@@ -2,19 +2,17 @@
from setuptools import setup, find_packages
+# exclude i-pi-driver that is a compiled fortran file when installing from a local dir
+scripts = [str(p) for p in Path("bin").iterdir() if p.name != "i-pi-driver"]
setup(
packages=[
*find_packages(exclude=["ipi_tests*"]),
"ipi._driver",
"ipi._driver.pes",
- "ipi.tests",
- "ipi.tests.regression_tests",
],
package_dir={
"ipi._driver": "drivers/py",
- "ipi.tests": "ipi_tests",
},
- package_data={"ipi.tests.regression_tests": ["tests/NVE/NVE_1/harmonic_python/*"]},
- scripts=[str(p) for p in Path("bin").iterdir()],
+ scripts=scripts,
)