Skip to content

Commit

Permalink
Merge pull request #27 from matthewrmshin/feature/concat-in-filenames
Browse files Browse the repository at this point in the history
Concat multiple input filenames before processing
  • Loading branch information
matthewrmshin authored Sep 18, 2024
2 parents adb0ab4 + e245b59 commit 18e82ef
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 29 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Python usage:
from yamlprocessor.dataprocess import DataProcessor
processor = DataProcessor()
# ... Customise the `DataProcessor` instance as necessary ..., then:
processor.process_data(in_file_name, out_file_name)
processor.process_data([in_file_name], out_file_name)
```

## Documentation
Expand Down
5 changes: 3 additions & 2 deletions docs/basic-usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ Command line

.. code-block:: bash
yp-data [options] input-file-name output-file-name
yp-data [options] input-file-name ... output-file-name
yp-data [options] -o output-file-name input-file-name ...
Type ``yp-data --help`` for a list of options. See :doc:`cli` for detail.
Expand All @@ -19,6 +20,6 @@ Python
from yamlprocessor.dataprocess import DataProcessor
processor = DataProcessor()
# ... Customise the `DataProcessor` instance as necessary ..., then:
processor.process_data(in_file_name, out_file_name)
processor.process_data([in_file_name], out_file_name)
See :doc:`api` for detail.
11 changes: 6 additions & 5 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,20 @@ Usage:

.. code-block:: bash
yp-data [options] input-file-name output-file-name
yp-data [options] input-file-name ... output-file-name
yp-data [options] -o output-file-name input-file-name ...
See :doc:`data-process` for detail.

.. program:: yp-data

.. option:: input-file-name
.. option:: file-names

Name of an input file. Use ``-`` to read from STDIN.
Names of input or input+output files. Use ``-`` for STDIN/STDOUT.

.. option:: output-file-name
.. option:: --out-filename=FILENAME, -o FILENAME

Name of an output file. Use ``-`` to write to STDOUT.
Name of output file. Use ``-`` for STDOUT.

.. option:: --include=DIR, -I DIR

Expand Down
42 changes: 41 additions & 1 deletion docs/data-process.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Python logic with the above files:
processor.include_dict.update({
'earth.yaml': {'location': 'earth', 'targets': ['dinosaur']},
})
processor.process_data('hello.yaml')
processor.process_data(['hello.yaml'])
We'll get:

Expand Down Expand Up @@ -253,6 +253,46 @@ and the output will look like:
- martian
Multiple Input Files Concatenation
----------------------------------

You can specify multiple input files in both command line and Python usages.
The input files will be concatenated together (as text) before before parsed as
a whole YAML document. For example, suppose we have ``part1.yaml`` with:

.. code-block:: yaml
hello:
And ``part2.yaml`` with:

- earth
- mars

And ``part3.yaml`` with:

- jupiter
- saturn

Running :program:`yp-data -o- part1.yaml part2.yaml part3.yaml` will give:

.. code-block:: yaml
hello:
- earth
- mars
- jupiter
- saturn
You can achieve the same results by running:

.. code-block:: python
from yamlprocessor.dataprocess import DataProcessor
# ...
processor = DataProcessor()
processor.process_data(['part1.yaml', 'part2.yaml', 'part3.yaml'])
String Value Variable Substitution
----------------------------------

Expand Down
81 changes: 61 additions & 20 deletions src/yamlprocessor/dataprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@
Validate against specified JSON schema if root file starts with either
``#!<SCHEMA-URI>`` or ``# yaml-language-server: $schema=<SCHEMA-URI>`` line.
CLI usage allows multiple positional arguments.
In usage 1, the final positional argument is the output file name,
and the other arguments are input file names.
In usage 2, with ``--output=FILENAME`` (``-o FILENAME``) option,
all positional arguments are input file names.
In either case, all input files will be concatenated together (as text),
before being parsed as a combined YAML file.
"""

from argparse import ArgumentParser, RawDescriptionHelpFormatter
Expand All @@ -25,6 +36,8 @@
from pathlib import Path
import re
import sys
from tempfile import SpooledTemporaryFile
from typing import IO, Iterable, Union

from dateutil.parser import parse as datetimeparse
from dateutil.relativedelta import relativedelta
Expand Down Expand Up @@ -307,16 +320,36 @@ def __init__(self):
self.variable_map = os.environ.copy()
self.unbound_placeholder = None

def process_data(self, in_filename: str, out_filename: str) -> None:
"""Process includes in input file and dump results in output file.
def process_data(
self,
in_filenames: Union[str, Iterable[str]],
out_filename: str,
) -> None:
"""Concatenate input files and load resulting data.
:param in_filename: input file name.
Dump results in output file.
:param in_filenames: input file name str or input file names list.
:param out_filename: output file name.
"""
in_filename = self.get_filename(in_filename, [])
root = self.load_file(in_filename)
schema_location = self.load_file_schema(in_filename)
stack = [[root, [in_filename], self.variable_map]]
if isinstance(in_filenames, str):
filename = self.get_filename(in_filenames, [])
root = self.load_file(filename)
schema_location = self.load_file_schema(filename)
root_filenames = [filename]
else:
root_filenames = []
with SpooledTemporaryFile(mode='w+') as concat_file:
for filename in in_filenames:
filename = self.get_filename(filename, [])
with open(filename) as file_:
concat_file.write(file_.read())
root_filenames.append(filename)
concat_file.seek(0)
root = self.load_file(concat_file)
concat_file.seek(0)
schema_location = self.load_file_schema(concat_file)
stack = [[root, root_filenames, self.variable_map]]
while stack:
data, parent_filenames, variable_map = stack.pop()
data = self.process_variable(data, variable_map)
Expand Down Expand Up @@ -477,10 +510,10 @@ def _is_include(cls, value: object) -> bool:
return False

@staticmethod
def load_file(filename: str) -> object:
def load_file(filename: Union[str, IO]) -> object:
"""Load content of (YAML) file into a data structure.
:param filename: name of file to load content.
:param filename: file (name) to load content.
:return: the loaded data structure.
"""
yaml = YAML(typ='safe', pure=True)
Expand All @@ -489,19 +522,23 @@ def load_file(filename: str) -> object:
construct_yaml_timestamp)
if filename == '-':
return yaml.load(sys.stdin)
elif hasattr(filename, 'readline'):
return yaml.load(filename)
else:
with open(filename) as file_:
return yaml.load(file_)

@staticmethod
def load_file_schema(filename: str) -> object:
def load_file_schema(filename: Union[str, IO]) -> object:
"""Load schema location from the schema association line of file.
:param filename: name of file to load schema location.
:return: a string containing the location of the schema or None.
"""
if filename == '-':
line = sys.stdin.readline()
elif hasattr(filename, 'readline'):
line = filename.readline()
else:
with open(filename) as file_:
line = file_.readline()
Expand Down Expand Up @@ -750,17 +787,17 @@ def main(argv=None):
description=__doc__,
formatter_class=RawDescriptionHelpFormatter,
)
# in_filenames or in_filename + out_filename
parser.add_argument(
'in_filename',
metavar='IN-FILE',
default='-',
nargs='?',
help='Name of input file, "-" for STDIN')
'filenames',
metavar='FILENAME',
nargs='*',
help='Names of input or input+output files, "-" for STDIN/STDOUT')
# out_filename
parser.add_argument(
'out_filename',
metavar='OUT-FILE',
default='-',
nargs='?',
'--out-filename', '-o',
metavar='FILENAME',
action="store",
help='Name of output file, "-" for STDOUT')
parser.add_argument(
'--include', '-I',
Expand Down Expand Up @@ -888,7 +925,11 @@ def main(argv=None):
if args.schema_prefix is not None:
processor.schema_prefix = args.schema_prefix

processor.process_data(args.in_filename, args.out_filename)
if args.out_filename is None and len(args.filenames) >= 2:
args.out_filename = args.filenames.pop()
elif args.out_filename is None:
args.out_filename = '-'
processor.process_data(args.filenames, args.out_filename)


if __name__ == '__main__':
Expand Down
41 changes: 41 additions & 0 deletions src/yamlprocessor/tests/test_dataprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,47 @@ def test_main_validate_1(tmp_path, capsys, yaml):
assert f'[INFO] ok {outfilename}' in captured.err.splitlines()


def test_main_concat_input_files(tmp_path, yaml):
"""Test main, concatentation of multiple input files before parsing."""
yaml_part_1 = 'hello:\n'
yaml_part_2 = '- earth\n'
yaml_part_3 = '- mars\n'
infilename1 = tmp_path / 'in_1.yaml'
with infilename1.open('w') as infile:
infile.write(yaml_part_1)
infilename2 = tmp_path / 'in_2.yaml'
with infilename2.open('w') as infile:
infile.write(yaml_part_2)
infilename3 = tmp_path / 'in_3.yaml'
with infilename3.open('w') as infile:
infile.write(yaml_part_3)
outfilename = tmp_path / 'out.yaml'
# Arguments are input file names + output file name
main([
str(infilename1),
str(infilename2),
str(infilename3),
str(outfilename),
])
assert yaml.load(outfilename.open()) == {'hello': ['earth', 'mars']}
# -o out-file-name, then arguments are only input file names
main([
'-o', str(outfilename),
str(infilename1),
str(infilename2),
str(infilename3),
])
assert yaml.load(outfilename.open()) == {'hello': ['earth', 'mars']}
# --out-filename=out-file-name, then arguments are only input file names
main([
f'--out-filename={outfilename}',
str(infilename1),
str(infilename2),
str(infilename3),
])
assert yaml.load(outfilename.open()) == {'hello': ['earth', 'mars']}


def test_process_data_include_dict(tmp_path, yaml):
"""Test DataProcessor.process_data, with DataProcessor.include_dict."""
data = {'testing': ['one', 2, {3: [3.1, 3.14]}]}
Expand Down

0 comments on commit 18e82ef

Please sign in to comment.