Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
dmartmillan committed Aug 3, 2022
2 parents 778b82a + 48a1a27 commit 40b5253
Show file tree
Hide file tree
Showing 27 changed files with 183,496 additions and 183,437 deletions.
33 changes: 11 additions & 22 deletions .github/workflows/openvariant_tester.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ on:
tags-ignore:
- '*'
paths-ignore:
- './examples/**'
- './examples/*'
pull_request:
branches: [ "master" ]
paths-ignore:
- './examples/**'
- './examples/*'

permissions:
contents: read
Expand All @@ -35,30 +35,19 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install tqdm
pip install click
pip install pyyaml
pip install -e .
# if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# pip install tqdm
# pip install click
# pip install pyyaml
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest tests/test_task/test_group_by.py
pytest tests/test_task/test_count.py
pytest tests/test_task/test_cat.py
pytest tests/test_find/test_find.py
pytest tests/test_utils/test_where.py
pytest tests/test_variant/test_variant.py
pytest tests/test_variant/test_variant_read.py
pytest tests/test_variant/test_variant_save.py
pytest tests/test_command/test_count_command.py
pytest tests/test_command/test_help_command.py
pytest tests/test_command/test_group_by_command.py
pytest tests/test_command/test_cat_command.py
pytest tests/test_annotation/test_annotation.py
pytest tests/test_annotation/test_process.py
pytest tests/test_annotation/test_builder.py
# python3 -m unittest discover -s tests
pytest tests/test_*
15 changes: 7 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<br>
<br>
<a href="https://openvariant.readthedocs.io/">
<img src="logo.png" width="530" height="350">
<img src="https://github.com/bbglab/openvariant/raw/master/logo.png" width="590" height="350">
</a>
<br>
<br>
Expand Down Expand Up @@ -46,7 +46,7 @@ functionalities on our parsed result some of them are the following ones:

<div align="center">
<a href="https://openvariant.readthedocs.io/en/latest/user_guide.html">
<img src="workflow.gif" width="600" height="352">
<img src="https://github.com/bbglab/openvariant/raw/master/workflow.gif" width="600" height="352">
</a>
</div>

Expand All @@ -65,17 +65,16 @@ For more details check our [Installation](https://openvariant.readthedocs.io/en/

## Examples

We offer a bunch of [Examples](examples) to we be able to understand how OpenVariant can be applied. Also, check
[Examples](./examples) section in OpenVariant's documentation.
We offer a bunch of [Examples](https://github.com/bbglab/openvariant/tree/master/examples) to we be able to understand how OpenVariant can be applied. Also, check
[Examples](https://openvariant.readthedocs.io/en/latest/examples.html) section in OpenVariant's documentation.

## Contributing

Feel free to contribute as much as you want to the code.

See [CONTRIBUTING](CONTRIBUTING.md) for guidelines on contributing and respect your behaviour specified
at [CODE OF CONDUCT](CODE_OF_CONDUCT.md).
See [CONTRIBUTING](https://github.com/bbglab/openvariant/blob/master/CONTRIBUTING.md) for guidelines on contributing and respect your behaviour specified
at [CODE OF CONDUCT](https://github.com/bbglab/openvariant/blob/master/CODE_OF_CONDUCT.md).

## License

The software is licensed under [BSD-3-Clause](LICENSE), and the artworks in the images folder are licensed
under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode.txt).
The software is licensed under [BSD-3-Clause](https://github.com/bbglab/openvariant/blob/master/LICENSE).
1 change: 1 addition & 0 deletions annotation_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ annotation: # Columns to parse
# be extracted and parsed as values in the output
- type: 'internal'
field: string
value: string # Text to represent multiple fieldSource; optional
fieldSource: # Columns to look for in the input files; required
- string
- string
Expand Down
14 changes: 7 additions & 7 deletions docs/examples/find_files/find_files_with_file_path.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"outputs": [],
"source": [
"from os import getcwd\n",
Expand Down Expand Up @@ -63,14 +63,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path: /home/dmartinez/openvariant/examples/datasets/sample1/5a3a743.wxs.maf.gz\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f6ae47b4160>\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f46043b9580>\n",
"-------------------------------------\n"
]
}
Expand Down Expand Up @@ -104,14 +104,14 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path: /home/dmartinez/openvariant/examples/datasets/sample1/5a3a743.wxs.maf.gz\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f6aec61feb0>\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f46040c7940>\n",
"-------------------------------------\n"
]
}
Expand Down Expand Up @@ -147,14 +147,14 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File path: /home/dmartinez/openvariant/examples/datasets/sample1/5a3a743.wxs.maf.gz\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f6ae46fceb0>\n",
"Annotation object: <openvariant.annotation.annotation.Annotation object at 0x7f46040c7c70>\n",
"-------------------------------------\n"
]
}
Expand Down
25 changes: 25 additions & 0 deletions docs/user_guide/annotation_structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ to search and match the files with that pattern.
# Example:
pattern:
- '*.maf'
- '[a-zA-Z-]*.tsv'
- 'samples.vcf.gz'
Format (optional)
Expand Down Expand Up @@ -103,6 +104,16 @@ Fixed value that will be parsed to all the rows of the `output` file. The value
field: 'ID'
value: 'SG2F24986083'
Also, we can combine other fields on a ``static`` annotation, as the following example represent:

.. code-block:: yaml
# Example:
# Where YEAR, DATASET and PATIENT are fields from other annotations
- type: 'static'
field: 'ID'
value: '{YEAR}_{DATASET}_{PATIENT}'
Internal
#############

Expand All @@ -112,6 +123,7 @@ which is a lambda function that will take the value as an input.

* ``type``: type of annotation. (required)
* ``field``: name that will appear as a head column of this annotation. (required)
* ``value``: text to represent multiple ``fieldSource`` parameters. (optional)
* ``fieldSource``: list of that will try to match with input fields and transform it to the annotation ``field`` on the output. (required)
* ``function``: lambda function that will be executed after get the value of ``fieldSource``. If it is not specified it will execute :python:`(lambda y: y)` making any modification into the value. (optional)

Expand All @@ -128,6 +140,19 @@ which is a lambda function that will take the value as an input.
- '#chrom'
function: "lambda c: c.upper().replace('CHR', '').replace('23', 'X').replace('24', 'Y')"
On the following example we can see the use of multiple fields on the ``internal`` type. It will format the ``value`` text
with the fields that appears on the ``fieldSource`` parameter. All the fields will must match with the ``value`` text.

.. code-block:: yaml
# Example:
# 'Symbol', 'Country' and 'Year' are columns from the input file.
- type: 'internal'
field: 'ID'
value: 'Identity_{Symbol}_{Country}_{Year}'
fieldSource:
- ['Symbol', 'Country', 'Year']
Filename
#############

Expand Down
15 changes: 7 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from os import getcwd

from openvariant import find_files, Annotation, Variant
from openvariant import findfiles, Annotation, Variant

# where = "VAR != 4 AND (VAR != 5 OR VAR != 10)"
# where_clauses = parse_where(where)
Expand All @@ -14,11 +14,10 @@
#print(res)


annotation = Annotation(f"{getcwd()}/tests/data/dataset/dataset.yaml")
#annotation = Annotation(f"{getcwd()}/tests/data/dataset/dataset.yaml")

#for file, _ in find_files(f"{getcwd()}/tests/data/dataset/"):

result = Variant(f"{getcwd()}/tests/data/dataset/sample3/", annotation)
for line in result.read():
print(f"Line in a dict: {line}")
break
for file, ann in findfiles(f"{getcwd()}/tests/data/dataset/sample3"):
result = Variant(file, ann)
for line in result.read(where="REF != 'A',REF != 'G'"):
print(f"Line in a dict: {line}")
break
3 changes: 2 additions & 1 deletion openvariant/annotation/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def _static_builder(x: dict, base_path: str = None) -> StaticBuilder:
value = x[AnnotationKeys.VALUE.value]
except KeyError:
raise KeyError('Static annotation is wrong specified.')

return AnnotationTypes.STATIC.name, value


Expand All @@ -109,7 +110,7 @@ def _internal_builder(x: dict, base_path: str = None) -> InternalBuilder:
try:
value = x[AnnotationKeys.VALUE.value]
except KeyError:
value = float('nan')
value = None

return AnnotationTypes.INTERNAL.name, x[AnnotationKeys.FIELD_SOURCE.value], Builder("(lambda y: y)") \
if AnnotationKeys.FUNCTION.value not in x or x[AnnotationKeys.FUNCTION.value] is None or \
Expand Down
30 changes: 23 additions & 7 deletions openvariant/annotation/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from openvariant.plugins.context import Context

StaticProcess = Tuple[str, float or int or str, Callable]
InternalProcess = Tuple[str, Optional[int], Callable]
InternalProcess = Tuple[str, Tuple[dict, str], str]
FilenameProcess = Tuple[str, float or int or str, Callable]
DirnameProcess = Tuple[str, float or int or str, Callable]
PluginProcess = Tuple[str, Context, Callable]
Expand Down Expand Up @@ -59,18 +59,34 @@ def _internal_process(x: InternalBuilder, original_header: List = [] or None, fi
Callable
Function to execute on the fixed value
"""
field_pos = None
field_pos = {}
try:
for i, h in enumerate(original_header):
if h in set(x[1]):
field_pos = i
break
header_dict = {field: num for num, field in list(enumerate(original_header))}
for source in x[1]:
if isinstance(source, List):
for s in source:
try:
field_pos.update({s: header_dict[s]})
except KeyError:
field_pos = {}
pass
if len(field_pos) == len(source):
break
else:
field_pos = {}
else:
try:
field_pos = {source: header_dict[source]}
break
except KeyError:
pass

except TypeError:
raise TypeError(f'Unable to parser {x[0]} annotation')
except SyntaxError:
raise SyntaxError(f'Unable to parser function lambda on {x[0]} annotation')

return AnnotationTypes.INTERNAL.name, field_pos, x[2]
return AnnotationTypes.INTERNAL.name, (field_pos, x[3]), x[2]


def _filename_process(x: FilenameBuilder, original_header: List = [] or None, file_path: str = None,
Expand Down
7 changes: 6 additions & 1 deletion openvariant/find_files/find_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ def _get_annotation(file_path, annotation):
def _scan_files(base_path: str, annotation: Annotation, fix: bool):
"""Recursive exploration from a base path"""
if isdir(base_path):
for file_name in listdir(base_path):
if not fix:
for annotation_file in glob.iglob(join(base_path, "*.{}".format(ANNOTATION_EXTENSION))):
annotation = Annotation(annotation_file)
list_files = listdir(base_path)
list_files.sort()
for file_name in list_files:
file_path = join(base_path, file_name)
try:
for f, a in _scan_files(file_path, annotation, fix):
Expand Down
8 changes: 2 additions & 6 deletions openvariant/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import re
from fnmatch import fnmatch
from os.path import basename


def check_extension(ext: str, path: str) -> bool:
"""Check if file matches with the annotation pattern"""
if ext[0] == '*':
match = fnmatch(path, ext)
else:
reg_apply = re.compile(ext + '$')
match = len(reg_apply.findall(path)) != 0
return match
return fnmatch(basename(path), ext) if ext[0] == '*' else re.match(ext, basename(path)) is not None
12 changes: 10 additions & 2 deletions openvariant/variant/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(self, path: str, annotation: Annotation) -> None:

def _unify(self, base_path: str, annotation: Annotation, group_by: str = None, display_header: bool = True) \
-> Generator[dict, None, None]:
"""Parse all the files thought the annotation schema and generated yields to interrate"""
"""Parse all the files thought the annotation schema and generated yields to iterate"""
for x in self._parser(base_path, annotation, group_by, display_header):
yield x

Expand Down Expand Up @@ -201,7 +201,15 @@ def _parser(self, file_path: str, annotation: Annotation, group_by: str, display
elif type_ann == AnnotationTypes.MAPPING.name:
mapping_values[head] = header[head]
elif type_ann == AnnotationTypes.INTERNAL.name:
value = line[value] if value is not None else None
if len(value[0]) == 1:
pos = list(value[0].values())[0]
value = line[pos] if value is not None else None
else:
pos = {}
for val, position in value[0].items():
pos.update({val: line[position]})
value = value[1].format(**pos)

line_dict[head] = _parse_field(value, func)
else:
line_dict[head] = _parse_field(value, func)
Expand Down
14 changes: 13 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
from setuptools import setup, find_packages

from pathlib import Path
this_directory = Path(__file__).parent
long_description = (this_directory / "README.md").read_text()

setup(
name="open-variant",
version="0.7.0",
version="0.7.5",
author="BBGLab - Barcelona Biomedical Genomics Lab",
author_email='[email protected]',
description="OpenVariant provides different functionalities to read, parse and operate different multiple input "
"file formats, being able to customize the output.",
long_description=long_description,
long_description_content_type='text/markdown',
license='BSD 3-Clause License',
keywords='bioinformatics,openvariant,openvar,bbglab',
packages=find_packages(exclude=["tests.*", "tests"]),
include_package_data=True,
install_requires=['pyyaml', 'tqdm', 'click'],
Expand All @@ -12,4 +23,5 @@
'openvar = openvariant.commands.openvar:openvar',
]
},
url="https://github.com/bbglab/openvariant",
)
Loading

0 comments on commit 40b5253

Please sign in to comment.