From 7d8b30300a3cc8b3af9046691ab28bb6c973961b Mon Sep 17 00:00:00 2001 From: Remi Gau Date: Tue, 9 Jul 2024 09:40:42 +0200 Subject: [PATCH 1/3] skip .git and refactor --- reproschema/validate.py | 140 ++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 50 deletions(-) diff --git a/reproschema/validate.py b/reproschema/validate.py index ae281b6..e2df11b 100644 --- a/reproschema/validate.py +++ b/reproschema/validate.py @@ -5,20 +5,41 @@ from .jsonldutils import load_file, validate_data from .utils import lgr, start_server, stop_server +DIR_TO_SKIP = [".git", "__pycache__", "env", "venv"] +FILES_TO_SKIP = [".DS_Store", ".gitignore", ".flake8", ".autorc", "LICENSE"] +SUPPORTED_EXTENSIONS = [ + ".jsonld", + "json", + "js", + "", +] -def validate_dir(directory, started=False, http_kwargs={}): + +def validate_dir( + directory: str, + started: bool = False, + http_kwargs: None | dict[str, int] = None, + stop=None, +): """Validate a directory containing JSONLD documents against the ReproSchema pydantic model. + Recursively goes through the directory tree and validates files with the allowed extensions. + Parameters ---------- directory: str Path to directory to walk for validation + started : bool Whether an http server exists or not - http_kwargs : dict + + http_kwargs : dict or None Keyword arguments for the http server. Valid keywords are: port, path and tmpdir + stop: None or function + Function to use to stop the HTTP server + Returns ------- conforms: bool @@ -26,54 +47,60 @@ def validate_dir(directory, started=False, http_kwargs={}): if any document is non-conformant. """ + if http_kwargs is None: + http_kwargs = {} + if not os.path.isdir(directory): + if stop is not None: + stop_server(stop) raise Exception(f"{directory} is not a directory") - print(f"Validating directory {directory}") - stop = None - if not started: - stop, port = start_server(**http_kwargs) - http_kwargs["port"] = port - else: - if "port" not in http_kwargs: - raise KeyError("HTTP server started, but port key is missing") - - for root, _, files in os.walk(directory): - for name in files: - full_file_name = os.path.join(root, name) - - if Path(full_file_name).suffix not in [ - ".jsonld", - "json", - "js", - "", - ]: - lgr.info(f"Skipping file {full_file_name}") - continue - - lgr.debug(f"Validating file {full_file_name}") - try: - data = load_file( - full_file_name, started=True, http_kwargs=http_kwargs - ) - if len(data) == 0: - raise ValueError("Empty data graph") - print(f"Validating {full_file_name}") - conforms, vtext = validate_data(data) - except (ValueError, json.JSONDecodeError): + + if Path(directory).name in DIR_TO_SKIP: + lgr.info(f"Skipping directory {directory}") + return True + + lgr.debug(f"Validating directory {directory}") + + files_to_validate = [ + str(x) + for x in Path(directory).iterdir() + if x.is_file() + and x.name not in FILES_TO_SKIP + and x.suffix in SUPPORTED_EXTENSIONS + ] + + for name in files_to_validate: + lgr.debug(f"Validating file {name}") + + try: + data = load_file(name, started=started, http_kwargs=http_kwargs) + if len(data) == 0: if stop is not None: stop_server(stop) - raise - else: - if not conforms: - lgr.critical( - f"File {full_file_name} has validation errors." - ) - if stop is not None: - stop_server(stop) - raise ValueError(vtext) - if not started: - stop_server(stop) - return True + raise ValueError(f"Empty data graph in file {name}") + conforms, vtext = validate_data(data) + except (ValueError, json.JSONDecodeError): + if stop is not None: + stop_server(stop) + raise + else: + if not conforms: + lgr.critical(f"File {name} has validation errors.") + stop_server(stop) + raise ValueError(vtext) + + dirs_to_validate = [ + str(x) + for x in Path(directory).iterdir() + if x.is_dir() and x.name not in DIR_TO_SKIP + ] + + for dir in dirs_to_validate: + conforms, stop = validate_dir( + dir, started=started, http_kwargs=http_kwargs, stop=stop + ) + + return True, stop def validate(path): @@ -92,16 +119,29 @@ def validate(path): """ if os.path.isdir(path): - conforms = validate_dir(path) + + stop, port = start_server() + http_kwargs = {"port": port} + started = True + + conforms, _ = validate_dir( + path, started=started, http_kwargs=http_kwargs, stop=stop + ) + + stop_server(stop) + else: - # Skip validation for .DS_Store files - if Path(path).name == ".DS_Store": - lgr.info(f"{path} is a .DS_Store file and is skipped.") + + if Path(path).name in FILES_TO_SKIP: + lgr.info(f"Skipping file {path}") return True + data = load_file(path, started=False) conforms, vtext = validate_data(data) if not conforms: lgr.critical(f"File {path} has validation errors.") raise ValueError(vtext) + lgr.info(f"{path} conforms.") + return conforms From 8ad724a23af30211d0652617c76b5f3837a18362 Mon Sep 17 00:00:00 2001 From: Remi Gau Date: Tue, 9 Jul 2024 09:58:42 +0200 Subject: [PATCH 2/3] make it slightly more verbose --- reproschema/validate.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/reproschema/validate.py b/reproschema/validate.py index e2df11b..b07db7e 100644 --- a/reproschema/validate.py +++ b/reproschema/validate.py @@ -5,8 +5,21 @@ from .jsonldutils import load_file, validate_data from .utils import lgr, start_server, stop_server -DIR_TO_SKIP = [".git", "__pycache__", "env", "venv"] -FILES_TO_SKIP = [".DS_Store", ".gitignore", ".flake8", ".autorc", "LICENSE"] +DIR_TO_SKIP = [ + ".git", + ".github", + "__pycache__", + "env", + "venv", +] +FILES_TO_SKIP = [ + ".DS_Store", + ".gitignore", + ".flake8", + ".autorc", + "LICENSE", + "Makefile", +] SUPPORTED_EXTENSIONS = [ ".jsonld", "json", @@ -59,7 +72,7 @@ def validate_dir( lgr.info(f"Skipping directory {directory}") return True - lgr.debug(f"Validating directory {directory}") + lgr.info(f"Validating directory {directory}") files_to_validate = [ str(x) @@ -120,6 +133,8 @@ def validate(path): """ if os.path.isdir(path): + lgr.info(f"Validating directory {path}") + stop, port = start_server() http_kwargs = {"port": port} started = True From b5e595f150ce05b9b3226c3fc0c8df9eec78031d Mon Sep 17 00:00:00 2001 From: Remi Gau Date: Tue, 9 Jul 2024 10:05:32 +0200 Subject: [PATCH 3/3] drop os --- reproschema/validate.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/reproschema/validate.py b/reproschema/validate.py index b07db7e..d865c87 100644 --- a/reproschema/validate.py +++ b/reproschema/validate.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from .jsonldutils import load_file, validate_data @@ -63,12 +62,14 @@ def validate_dir( if http_kwargs is None: http_kwargs = {} - if not os.path.isdir(directory): + directory = Path(directory) + + if not directory.is_dir(): if stop is not None: stop_server(stop) - raise Exception(f"{directory} is not a directory") + raise Exception(f"{str(directory)} is not a directory") - if Path(directory).name in DIR_TO_SKIP: + if directory.name in DIR_TO_SKIP: lgr.info(f"Skipping directory {directory}") return True @@ -76,7 +77,7 @@ def validate_dir( files_to_validate = [ str(x) - for x in Path(directory).iterdir() + for x in directory.iterdir() if x.is_file() and x.name not in FILES_TO_SKIP and x.suffix in SUPPORTED_EXTENSIONS @@ -104,7 +105,7 @@ def validate_dir( dirs_to_validate = [ str(x) - for x in Path(directory).iterdir() + for x in directory.iterdir() if x.is_dir() and x.name not in DIR_TO_SKIP ] @@ -131,7 +132,7 @@ def validate(path): exception. """ - if os.path.isdir(path): + if Path(path).is_dir(): lgr.info(f"Validating directory {path}")