Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] skip .git and few other files / directory during validation #63

Merged
merged 3 commits into from
Jul 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 110 additions & 54 deletions reproschema/validate.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,120 @@
import json
import os
from pathlib import Path

from .jsonldutils import load_file, validate_data
from .utils import lgr, start_server, stop_server

DIR_TO_SKIP = [
".git",
".github",
"__pycache__",
"env",
"venv",
]
FILES_TO_SKIP = [
".DS_Store",
".gitignore",
".flake8",
".autorc",
"LICENSE",
"Makefile",
]
SUPPORTED_EXTENSIONS = [
".jsonld",
"json",
"js",
"",
]

def validate_dir(directory, started=False, http_kwargs={}):

def validate_dir(
directory: str,
started: bool = False,
http_kwargs: None | dict[str, int] = None,
stop=None,
):
"""Validate a directory containing JSONLD documents against the ReproSchema pydantic model.

Recursively goes through the directory tree and validates files with the allowed extensions.

Parameters
----------
directory: str
Path to directory to walk for validation

started : bool
Whether an http server exists or not
http_kwargs : dict

http_kwargs : dict or None
Keyword arguments for the http server. Valid keywords are: port, path
and tmpdir

stop: None or function
Function to use to stop the HTTP server

Returns
-------
conforms: bool
Whether the document is conformant with the shape. Raises an exception
if any document is non-conformant.

"""
if not os.path.isdir(directory):
raise Exception(f"{directory} is not a directory")
print(f"Validating directory {directory}")
stop = None
if not started:
stop, port = start_server(**http_kwargs)
http_kwargs["port"] = port
else:
if "port" not in http_kwargs:
raise KeyError("HTTP server started, but port key is missing")

for root, _, files in os.walk(directory):
for name in files:
full_file_name = os.path.join(root, name)

if Path(full_file_name).suffix not in [
".jsonld",
"json",
"js",
"",
]:
lgr.info(f"Skipping file {full_file_name}")
continue

lgr.debug(f"Validating file {full_file_name}")
try:
data = load_file(
full_file_name, started=True, http_kwargs=http_kwargs
)
if len(data) == 0:
raise ValueError("Empty data graph")
print(f"Validating {full_file_name}")
conforms, vtext = validate_data(data)
except (ValueError, json.JSONDecodeError):
if http_kwargs is None:
http_kwargs = {}

directory = Path(directory)

if not directory.is_dir():
if stop is not None:
stop_server(stop)
raise Exception(f"{str(directory)} is not a directory")

if directory.name in DIR_TO_SKIP:
lgr.info(f"Skipping directory {directory}")
return True

lgr.info(f"Validating directory {directory}")

files_to_validate = [
str(x)
for x in directory.iterdir()
if x.is_file()
and x.name not in FILES_TO_SKIP
and x.suffix in SUPPORTED_EXTENSIONS
]

for name in files_to_validate:
lgr.debug(f"Validating file {name}")

try:
data = load_file(name, started=started, http_kwargs=http_kwargs)
if len(data) == 0:
if stop is not None:
stop_server(stop)
raise
else:
if not conforms:
lgr.critical(
f"File {full_file_name} has validation errors."
)
if stop is not None:
stop_server(stop)
raise ValueError(vtext)
if not started:
stop_server(stop)
return True
raise ValueError(f"Empty data graph in file {name}")
conforms, vtext = validate_data(data)
except (ValueError, json.JSONDecodeError):
if stop is not None:
stop_server(stop)
raise
else:
if not conforms:
lgr.critical(f"File {name} has validation errors.")
stop_server(stop)
raise ValueError(vtext)

dirs_to_validate = [
str(x)
for x in directory.iterdir()
if x.is_dir() and x.name not in DIR_TO_SKIP
]

for dir in dirs_to_validate:
conforms, stop = validate_dir(
dir, started=started, http_kwargs=http_kwargs, stop=stop
)

return True, stop


def validate(path):
Expand All @@ -91,17 +132,32 @@ def validate(path):
exception.

"""
if os.path.isdir(path):
conforms = validate_dir(path)
if Path(path).is_dir():

lgr.info(f"Validating directory {path}")

stop, port = start_server()
http_kwargs = {"port": port}
started = True

conforms, _ = validate_dir(
path, started=started, http_kwargs=http_kwargs, stop=stop
)

stop_server(stop)

else:
# Skip validation for .DS_Store files
if Path(path).name == ".DS_Store":
lgr.info(f"{path} is a .DS_Store file and is skipped.")

if Path(path).name in FILES_TO_SKIP:
lgr.info(f"Skipping file {path}")
return True

data = load_file(path, started=False)
conforms, vtext = validate_data(data)
if not conforms:
lgr.critical(f"File {path} has validation errors.")
raise ValueError(vtext)

lgr.info(f"{path} conforms.")

return conforms