From b8501b73c84678d364021ea8240e4626205f3965 Mon Sep 17 00:00:00 2001 From: Edward Hope-Morley Date: Wed, 10 Jul 2024 15:42:27 +0200 Subject: [PATCH] Update pylint, tox etc --- pylintrc | 24 +---- searchkit/constraints.py | 100 +++++++++--------- searchkit/log.py | 4 +- searchkit/search.py | 216 ++++++++++++++++++++++---------------- searchkit/utils.py | 7 +- test-requirements.txt | 1 + tests/unit/test_search.py | 126 ++++++++++++---------- tests/unit/test_utils.py | 2 +- tests/unit/utils.py | 9 +- tox.ini | 77 ++++++++++---- 10 files changed, 320 insertions(+), 246 deletions(-) diff --git a/pylintrc b/pylintrc index ce49902..1738324 100644 --- a/pylintrc +++ b/pylintrc @@ -12,6 +12,8 @@ suggestion-mode=yes [FORMAT] max-line-length=79 +# Allow doctrings containing long urls +ignore-long-lines=^\s+.+?$ [REPORTS] #reports=yes @@ -19,29 +21,7 @@ score=yes [MESSAGES CONTROL] disable= - invalid-name, - pointless-statement, missing-module-docstring, - missing-class-docstring, missing-function-docstring, too-many-arguments, - too-many-locals, - too-many-branches, - too-many-instance-attributes, - too-many-ancestors, - too-many-public-methods, too-many-lines, - too-many-nested-blocks, - too-many-statements, - protected-access, - super-init-not-called, - useless-object-inheritance, - inconsistent-return-statements, - too-few-public-methods, - abstract-method, - no-self-use, - broad-except, - unnecessary-lambda, - broad-exception-raised, - unspecified-encoding, - consider-using-f-string, diff --git a/searchkit/constraints.py b/searchkit/constraints.py index ac13564..c2bca0a 100644 --- a/searchkit/constraints.py +++ b/searchkit/constraints.py @@ -9,7 +9,7 @@ from searchkit.log import log -class TimestampMatcherBase(object): +class TimestampMatcherBase(): """ Match start of line timestamps in a standard way. @@ -74,6 +74,7 @@ def strptime(self): class ConstraintBase(abc.ABC): + """ Base class for all constraints. """ @cached_property def id(self): @@ -124,7 +125,7 @@ def extracted_datetime(self, line): @property @abc.abstractmethod - def _since_date(self): + def since_date(self): """ A datetime.datetime object representing the "since" date/time """ def _line_date_is_valid(self, extracted_datetime): @@ -140,12 +141,12 @@ def _line_date_is_valid(self, extracted_datetime): # unique_search_id, ', '.join(self.exprs)) return False - if ts < self._since_date: - # log.debug("%s < %s at (%s) i.e. False", ts, self._since_date, + if ts < self.since_date: + # log.debug("%s < %s at (%s) i.e. False", ts, self.since_date, # line[-3:].strip()) return False - # log.debug("%s >= %s at (%s) i.e. True", ts, self._since_date, + # log.debug("%s >= %s at (%s) i.e. True", ts, self.since_date, # line[-3:].strip()) return True @@ -178,15 +179,15 @@ class MaxSearchableLineLengthReached(Exception): class FindTokenStatus(Enum): + """ Token Search Status Representation. """ FOUND = 1 REACHED_EOF = 2 -class SearchState(object): +class SearchState(): + """ Representation of binary search state. """ def __init__(self, status: FindTokenStatus, offset=0): """ - Representation of binary search state. - @param status: current status of search @param offset: current position in file from which next search will be started. @@ -203,7 +204,7 @@ def offset(self): return self._offset -class SavedFilePosition(object): +class SavedFilePosition(): """ Context manager class that saves current position at start and restores once finished. @@ -219,7 +220,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): self.file.seek(self.original_position) -class LogLine(object): +class LogLine(): """ Class representing a line extracted from a log file. @@ -316,7 +317,7 @@ def _read_line(self, max_len): return line_text -class LogFileDateSinceSeeker(object): +class LogFileDateSinceSeeker(): """ Performs "since" date lookups with file offsets. This is useful for performing line-based binary date searches on a log file. @@ -425,9 +426,9 @@ def find_token_reverse(self, start_offset): return SearchState(status=FindTokenStatus.REACHED_EOF, offset=0) - msg = ("reached max line length ({}) search without finding a line " - "feed (epicenter={})".format(self.MAX_SEARCHABLE_LINE_LENGTH, - start_offset_saved)) + msg = (f"reached max line length ({self.MAX_SEARCHABLE_LINE_LENGTH}) " + "search without finding a line " + f"feed (epicenter={start_offset_saved})") raise MaxSearchableLineLengthReached(msg) def find_token(self, start_offset): @@ -469,9 +470,9 @@ def find_token(self, start_offset): # chunk's length. current_offset = current_offset + len(chunk) - msg = ("reached max line length ({}) search without finding a line " - "feed (epicenter={})".format(self.MAX_SEARCHABLE_LINE_LENGTH, - start_offset_saved)) + msg = (f"reached max line length ({self.MAX_SEARCHABLE_LINE_LENGTH}) " + "search without finding a line feed " + f"(epicenter={start_offset_saved})") raise MaxSearchableLineLengthReached(msg) def try_find_line(self, epicenter, slf_off=None, elf_off=None): @@ -746,16 +747,16 @@ def __getitem__(self, offset): # to indicate that. self.found_any_date = True - if result.date >= self.constraint._since_date: + if result.date >= self.constraint.since_date: # Keep the matching line so we can access it # after the bisect without having to perform another # lookup. self.line_info = result - constraint_met = ((result.date >= self.constraint._since_date) + constraint_met = ((result.date >= self.constraint.since_date) if result.date else False) log.debug("extracted_date='%s' >= since_date='%s' == %s", result.date, - self.constraint._since_date, constraint_met) + self.constraint.since_date, constraint_met) return result.date def run(self): @@ -788,14 +789,14 @@ def run(self): SearchState(FindTokenStatus.FOUND, -1), SearchState(FindTokenStatus.FOUND, 100)) if result.date is not None: - if result.date >= self.constraint._since_date: + if result.date >= self.constraint.since_date: log.debug("first line has date that is valid so assuming " "rest of file is valid") return current log.debug("starting full binary search") try: - bisect.bisect_left(self, self.constraint._since_date) + bisect.bisect_left(self, self.constraint.since_date) except TooManyLinesWithoutDate as exc: if not self.found_any_date: raise NoTimestampsFoundInFile from exc @@ -813,8 +814,12 @@ def run(self): return self.line_info.start_offset -class SearchConstraintSearchSince(BinarySeekSearchBase): - +class SearchConstraintSearchSince(BinarySeekSearchBase): # noqa, pylint: disable=too-many-instance-attributes + """ + Search constraints implementation so filter lines that are after a given + date/time. The constraint can be a applied to a line or an entire file and + for the latter a binary search is performed. + """ def __init__(self, current_date, ts_matcher_cls, days=0, hours=24, **kwargs): """ @@ -833,13 +838,6 @@ def __init__(self, current_date, ts_matcher_cls, days=0, hours=24, """ super().__init__(**kwargs) self.ts_matcher_cls = ts_matcher_cls - if ts_matcher_cls: - self.date_format = ts_matcher_cls.DEFAULT_DATETIME_FORMAT - else: - log.warning("using patterns to identify timestamp is deprecated - " - "use ts_matcher_cls instead") - self.date_format = TimestampMatcherBase.DEFAULT_DATETIME_FORMAT - self.current_date = datetime.strptime(current_date, self.date_format) self._line_pass = 0 self._line_fail = 0 @@ -852,6 +850,15 @@ def __init__(self, current_date, ts_matcher_cls, days=0, hours=24, self._results = {} + @property + def date_format(self): + if self.ts_matcher_cls: + return self.ts_matcher_cls.DEFAULT_DATETIME_FORMAT + + log.warning("using patterns to identify timestamp is deprecated - " + "use ts_matcher_cls instead") + return TimestampMatcherBase.DEFAULT_DATETIME_FORMAT + def extracted_datetime(self, line): if isinstance(line, bytes): # need this for e.g. gzipped files @@ -861,19 +868,19 @@ def extracted_datetime(self, line): if timestamp.matched: return timestamp.strptime - return + return None @property def _is_valid(self): - return self._since_date is not None + return self.since_date is not None @cached_property - def _since_date(self): # pylint: disable=W0236 + def since_date(self): """ Reflects the date from which we will start to apply searches. """ if not self.current_date: - return + return None return self.current_date - timedelta(days=self.days, hours=self.hours or 0) @@ -882,17 +889,16 @@ def apply_to_line(self, line): if not self._is_valid: # The caller is expected to catch this and handle it appropriately, # perhaps deciding to continue. - raise CouldNotApplyConstraint("c:{} unable to apply constraint to " - "line as since_date not valid". - format(self.id)) + raise CouldNotApplyConstraint(f"c:{self.id} unable to apply " + "constraint to line as since_date " + "not valid") extracted_datetime = self.extracted_datetime(line) if not extracted_datetime: - raise CouldNotApplyConstraint("c:{} unable to apply constraint to " - "line since unable to extract " - "a datetime from the start of the " - "line to compare against". - format(self.id)) + raise CouldNotApplyConstraint(f"c:{self.id} unable to apply " + "constraint to line since unable to " + "extract a datetime from the start " + "of the line to compare against") if self._line_date_is_valid(extracted_datetime): self._line_pass += 1 @@ -905,14 +911,14 @@ def apply_to_file(self, fd, destructive=True): if not self._is_valid: log.warning("c:%s unable to apply constraint to %s", self.id, fd.name) - return + return None if fd.name in self._results: log.debug("using cached offset") return self._results[fd.name] log.debug("c:%s: starting binary seek search to %s in file %s " - "(destructive=True)", self.id, self._since_date, fd.name) + "(destructive=True)", self.id, self.since_date, fd.name) try: orig_offset = fd.tell() seeker = LogFileDateSinceSeeker(fd, self) @@ -933,7 +939,7 @@ def apply_to_file(self, fd, destructive=True): return fd.tell() except NoValidLinesFoundInFile: log.debug("c:%s no date after %s found in file - seeking to end", - self._since_date, self.id) + self.since_date, self.id) fd.seek(0, 2) return fd.tell() except TooManyLinesWithoutDate as exc: @@ -957,4 +963,4 @@ def stats(self): return _stats def __repr__(self): - return "id={}, since={}".format(self.id, self._since_date) + return f"id={self.id}, since={self.since_date}" diff --git a/searchkit/log.py b/searchkit/log.py index ccb0508..b1b68ca 100644 --- a/searchkit/log.py +++ b/searchkit/log.py @@ -2,13 +2,13 @@ import logging log = logging.getLogger('searchkit') -logformat = ("%(asctime)s %(process)d %(levelname)s %(name)s [-] " +LOGFORMAT = ("%(asctime)s %(process)d %(levelname)s %(name)s [-] " "%(message)s") def configure_handler(): handler = logging.StreamHandler() - handler.setFormatter(logging.Formatter(logformat)) + handler.setFormatter(logging.Formatter(LOGFORMAT)) log.addHandler(handler) diff --git a/searchkit/search.py b/searchkit/search.py index edc5fdf..6c4c193 100755 --- a/searchkit/search.py +++ b/searchkit/search.py @@ -32,19 +32,25 @@ def _rs_locked_inner(*args, **kwargs): return _rs_locked_inner +class SearchTaskError(Exception): + """ General search task error. """ + + class FileSearchException(Exception): + """ Exception raised when an error occurs during a file search. """ def __init__(self, msg): self.msg = msg -class SearchDefBase(object): - +class SearchDefBase(): # pylint: disable=too-few-public-methods + """ Base class for search definition implementations. """ def __init__(self, constraints=None): """ @param constraints: an optional list of constraints to apply to results. """ - self.id + # ensure uuid is generated + _ = self.id self.constraints = {c.id: c for c in constraints or {}} @cached_property @@ -54,8 +60,11 @@ def id(self): class SearchDef(SearchDefBase): + """ Simple search definition. - def __init__(self, pattern, tag=None, hint=None, + Defines a single line search. + """ + def __init__(self, pattern, tag=None, hint=None, # noqa, pylint: disable=too-many-arguments store_result_contents=True, field_info=None, **kwargs): """ Simple search definition. @@ -121,7 +130,10 @@ def __repr__(self): class SequenceSearchDef(SearchDefBase): + """ Sequence search definition. + Defines a multiline search that supports matching sequences of patterns. + """ def __init__(self, start, tag, end=None, body=None, **kwargs): """ Sequence search definition. @@ -162,17 +174,17 @@ def __init__(self, start, tag, end=None, body=None, **kwargs): @property def start_tag(self): """ Tag used to identify start of section. """ - return "{}-start".format(self.tag) + return f"{self.tag}-start" @property def end_tag(self): """ Tag used to identify end of section. """ - return "{}-end".format(self.tag) + return f"{self.tag}-end" @property def body_tag(self): """ Tag used to identify body of section. """ - return "{}-body".format(self.tag) + return f"{self.tag}-body" @property def current_section_id(self): @@ -212,16 +224,14 @@ def stop(self): self._section_id = str(uuid.uuid4()) def __repr__(self): - return ("{}: current_section={}, started={}, completed_sections={}". - format(self.__class__.__name__, self.current_section_id, - self.started, self.completed_sections)) + return (f"{self.__class__.__name__}: " + f"current_section={self.current_section_id}, " + f"started={self.started}, " + f"completed_sections={self.completed_sections}") class SequenceSearchResults(UserDict): - - def __init__(self): - self.data = {} - + """ Captures results from sequence searches. """ def add(self, result): sid = result.sequence_id if sid in self.data: @@ -241,6 +251,7 @@ class ResultStoreBase(UserList): """ def __init__(self): + super().__init__() self.counters = {} self.value_store = self.data = [] self.tag_store = [] @@ -261,7 +272,8 @@ def parts_deduped(self): def parts_non_deduped(self): return len(self.value_store) - def _get_store_index(self, value, store): + @staticmethod + def _get_store_index(value, store): """ Add value to the provided store and return its position. If the value is None do not save in the store and return None. @@ -313,6 +325,8 @@ class ResultStoreParallel(ResultStoreBase): """ Store for use when sharing between processes is required. """ def __init__(self, mgr): + super().__init__() + # Replace these with MP-safe equivalents self.counters = mgr.dict() self.value_store = self.data = mgr.list() self.tag_store = mgr.list() @@ -350,7 +364,8 @@ def unproxy_results(self): class ResultFieldInfo(UserDict): - + """ Supports defining result field names to allow results to be retrieved + by name as opposed to index. """ def __init__(self, fields): """ @param fields: list or dictionary of field names. If a dictionary is @@ -359,9 +374,11 @@ def __init__(self, fields): should typically be standard or custom types. """ if issubclass(fields.__class__, dict): - self.data = fields + data = fields else: - self.data = {f: None for f in fields} + data = {f: None for f in fields} + + super().__init__(data) def ensure_type(self, name, value): """ @@ -379,11 +396,12 @@ def index_to_name(self, index): if index == i: return _field - raise FileSearchException("field with index {} not found in mapping". - format(index)) + raise FileSearchException(f"field with index {index} not found in " + "mapping") class SearchResultBase(UserList): + """ Base class for search result implementations. """ META_OFFSET_TAG = 0 META_OFFSET_SEQ_ID = 1 @@ -393,6 +411,7 @@ class SearchResultBase(UserList): @abc.abstractmethod def __init__(self): + super().__init__() self.results_store = None self.linenumber = None self.section_id = None @@ -414,6 +433,8 @@ def _get_store_id(self, field): if store_id is not None: return store_id + return None + def get(self, field): """ Retrieve result part value by index or name. @@ -424,22 +445,27 @@ def get(self, field): if store_id is not None: return self.results_store[store_id] + return None + def __iter__(self): """ Only return part values when iterating over this object. """ for part in self.data: yield self.results_store[part[self.PART_OFFSET_VALUE]] def __repr__(self): - r_list = ["{}='{}'". + r_list = ["{}='{}'". # noqa, pylint: disable=consider-using-f-string format(rp[self.PART_OFFSET_IDX], self.results_store[rp[self.PART_OFFSET_VALUE]]) for rp in self.data] - return ("ln:{} {} (section={})". + return ("ln:{} {} (section={})". # noqa, pylint: disable=consider-using-f-string format(self.linenumber, ", ".join(r_list), self.section_id)) class SearchResultMinimal(SearchResultBase): - + """ + Minimal search result implementation optimised for IPC transfer between + worker tasks and the main collector process. + """ def __init__(self, data, metadata, linenumber, source_id, sequence_section_id, field_info): """ @@ -470,14 +496,14 @@ def __getattr__(self, name): if self.field_names and name in self.field_names: return self.get(name) - raise AttributeError("'{}' object has no attribute '{}'". - format(self.__class__.__name__, name)) + raise AttributeError(f"'{self.__class__.__name__}' object has " + f"no attribute '{name}'") @property def tag(self): idx = self.metadata[self.META_OFFSET_TAG] if idx is None: - return + return None return self.results_store.tag_store[idx] @@ -485,7 +511,7 @@ def tag(self): def sequence_id(self): idx = self.metadata[self.META_OFFSET_SEQ_ID] if idx is None: - return + return None return self.results_store.sequence_id_store[idx] @@ -499,8 +525,9 @@ def register_results_store(self, store): self.results_store = store -class SearchResult(SearchResultBase): - +class SearchResult(SearchResultBase): # noqa,pylint: disable=too-many-instance-attributes + """ Search result. This implementation is not intended to be transfered + over the results queue. """ def __init__(self, linenumber, source_id, result, search_def, results_store, sequence_section_id=None): """ @@ -585,14 +612,18 @@ def export(self): class SearchResultsCollection(UserDict): - + """ Store for all search results received from worker jobs. Provides + methods for searching for results. """ def __init__(self, search_catalog, results_store): + super().__init__() self.search_catalog = search_catalog self.results_store = results_store self.reset() - @property - def data(self): + def __getattribute__(self, name): + if name != 'data': + return super().__getattribute__(name) + results = {} for path, _results in self._results_by_path.items(): results[path] = _results @@ -715,35 +746,33 @@ def __len__(self): return _count -class LogrotateLogSort(object): - - def __call__(self, fname): - """ - Sort contents of a directory by passing the function as the key to a - list sort. Directory is expected to contain logfiles with extensions - used by logrotate e.g. .log, .log.1, .log.2.gz etc. - """ - filters = [r"\S+\.log$", - r"\S+\.log\.(\d+)$", - r"\S+\.log\.(\d+)\.gz?$"] - for f in filters: - ret = re.compile(f).match(fname) - if ret: - break - - # files that don't follow logrotate naming format go to the end. - if not ret: - # put at the end - return 100000 +def logrotate_log_sort(fname): + """ + Sort contents of a directory by passing the function as the key to a + list sort. Directory is expected to contain logfiles with extensions + used by logrotate e.g. .log, .log.1, .log.2.gz etc. + """ + filters = [r"\S+\.log$", + r"\S+\.log\.(\d+)$", + r"\S+\.log\.(\d+)\.gz?$"] + for f in filters: + ret = re.compile(f).match(fname) + if ret: + break - if len(ret.groups()) == 0: - return 0 + # files that don't follow logrotate naming format go to the end. + if not ret: + # put at the end + return 100000 - return int(ret.group(1)) + if len(ret.groups()) == 0: + return 0 + return int(ret.group(1)) -class SearchCatalog(object): +class SearchCatalog(): + """ Catalog to register all searches to be performed. """ def __init__(self, max_logrotate_depth=7): self.max_logrotate_depth = max_logrotate_depth self._source_ids = {} @@ -781,7 +810,7 @@ def register(self, search, user_path): entry = self._entries[path] entry['searches'].append(search) else: - self._entries[path] = {'source_id': self._get_source_id(path), + self._entries[path] = {'source_id': self.get_source_id(path), 'path': path, 'searches': [search]} @@ -803,7 +832,8 @@ def resolve_from_tag(self, tag): return searches - def _filtered_dir(self, contents, max_logrotate_depth=7): + @staticmethod + def _filtered_dir(contents, max_logrotate_depth=7): """ Filter contents of a directory. Directories are ignored and if any files look like logrotated log files they are sorted and only max_logrotate_depth are kept. @@ -831,7 +861,7 @@ def _filtered_dir(self, contents, max_logrotate_depth=7): limit = max_logrotate_depth for logrotated in logrotated.values(): capped = sorted(logrotated, - key=LogrotateLogSort())[:limit] + key=logrotate_log_sort)[:limit] new_contents += capped return new_contents @@ -853,7 +883,9 @@ def source_id_to_path(self, s_id): log.exception("ALL PATHS:") log.error('\n'.join(list(self._source_ids.keys()))) - def _get_source_id(self, path): + return None + + def get_source_id(self, path): if not self._source_ids: source_id = 0 else: @@ -876,18 +908,18 @@ def __iter__(self): def __repr__(self): info = "" for path, searches in self._entries.items(): - info += "\n{}:\n ".format(path) + info += f"\n{path}:\n " entries = [] for key, val in searches.items(): - entries.append("{}={}".format(key, val)) + entries.append(f"{key}={val}") info += '\n '.join(entries) return info -class SearchTask(object): - +class SearchTask(): # pylint: disable=too-many-instance-attributes + """ Search task implementation for all searches. """ def __init__(self, info, constraints_manager, results_store, results_queue=None, results_collection=None, decode_errors=None): @@ -906,8 +938,9 @@ def __init__(self, info, constraints_manager, results_store, @param decode_errors: unicode decode error handling. """ if results_queue is not None and results_collection is not None: - raise Exception("only one of results_queue and results_collection " - "can be used with a SearchTask.") + raise SearchTaskError("only one of results_queue and " + "results_collection can be used with a " + "SearchTask.") self.info = info self.stats = SearchTaskStats() @@ -1044,7 +1077,7 @@ def _sequence_search(self, seq_def, line, ln, sequence_results): self.results_store, sequence_section_id=section_id)) - def _process_sequence_results(self, sequence_results, current_ln): + def _process_sequence_results(self, sequence_results, current_ln): # noqa,pylint: disable=too-many-branches """ Perform post processing to sequence search results. @@ -1110,7 +1143,7 @@ def _run_search(self, fd): """ self.stats.reset() sequence_results = SequenceSearchResults() - search_ids = set([s.id for s in self.search_defs]) # noqa, pylint: disable=R1718 + search_ids = (s.id for s in self.search_defs) offset = self.constraints_manager.apply_global(search_ids, fd) log.debug("starting search of %s (offset=%s, pos=%s)", fd.name, offset, fd.tell()) @@ -1146,11 +1179,11 @@ def _run_search(self, fd): log.debug("completed search of %s lines", self.stats['lines_searched']) if self.search_defs_conditional: - msg = "constraints stats {}:".format(fd.name) + msg = f"constraints stats {fd.name}:" for sd in self.search_defs_conditional: if sd.constraints: for c in sd.constraints.values(): - msg += "\n id={}: {}".format(c.id, c.stats()) + msg += f"\n id={c.id}: {c.stats()}" log.debug(msg) @@ -1190,13 +1223,12 @@ def execute(self): raise except EOFError as e: log.exception("") - msg = ("an exception occurred while searching {} - {}". - format(path, e)) + msg = f"an exception occurred while searching {path} - {e}" raise FileSearchException(msg) from e except Exception as e: log.exception("") - msg = ("an unexpected exception occurred while searching {} - {}". - format(path, e)) + msg = (f"an unexpected exception occurred while searching {path} " + f"- {e}") raise FileSearchException(msg) from e log.debug("finished execution on path %s", path) @@ -1204,8 +1236,9 @@ def execute(self): class SearchTaskStats(UserDict): - + """ Keep stats on search tasks executed. """ def __init__(self): + super().__init__() self.reset() def reset(self): @@ -1218,7 +1251,7 @@ def reset(self): 'parts_deduped': 0, 'parts_non_deduped': 0} - def update(self, stats): # pylint: disable=W0221 + def update(self, stats): # pylint: disable=arguments-differ if not stats: return @@ -1230,7 +1263,7 @@ def __repr__(self): class SearcherBase(abc.ABC): - + """ Base class for searcher implementations. """ @property @abc.abstractmethod def files(self): @@ -1260,8 +1293,8 @@ def run(self): """ -class SearchConstraintsManager(object): - +class SearchConstraintsManager(): + """ Manager for any search constraints being applied to searches. """ def __init__(self, search_catalog): self.search_catalog = search_catalog self.global_constraints = [] @@ -1287,7 +1320,8 @@ def apply_global(self, search_ids, fd): return offset - def apply_single(self, searchdef, line): + @staticmethod + def apply_single(searchdef, line): """ Apply any constraints for this searchdef to the give line. @param searchdef: SearchDef object @@ -1318,7 +1352,7 @@ def apply_single(self, searchdef, line): class FileSearcher(SearcherBase): - + """ Searcher implementation used to search filesystem locations. """ def __init__(self, max_parallel_tasks=8, max_logrotate_depth=7, constraint=None, decode_errors=None): """ @@ -1350,7 +1384,7 @@ def files(self): def resolve_source_id(self, source_id): return self.catalog.source_id_to_path(source_id) - def add(self, searchdef, path, allow_global_constraints=True): # noqa, pylint: disable=W0221 + def add(self, searchdef, path, allow_global_constraints=True): # noqa, pylint: disable=arguments-differ """ Add a search definition. @@ -1383,7 +1417,8 @@ def stats(self): """ return self._stats - def _get_results(self, results, results_queue, event, stats): + @staticmethod + def _get_results(results, results_queue, event, stats): """ Collect results from all search task processes. @@ -1409,7 +1444,8 @@ def _get_results(self, results, results_queue, event, stats): log.debug("stopped fetching results (total received=%s)", len(results)) - def _purge_results(self, results, results_queue, expected): + @staticmethod + def _purge_results(results, results_queue, expected): """ Purge results from all search task processes. @@ -1446,13 +1482,15 @@ def _create_results_thread(self, results, results_queue, stats): args=[results, results_queue, event, stats]) return t, event - def _stop_results_thread(self, thread, event): + @staticmethod + def _stop_results_thread(thread, event): log.debug("joining/stopping queue consumer thread") event.set() thread.join() log.debug("consumer thread stopped successfully") - def _ensure_worker_processes_killed(self): + @staticmethod + def _ensure_worker_processes_killed(): """ For some reason it is sometimes possible to for pool termination to hang indefinitely because one or more worker process fails to @@ -1480,7 +1518,7 @@ def _ensure_worker_processes_killed(self): try: log.debug('sending SIGKILL to worker process %s', wpid) os.kill(wpid, signal.SIGILL) - except Exception: + except ProcessLookupError: log.debug('worker process %s already killed', wpid) def _run_single(self, results_collection, results_store): @@ -1500,7 +1538,7 @@ def _run_single(self, results_collection, results_store): self.stats['jobs_completed'] = 1 self.stats['total_jobs'] = 1 - def _run_mp(self, mgr, results, results_store): + def _run_mp(self, mgr, results, results_store): # noqa,pylint: disable=too-many-locals """ Run searches in parallel. @param mgr: multiprocessing.Manager object @@ -1573,7 +1611,7 @@ def run(self): log.debug("catalog is empty - nothing to run") return SearchResultsCollection(self.catalog, ResultStoreSimple()) - self.stats['searches'] = sum([len(p['searches']) # noqa, pylint: disable=R1728 + self.stats['searches'] = sum([len(p['searches']) # noqa, pylint: disable=consider-using-generator for p in self.catalog]) self.stats['searches_by_job'] = [len(p['searches']) for p in self.catalog] diff --git a/searchkit/utils.py b/searchkit/utils.py index 142dc76..35a95f3 100644 --- a/searchkit/utils.py +++ b/searchkit/utils.py @@ -29,7 +29,7 @@ def __init__(self, cache_id, cache_type, global_path): locks_path = os.path.join(self.global_path, 'locks') path = os.path.join(locks_path, 'cache_all_global.lock') self.global_lock = fasteners.InterProcessLock(path) - path = os.path.join(locks_path, 'cache_{}.lock'.format(self.cache_id)) + path = os.path.join(locks_path, f'cache_{self.cache_id}.lock') self.cache_lock = fasteners.InterProcessLock(path) def __enter__(self): @@ -75,7 +75,7 @@ def __len__(self): class MPCacheSimple(MPCacheBase): - + """ Multiprocessing safe simple key/value store used to cache values. """ def __exit__(self, *exc_info): """ noop. """ @@ -130,6 +130,5 @@ def __len__(self): return len(os.listdir(self.cache_base_path)) -# this is the default type class MPCache(MPCacheSimple): - pass + """ Denotes the default cache type. """ diff --git a/test-requirements.txt b/test-requirements.txt index 8dbbaa8..81f7db1 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -2,3 +2,4 @@ flake8==6.0.0 flake8-import-order==0.18.2 stestr pylint==3.1.0 +coverage diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index 39a0cbb..40bcdc5 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -14,7 +14,7 @@ SequenceSearchDef, ) from searchkit.search import ( - LogrotateLogSort, + logrotate_log_sort, SearchResult, SearchCatalog, SearchResultsCollection, @@ -109,7 +109,12 @@ """ +class TestFailedError(Exception): + """ Raised when an error is identified in a test. """ + + class TimestampSimple(TimestampMatcherBase): + """ Test timestamp implementation. """ @property def patterns(self): @@ -118,13 +123,15 @@ def patterns(self): class TestSearchKitBase(utils.BaseTestCase): + """ Base class for tests. """ @property def datetime_expr(self): return r"^([\d-]+\s+[\d:]+)" - def get_date(self, date): - cmd = ["date", "--utc", "--date={}".format(date), + @staticmethod + def get_date(date): + cmd = ["date", "--utc", f"--date={date}", '+' + TimestampSimple.DEFAULT_DATETIME_FORMAT] out = subprocess.check_output(cmd) out = re.compile(r"\s+").sub(' ', out.decode('UTF-8')).strip() @@ -140,7 +147,8 @@ def tearDown(self): super().tearDown() -class TestSearchKit(TestSearchKitBase): +class TestSearchKit(TestSearchKitBase): # noqa,pylint: disable=too-many-public-methods + """ Unit tests for searchkit. """ def test_resultscollection(self): catalog = SearchCatalog() @@ -149,7 +157,7 @@ def test_resultscollection(self): rs = ResultStoreSimple() results = SearchResultsCollection(catalog, rs) self.assertEqual(len(results), 0) - results.add(SearchResult(0, catalog._get_source_id('a/path'), + results.add(SearchResult(0, catalog.get_source_id('a/path'), re.match(sd.patterns[0], '1 2 3'), search_def=sd, results_store=rs).export) self.assertEqual(len(results), 1) @@ -162,7 +170,7 @@ def test_simple_search(self): with tempfile.TemporaryDirectory() as dtmp: fpaths = [os.path.join(dtmp, fname) for fname in ['f1', 'f2']] for fpath in fpaths: - with open(fpath, 'w') as fd: + with open(fpath, 'w', encoding='utf-8') as fd: fd.write("a key: some value\n") fd.write("a key: another value\n") @@ -198,7 +206,9 @@ def test_simple_search_zero_length_files_only(self): f = FileSearcher() with tempfile.TemporaryDirectory() as dtmp: for i in range(30): - open(os.path.join(dtmp, str(i)), 'w').close() # noqa, pylint: disable=R1732 + with open(os.path.join(dtmp, str(i)), 'w', + encoding='utf-8') as fd: + fd.write('') f.add(SearchDef(r'.+:\s+(\S+) \S+', tag='simple'), dtmp + '/*') results = f.run() @@ -210,10 +220,13 @@ def test_simple_search_zero_length_files_mixed(self): f = FileSearcher() with tempfile.TemporaryDirectory() as dtmp: for i in range(30): - open(os.path.join(dtmp, str(i)), 'w').close() # noqa, pylint: disable=R1732 + with open(os.path.join(dtmp, str(i)), 'w', + encoding='utf-8') as fd: + fd.write('') for i in range(30, 60): - with open(os.path.join(dtmp, str(i)), 'w') as fd: + with open(os.path.join(dtmp, str(i)), 'w', + encoding='utf-8') as fd: fd.write("a key: foo bar\n") f.add(SearchDef(r'.+:\s+(\S+) \S+', tag='simple'), dtmp + '/*') @@ -226,7 +239,8 @@ def test_simple_search_many_files(self): f = FileSearcher() with tempfile.TemporaryDirectory() as dtmp: for i in range(1000): - with open(os.path.join(dtmp, str(i)), 'w') as fd: + with open(os.path.join(dtmp, str(i)), 'w', + encoding='utf-8') as fd: fd.write("a key: foo bar\n") for i in range(1000): fd.write("some extra text\n") @@ -241,7 +255,7 @@ def test_simple_search_many_files(self): def test_simple_search_named_fields_no_types(self): f = FileSearcher() with tempfile.NamedTemporaryFile() as ftmp: - with open(ftmp.name, 'w') as fd: + with open(ftmp.name, 'w', encoding='utf-8') as fd: fd.write(" PID TTY TIME CMD\n" "49606 pts/2 00:00:00 bash\n" "49613 pts/2 00:00:00 ps\n") @@ -264,7 +278,7 @@ def test_simple_search_named_fields_no_types(self): def test_simple_search_named_fields_w_types(self): f = FileSearcher() with tempfile.NamedTemporaryFile() as ftmp: - with open(ftmp.name, 'w') as fd: + with open(ftmp.name, 'w', encoding='utf-8') as fd: fd.write(" PID TTY TIME CMD\n" "49606 pts/2 00:00:00 bash\n" "49613 pts/2 00:00:00 ps\n") @@ -289,7 +303,7 @@ def test_simple_search_named_fields_w_types(self): def test_simple_search_named_fields_w_types_orderered_dict(self): f = FileSearcher() with tempfile.NamedTemporaryFile() as ftmp: - with open(ftmp.name, 'w') as fd: + with open(ftmp.name, 'w', encoding='utf-8') as fd: fd.write(" PID TTY TIME CMD\n" "49606 pts/2 00:00:00 bash\n" "49613 pts/2 00:00:00 ps\n") @@ -322,12 +336,12 @@ def test_large_sequence_search(self): with tempfile.TemporaryDirectory() as dtmp: try: for i in range(20): - fpath = os.path.join(dtmp, 'f{}'.format(i)) - with open(fpath, 'w') as fd: + fpath = os.path.join(dtmp, f'f{i}') + with open(fpath, 'w', encoding='utf-8') as fd: fd.write('HEADER\n') for _ in range(1000): # this should be almost 100% deduped - fd.write('{}\n'.format(1234)) + fd.write(f'{1234}\n') fd.write('FOOTER\n') @@ -354,7 +368,7 @@ def test_large_sequence_search(self): elif r.tag == seq.end_tag: self.assertEqual(r.get(1), 'FOOTER') elif r.tag != seq.body_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") else: self.assertEqual(r.get(1), '1234') @@ -403,16 +417,16 @@ def test_logrotatelogsort(self): # add in an erroneous file that does not follow logrotate format os.mknod(os.path.join(dtmp, "my-test-agent.log.tar.gz")) for i in range(2, 100): - fname = "my-test-agent.log.{}.gz".format(i) + fname = f"my-test-agent.log.{i}.gz" os.mknod(os.path.join(dtmp, fname)) ordered_contents.append(fname) - self.assertEqual(LogrotateLogSort()(fname), i) + self.assertEqual(logrotate_log_sort(fname), i) ordered_contents.append("my-test-agent.log.tar.gz") contents = os.listdir(dtmp) self.assertEqual(sorted(contents, - key=LogrotateLogSort()), + key=logrotate_log_sort), ordered_contents) def test_catalog_user_paths_overlap(self): @@ -420,7 +434,7 @@ def test_catalog_user_paths_overlap(self): logspath = os.path.join(dtmp, 'var/log') os.makedirs(logspath) logpath = os.path.join(logspath, 'foo.log') - with open(logpath, 'w') as fd: + with open(logpath, 'w', encoding='utf-8') as fd: fd.write('blah') catalog = SearchCatalog(max_logrotate_depth=1) @@ -430,7 +444,7 @@ def test_catalog_user_paths_overlap(self): catalog.register(s2, os.path.join(logspath, 'foo*.log')) self.assertEqual(len(catalog), 1) self.assertEqual(list(catalog), - [{'source_id': catalog._get_source_id(logpath), + [{'source_id': catalog.get_source_id(logpath), 'path': logpath, 'searches': [s1, s2]}]) @@ -455,22 +469,19 @@ def test_catalog_glob_filesort(self): max_logrotate_depth = 7 for i in range(2, max_logrotate_depth + 10): - fname = os.path.join(dtmp, - "my-test-agent.1.log.{}.gz".format(i)) + fname = os.path.join(dtmp, f"my-test-agent.1.log.{i}.gz") os.mknod(fname) if i <= max_logrotate_depth: dir_contents.append(fname) for i in range(2, max_logrotate_depth + 10): - fname = os.path.join(dtmp, - "my-test-agent.49.log.{}.gz".format(i)) + fname = os.path.join(dtmp, f"my-test-agent.49.log.{i}.gz") os.mknod(fname) if i <= max_logrotate_depth: dir_contents.append(fname) for i in range(2, max_logrotate_depth + 10): - fname = os.path.join(dtmp, - "my-test-agent.100.log.{}.gz".format(i)) + fname = os.path.join(dtmp, f"my-test-agent.100.log.{i}.gz") os.mknod(fname) if i <= max_logrotate_depth: dir_contents.append(fname) @@ -478,7 +489,7 @@ def test_catalog_glob_filesort(self): exp = sorted(dir_contents) path = os.path.join(dtmp, 'my-test-agent*.log*') depth = max_logrotate_depth - act = sorted(SearchCatalog(max_logrotate_depth=depth). + act = sorted(SearchCatalog(max_logrotate_depth=depth). # noqa,pylint: disable=protected-access _filtered_dir(glob.glob(path))) self.assertEqual(act, exp) @@ -501,7 +512,7 @@ def test_sequence_searcher(self): elif r.tag == sd.end_tag: self.assertEqual(r.get(1), "ending") elif r.tag != sd.body_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_2, 'atestfile2': SEQ_TEST_2}) @@ -527,7 +538,7 @@ def test_sequence_searcher_overlapping(self): elif r.tag == sd.end_tag: self.assertEqual(r.get(1), "ending") elif r.tag != sd.body_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_3, 'atestfile2': SEQ_TEST_3}) @@ -553,7 +564,7 @@ def test_sequence_searcher_overlapping_incomplete(self): elif r.tag == sd.end_tag: self.assertEqual(r.get(1), "ending") elif r.tag != sd.body_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_4}) def test_sequence_searcher_incomplete_eof_match(self): @@ -576,7 +587,7 @@ def test_sequence_searcher_incomplete_eof_match(self): elif r.tag == sd.end_tag: self.assertEqual(r.get(0), "") else: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_5}) def test_sequence_searcher_multiple_sections(self): @@ -599,7 +610,7 @@ def test_sequence_searcher_multiple_sections(self): elif r.tag == sd.end_tag: self.assertEqual(r.get(0), "") else: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_6}) def test_sequence_searcher_eof(self): @@ -629,7 +640,7 @@ def test_sequence_searcher_eof(self): else: self.assertTrue(r.get(0) in ["2_1"]) elif r.tag != sd.end_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': SEQ_TEST_7}) def test_sequence_searcher_section_start_end_same(self): @@ -655,7 +666,7 @@ def test_sequence_searcher_section_start_end_same(self): elif r.tag == sd.body_tag: self.assertTrue(r.get(0) in ["2_1"]) elif r.tag != sd.end_tag: - raise Exception("error - tag is '{}'".format(r.tag)) + raise TestFailedError(f"error - tag is '{r.tag}'") @utils.create_files({'atestfile': MULTI_SEQ_TEST}) def test_sequence_searcher_multi_sequence(self): @@ -666,19 +677,19 @@ def test_sequence_searcher_multi_sequence(self): * test that single incomplete result gets removed """ s = FileSearcher() - sdA = SequenceSearchDef(start=SearchDef(r"^sectionA (\d+)"), + sda = SequenceSearchDef(start=SearchDef(r"^sectionA (\d+)"), body=SearchDef(r"\d_\d"), end=SearchDef( r"^section\S+ (\d+)"), tag="seqA-search-test") - sdB = SequenceSearchDef(start=SearchDef(r"^sectionB (\d+)"), + sdb = SequenceSearchDef(start=SearchDef(r"^sectionB (\d+)"), body=SearchDef(r"\d_\d"), end=SearchDef( r"^section\S+ (\d+)"), tag="seqB-search-test") fname = os.path.join(self.data_root, 'atestfile') - s.add(sdA, path=fname) - s.add(sdB, path=fname) + s.add(sda, path=fname) + s.add(sdb, path=fname) results = s.run() sections = results.find_sequence_by_tag('seqA-search-test') self.assertEqual(len(sections), 1) @@ -694,7 +705,7 @@ def test_logs_since_single_valid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd', + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd', constraints=[c]) fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) @@ -711,14 +722,14 @@ def test_logs_since_first_valid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd', + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd', constraints=[c]) fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() results = results.find_by_tag('mysd') self.assertEqual([r.get(2) for r in results], - ["L{}".format(i) for i in range(5)]) + [f"L{i}" for i in range(5)]) @utils.create_files({'atestfile': LOGS_W_TS}) def test_logs_since_multi_valid(self): @@ -729,7 +740,7 @@ def test_logs_since_multi_valid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() @@ -745,13 +756,13 @@ def test_logs_since_all_valid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() results = results.find_by_tag('mysd') self.assertEqual([r.get(2) for r in results], - ["L{}".format(i) for i in range(5)]) + [f"L{i}" for i in range(5)]) @utils.create_files({'atestfile': LOGS_W_TS}) def test_logs_since_all_invalid(self): @@ -762,7 +773,7 @@ def test_logs_since_all_invalid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() @@ -778,13 +789,13 @@ def test_logs_since_junk_at_start_of_file(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() results = results.find_by_tag('mysd') self.assertEqual([r.get(2) for r in results], - ["L{}".format(i) for i in range(5)]) + [f"L{i}" for i in range(5)]) @utils.create_files({'atestfile': LOGS_W_TS + "\n"}) def test_logs_since_junk_at_end_of_file(self): @@ -795,13 +806,13 @@ def test_logs_since_junk_at_end_of_file(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() results = results.find_by_tag('mysd') self.assertEqual([r.get(2) for r in results], - ["L{}".format(i) for i in range(5)]) + [f"L{i}" for i in range(5)]) @utils.create_files({'atestfile': LOGS_W_TS + "\n"}) def test_logs_since_junk_at_end_of_file_and_start_invalid(self): @@ -812,7 +823,7 @@ def test_logs_since_junk_at_end_of_file_and_start_invalid(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=1) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() @@ -828,7 +839,7 @@ def test_logs_since_file_valid_with_unmatchable_lines(self): c = SearchConstraintSearchSince(current_date=self.current_date, ts_matcher_cls=TimestampSimple, days=7) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() @@ -870,7 +881,7 @@ def test_logs_since_all_junk(self): results = s.run() results = results.find_by_tag('mysd') self.assertEqual([r.get(1) for r in results], - ["L{}".format(i) for i in range(9)]) + [f"L{i}" for i in range(9)]) @utils.create_files({'atestfile': LOGS_W_TS}) def test_logs_since_hours(self): @@ -882,7 +893,7 @@ def test_logs_since_hours(self): hours=24, ts_matcher_cls=TimestampSimple) s = FileSearcher(constraint=c) - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd') + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd') fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) results = s.run() @@ -899,7 +910,7 @@ def test_logs_since_hours_sd(self): hours=24, ts_matcher_cls=TimestampSimple) s = FileSearcher() - sd = SearchDef(r"{}\S+ (.+)".format(self.datetime_expr), tag='mysd', + sd = SearchDef(rf"{self.datetime_expr}\S+ (.+)", tag='mysd', constraints=[c]) fname = os.path.join(self.data_root, 'atestfile') s.add(sd, path=fname) @@ -933,7 +944,8 @@ def test_search_unicode_decode_w_error(self): with self.assertRaises(UnicodeDecodeError): f.run() - def test_search_unicode_decode_no_error(self): + @staticmethod + def test_search_unicode_decode_no_error(): f = FileSearcher(decode_errors='backslashreplace') with tempfile.TemporaryDirectory() as dtmp: fpath = os.path.join(dtmp, 'f1') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ca92ff5..77c89aa 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -7,7 +7,7 @@ class TestUtils(utils.BaseTestCase): - + """ Unit tests for unit test utils. """ def test_mpcache_simple(self): with tempfile.TemporaryDirectory() as dtmp: cache = MPCache('testtype', 'testcache', dtmp) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index f687f90..7a5c93d 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -24,7 +24,7 @@ def create_files_inner2(self, *args, **kwargs): os.makedirs(os.path.dirname(path)) log.debug("creating test file %s", path) - with open(path, 'w') as fd: + with open(path, 'w', encoding='utf-8') as fd: fd.write(content) ret = f(self, *args, **kwargs) @@ -36,15 +36,16 @@ def create_files_inner2(self, *args, **kwargs): class BaseTestCase(unittest.TestCase): - + """ Custom test case for all unit tests. """ def setUp(self): - self.maxDiff = None + self.maxDiff = None # pylint: disable=invalid-name if os.environ.get('TESTS_LOG_LEVEL_DEBUG', 'no') == 'yes': set_log_level(logging.DEBUG) else: set_log_level(logging.INFO) - def _addDuration(self, *args, **kwargs): # For Python >= 3.12 + # For Python >= 3.12 + def _addDuration(self, *args, **kwargs): # noqa,pylint: disable=invalid-name """ Python 3.12 needs subclasses of unittest.TestCase to implement this in order to record times and execute any cleanup actions once a test completes regardless of success. Otherwise it emits a warning. diff --git a/tox.ini b/tox.ini index f0b54c5..ecd9f74 100644 --- a/tox.ini +++ b/tox.ini @@ -1,24 +1,6 @@ [tox] skipsdist = True -envlist = py3,pep8,pylint -sitepackages = False -minversion = 3.18.0 - -[testenv] -basepython = {env:TOX_PYTHON:python3} -unit_tests = {toxinidir}/tests/unit/ -pyfiles = {toxinidir}/setup.py {toxinidir}/searchkit/ {[testenv]unit_tests} -passenv = - TESTS_LOG_LEVEL_DEBUG -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/test-requirements.txt -commands = stestr run --serial --test-path {[testenv]unit_tests} {posargs} - -[testenv:pep8] -allowlist_externals = flake8 -commands = - flake8 -v {posargs:{[testenv]pyfiles}} +envlist = py3-coverage,coveragereport,pep8,pylint [flake8] # H106: Don't put vim configuration in source files @@ -31,7 +13,62 @@ show-source = true exclude = ./.*,build,dist import-order-style = pep8 +[coverage:run] +parallel = True +branch = True +source = searchkit +omit = tests/* + +[coverage:report] +ignore_errors = True +skip_empty = True +precision = 2 +fail_under = 84 + +[testenv] +basepython = {env:TOX_PYTHON:python3} +unit_tests = {toxinidir}/tests/unit/ +pyfiles = + {toxinidir}/setup.py {toxinidir}/searchkit/ {[testenv]unit_tests} +setenv = + PYTHONHASHSEED=0 + # Override the default coverage output path to tox's temp directory + # so we don't pollute the root dir. + COVERAGE_FILE={temp_dir}/.coverage + # Override the default "PYTHON" when the "-coverage" is present. + # This allows us to run additional steps on top of the default + # test run, such as changing the default interpreter to "coverage run". + coverage: PYTHON=coverage run +passenv = + TESTS_LOG_LEVEL_DEBUG +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/test-requirements.txt +commands = + coverage: coverage erase + # This environment can "optionally" gather coverage data with "-coverage" + # suffix. The gathered coverage data is consumed by the "coverage" + # environment which merges the data and then generates coverage reports in + # HTML and XML formats. + stestr run --random --slowest --test-path {[testenv]unit_tests} {posargs} + +[testenv:pep8] +commands = flake8 -v {posargs:{[testenv]pyfiles}} + [testenv:pylint] -allowlist_externals = pylint commands = pylint -v --rcfile={toxinidir}/pylintrc {posargs:{[testenv]pyfiles}} +[testenv:coveragereport] +depends = py3-coverage +description = + 'Generate code coverage reports from collected coverage information. + This environment depends on `py3-coverage` for generating the actual + code coverage data from the test run. To collect and generate in a + single step, run `tox -e py3-coverage,coveragereport`.' +commands = + coverage combine + coverage report + coverage html -d cover + coverage xml -o cover/coverage.xml + coverage erase +