From eb726756a77eaf1a363ed4c157216b519fc4fdd6 Mon Sep 17 00:00:00 2001 From: Konstantin Slavnov Date: Mon, 20 May 2019 17:46:27 +0300 Subject: [PATCH] Address review feedback Signed-off-by: Konstantin Slavnov --- .../format/annotations/annotated_data.py | 164 +++++++++--------- .../style/format/annotations/annotations.py | 38 ++-- lookout/style/format/feature_extractor.py | 76 ++++---- .../tests/bugs/002_bad_line_positions/test.py | 4 +- .../style/format/tests/test_annotations.py | 11 +- .../tests/test_expected_vnodes_number.py | 4 +- lookout/style/format/tests/test_features.py | 27 +-- 7 files changed, 170 insertions(+), 154 deletions(-) diff --git a/lookout/style/format/annotations/annotated_data.py b/lookout/style/format/annotations/annotated_data.py index 6e190ae21..55355e688 100644 --- a/lookout/style/format/annotations/annotated_data.py +++ b/lookout/style/format/annotations/annotated_data.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterator, Optional, Tuple, Union, Type # noqa F401 +from typing import Dict, Iterator, Optional, Tuple, Union, Type, Callable # noqa F401 from lookout.core.analyzer import UnicodeFile from sortedcontainers import SortedDict @@ -39,9 +39,7 @@ def __init__(self, start, stop, *args, **kwargs): self._stop = stop start = property(lambda self: self._start) - stop = property(lambda self: self._stop) - span = property(lambda self: self._span) @@ -66,14 +64,14 @@ def __init__(self, sequence: str): # Dictionary to store annotations for the whole file (aka `global` annotations) self._global_annotations = {} # type: Dict[Type[Annotation], Annotation] - # Next dictionaries are the main core of this class. The most common use-case we have in - # style-analyzer is iterating through Token annotations in the sorted order. That is why + # The following dictionaries are the main members. The most common use case we have in + # style-analyzer is iterating through Token annotations in the sorted order. This is why # ordered dict is used. # `self._type_to_annotations` is the main storage of the Annotations. It is a mapping - # from the annotation type to all annotations of this type which are are stored in the \ + # from the annotation type to all annotations of this type which are stored in the \ # dictionary that is sorted by spans. # `self._span_to_annotations` dict is an optimization to quickly lookup all - # `Annotation`-s that belongs to the same [start, stop) span. + # `Annotation`-s that belong to the same [start, stop) span. self._span_to_annotations = SortedDict() # type: SortedDict[(int, int), Dict[Type[Annotation], Annotation]] # noqa E501 self._type_to_annotations = {} # type: Dict[Type[Annotation], SortedDict[(int, int), Annotation]] # noqa E501 @@ -85,7 +83,7 @@ def __len__(self): def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str: """ - Get the underlying sequence item or slice for the specified range. + Get the underlying sequence item or sequence slice for the specified range. :param item: index, slice or (start, stop) tuple. :return: The corresponding part of the sequence. @@ -98,14 +96,11 @@ def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str: def count(self, annotation_type: Type[Annotation]): """Count the number of annotations of a specific type.""" - try: - return len(self._type_to_annotations[annotation_type]) - except KeyError: - return 0 + return len(self._type_to_annotations.get(annotation_type, [])) def add(self, *annotations: Annotation) -> None: """ - Add multiple annotations. + Add several annotations. """ for annotation in annotations: self._add(annotation) @@ -132,13 +127,13 @@ def _add(self, annotation: Annotation) -> None: self._type_to_annotations[annotation_type].peekitem(right_span_index) if self._check_spans_overlap(*right_span, *annotation.span): raise ValueError( - "Trying to add Annotation %s which is overlaps with existing %s" % ( + "Trying to add Annotation %s which overlaps with existing %s" % ( right_annotation, annotation)) left_span_index = max(0, right_span_index - 1) left_span, left_annotation = self._type_to_annotations[annotation_type].peekitem( left_span_index) if self._check_spans_overlap(*left_span, *annotation.span): - raise ValueError("Trying to add Annotation %s which is overlaps with existing" + raise ValueError("Trying to add Annotation %s which overlaps with existing" "%s" % (left_annotation, annotation)) self._span_to_annotations[annotation.span][annotation_type] = annotation self._type_to_annotations[annotation_type][annotation.span] = annotation @@ -148,7 +143,7 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]] """ Return an annotation for the given span and type. - Looking for an exact match only. + Looking for an exact (type and span) match only. :param annotation_type: Annotation type to get. :param span: Annotation span (range) to get. If span is not specified it returns an \ @@ -157,32 +152,47 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]] """ if span is None: return self._global_annotations[annotation_type] - else: - check_span(*span) - return self._type_to_annotations[annotation_type][span] + check_span(*span) + return self._type_to_annotations[annotation_type][span] - def iter_annotations(self, annotation_type: Type[Annotation], - *additional: Type[Annotation], - start_offset: Optional[int] = None) -> Union[Iterator[AnnotationsSpan]]: + def iter_by_type_nested(self, annotation_type: Type[Annotation], + *covered_by: Type[Annotation], + start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]: """ - Iterate through annotations with specified type. - - Iteration goes through `annotation_type`. It is additionally annotated with annotations - specified in `additional`. `additional` can't be empty. If you need to iterate through \ - one annotation only use `AnnotationManager.iter_annotation()`. + Iterate over annotations of the specified type which are covered by some other annotation \ + types. + + Iteration goes over `annotation_type` objects. Annotations which are specified in + `covered_by` are added to the resulting `AnnotationsSpan` object. Spans of the additional + annotations should fully cover the spans of `annotation_type`. For example, suppose that + you have *line* and *token* annotations. Each line contains several tokens. If you try to + iterate through *line* and set *token* as `covered_by` annotation you get only *line* + annotation inside `AnnotationsSpan`. It happens because you can annotate *token* with + *line* but not *line* with *token*: *token* is covered by only one line and not vice versa. + + So, + manager.iter_by_type_nested(line, token) # gives you only line annotation as output, + # *token* annotations not found + manager.iter_by_type_nested(token, line) # gives you *token* and *line* annotations + # because it is possible to find only one + # covering *line* annotation. + + `covered_by` can't be empty. If you need to iterate over a single annotation type, you + should call `AnnotationManager.iter_annotation()` instead. :param annotation_type: Type of annotation to iterate through. - :param additional: Additional annotations that should be added to the main one. - :param start_offset: Start to iterate from the spesific offset. \ + :param covered_by: Additional annotations that should be added to the main one if they \ + cover its span. + :param start_offset: Start to iterate from a specific offset. \ Can be used as a key argument only. - :return: Iterator through annotations of requested types. + :return: Iterator over annotations of the requested type. """ - if not additional: - raise ValueError("At least one additional annotation should be specified. " - "If you need to iterate through only one annotation use " + if not covered_by: + raise ValueError("At least one covered_by annotation must be specified. " + "If you need to iterate over a single annotation type use " "`iter_annotation()`.") - types = set(additional) | {annotation_type} - for annotation in self.iter_annotation(annotation_type, start_offset=start_offset): + types = set(covered_by) | {annotation_type} + for annotation in self.iter_by_type(annotation_type, start_offset=start_offset): # Annotations with the same span same_span_annotations = self._span_to_annotations[annotation.span] same_span_annotations_type = set(same_span_annotations.keys()) @@ -198,10 +208,10 @@ def iter_annotations(self, annotation_type: Type[Annotation], annotations.update({type: same_span_annotations[type] for type in common_types}) yield AnnotationsSpan(*annotation.span, annotations) - def iter_annotation(self, annotation_type: Type[Annotation], *, - start_offset: Optional[int] = None) -> Iterator[Annotation]: + def iter_by_type(self, annotation_type: Type[Annotation], *, + start_offset: Optional[int] = None) -> Iterator[Annotation]: """ - Iterate through a specific type of annotation. + Iterate over annotations of the specified type. If you need to iterate through several annotations use \ `AnnotationManager.iter_annotations()` instead. @@ -219,6 +229,21 @@ def iter_annotation(self, annotation_type: Type[Annotation], *, for value in self._type_to_annotations[annotation_type].values()[search_from:]: yield value + def _find_annotations(self, annotation_type: Type[Annotation], start: int, stop: int, + inspect: Callable, action: str) -> Annotation: + try: + annotation_layer = self._type_to_annotations[annotation_type] + except KeyError: + raise NoAnnotation("There is no annotation type %s" % annotation_type) + check_span(start, stop) + search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) + search_stop = annotation_layer.bisect_right((stop, stop)) + for span in annotation_layer.islice(search_start, search_stop): + if inspect(*span): + return annotation_layer[span] + raise NoAnnotation("There is no annotation %s that %s [%d, %d)" % ( + annotation_type, action, start, stop)) + def find_overlapping_annotation(self, annotation_type: Type[Annotation], start: int, stop: int) -> Annotation: """ @@ -230,19 +255,10 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation], :raise NoAnnotation: There is no such annotation that overlaps with the given interval. :return: `Annotation` of the requested type. """ - try: - annotation_layer = self._type_to_annotations[annotation_type] - except KeyError: - raise NoAnnotation("There is no annotation layer %s" % annotation_type) - check_span(start, stop) - search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) - search_stop = annotation_layer.bisect_right((stop, stop)) - for span in annotation_layer.islice(search_start, search_stop): - if self._check_spans_overlap(start, stop, *span): - # assuming that there is only one such annotation - return annotation_layer[span] - raise NoAnnotation("There is no annotation %s from %d to %d" % ( - annotation_type, start, stop)) + return self._find_annotations( + annotation_type, start, stop, action="overlaps", + inspect=lambda span_start, span_stop: self._check_spans_overlap( + start, stop, span_start, span_stop)) def find_covering_annotation(self, annotation_type: Type[Annotation], start: int, stop: int) -> Annotation: @@ -255,27 +271,18 @@ def find_covering_annotation(self, annotation_type: Type[Annotation], :raise NoAnnotation: There is no such annotation that overlaps with the given interval. :return: `Annotation` of the requested type. """ - try: - annotation_layer = self._type_to_annotations[annotation_type] - except KeyError: - raise NoAnnotation("There is no annotation layer %s" % annotation_type) - check_span(start, stop) - search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) - search_stop = annotation_layer.bisect_right((stop, stop)) - for span_start, span_stop in annotation_layer.islice(search_start, search_stop): + def check_cover(span_start, span_stop): if start == stop: - if self._check_spans_overlap(start, stop, span_start, span_stop): - return annotation_layer[(span_start, span_stop)] - elif span_start <= start and stop <= span_stop: - # assuming that there is only one such annotation because they cannot overlap - return annotation_layer[(span_start, span_stop)] - raise NoAnnotation("There is no annotation %s that contains i%d to %d" % ( - annotation_type, start, stop)) + return self._check_spans_overlap(start, stop, span_start, span_stop) + return span_start <= start and stop <= span_stop + + return self._find_annotations(annotation_type, start, stop, action="contains", + inspect=check_cover) @classmethod def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int) -> bool: """ - Check if two spans have at least 1 common point in their overlap. + Check if two spans have at least 1 common point. Span 1 is [start1, stop1). `stop1` itself is excluded. Span 2 is [start2, stop2). `stop2` itself is excluded. @@ -289,28 +296,25 @@ def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int) 2.3. [x, y) and [y, y) have no overlap. 2.4. [x, z) and [y, y) are overlapping because [x, z) fully covers y point. - Despite the fact that overlapping rules are defined for 0-intervals it is not recommended \ - to rely on them. If you want to get an additional annotation of the 0-interval annotation \ - link one annotation to another. See `TokenAnnotation` as an example. + Despite the fact that overlapping rules are defined for 0-intervals, it is unsafe \ + to rely on them. If you want to get an additional annotation of the 0-interval \ + annotation, link one annotation to another. See `TokenAnnotation` for example. :param start1: Start offset of the first span. :param stop1: Stop offset of the first span. :param start2: Start offset of the second span. :param stop2: Stop offset of the second span. - :return: True if spans are overlapping else False. + :return: True if two spans overlap, otherwise False. """ if start1 == stop1: if start2 == stop2: return start1 == start2 - else: - return start2 < start1 < stop2 - else: - if start2 == stop2: - return start1 < start2 < stop1 - else: - return (start1 <= start2 < stop1 or - start1 < stop2 < stop1 or - start2 <= start1 < stop2) + return start2 < start1 < stop2 + if start2 == stop2: + return start1 < start2 < stop1 + return (start1 <= start2 < stop1 or + start1 < stop2 < stop1 or + start2 <= start1 < stop2) @classmethod def from_file(cls, file: UnicodeFile) -> "AnnotationManager": diff --git a/lookout/style/format/annotations/annotations.py b/lookout/style/format/annotations/annotations.py index 8dba910fb..0f1cab9c1 100644 --- a/lookout/style/format/annotations/annotations.py +++ b/lookout/style/format/annotations/annotations.py @@ -1,26 +1,28 @@ """Annotations for style-analyzer.""" -from typing import FrozenSet, Optional, Tuple +from typing import FrozenSet, Optional, Tuple, Union + +import numpy from lookout.style.format.utils import get_uast_parents -def check_offset(value: int, name: str) -> None: +def check_offset(value: Union[int, numpy.int], name: str) -> None: """ - Check offset value for unexpected values as wrong type and negative. + Validate the offset: check the type and reject negative values. :param value: Offset value to check. - :param name: Variable name for exception message. + :param name: Variable name for the exception message. :raises ValueError: if value is incorrect. """ - if not isinstance(value, int): - raise ValueError("Type of `%s` should be int. Get %s" % (name, type(value))) + if not (isinstance(value, int) or isinstance(value, numpy.int)): + raise ValueError("Type of `%s` should be int. Got %s" % (name, type(value))) if value < 0: - raise ValueError("`%s` should be non-negative. Get %d" % (name, value)) + raise ValueError("`%s` should be non-negative. Got %d" % (name, value)) def check_span(start: int, stop: int) -> None: """ - Check span value for unexpected values as wrong type and negative or wrong order. + Validate span value: check the type, reject negative values and ensure an increasing order. :param start: Start offset of the span. :param stop: Stop offset of the span. @@ -29,7 +31,7 @@ def check_span(start: int, stop: int) -> None: check_offset(start, "start") check_offset(stop, "stop") if stop < start: - raise ValueError("`stop` should be not less than `start`. Get start=%d, stop=%d" % ( + raise ValueError("`stop` should be not less than `start`. Got start=%d, stop=%d" % ( start, stop)) @@ -48,11 +50,8 @@ def __init__(self, start: int, stop: int): self._stop = stop start = property(lambda self: self._start) - stop = property(lambda self: self._stop) - span = property(lambda self: (self._start, self._stop)) - name = property(lambda self: type(self).__name__) def __repr__(self) -> str: @@ -68,16 +67,16 @@ class LineAnnotation(Annotation): """ Line number annotation. - Line number in 1-based. + Line numbers are 1-based. """ def __init__(self, start: int, stop: int, line: int): """Initialize a new instance of `LineAnnotation`.""" super().__init__(start, stop) if not isinstance(line, int): - raise ValueError("Type of `line` should be int. Get %s" % type(line)) + raise ValueError("Type of `line` should be int. Got %s" % type(line)) if line < 1: - raise ValueError("`line` value should be positive. Get %d" % line) + raise ValueError("`line` value should be positive. Got %d" % line) self._line = line line = property(lambda self: self._line) @@ -116,7 +115,6 @@ def __init__(self, start: int, stop: int, uast: "bblfsh.Node"): self._parents = get_uast_parents(uast) uast = property(lambda self: self._node) - parents = property(lambda self: self._parents) @@ -128,8 +126,8 @@ def __init__(self, start: int, stop: int, """ Initialize a new instance of `TokenAnnotation`. - `TokenAnnotation` can be 0-interval: [x, x). To avoid the complex search of related uast - annotation it is required to pass `UASTNodeAnnotation` to `__init__()`. + `TokenAnnotation` may have 0 length: [x, x). To avoid the complex search of the related + UAST annotation it is required to pass `UASTNodeAnnotation` to `__init__()`. :param start: Annotation start. :param stop: Annotation end. @@ -235,9 +233,9 @@ def __init__(self, start: int, stop: int, lines: FrozenSet[int]): super().__init__(start, stop) for line in lines: if not isinstance(line, int): - raise ValueError("Type of `line` should be int. Get %s" % type(line)) + raise ValueError("Type of `line` should be int. Got %s" % type(line)) if line < 1: - raise ValueError("`line` value should be positive. Get %d" % line) + raise ValueError("`line` value should be positive. Got %d" % line) self._lines = lines lines = property(lambda self: self._lines) diff --git a/lookout/style/format/feature_extractor.py b/lookout/style/format/feature_extractor.py index 926f75430..ec1612a3d 100644 --- a/lookout/style/format/feature_extractor.py +++ b/lookout/style/format/feature_extractor.py @@ -194,8 +194,8 @@ def extract_features(self, files: Iterable[UnicodeFile], the corresponding `VirtualNode`-s and the parents mapping \ or None in case no features were extracted. """ - files = self._parse_vnodes(files, lines) - parsed_files, node_parents, vnode_parents = to_vnodes_format(files) + files = self._annotate_files(files, lines) + parsed_files, node_parents, vnode_parents = files_to_old_parse_vnodes_format(files) xy = self._convert_files_to_xy(parsed_files) if xy is None: return None @@ -304,8 +304,8 @@ def populate_indices(feature_group: FeatureGroup, feature_names: Sequence[str], for group, counts in self._feature_node_counts.items()} self._feature_count = sum(self._feature_group_counts.values()) - def _parse_vnodes(self, files: Iterable[UnicodeFile], lines: Optional[List[List[int]]] = None, - ) -> List[AnnotationManager]: + def _annotate_files(self, files: Iterable[UnicodeFile], + lines: Optional[List[List[int]]] = None) -> List[AnnotationManager]: parsed_files = [] for i, file in enumerate(files): path = file.path @@ -450,13 +450,13 @@ def _classify_vnodes(self, file: AnnotationManager) -> None: Annotate source code with `AtomicTokenAnnotation`, `ClassAnnotation` and \ `AccumulatedIndentationAnnotation`. - `ClassAnnotation` caontains the index of the corresponding class to predict. - We detect indentation changes so several whitespace nodes are merged together. + `ClassAnnotation` contains the index of the corresponding class to predict. + We detect indentation changes, so several whitespace nodes are merged together. :param file: Source code annotated with `RawTokenAnnotation`. """ indentation = [] - for token in file.iter_annotation(RawTokenAnnotation): + for token in file.iter_by_type(RawTokenAnnotation): token_value = file[token.span] if token.has_node: file.add(token.to_atomic_token_annotation()) @@ -574,7 +574,7 @@ def _class_seq_to_annotations(start, stop, current_class_seq): start, stop, current_class_seq = None, None, [] - for annotations in file.iter_annotations( + for annotations in file.iter_by_type_nested( AtomicTokenAnnotation, ClassAnnotation, AccumulatedIndentationAnnotation): has_target = ClassAnnotation in annotations acc_indent = AccumulatedIndentationAnnotation in annotations @@ -609,7 +609,8 @@ def _add_noops(self, file: AnnotationManager) -> None: return prev_annotations = None - for i, annotations in enumerate(file.iter_annotations(TokenAnnotation, LabelAnnotation)): + for i, annotations in enumerate(file.iter_by_type_nested( + TokenAnnotation, LabelAnnotation)): if i == 0: if LabelAnnotation not in annotations: file.add(TokenAnnotation(0, 0)) @@ -630,7 +631,7 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left ) -> Optional[bblfsh.Node]: """ Compute the UAST parent of the `TokenAnnotation` as the LCA of the closest left and right \ - babelfish nodes. + Babelfish nodes. :param search_start_offset: Offset of the current node. :param file: Source code annotated with `UASTAnnotation` and `TokenAnnotation`. @@ -644,8 +645,8 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left left_ancestors.add(id(parents[current_left_ancestor_id])) current_left_ancestor_id = id(parents[current_left_ancestor_id]) - for future_vnode in file.iter_annotation(TokenAnnotation, - start_offset=search_start_offset): + for future_vnode in file.iter_by_type(TokenAnnotation, + start_offset=search_start_offset): if future_vnode.has_node: break else: @@ -798,7 +799,7 @@ def _compute_labels_mappings(self, vnodes: Iterable[VirtualNode]) -> None: def _fill_vnode_parents(self, file: AnnotationManager): closest_left_node_id = None uast_annotation = file.get(UASTAnnotation) - for annotation in file.iter_annotation(TokenAnnotation): + for annotation in file.iter_by_type(TokenAnnotation): if annotation.has_node: closest_left_node_id = id(annotation.node) file.add(TokenParentAnnotation(*annotation.span, @@ -821,13 +822,13 @@ def _to_position(raw_lines_data, _lines_start_offset, offset): return Position(offset, line_num + 1, col + 1) -def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], - Dict[int, bblfsh.Node]]: +def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["VirtualNode"], + Dict[int, bblfsh.Node]]: """ Convert `AnnotationManager` instance to the deprecated output format of \ `FeatureExtractor._parse_file()`. - The function is created for backwards capability and should be removed after refactoring is \ + The function exists for backward compatibility and should be removed after the refactoring is \ finished. :param file: file annotated with `UASTAnnotation`, `PathAnnotation` and `RawTokenAnnotation`. \ @@ -841,7 +842,7 @@ def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["Virtu line_lens = [0] + [len(d) for d in raw_lines_data] line_lens[-1] += 1 _line_start_offsets = numpy.array(line_lens).cumsum() - for annotation in file.iter_annotation(RawTokenAnnotation): + for annotation in file.iter_by_type(RawTokenAnnotation): vnode = VirtualNode( file[annotation.span], _to_position(raw_lines_data, _line_start_offsets, annotation.start), @@ -855,13 +856,19 @@ def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["Virtu return vnodes, file.get(UASTAnnotation).parents -def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], - Dict[int, bblfsh.Node]]: +def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], + Dict[int, bblfsh.Node]]: """ - Convert `AnnotationManager` instance to an old format, that is a sequence of vnodes and \ - vnodes parents mapping. + Convert one `AnnotationManager` instance to the deprecated format of \ + `FeatureExtractor._annotate_files()` (`_parse_vnodes()` before refactoring). - The function is created for backwards capability and should be removed after refactoring is \ + The old format is a sequence of vnodes and vnodes parents mapping. Used by + `files_to_old_parse_file_format` to generate the old `_parse_vnodes`-like output format for a + sequence of `AnnotationManager`-s. This function is different from + `file_to_old_parse_file_format()` because it is created for `_parse_vnodes()` backward + compatibility and `file_to_old_parse_file_format()` for `_parse_file()` backward compatibility. + + The function exists for backward compatibility and should be removed after the refactoring is \ finished. :param file: file annotated with `Path`-, `Token`-, `Label`-, `TokenParent`- `Annotation`. @@ -874,8 +881,8 @@ def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNo line_lens[-1] += 1 _line_start_offsets = numpy.array(line_lens).cumsum() vnode_parents = {} - for annotations in file.iter_annotations(TokenAnnotation, - LabelAnnotation, TokenParentAnnotation): + for annotations in file.iter_by_type_nested(TokenAnnotation, + LabelAnnotation, TokenParentAnnotation): vnode = VirtualNode( file[annotations.span], _to_position(raw_lines_data, _line_start_offsets, annotations.start), @@ -891,15 +898,18 @@ def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNo return vnodes, vnode_parents -def to_vnodes_format(files: Sequence[AnnotationManager], - ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]], - Dict[int, bblfsh.Node], - Dict[int, bblfsh.Node]]: +def files_to_old_parse_vnodes_format( + files: Sequence[AnnotationManager], + ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]], + Dict[int, bblfsh.Node], + Dict[int, bblfsh.Node]]: """ - Convert `AnnotationManager` instances to the deprecated output format of \ - `FeatureExtractor._parse_vnodes()`. + Convert a sequence of `AnnotationManager` instances to the deprecated output format of \ + `FeatureExtractor._annotate_files()` (`_parse_vnodes()` before refactoring). + + In addition to `_file_to_vnodes_and_parents()` it provides the `node_parents` mapping. - The function is created for backwards capability and should be removed after refactoring is \ + The function exists for backward compatibility and should be removed after the refactoring is \ finished. :param files: Sequence of fully annotated files. It is expected to be the output of \ @@ -911,11 +921,11 @@ def to_vnodes_format(files: Sequence[AnnotationManager], node_parents = {} vnodes = [] for file in files: - file_vnodes, file_vnode_parents = file_to_vnodes_and_parents(file) + file_vnodes, file_vnode_parents = _file_to_vnodes_and_parents(file) file_node_parents = file.get(UASTAnnotation).parents try: file_lines = set(file.get(LinesToCheckAnnotation).lines) - except Exception: + except KeyError: file_lines = None vnodes.append((file_vnodes, file_node_parents, file_lines)) vnode_parents.update(file_vnode_parents) diff --git a/lookout/style/format/tests/bugs/002_bad_line_positions/test.py b/lookout/style/format/tests/bugs/002_bad_line_positions/test.py index d5b4ad893..389965006 100644 --- a/lookout/style/format/tests/bugs/002_bad_line_positions/test.py +++ b/lookout/style/format/tests/bugs/002_bad_line_positions/test.py @@ -7,7 +7,7 @@ from lookout.style.format.analyzer import FormatAnalyzer from lookout.style.format.annotations.annotated_data import AnnotationManager -from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents +from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format from lookout.style.format.tests.test_analyzer import get_config @@ -29,7 +29,7 @@ def test_positions(self): file = UnicodeFile(content=code_uni, uast=uast_uni, language="javascript", path="test.js") annotated_data = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_data) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_data) + nodes, _ = file_to_old_parse_file_format(annotated_data) for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])): self.assertLessEqual(node1.start.line, node2.start.line, "Start line position decrease for %d, %d nodes" % ( diff --git a/lookout/style/format/tests/test_annotations.py b/lookout/style/format/tests/test_annotations.py index d782a6cf4..8acc90f48 100644 --- a/lookout/style/format/tests/test_annotations.py +++ b/lookout/style/format/tests/test_annotations.py @@ -86,9 +86,10 @@ def test_iter_annotations(self): AnotherAnnotation: another_annotation}), AnnotationsSpan(3, 4, {Annotation: annotations[2], AnotherAnnotation: another_annotation})] - self.assertEqual(list(annotated_data.iter_annotations(Annotation, AnotherAnnotation)), res) + self.assertEqual(list(annotated_data.iter_by_type_nested( + Annotation, AnotherAnnotation)), res) - annotations = list(annotated_data.iter_annotations(AnotherAnnotation, Annotation)) + annotations = list(annotated_data.iter_by_type_nested(AnotherAnnotation, Annotation)) res = [AnnotationsSpan(0, len(code)-1, {AnotherAnnotation: another_annotation})] self.assertEqual(annotations, res) @@ -96,15 +97,15 @@ def test_iter_annotation(self): code = "0123456789" annotated_data = AnnotationManager(code) with self.assertRaises(KeyError): - list(annotated_data.iter_annotation(Annotation)) + list(annotated_data.iter_by_type(Annotation)) annotations = [Annotation(0, 3), Annotation(3, 3), Annotation(3, 4)] annotated_data.add(*annotations[::-1]) - self.assertEqual(list(annotated_data.iter_annotation(Annotation)), + self.assertEqual(list(annotated_data.iter_by_type(Annotation)), annotations) more_annotations = [Annotation(0, 0), Annotation(4, 4), Annotation(4, 7)] annotated_data.add(*more_annotations) - self.assertEqual(list(annotated_data.iter_annotation(Annotation)), + self.assertEqual(list(annotated_data.iter_by_type(Annotation)), sorted(annotations + more_annotations, key=lambda x: x.span)) def test_find_overlapping_annotation(self): diff --git a/lookout/style/format/tests/test_expected_vnodes_number.py b/lookout/style/format/tests/test_expected_vnodes_number.py index 0c25ea420..b7f743c56 100644 --- a/lookout/style/format/tests/test_expected_vnodes_number.py +++ b/lookout/style/format/tests/test_expected_vnodes_number.py @@ -10,10 +10,12 @@ class ExpectedVnodesTest(unittest.TestCase): + def setUp(self): + slogging.setup("INFO", False) + @unittest.skipIf(sys.version_info.minor == 6, "We test python 3.6 inside docker container, " "so there is no another docker inside.") def test_calc_expected_vnodes_number_entry(self): - slogging.setup("INFO", False) quality_report_repos_filepath = os.path.abspath( os.path.join(os.path.split(__file__)[0], "../benchmarks/data/quality_report_repos.csv")) diff --git a/lookout/style/format/tests/test_features.py b/lookout/style/format/tests/test_features.py index 94e770b93..6ae9af666 100644 --- a/lookout/style/format/tests/test_features.py +++ b/lookout/style/format/tests/test_features.py @@ -18,7 +18,7 @@ LabelAnnotation, RawTokenAnnotation, TokenAnnotation, UASTAnnotation from lookout.style.format.classes import CLASS_INDEX, CLASSES, CLS_NEWLINE, CLS_NOOP, \ CLS_SINGLE_QUOTE, CLS_SPACE, CLS_SPACE_DEC, CLS_SPACE_INC -from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents +from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format from lookout.style.format.tests.test_analyzer import get_config from lookout.style.format.virtual_node import Position, VirtualNode @@ -54,10 +54,10 @@ def test_parse_file_exact_match(self): File(uast=uast, content=code, language="javascript", path="")) annotated_file = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_file) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_file) + nodes, _ = file_to_old_parse_file_format(annotated_file) self.assertEqual("".join(n.value for n in nodes), code.decode()) self.assertEqual("".join(annotated_file[token.span] - for token in annotated_file.iter_annotation(RawTokenAnnotation)), + for token in annotated_file.iter_by_type(RawTokenAnnotation)), code.decode()) def test_extract_features_exact_match(self): @@ -76,14 +76,14 @@ def test_parse_file_comment_after_regexp(self): annotated_file = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_file) self.assertEqual("".join(annotated_file[token.span] for token in - annotated_file.iter_annotation(RawTokenAnnotation)), + annotated_file.iter_by_type(RawTokenAnnotation)), code.decode()) def test_parse_file(self): self.extractor._parse_file(self.annotated_file) text = [] offset = line = col = 0 - nodes, _ = raw_file_to_vnodes_and_parents(self.annotated_file) + nodes, _ = file_to_old_parse_file_format(self.annotated_file) parents = self.annotated_file.get(UASTAnnotation).parents for n in nodes: if line == n.start.line - 1: @@ -105,7 +105,7 @@ def test_parse_file_with_trailing_space(self): File(content=contents.encode(), uast=self.uast, language="javascript", path="test")) annotated_data = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_data) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_data) + nodes, _ = file_to_old_parse_file_format(annotated_data) offset, line, col = nodes[-1].end self.assertEqual(len(contents), offset) # Space token always ends on the same line @@ -116,12 +116,12 @@ def test_classify_vnodes(self): self.extractor._parse_file(self.annotated_file) self.extractor._classify_vnodes(self.annotated_file) text = "".join(self.annotated_file[token.span] for token in - self.annotated_file.iter_annotation(AtomicTokenAnnotation)) + self.annotated_file.iter_by_type(AtomicTokenAnnotation)) self.assertEqual(text, self.contents) cls_counts = Counter() old_stop = 0 - for annotations in self.annotated_file.iter_annotations(AtomicTokenAnnotation, - ClassAnnotation): + for annotations in self.annotated_file.iter_by_type_nested(AtomicTokenAnnotation, + ClassAnnotation): self.assertEqual(old_stop, annotations.start) if ClassAnnotation in annotations: cls_counts.update(map(CLASSES.__getitem__, @@ -143,12 +143,12 @@ def test_classify_vnodes_with_trailing_space(self): self.extractor._parse_file(annotated_file) self.extractor._classify_vnodes(annotated_file) text = "".join(annotated_file[token.span] for token in - annotated_file.iter_annotation(AtomicTokenAnnotation)) + annotated_file.iter_by_type(AtomicTokenAnnotation)) self.assertEqual(text, contents) cls_counts = Counter() old_stop = 0 - for annotations in annotated_file.iter_annotations(AtomicTokenAnnotation, - ClassAnnotation): + for annotations in annotated_file.iter_by_type_nested(AtomicTokenAnnotation, + ClassAnnotation): self.assertEqual(old_stop, annotations.start) if ClassAnnotation in annotations: cls_counts.update(map(CLASSES.__getitem__, @@ -245,7 +245,8 @@ def test_noop_vnodes(self): self.extractor._classify_vnodes(self.annotated_file) self.extractor._merge_classes_to_composite_labels(self.annotated_file) self.extractor._add_noops(self.annotated_file) - annotations = list(self.annotated_file.iter_annotations(TokenAnnotation, LabelAnnotation)) + annotations = list(self.annotated_file.iter_by_type_nested( + TokenAnnotation, LabelAnnotation)) for ann1, ann2, ann3 in zip(annotations, islice(annotations, 1, None), islice(annotations, 2, None)):