From 5516ae3842dd18381bf9aee898b2e71657e51bdd Mon Sep 17 00:00:00 2001 From: Konstantin Slavnov Date: Tue, 21 May 2019 17:51:28 +0300 Subject: [PATCH] Apply suggestions from code review Signed-off-by: Konstantin Slavnov --- .../format/annotations/annotated_data.py | 141 ++++++++---------- .../style/format/annotations/annotations.py | 2 +- lookout/style/format/feature_extractor.py | 21 +-- .../style/format/tests/test_annotations.py | 11 +- lookout/style/format/tests/test_features.py | 19 +-- 5 files changed, 93 insertions(+), 101 deletions(-) diff --git a/lookout/style/format/annotations/annotated_data.py b/lookout/style/format/annotations/annotated_data.py index 26080026c..144b64c8b 100644 --- a/lookout/style/format/annotations/annotated_data.py +++ b/lookout/style/format/annotations/annotated_data.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterator, Optional, Tuple, Union, Type # noqa F401 +from typing import Dict, Iterator, Optional, Tuple, Union, Type, Callable # noqa F401 from lookout.core.analyzer import UnicodeFile from sortedcontainers import SortedDict @@ -64,14 +64,14 @@ def __init__(self, sequence: str): # Dictionary to store annotations for the whole file (aka `global` annotations) self._global_annotations = {} # type: Dict[Type[Annotation], Annotation] - # Next dictionaries are the main core of this class. The most common use-case we have in - # style-analyzer is iterating through Token annotations in the sorted order. That is why + # The following dictionaries are the main members. The most common use case we have in + # style-analyzer is iterating through Token annotations in the sorted order. This is why # ordered dict is used. # `self._type_to_annotations` is the main storage of the Annotations. It is a mapping - # from the annotation type to all annotations of this type which are are stored in the \ + # from the annotation type to all annotations of this type which are stored in the \ # dictionary that is sorted by spans. # `self._span_to_annotations` dict is an optimization to quickly lookup all - # `Annotation`-s that belongs to the same [start, stop) span. + # `Annotation`-s that belong to the same [start, stop) span. self._span_to_annotations = SortedDict() # type: SortedDict[(int, int), Dict[Type[Annotation], Annotation]] # noqa E501 self._type_to_annotations = {} # type: Dict[Type[Annotation], SortedDict[(int, int), Annotation]] # noqa E501 @@ -83,7 +83,7 @@ def __len__(self): def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str: """ - Get the underlying sequence item or slice for the specified range. + Get the underlying sequence item or sequence slice for the specified range. :param item: index, slice or (start, stop) tuple. :return: The corresponding part of the sequence. @@ -96,10 +96,7 @@ def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str: def count(self, annotation_type: Type[Annotation]): """Count the number of annotations of a specific type.""" - try: - return len(self._type_to_annotations[annotation_type]) - except KeyError: - return 0 + return len(self._type_to_annotations.get(annotation_type, [])) def add(self, *annotations: Annotation) -> None: """ @@ -130,13 +127,13 @@ def _add(self, annotation: Annotation) -> None: self._type_to_annotations[annotation_type].peekitem(right_span_index) if self._check_spans_overlap(*right_span, *annotation.span): raise ValueError( - "Trying to add Annotation %s which is overlaps with existing %s" % ( + "Trying to add Annotation %s which overlaps with existing %s" % ( right_annotation, annotation)) left_span_index = max(0, right_span_index - 1) left_span, left_annotation = self._type_to_annotations[annotation_type].peekitem( left_span_index) if self._check_spans_overlap(*left_span, *annotation.span): - raise ValueError("Trying to add Annotation %s which is overlaps with existing" + raise ValueError("Trying to add Annotation %s which overlaps with existing" "%s" % (left_annotation, annotation)) self._span_to_annotations[annotation.span][annotation_type] = annotation self._type_to_annotations[annotation_type][annotation.span] = annotation @@ -146,7 +143,7 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]] """ Return an annotation for the given span and type. - Looking for an exact match only. + Looking for an exact (type and span) match only. :param annotation_type: Annotation type to get. :param span: Annotation span (range) to get. If span is not specified it returns an \ @@ -155,45 +152,45 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]] """ if span is None: return self._global_annotations[annotation_type] - else: - check_span(*span) - return self._type_to_annotations[annotation_type][span] + check_span(*span) + return self._type_to_annotations[annotation_type][span] - def iter_annotations(self, annotation_type: Type[Annotation], - *additional: Type[Annotation], - start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]: + def iter_by_type_nested(self, annotation_type: Type[Annotation], + *additional: Type[Annotation], + start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]: """ Iterate through annotations of the specified type. - Iteration goes through `annotation_type`. it is additionally annotated with what is - specified in `additional` list. Spans of additional annotations should fully cover spans of - `annotation_type`. For example you have annotation *line* and *token*. Each line contains - several tokens. If you try to iterate through *line* and set *token* as `additional` - annotation you get only *line* annotation inside `AnnotationsSpan`. It happens because you - can annotate *token* with *line* but cannot *line* with *token* because *token* is covered - by only one line, but not vice versa. + Iteration goes over `annotation_type` objects. Annotations which are specified in + `additional` list added to resulting `AnnotationsSpan` object. Spans of additional + annotations should fully cover spans of `annotation_type`. For example, suppose that you + have *line* and *token* annotations. Each line contains several tokens. If you try to + iterate through *line* and set *token* as `additional` annotation you get only *line* + annotation inside `AnnotationsSpan`. It happens because you can annotate *token* with + *line* but not *line* with *token*: *token* is covered by only one line and not vice versa. So, manager.iter_annotations(line, token) # gives you only line annotation as output, - # *token* annotations not found - manager.iter_annotations(token, line) # gives you *token* and *line* annotation because - # It is possible to find only one covering *line* annotation. + # *token* annotations not found + manager.iter_annotations(token, line) # gives you *token* and *line* annotations because + # It is possible to find only one covering *line* + # annotation. - `additional` can't be empty. If you need to iterate through one annotation only use - `AnnotationManager.iter_annotation()`. + `additional` can't be empty. If you need to iterate through a single annotation, you should + call `AnnotationManager.iter_annotation()` instead. :param annotation_type: Type of annotation to iterate through. :param additional: Additional annotations that should be added to the main one. - :param start_offset: Start to iterate from the spesific offset. \ + :param start_offset: Start to iterate from a specific offset. \ Can be used as a key argument only. - :return: Iterator through annotations of the requested type. + :return: Iterator over annotations of the requested type. """ if not additional: raise ValueError("At least one additional annotation must be specified. " - "If you need to iterate through only one annotation use " + "If you need to iterate through a single annotation use " "`iter_annotation()`.") types = set(additional) | {annotation_type} - for annotation in self.iter_annotation(annotation_type, start_offset=start_offset): + for annotation in self.iter_by_type(annotation_type, start_offset=start_offset): # Annotations with the same span same_span_annotations = self._span_to_annotations[annotation.span] same_span_annotations_type = set(same_span_annotations.keys()) @@ -209,8 +206,8 @@ def iter_annotations(self, annotation_type: Type[Annotation], annotations.update({type: same_span_annotations[type] for type in common_types}) yield AnnotationsSpan(*annotation.span, annotations) - def iter_annotation(self, annotation_type: Type[Annotation], *, - start_offset: Optional[int] = None) -> Iterator[Annotation]: + def iter_by_type(self, annotation_type: Type[Annotation], *, + start_offset: Optional[int] = None) -> Iterator[Annotation]: """ Iterate through a specific type of annotation. @@ -230,6 +227,21 @@ def iter_annotation(self, annotation_type: Type[Annotation], *, for value in self._type_to_annotations[annotation_type].values()[search_from:]: yield value + def _find_annotations(self, annotation_type: Type[Annotation], start: int, stop: int, + inspect: Callable, action: str) -> Annotation: + try: + annotation_layer = self._type_to_annotations[annotation_type] + except KeyError: + raise NoAnnotation("There is no annotation type %s" % annotation_type) + check_span(start, stop) + search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) + search_stop = annotation_layer.bisect_right((stop, stop)) + for span in annotation_layer.islice(search_start, search_stop): + if inspect(*span): + return annotation_layer[span] + raise NoAnnotation("There is no annotation %s that %s [%d, %d)" % ( + annotation_type, action, start, stop)) + def find_overlapping_annotation(self, annotation_type: Type[Annotation], start: int, stop: int) -> Annotation: """ @@ -241,19 +253,10 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation], :raise NoAnnotation: There is no such annotation that overlaps with the given interval. :return: `Annotation` of the requested type. """ - try: - annotation_layer = self._type_to_annotations[annotation_type] - except KeyError: - raise NoAnnotation("There is no annotation type %s" % annotation_type) - check_span(start, stop) - search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) - search_stop = annotation_layer.bisect_right((stop, stop)) - for span in annotation_layer.islice(search_start, search_stop): - if self._check_spans_overlap(start, stop, *span): - # assuming that there is only one such annotation - return annotation_layer[span] - raise NoAnnotation("There is no annotation %s from %d to %d" % ( - annotation_type, start, stop)) + return self._find_annotations( + annotation_type, start, stop, action="overlaps", + inspect=lambda span_start, span_stop: self._check_spans_overlap( + start, stop, span_start, span_stop)) def find_covering_annotation(self, annotation_type: Type[Annotation], start: int, stop: int) -> Annotation: @@ -266,27 +269,16 @@ def find_covering_annotation(self, annotation_type: Type[Annotation], :raise NoAnnotation: There is no such annotation that overlaps with the given interval. :return: `Annotation` of the requested type. """ - try: - annotation_layer = self._type_to_annotations[annotation_type] - except KeyError: - raise NoAnnotation("There is no annotation type %s" % annotation_type) - check_span(start, stop) - search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) - search_stop = annotation_layer.bisect_right((stop, stop)) - for span_start, span_stop in annotation_layer.islice(search_start, search_stop): - if start == stop: - if self._check_spans_overlap(start, stop, span_start, span_stop): - return annotation_layer[(span_start, span_stop)] - elif span_start <= start and stop <= span_stop: - # assuming that there is only one such annotation because they cannot overlap - return annotation_layer[(span_start, span_stop)] - raise NoAnnotation("There is no annotation %s that contains [%d, %d)" % ( - annotation_type, start, stop)) + return self._find_annotations( + annotation_type, start, stop, action="contains", + inspect=lambda span_start, span_stop: self._check_spans_overlap( + start, stop, span_start, span_stop) if start == stop else + span_start <= start and stop <= span_stop) @classmethod def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int) -> bool: """ - Check if two spans have at least 1 common point in their overlap. + Check if two spans have at least 1 common point. Span 1 is [start1, stop1). `stop1` itself is excluded. Span 2 is [start2, stop2). `stop2` itself is excluded. @@ -313,15 +305,12 @@ def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int) if start1 == stop1: if start2 == stop2: return start1 == start2 - else: - return start2 < start1 < stop2 - else: - if start2 == stop2: - return start1 < start2 < stop1 - else: - return (start1 <= start2 < stop1 or - start1 < stop2 < stop1 or - start2 <= start1 < stop2) + return start2 < start1 < stop2 + if start2 == stop2: + return start1 < start2 < stop1 + return (start1 <= start2 < stop1 or + start1 < stop2 < stop1 or + start2 <= start1 < stop2) @classmethod def from_file(cls, file: UnicodeFile) -> "AnnotationManager": diff --git a/lookout/style/format/annotations/annotations.py b/lookout/style/format/annotations/annotations.py index dd70cd2d1..0f1cab9c1 100644 --- a/lookout/style/format/annotations/annotations.py +++ b/lookout/style/format/annotations/annotations.py @@ -11,7 +11,7 @@ def check_offset(value: Union[int, numpy.int], name: str) -> None: Validate the offset: check the type and reject negative values. :param value: Offset value to check. - :param name: Variable name for exception message. + :param name: Variable name for the exception message. :raises ValueError: if value is incorrect. """ if not (isinstance(value, int) or isinstance(value, numpy.int)): diff --git a/lookout/style/format/feature_extractor.py b/lookout/style/format/feature_extractor.py index bbf07144b..e474d4bb4 100644 --- a/lookout/style/format/feature_extractor.py +++ b/lookout/style/format/feature_extractor.py @@ -456,7 +456,7 @@ def _classify_vnodes(self, file: AnnotationManager) -> None: :param file: Source code annotated with `RawTokenAnnotation`. """ indentation = [] - for token in file.iter_annotation(RawTokenAnnotation): + for token in file.iter_by_type(RawTokenAnnotation): token_value = file[token.span] if token.has_node: file.add(token.to_atomic_token_annotation()) @@ -574,7 +574,7 @@ def _class_seq_to_annotations(start, stop, current_class_seq): start, stop, current_class_seq = None, None, [] - for annotations in file.iter_annotations( + for annotations in file.iter_by_type_nested( AtomicTokenAnnotation, ClassAnnotation, AccumulatedIndentationAnnotation): has_target = ClassAnnotation in annotations acc_indent = AccumulatedIndentationAnnotation in annotations @@ -609,7 +609,8 @@ def _add_noops(self, file: AnnotationManager) -> None: return prev_annotations = None - for i, annotations in enumerate(file.iter_annotations(TokenAnnotation, LabelAnnotation)): + for i, annotations in enumerate(file.iter_by_type_nested( + TokenAnnotation, LabelAnnotation)): if i == 0: if LabelAnnotation not in annotations: file.add(TokenAnnotation(0, 0)) @@ -630,7 +631,7 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left ) -> Optional[bblfsh.Node]: """ Compute the UAST parent of the `TokenAnnotation` as the LCA of the closest left and right \ - babelfish nodes. + Babelfish nodes. :param search_start_offset: Offset of the current node. :param file: Source code annotated with `UASTAnnotation` and `TokenAnnotation`. @@ -644,8 +645,8 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left left_ancestors.add(id(parents[current_left_ancestor_id])) current_left_ancestor_id = id(parents[current_left_ancestor_id]) - for future_vnode in file.iter_annotation(TokenAnnotation, - start_offset=search_start_offset): + for future_vnode in file.iter_by_type(TokenAnnotation, + start_offset=search_start_offset): if future_vnode.has_node: break else: @@ -798,7 +799,7 @@ def _compute_labels_mappings(self, vnodes: Iterable[VirtualNode]) -> None: def _fill_vnode_parents(self, file: AnnotationManager): closest_left_node_id = None uast_annotation = file.get(UASTAnnotation) - for annotation in file.iter_annotation(TokenAnnotation): + for annotation in file.iter_by_type(TokenAnnotation): if annotation.has_node: closest_left_node_id = id(annotation.node) file.add(TokenParentAnnotation(*annotation.span, @@ -841,7 +842,7 @@ def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["Virtua line_lens = [0] + [len(d) for d in raw_lines_data] line_lens[-1] += 1 _line_start_offsets = numpy.array(line_lens).cumsum() - for annotation in file.iter_annotation(RawTokenAnnotation): + for annotation in file.iter_by_type(RawTokenAnnotation): vnode = VirtualNode( file[annotation.span], _to_position(raw_lines_data, _line_start_offsets, annotation.start), @@ -877,8 +878,8 @@ def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualN line_lens[-1] += 1 _line_start_offsets = numpy.array(line_lens).cumsum() vnode_parents = {} - for annotations in file.iter_annotations(TokenAnnotation, - LabelAnnotation, TokenParentAnnotation): + for annotations in file.iter_by_type_nested(TokenAnnotation, + LabelAnnotation, TokenParentAnnotation): vnode = VirtualNode( file[annotations.span], _to_position(raw_lines_data, _line_start_offsets, annotations.start), diff --git a/lookout/style/format/tests/test_annotations.py b/lookout/style/format/tests/test_annotations.py index d782a6cf4..8acc90f48 100644 --- a/lookout/style/format/tests/test_annotations.py +++ b/lookout/style/format/tests/test_annotations.py @@ -86,9 +86,10 @@ def test_iter_annotations(self): AnotherAnnotation: another_annotation}), AnnotationsSpan(3, 4, {Annotation: annotations[2], AnotherAnnotation: another_annotation})] - self.assertEqual(list(annotated_data.iter_annotations(Annotation, AnotherAnnotation)), res) + self.assertEqual(list(annotated_data.iter_by_type_nested( + Annotation, AnotherAnnotation)), res) - annotations = list(annotated_data.iter_annotations(AnotherAnnotation, Annotation)) + annotations = list(annotated_data.iter_by_type_nested(AnotherAnnotation, Annotation)) res = [AnnotationsSpan(0, len(code)-1, {AnotherAnnotation: another_annotation})] self.assertEqual(annotations, res) @@ -96,15 +97,15 @@ def test_iter_annotation(self): code = "0123456789" annotated_data = AnnotationManager(code) with self.assertRaises(KeyError): - list(annotated_data.iter_annotation(Annotation)) + list(annotated_data.iter_by_type(Annotation)) annotations = [Annotation(0, 3), Annotation(3, 3), Annotation(3, 4)] annotated_data.add(*annotations[::-1]) - self.assertEqual(list(annotated_data.iter_annotation(Annotation)), + self.assertEqual(list(annotated_data.iter_by_type(Annotation)), annotations) more_annotations = [Annotation(0, 0), Annotation(4, 4), Annotation(4, 7)] annotated_data.add(*more_annotations) - self.assertEqual(list(annotated_data.iter_annotation(Annotation)), + self.assertEqual(list(annotated_data.iter_by_type(Annotation)), sorted(annotations + more_annotations, key=lambda x: x.span)) def test_find_overlapping_annotation(self): diff --git a/lookout/style/format/tests/test_features.py b/lookout/style/format/tests/test_features.py index 1229f541b..6ae9af666 100644 --- a/lookout/style/format/tests/test_features.py +++ b/lookout/style/format/tests/test_features.py @@ -57,7 +57,7 @@ def test_parse_file_exact_match(self): nodes, _ = file_to_old_parse_file_format(annotated_file) self.assertEqual("".join(n.value for n in nodes), code.decode()) self.assertEqual("".join(annotated_file[token.span] - for token in annotated_file.iter_annotation(RawTokenAnnotation)), + for token in annotated_file.iter_by_type(RawTokenAnnotation)), code.decode()) def test_extract_features_exact_match(self): @@ -76,7 +76,7 @@ def test_parse_file_comment_after_regexp(self): annotated_file = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_file) self.assertEqual("".join(annotated_file[token.span] for token in - annotated_file.iter_annotation(RawTokenAnnotation)), + annotated_file.iter_by_type(RawTokenAnnotation)), code.decode()) def test_parse_file(self): @@ -116,12 +116,12 @@ def test_classify_vnodes(self): self.extractor._parse_file(self.annotated_file) self.extractor._classify_vnodes(self.annotated_file) text = "".join(self.annotated_file[token.span] for token in - self.annotated_file.iter_annotation(AtomicTokenAnnotation)) + self.annotated_file.iter_by_type(AtomicTokenAnnotation)) self.assertEqual(text, self.contents) cls_counts = Counter() old_stop = 0 - for annotations in self.annotated_file.iter_annotations(AtomicTokenAnnotation, - ClassAnnotation): + for annotations in self.annotated_file.iter_by_type_nested(AtomicTokenAnnotation, + ClassAnnotation): self.assertEqual(old_stop, annotations.start) if ClassAnnotation in annotations: cls_counts.update(map(CLASSES.__getitem__, @@ -143,12 +143,12 @@ def test_classify_vnodes_with_trailing_space(self): self.extractor._parse_file(annotated_file) self.extractor._classify_vnodes(annotated_file) text = "".join(annotated_file[token.span] for token in - annotated_file.iter_annotation(AtomicTokenAnnotation)) + annotated_file.iter_by_type(AtomicTokenAnnotation)) self.assertEqual(text, contents) cls_counts = Counter() old_stop = 0 - for annotations in annotated_file.iter_annotations(AtomicTokenAnnotation, - ClassAnnotation): + for annotations in annotated_file.iter_by_type_nested(AtomicTokenAnnotation, + ClassAnnotation): self.assertEqual(old_stop, annotations.start) if ClassAnnotation in annotations: cls_counts.update(map(CLASSES.__getitem__, @@ -245,7 +245,8 @@ def test_noop_vnodes(self): self.extractor._classify_vnodes(self.annotated_file) self.extractor._merge_classes_to_composite_labels(self.annotated_file) self.extractor._add_noops(self.annotated_file) - annotations = list(self.annotated_file.iter_annotations(TokenAnnotation, LabelAnnotation)) + annotations = list(self.annotated_file.iter_by_type_nested( + TokenAnnotation, LabelAnnotation)) for ann1, ann2, ann3 in zip(annotations, islice(annotations, 1, None), islice(annotations, 2, None)):