Address review feedback

Signed-off-by: Konstantin Slavnov <[email protected]>
src-d · May 20, 2019 · 4b557b0 · 4b557b0
1 parent 6538220
commit 4b557b0
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 58 deletions.
diff --git a/lookout/style/format/annotations/annotated_data.py b/lookout/style/format/annotations/annotated_data.py
@@ -39,9 +39,7 @@ def __init__(self, start, stop, *args, **kwargs):
         self._stop = stop
 
     start = property(lambda self: self._start)
-
     stop = property(lambda self: self._stop)
-
     span = property(lambda self: self._span)
 
 
@@ -105,7 +103,7 @@ def count(self, annotation_type: Type[Annotation]):
 
     def add(self, *annotations: Annotation) -> None:
         """
-        Add multiple annotations.
+        Add several annotations.
         """
         for annotation in annotations:
             self._add(annotation)
@@ -163,22 +161,35 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]]
 
     def iter_annotations(self, annotation_type: Type[Annotation],
                          *additional: Type[Annotation],
-                         start_offset: Optional[int] = None) -> Union[Iterator[AnnotationsSpan]]:
+                         start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]:
         """
-        Iterate through annotations with specified type.
+        Iterate through annotations of the specified type.
+
+        Iteration goes through `annotation_type`. it is additionally annotated with what is
+        specified in `additional` list. Spans of additional annotations should fully cover spans of
+        `annotation_type`. For example you have annotation *line* and *token*. Each line contains
+        several tokens. If you try to iterate through *line* and set *token* as `additional`
+        annotation you get only *line* annotation inside `AnnotationsSpan`. It happens because you
+        can annotate *token* with *line* but cannot *line* with *token* because *token* is covered
+        by only one line, but not vice versa.
+
+        So,
+        manager.iter_annotations(line, token)  # gives you only line annotation as output,
+        # *token* annotations not found
+        manager.iter_annotations(token, line)  # gives you *token* and *line* annotation because
+        # It is possible to find only one covering *line* annotation.
 
-        Iteration goes through `annotation_type`. It is additionally annotated with annotations
-        specified in `additional`. `additional` can't be empty. If you need to iterate through \
-        one annotation only use `AnnotationManager.iter_annotation()`.
+        `additional` can't be empty. If you need to iterate through one annotation only use
+        `AnnotationManager.iter_annotation()`.
 
         :param annotation_type: Type of annotation to iterate through.
         :param additional: Additional annotations that should be added to the main one.
         :param start_offset: Start to iterate from the spesific offset. \
                              Can be used as a key argument only.
-        :return: Iterator through annotations of requested types.
+        :return: Iterator through annotations of the requested type.
         """
         if not additional:
-            raise ValueError("At least one additional annotation should be specified. "
+            raise ValueError("At least one additional annotation must be specified. "
                              "If you need to iterate through only one annotation use "
                              "`iter_annotation()`.")
         types = set(additional) | {annotation_type}
@@ -233,7 +244,7 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation],
         try:
             annotation_layer = self._type_to_annotations[annotation_type]
         except KeyError:
-            raise NoAnnotation("There is no annotation layer %s" % annotation_type)
+            raise NoAnnotation("There is no annotation type %s" % annotation_type)
         check_span(start, stop)
         search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
         search_stop = annotation_layer.bisect_right((stop, stop))
@@ -258,7 +269,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation],
         try:
             annotation_layer = self._type_to_annotations[annotation_type]
         except KeyError:
-            raise NoAnnotation("There is no annotation layer %s" % annotation_type)
+            raise NoAnnotation("There is no annotation type %s" % annotation_type)
         check_span(start, stop)
         search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
         search_stop = annotation_layer.bisect_right((stop, stop))
@@ -269,7 +280,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation],
             elif span_start <= start and stop <= span_stop:
                 # assuming that there is only one such annotation because they cannot overlap
                 return annotation_layer[(span_start, span_stop)]
-        raise NoAnnotation("There is no annotation %s that contains i%d to %d" % (
+        raise NoAnnotation("There is no annotation %s that contains [%d, %d)" % (
             annotation_type, start, stop))
 
     @classmethod

diff --git a/lookout/style/format/annotations/annotations.py b/lookout/style/format/annotations/annotations.py
@@ -1,26 +1,28 @@
 """Annotations for style-analyzer."""
-from typing import FrozenSet, Optional, Tuple
+from typing import FrozenSet, Optional, Tuple, Union
+
+import numpy
 
 from lookout.style.format.utils import get_uast_parents
 
 
-def check_offset(value: int, name: str) -> None:
+def check_offset(value: Union[int, numpy.int], name: str) -> None:
     """
-    Check offset value for unexpected values as wrong type and negative.
+    Validate the offset: check the type and reject negative values.
 
     :param value: Offset value to check.
     :param name: Variable name for exception message.
     :raises ValueError: if value is incorrect.
     """
-    if not isinstance(value, int):
-        raise ValueError("Type of `%s` should be int. Get %s" % (name, type(value)))
+    if not (isinstance(value, int) or isinstance(value, numpy.int)):
+        raise ValueError("Type of `%s` should be int. Got %s" % (name, type(value)))
     if value < 0:
-        raise ValueError("`%s` should be non-negative. Get %d" % (name, value))
+        raise ValueError("`%s` should be non-negative. Got %d" % (name, value))
 
 
 def check_span(start: int, stop: int) -> None:
     """
-    Check span value for unexpected values as wrong type and negative or wrong order.
+    Validate span value: check the type, reject negative values and ensure an increasing order.
 
     :param start: Start offset of the span.
     :param stop: Stop offset of the span.
@@ -29,7 +31,7 @@ def check_span(start: int, stop: int) -> None:
     check_offset(start, "start")
     check_offset(stop, "stop")
     if stop < start:
-        raise ValueError("`stop` should be not less than `start`. Get start=%d, stop=%d" % (
+        raise ValueError("`stop` should be not less than `start`. Got start=%d, stop=%d" % (
             start, stop))
 
 
@@ -48,11 +50,8 @@ def __init__(self, start: int, stop: int):
         self._stop = stop
 
     start = property(lambda self: self._start)
-
     stop = property(lambda self: self._stop)
-
     span = property(lambda self: (self._start, self._stop))
-
     name = property(lambda self: type(self).__name__)
 
     def __repr__(self) -> str:
@@ -68,16 +67,16 @@ class LineAnnotation(Annotation):
     """
     Line number annotation.
 
-    Line number in 1-based.
+    Line numbers are 1-based.
     """
 
     def __init__(self, start: int, stop: int, line: int):
         """Initialize a new instance of `LineAnnotation`."""
         super().__init__(start, stop)
         if not isinstance(line, int):
-            raise ValueError("Type of `line` should be int. Get %s" % type(line))
+            raise ValueError("Type of `line` should be int. Got %s" % type(line))
         if line < 1:
-            raise ValueError("`line` value should be positive. Get %d" % line)
+            raise ValueError("`line` value should be positive. Got %d" % line)
         self._line = line
 
     line = property(lambda self: self._line)
@@ -116,7 +115,6 @@ def __init__(self, start: int, stop: int, uast: "bblfsh.Node"):
         self._parents = get_uast_parents(uast)
 
     uast = property(lambda self: self._node)
-
     parents = property(lambda self: self._parents)
 
 
@@ -128,8 +126,8 @@ def __init__(self, start: int, stop: int,
         """
         Initialize a new instance of `TokenAnnotation`.
 
-        `TokenAnnotation` can be 0-interval: [x, x). To avoid the complex search of related uast
-        annotation it is required to pass `UASTNodeAnnotation` to `__init__()`.
+        `TokenAnnotation` may have 0 length: [x, x). To avoid the complex search of the related
+        UAST annotation it is required to pass `UASTNodeAnnotation` to `__init__()`.
 
         :param start: Annotation start.
         :param stop: Annotation end.
@@ -235,9 +233,9 @@ def __init__(self, start: int, stop: int, lines: FrozenSet[int]):
         super().__init__(start, stop)
         for line in lines:
             if not isinstance(line, int):
-                raise ValueError("Type of `line` should be int. Get %s" % type(line))
+                raise ValueError("Type of `line` should be int. Got %s" % type(line))
             if line < 1:
-                raise ValueError("`line` value should be positive. Get %d" % line)
+                raise ValueError("`line` value should be positive. Got %d" % line)
         self._lines = lines
 
     lines = property(lambda self: self._lines)

diff --git a/lookout/style/format/feature_extractor.py b/lookout/style/format/feature_extractor.py
@@ -194,8 +194,8 @@ def extract_features(self, files: Iterable[UnicodeFile],
                  the corresponding `VirtualNode`-s and the parents mapping \
                  or None in case no features were extracted.
         """
-        files = self._parse_vnodes(files, lines)
-        parsed_files, node_parents, vnode_parents = to_vnodes_format(files)
+        files = self._annotate_files(files, lines)
+        parsed_files, node_parents, vnode_parents = files_to_old_parse_vnodes_format(files)
         xy = self._convert_files_to_xy(parsed_files)
         if xy is None:
             return None
@@ -304,8 +304,8 @@ def populate_indices(feature_group: FeatureGroup, feature_names: Sequence[str],
                                       for group, counts in self._feature_node_counts.items()}
         self._feature_count = sum(self._feature_group_counts.values())
 
-    def _parse_vnodes(self, files: Iterable[UnicodeFile], lines: Optional[List[List[int]]] = None,
-                      ) -> List[AnnotationManager]:
+    def _annotate_files(self, files: Iterable[UnicodeFile],
+                        lines: Optional[List[List[int]]] = None) -> List[AnnotationManager]:
         parsed_files = []
         for i, file in enumerate(files):
             path = file.path
@@ -450,8 +450,8 @@ def _classify_vnodes(self, file: AnnotationManager) -> None:
         Annotate source code with `AtomicTokenAnnotation`, `ClassAnnotation` and \
         `AccumulatedIndentationAnnotation`.
 
-        `ClassAnnotation` caontains the index of the corresponding class to predict.
-        We detect indentation changes so several whitespace nodes are merged together.
+        `ClassAnnotation` contains the index of the corresponding class to predict.
+        We detect indentation changes, so several whitespace nodes are merged together.
 
         :param file: Source code annotated with `RawTokenAnnotation`.
         """
@@ -821,8 +821,8 @@ def _to_position(raw_lines_data, _lines_start_offset, offset):
     return Position(offset, line_num + 1, col + 1)
 
 
-def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
-                                                                     Dict[int, bblfsh.Node]]:
+def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["VirtualNode"],
+                                                                    Dict[int, bblfsh.Node]]:
     """
     Convert `AnnotationManager` instance to the deprecated output format of \
     `FeatureExtractor._parse_file()`.
@@ -855,11 +855,14 @@ def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["Virtu
     return vnodes, file.get(UASTAnnotation).parents
 
 
-def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
-                                                                 Dict[int, bblfsh.Node]]:
+def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
+                                                                  Dict[int, bblfsh.Node]]:
     """
-    Convert `AnnotationManager` instance to an old format, that is a sequence of vnodes and \
-    vnodes parents mapping.
+    Convert `AnnotationManager` instance to the deprecated format, which is a sequence of vnodes \
+    and vnodes parents mapping.
+
+    Used by `files_to_old_parse_vnodes_format` to generate old `_parse_vnodes` method output
+    format.
 
     The function is created for backwards capability and should be removed after refactoring is \
     finished.
@@ -891,13 +894,16 @@ def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNo
     return vnodes, vnode_parents
 
 
-def to_vnodes_format(files: Sequence[AnnotationManager],
-                     ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]],
-                                Dict[int, bblfsh.Node],
-                                Dict[int, bblfsh.Node]]:
+def files_to_old_parse_vnodes_format(
+        files: Sequence[AnnotationManager],
+    ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]],
+               Dict[int, bblfsh.Node],
+               Dict[int, bblfsh.Node]]:
     """
-    Convert `AnnotationManager` instances to the deprecated output format of \
-    `FeatureExtractor._parse_vnodes()`.
+    Convert a sequence of `AnnotationManager` instances to the deprecated output format of \
+    `FeatureExtractor._annotate_files()` (`_parse_vnodes()` before refactoring).
+
+    In addition to `_file_to_vnodes_and_parents()` it restores `node_parents` mapping.
 
     The function is created for backwards capability and should be removed after refactoring is \
     finished.
@@ -911,7 +917,7 @@ def to_vnodes_format(files: Sequence[AnnotationManager],
     node_parents = {}
     vnodes = []
     for file in files:
-        file_vnodes, file_vnode_parents = file_to_vnodes_and_parents(file)
+        file_vnodes, file_vnode_parents = _file_to_vnodes_and_parents(file)
         file_node_parents = file.get(UASTAnnotation).parents
         try:
             file_lines = set(file.get(LinesToCheckAnnotation).lines)

diff --git a/lookout/style/format/tests/bugs/002_bad_line_positions/test.py b/lookout/style/format/tests/bugs/002_bad_line_positions/test.py
@@ -7,7 +7,7 @@
 
 from lookout.style.format.analyzer import FormatAnalyzer
 from lookout.style.format.annotations.annotated_data import AnnotationManager
-from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents
+from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format
 from lookout.style.format.tests.test_analyzer import get_config
 
 
@@ -29,7 +29,7 @@ def test_positions(self):
         file = UnicodeFile(content=code_uni, uast=uast_uni, language="javascript", path="test.js")
         annotated_data = AnnotationManager.from_file(file)
         self.extractor._parse_file(annotated_data)
-        nodes, _ = raw_file_to_vnodes_and_parents(annotated_data)
+        nodes, _ = file_to_old_parse_file_format(annotated_data)
         for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])):
             self.assertLessEqual(node1.start.line, node2.start.line,
                                  "Start line position decrease for %d, %d nodes" % (

diff --git a/lookout/style/format/tests/test_expected_vnodes_number.py b/lookout/style/format/tests/test_expected_vnodes_number.py
@@ -10,10 +10,12 @@
 
 
 class ExpectedVnodesTest(unittest.TestCase):
+    def setUp(self):
+        slogging.setup("INFO", False)
+
     @unittest.skipIf(sys.version_info.minor == 6, "We test python 3.6 inside docker container, "
                                                   "so there is no another docker inside.")
     def test_calc_expected_vnodes_number_entry(self):
-        slogging.setup("INFO", False)
         quality_report_repos_filepath = os.path.abspath(
             os.path.join(os.path.split(__file__)[0],
                          "../benchmarks/data/quality_report_repos.csv"))

diff --git a/lookout/style/format/tests/test_features.py b/lookout/style/format/tests/test_features.py
@@ -18,7 +18,7 @@
     LabelAnnotation, RawTokenAnnotation, TokenAnnotation, UASTAnnotation
 from lookout.style.format.classes import CLASS_INDEX, CLASSES, CLS_NEWLINE, CLS_NOOP, \
     CLS_SINGLE_QUOTE, CLS_SPACE, CLS_SPACE_DEC, CLS_SPACE_INC
-from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents
+from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format
 from lookout.style.format.tests.test_analyzer import get_config
 from lookout.style.format.virtual_node import Position, VirtualNode
 
@@ -54,7 +54,7 @@ def test_parse_file_exact_match(self):
             File(uast=uast, content=code, language="javascript", path=""))
         annotated_file = AnnotationManager.from_file(file)
         self.extractor._parse_file(annotated_file)
-        nodes, _ = raw_file_to_vnodes_and_parents(annotated_file)
+        nodes, _ = file_to_old_parse_file_format(annotated_file)
         self.assertEqual("".join(n.value for n in nodes), code.decode())
         self.assertEqual("".join(annotated_file[token.span]
                                  for token in annotated_file.iter_annotation(RawTokenAnnotation)),
@@ -83,7 +83,7 @@ def test_parse_file(self):
         self.extractor._parse_file(self.annotated_file)
         text = []
         offset = line = col = 0
-        nodes, _ = raw_file_to_vnodes_and_parents(self.annotated_file)
+        nodes, _ = file_to_old_parse_file_format(self.annotated_file)
         parents = self.annotated_file.get(UASTAnnotation).parents
         for n in nodes:
             if line == n.start.line - 1:
@@ -105,7 +105,7 @@ def test_parse_file_with_trailing_space(self):
             File(content=contents.encode(), uast=self.uast, language="javascript", path="test"))
         annotated_data = AnnotationManager.from_file(file)
         self.extractor._parse_file(annotated_data)
-        nodes, _ = raw_file_to_vnodes_and_parents(annotated_data)
+        nodes, _ = file_to_old_parse_file_format(annotated_data)
         offset, line, col = nodes[-1].end
         self.assertEqual(len(contents), offset)
         # Space token always ends on the same line