diff --git a/lookout/style/format/annotations/annotated_data.py b/lookout/style/format/annotations/annotated_data.py index 6e190ae21..26080026c 100644 --- a/lookout/style/format/annotations/annotated_data.py +++ b/lookout/style/format/annotations/annotated_data.py @@ -39,9 +39,7 @@ def __init__(self, start, stop, *args, **kwargs): self._stop = stop start = property(lambda self: self._start) - stop = property(lambda self: self._stop) - span = property(lambda self: self._span) @@ -105,7 +103,7 @@ def count(self, annotation_type: Type[Annotation]): def add(self, *annotations: Annotation) -> None: """ - Add multiple annotations. + Add several annotations. """ for annotation in annotations: self._add(annotation) @@ -163,22 +161,35 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]] def iter_annotations(self, annotation_type: Type[Annotation], *additional: Type[Annotation], - start_offset: Optional[int] = None) -> Union[Iterator[AnnotationsSpan]]: + start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]: """ - Iterate through annotations with specified type. + Iterate through annotations of the specified type. + + Iteration goes through `annotation_type`. it is additionally annotated with what is + specified in `additional` list. Spans of additional annotations should fully cover spans of + `annotation_type`. For example you have annotation *line* and *token*. Each line contains + several tokens. If you try to iterate through *line* and set *token* as `additional` + annotation you get only *line* annotation inside `AnnotationsSpan`. It happens because you + can annotate *token* with *line* but cannot *line* with *token* because *token* is covered + by only one line, but not vice versa. + + So, + manager.iter_annotations(line, token) # gives you only line annotation as output, + # *token* annotations not found + manager.iter_annotations(token, line) # gives you *token* and *line* annotation because + # It is possible to find only one covering *line* annotation. - Iteration goes through `annotation_type`. It is additionally annotated with annotations - specified in `additional`. `additional` can't be empty. If you need to iterate through \ - one annotation only use `AnnotationManager.iter_annotation()`. + `additional` can't be empty. If you need to iterate through one annotation only use + `AnnotationManager.iter_annotation()`. :param annotation_type: Type of annotation to iterate through. :param additional: Additional annotations that should be added to the main one. :param start_offset: Start to iterate from the spesific offset. \ Can be used as a key argument only. - :return: Iterator through annotations of requested types. + :return: Iterator through annotations of the requested type. """ if not additional: - raise ValueError("At least one additional annotation should be specified. " + raise ValueError("At least one additional annotation must be specified. " "If you need to iterate through only one annotation use " "`iter_annotation()`.") types = set(additional) | {annotation_type} @@ -233,7 +244,7 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation], try: annotation_layer = self._type_to_annotations[annotation_type] except KeyError: - raise NoAnnotation("There is no annotation layer %s" % annotation_type) + raise NoAnnotation("There is no annotation type %s" % annotation_type) check_span(start, stop) search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) search_stop = annotation_layer.bisect_right((stop, stop)) @@ -258,7 +269,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation], try: annotation_layer = self._type_to_annotations[annotation_type] except KeyError: - raise NoAnnotation("There is no annotation layer %s" % annotation_type) + raise NoAnnotation("There is no annotation type %s" % annotation_type) check_span(start, stop) search_start = max(0, annotation_layer.bisect_left((start, start)) - 1) search_stop = annotation_layer.bisect_right((stop, stop)) @@ -269,7 +280,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation], elif span_start <= start and stop <= span_stop: # assuming that there is only one such annotation because they cannot overlap return annotation_layer[(span_start, span_stop)] - raise NoAnnotation("There is no annotation %s that contains i%d to %d" % ( + raise NoAnnotation("There is no annotation %s that contains [%d, %d)" % ( annotation_type, start, stop)) @classmethod diff --git a/lookout/style/format/annotations/annotations.py b/lookout/style/format/annotations/annotations.py index 8dba910fb..dd70cd2d1 100644 --- a/lookout/style/format/annotations/annotations.py +++ b/lookout/style/format/annotations/annotations.py @@ -1,26 +1,28 @@ """Annotations for style-analyzer.""" -from typing import FrozenSet, Optional, Tuple +from typing import FrozenSet, Optional, Tuple, Union + +import numpy from lookout.style.format.utils import get_uast_parents -def check_offset(value: int, name: str) -> None: +def check_offset(value: Union[int, numpy.int], name: str) -> None: """ - Check offset value for unexpected values as wrong type and negative. + Validate the offset: check the type and reject negative values. :param value: Offset value to check. :param name: Variable name for exception message. :raises ValueError: if value is incorrect. """ - if not isinstance(value, int): - raise ValueError("Type of `%s` should be int. Get %s" % (name, type(value))) + if not (isinstance(value, int) or isinstance(value, numpy.int)): + raise ValueError("Type of `%s` should be int. Got %s" % (name, type(value))) if value < 0: - raise ValueError("`%s` should be non-negative. Get %d" % (name, value)) + raise ValueError("`%s` should be non-negative. Got %d" % (name, value)) def check_span(start: int, stop: int) -> None: """ - Check span value for unexpected values as wrong type and negative or wrong order. + Validate span value: check the type, reject negative values and ensure an increasing order. :param start: Start offset of the span. :param stop: Stop offset of the span. @@ -29,7 +31,7 @@ def check_span(start: int, stop: int) -> None: check_offset(start, "start") check_offset(stop, "stop") if stop < start: - raise ValueError("`stop` should be not less than `start`. Get start=%d, stop=%d" % ( + raise ValueError("`stop` should be not less than `start`. Got start=%d, stop=%d" % ( start, stop)) @@ -48,11 +50,8 @@ def __init__(self, start: int, stop: int): self._stop = stop start = property(lambda self: self._start) - stop = property(lambda self: self._stop) - span = property(lambda self: (self._start, self._stop)) - name = property(lambda self: type(self).__name__) def __repr__(self) -> str: @@ -68,16 +67,16 @@ class LineAnnotation(Annotation): """ Line number annotation. - Line number in 1-based. + Line numbers are 1-based. """ def __init__(self, start: int, stop: int, line: int): """Initialize a new instance of `LineAnnotation`.""" super().__init__(start, stop) if not isinstance(line, int): - raise ValueError("Type of `line` should be int. Get %s" % type(line)) + raise ValueError("Type of `line` should be int. Got %s" % type(line)) if line < 1: - raise ValueError("`line` value should be positive. Get %d" % line) + raise ValueError("`line` value should be positive. Got %d" % line) self._line = line line = property(lambda self: self._line) @@ -116,7 +115,6 @@ def __init__(self, start: int, stop: int, uast: "bblfsh.Node"): self._parents = get_uast_parents(uast) uast = property(lambda self: self._node) - parents = property(lambda self: self._parents) @@ -128,8 +126,8 @@ def __init__(self, start: int, stop: int, """ Initialize a new instance of `TokenAnnotation`. - `TokenAnnotation` can be 0-interval: [x, x). To avoid the complex search of related uast - annotation it is required to pass `UASTNodeAnnotation` to `__init__()`. + `TokenAnnotation` may have 0 length: [x, x). To avoid the complex search of the related + UAST annotation it is required to pass `UASTNodeAnnotation` to `__init__()`. :param start: Annotation start. :param stop: Annotation end. @@ -235,9 +233,9 @@ def __init__(self, start: int, stop: int, lines: FrozenSet[int]): super().__init__(start, stop) for line in lines: if not isinstance(line, int): - raise ValueError("Type of `line` should be int. Get %s" % type(line)) + raise ValueError("Type of `line` should be int. Got %s" % type(line)) if line < 1: - raise ValueError("`line` value should be positive. Get %d" % line) + raise ValueError("`line` value should be positive. Got %d" % line) self._lines = lines lines = property(lambda self: self._lines) diff --git a/lookout/style/format/feature_extractor.py b/lookout/style/format/feature_extractor.py index 926f75430..bbf07144b 100644 --- a/lookout/style/format/feature_extractor.py +++ b/lookout/style/format/feature_extractor.py @@ -194,8 +194,8 @@ def extract_features(self, files: Iterable[UnicodeFile], the corresponding `VirtualNode`-s and the parents mapping \ or None in case no features were extracted. """ - files = self._parse_vnodes(files, lines) - parsed_files, node_parents, vnode_parents = to_vnodes_format(files) + files = self._annotate_files(files, lines) + parsed_files, node_parents, vnode_parents = files_to_old_parse_vnodes_format(files) xy = self._convert_files_to_xy(parsed_files) if xy is None: return None @@ -304,8 +304,8 @@ def populate_indices(feature_group: FeatureGroup, feature_names: Sequence[str], for group, counts in self._feature_node_counts.items()} self._feature_count = sum(self._feature_group_counts.values()) - def _parse_vnodes(self, files: Iterable[UnicodeFile], lines: Optional[List[List[int]]] = None, - ) -> List[AnnotationManager]: + def _annotate_files(self, files: Iterable[UnicodeFile], + lines: Optional[List[List[int]]] = None) -> List[AnnotationManager]: parsed_files = [] for i, file in enumerate(files): path = file.path @@ -450,8 +450,8 @@ def _classify_vnodes(self, file: AnnotationManager) -> None: Annotate source code with `AtomicTokenAnnotation`, `ClassAnnotation` and \ `AccumulatedIndentationAnnotation`. - `ClassAnnotation` caontains the index of the corresponding class to predict. - We detect indentation changes so several whitespace nodes are merged together. + `ClassAnnotation` contains the index of the corresponding class to predict. + We detect indentation changes, so several whitespace nodes are merged together. :param file: Source code annotated with `RawTokenAnnotation`. """ @@ -821,8 +821,8 @@ def _to_position(raw_lines_data, _lines_start_offset, offset): return Position(offset, line_num + 1, col + 1) -def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], - Dict[int, bblfsh.Node]]: +def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["VirtualNode"], + Dict[int, bblfsh.Node]]: """ Convert `AnnotationManager` instance to the deprecated output format of \ `FeatureExtractor._parse_file()`. @@ -855,11 +855,14 @@ def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["Virtu return vnodes, file.get(UASTAnnotation).parents -def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], - Dict[int, bblfsh.Node]]: +def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"], + Dict[int, bblfsh.Node]]: """ - Convert `AnnotationManager` instance to an old format, that is a sequence of vnodes and \ - vnodes parents mapping. + Convert `AnnotationManager` instance to the deprecated format, which is a sequence of vnodes \ + and vnodes parents mapping. + + Used by `files_to_old_parse_vnodes_format` to generate old `_parse_vnodes` method output + format. The function is created for backwards capability and should be removed after refactoring is \ finished. @@ -891,13 +894,16 @@ def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNo return vnodes, vnode_parents -def to_vnodes_format(files: Sequence[AnnotationManager], - ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]], - Dict[int, bblfsh.Node], - Dict[int, bblfsh.Node]]: +def files_to_old_parse_vnodes_format( + files: Sequence[AnnotationManager], + ) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]], + Dict[int, bblfsh.Node], + Dict[int, bblfsh.Node]]: """ - Convert `AnnotationManager` instances to the deprecated output format of \ - `FeatureExtractor._parse_vnodes()`. + Convert a sequence of `AnnotationManager` instances to the deprecated output format of \ + `FeatureExtractor._annotate_files()` (`_parse_vnodes()` before refactoring). + + In addition to `_file_to_vnodes_and_parents()` it restores `node_parents` mapping. The function is created for backwards capability and should be removed after refactoring is \ finished. @@ -911,7 +917,7 @@ def to_vnodes_format(files: Sequence[AnnotationManager], node_parents = {} vnodes = [] for file in files: - file_vnodes, file_vnode_parents = file_to_vnodes_and_parents(file) + file_vnodes, file_vnode_parents = _file_to_vnodes_and_parents(file) file_node_parents = file.get(UASTAnnotation).parents try: file_lines = set(file.get(LinesToCheckAnnotation).lines) diff --git a/lookout/style/format/tests/bugs/002_bad_line_positions/test.py b/lookout/style/format/tests/bugs/002_bad_line_positions/test.py index d5b4ad893..389965006 100644 --- a/lookout/style/format/tests/bugs/002_bad_line_positions/test.py +++ b/lookout/style/format/tests/bugs/002_bad_line_positions/test.py @@ -7,7 +7,7 @@ from lookout.style.format.analyzer import FormatAnalyzer from lookout.style.format.annotations.annotated_data import AnnotationManager -from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents +from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format from lookout.style.format.tests.test_analyzer import get_config @@ -29,7 +29,7 @@ def test_positions(self): file = UnicodeFile(content=code_uni, uast=uast_uni, language="javascript", path="test.js") annotated_data = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_data) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_data) + nodes, _ = file_to_old_parse_file_format(annotated_data) for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])): self.assertLessEqual(node1.start.line, node2.start.line, "Start line position decrease for %d, %d nodes" % ( diff --git a/lookout/style/format/tests/test_expected_vnodes_number.py b/lookout/style/format/tests/test_expected_vnodes_number.py index 0c25ea420..b7f743c56 100644 --- a/lookout/style/format/tests/test_expected_vnodes_number.py +++ b/lookout/style/format/tests/test_expected_vnodes_number.py @@ -10,10 +10,12 @@ class ExpectedVnodesTest(unittest.TestCase): + def setUp(self): + slogging.setup("INFO", False) + @unittest.skipIf(sys.version_info.minor == 6, "We test python 3.6 inside docker container, " "so there is no another docker inside.") def test_calc_expected_vnodes_number_entry(self): - slogging.setup("INFO", False) quality_report_repos_filepath = os.path.abspath( os.path.join(os.path.split(__file__)[0], "../benchmarks/data/quality_report_repos.csv")) diff --git a/lookout/style/format/tests/test_features.py b/lookout/style/format/tests/test_features.py index 94e770b93..1229f541b 100644 --- a/lookout/style/format/tests/test_features.py +++ b/lookout/style/format/tests/test_features.py @@ -18,7 +18,7 @@ LabelAnnotation, RawTokenAnnotation, TokenAnnotation, UASTAnnotation from lookout.style.format.classes import CLASS_INDEX, CLASSES, CLS_NEWLINE, CLS_NOOP, \ CLS_SINGLE_QUOTE, CLS_SPACE, CLS_SPACE_DEC, CLS_SPACE_INC -from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents +from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format from lookout.style.format.tests.test_analyzer import get_config from lookout.style.format.virtual_node import Position, VirtualNode @@ -54,7 +54,7 @@ def test_parse_file_exact_match(self): File(uast=uast, content=code, language="javascript", path="")) annotated_file = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_file) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_file) + nodes, _ = file_to_old_parse_file_format(annotated_file) self.assertEqual("".join(n.value for n in nodes), code.decode()) self.assertEqual("".join(annotated_file[token.span] for token in annotated_file.iter_annotation(RawTokenAnnotation)), @@ -83,7 +83,7 @@ def test_parse_file(self): self.extractor._parse_file(self.annotated_file) text = [] offset = line = col = 0 - nodes, _ = raw_file_to_vnodes_and_parents(self.annotated_file) + nodes, _ = file_to_old_parse_file_format(self.annotated_file) parents = self.annotated_file.get(UASTAnnotation).parents for n in nodes: if line == n.start.line - 1: @@ -105,7 +105,7 @@ def test_parse_file_with_trailing_space(self): File(content=contents.encode(), uast=self.uast, language="javascript", path="test")) annotated_data = AnnotationManager.from_file(file) self.extractor._parse_file(annotated_data) - nodes, _ = raw_file_to_vnodes_and_parents(annotated_data) + nodes, _ = file_to_old_parse_file_format(annotated_data) offset, line, col = nodes[-1].end self.assertEqual(len(contents), offset) # Space token always ends on the same line