Skip to content

Commit

Permalink
Address review feedback
Browse files Browse the repository at this point in the history
Signed-off-by: Konstantin Slavnov <[email protected]>
  • Loading branch information
zurk committed May 20, 2019
1 parent 6538220 commit 4b557b0
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 58 deletions.
37 changes: 24 additions & 13 deletions lookout/style/format/annotations/annotated_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ def __init__(self, start, stop, *args, **kwargs):
self._stop = stop

start = property(lambda self: self._start)

stop = property(lambda self: self._stop)

span = property(lambda self: self._span)


Expand Down Expand Up @@ -105,7 +103,7 @@ def count(self, annotation_type: Type[Annotation]):

def add(self, *annotations: Annotation) -> None:
"""
Add multiple annotations.
Add several annotations.
"""
for annotation in annotations:
self._add(annotation)
Expand Down Expand Up @@ -163,22 +161,35 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]]

def iter_annotations(self, annotation_type: Type[Annotation],
*additional: Type[Annotation],
start_offset: Optional[int] = None) -> Union[Iterator[AnnotationsSpan]]:
start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]:
"""
Iterate through annotations with specified type.
Iterate through annotations of the specified type.
Iteration goes through `annotation_type`. it is additionally annotated with what is
specified in `additional` list. Spans of additional annotations should fully cover spans of
`annotation_type`. For example you have annotation *line* and *token*. Each line contains
several tokens. If you try to iterate through *line* and set *token* as `additional`
annotation you get only *line* annotation inside `AnnotationsSpan`. It happens because you
can annotate *token* with *line* but cannot *line* with *token* because *token* is covered
by only one line, but not vice versa.
So,
manager.iter_annotations(line, token) # gives you only line annotation as output,
# *token* annotations not found
manager.iter_annotations(token, line) # gives you *token* and *line* annotation because
# It is possible to find only one covering *line* annotation.
Iteration goes through `annotation_type`. It is additionally annotated with annotations
specified in `additional`. `additional` can't be empty. If you need to iterate through \
one annotation only use `AnnotationManager.iter_annotation()`.
`additional` can't be empty. If you need to iterate through one annotation only use
`AnnotationManager.iter_annotation()`.
:param annotation_type: Type of annotation to iterate through.
:param additional: Additional annotations that should be added to the main one.
:param start_offset: Start to iterate from the spesific offset. \
Can be used as a key argument only.
:return: Iterator through annotations of requested types.
:return: Iterator through annotations of the requested type.
"""
if not additional:
raise ValueError("At least one additional annotation should be specified. "
raise ValueError("At least one additional annotation must be specified. "
"If you need to iterate through only one annotation use "
"`iter_annotation()`.")
types = set(additional) | {annotation_type}
Expand Down Expand Up @@ -233,7 +244,7 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation],
try:
annotation_layer = self._type_to_annotations[annotation_type]
except KeyError:
raise NoAnnotation("There is no annotation layer %s" % annotation_type)
raise NoAnnotation("There is no annotation type %s" % annotation_type)
check_span(start, stop)
search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
search_stop = annotation_layer.bisect_right((stop, stop))
Expand All @@ -258,7 +269,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation],
try:
annotation_layer = self._type_to_annotations[annotation_type]
except KeyError:
raise NoAnnotation("There is no annotation layer %s" % annotation_type)
raise NoAnnotation("There is no annotation type %s" % annotation_type)
check_span(start, stop)
search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
search_stop = annotation_layer.bisect_right((stop, stop))
Expand All @@ -269,7 +280,7 @@ def find_covering_annotation(self, annotation_type: Type[Annotation],
elif span_start <= start and stop <= span_stop:
# assuming that there is only one such annotation because they cannot overlap
return annotation_layer[(span_start, span_stop)]
raise NoAnnotation("There is no annotation %s that contains i%d to %d" % (
raise NoAnnotation("There is no annotation %s that contains [%d, %d)" % (
annotation_type, start, stop))

@classmethod
Expand Down
36 changes: 17 additions & 19 deletions lookout/style/format/annotations/annotations.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""Annotations for style-analyzer."""
from typing import FrozenSet, Optional, Tuple
from typing import FrozenSet, Optional, Tuple, Union

import numpy

from lookout.style.format.utils import get_uast_parents


def check_offset(value: int, name: str) -> None:
def check_offset(value: Union[int, numpy.int], name: str) -> None:
"""
Check offset value for unexpected values as wrong type and negative.
Validate the offset: check the type and reject negative values.
:param value: Offset value to check.
:param name: Variable name for exception message.
:raises ValueError: if value is incorrect.
"""
if not isinstance(value, int):
raise ValueError("Type of `%s` should be int. Get %s" % (name, type(value)))
if not (isinstance(value, int) or isinstance(value, numpy.int)):
raise ValueError("Type of `%s` should be int. Got %s" % (name, type(value)))
if value < 0:
raise ValueError("`%s` should be non-negative. Get %d" % (name, value))
raise ValueError("`%s` should be non-negative. Got %d" % (name, value))


def check_span(start: int, stop: int) -> None:
"""
Check span value for unexpected values as wrong type and negative or wrong order.
Validate span value: check the type, reject negative values and ensure an increasing order.
:param start: Start offset of the span.
:param stop: Stop offset of the span.
Expand All @@ -29,7 +31,7 @@ def check_span(start: int, stop: int) -> None:
check_offset(start, "start")
check_offset(stop, "stop")
if stop < start:
raise ValueError("`stop` should be not less than `start`. Get start=%d, stop=%d" % (
raise ValueError("`stop` should be not less than `start`. Got start=%d, stop=%d" % (
start, stop))


Expand All @@ -48,11 +50,8 @@ def __init__(self, start: int, stop: int):
self._stop = stop

start = property(lambda self: self._start)

stop = property(lambda self: self._stop)

span = property(lambda self: (self._start, self._stop))

name = property(lambda self: type(self).__name__)

def __repr__(self) -> str:
Expand All @@ -68,16 +67,16 @@ class LineAnnotation(Annotation):
"""
Line number annotation.
Line number in 1-based.
Line numbers are 1-based.
"""

def __init__(self, start: int, stop: int, line: int):
"""Initialize a new instance of `LineAnnotation`."""
super().__init__(start, stop)
if not isinstance(line, int):
raise ValueError("Type of `line` should be int. Get %s" % type(line))
raise ValueError("Type of `line` should be int. Got %s" % type(line))
if line < 1:
raise ValueError("`line` value should be positive. Get %d" % line)
raise ValueError("`line` value should be positive. Got %d" % line)
self._line = line

line = property(lambda self: self._line)
Expand Down Expand Up @@ -116,7 +115,6 @@ def __init__(self, start: int, stop: int, uast: "bblfsh.Node"):
self._parents = get_uast_parents(uast)

uast = property(lambda self: self._node)

parents = property(lambda self: self._parents)


Expand All @@ -128,8 +126,8 @@ def __init__(self, start: int, stop: int,
"""
Initialize a new instance of `TokenAnnotation`.
`TokenAnnotation` can be 0-interval: [x, x). To avoid the complex search of related uast
annotation it is required to pass `UASTNodeAnnotation` to `__init__()`.
`TokenAnnotation` may have 0 length: [x, x). To avoid the complex search of the related
UAST annotation it is required to pass `UASTNodeAnnotation` to `__init__()`.
:param start: Annotation start.
:param stop: Annotation end.
Expand Down Expand Up @@ -235,9 +233,9 @@ def __init__(self, start: int, stop: int, lines: FrozenSet[int]):
super().__init__(start, stop)
for line in lines:
if not isinstance(line, int):
raise ValueError("Type of `line` should be int. Get %s" % type(line))
raise ValueError("Type of `line` should be int. Got %s" % type(line))
if line < 1:
raise ValueError("`line` value should be positive. Get %d" % line)
raise ValueError("`line` value should be positive. Got %d" % line)
self._lines = lines

lines = property(lambda self: self._lines)
Expand Down
44 changes: 25 additions & 19 deletions lookout/style/format/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ def extract_features(self, files: Iterable[UnicodeFile],
the corresponding `VirtualNode`-s and the parents mapping \
or None in case no features were extracted.
"""
files = self._parse_vnodes(files, lines)
parsed_files, node_parents, vnode_parents = to_vnodes_format(files)
files = self._annotate_files(files, lines)
parsed_files, node_parents, vnode_parents = files_to_old_parse_vnodes_format(files)
xy = self._convert_files_to_xy(parsed_files)
if xy is None:
return None
Expand Down Expand Up @@ -304,8 +304,8 @@ def populate_indices(feature_group: FeatureGroup, feature_names: Sequence[str],
for group, counts in self._feature_node_counts.items()}
self._feature_count = sum(self._feature_group_counts.values())

def _parse_vnodes(self, files: Iterable[UnicodeFile], lines: Optional[List[List[int]]] = None,
) -> List[AnnotationManager]:
def _annotate_files(self, files: Iterable[UnicodeFile],
lines: Optional[List[List[int]]] = None) -> List[AnnotationManager]:
parsed_files = []
for i, file in enumerate(files):
path = file.path
Expand Down Expand Up @@ -450,8 +450,8 @@ def _classify_vnodes(self, file: AnnotationManager) -> None:
Annotate source code with `AtomicTokenAnnotation`, `ClassAnnotation` and \
`AccumulatedIndentationAnnotation`.
`ClassAnnotation` caontains the index of the corresponding class to predict.
We detect indentation changes so several whitespace nodes are merged together.
`ClassAnnotation` contains the index of the corresponding class to predict.
We detect indentation changes, so several whitespace nodes are merged together.
:param file: Source code annotated with `RawTokenAnnotation`.
"""
Expand Down Expand Up @@ -821,8 +821,8 @@ def _to_position(raw_lines_data, _lines_start_offset, offset):
return Position(offset, line_num + 1, col + 1)


def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
Dict[int, bblfsh.Node]]:
def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["VirtualNode"],
Dict[int, bblfsh.Node]]:
"""
Convert `AnnotationManager` instance to the deprecated output format of \
`FeatureExtractor._parse_file()`.
Expand Down Expand Up @@ -855,11 +855,14 @@ def raw_file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["Virtu
return vnodes, file.get(UASTAnnotation).parents


def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
Dict[int, bblfsh.Node]]:
def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNode"],
Dict[int, bblfsh.Node]]:
"""
Convert `AnnotationManager` instance to an old format, that is a sequence of vnodes and \
vnodes parents mapping.
Convert `AnnotationManager` instance to the deprecated format, which is a sequence of vnodes \
and vnodes parents mapping.
Used by `files_to_old_parse_vnodes_format` to generate old `_parse_vnodes` method output
format.
The function is created for backwards capability and should be removed after refactoring is \
finished.
Expand Down Expand Up @@ -891,13 +894,16 @@ def file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualNo
return vnodes, vnode_parents


def to_vnodes_format(files: Sequence[AnnotationManager],
) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]],
Dict[int, bblfsh.Node],
Dict[int, bblfsh.Node]]:
def files_to_old_parse_vnodes_format(
files: Sequence[AnnotationManager],
) -> Tuple[List[Tuple[List[VirtualNode], Dict[int, bblfsh.Node], Set[int]]],
Dict[int, bblfsh.Node],
Dict[int, bblfsh.Node]]:
"""
Convert `AnnotationManager` instances to the deprecated output format of \
`FeatureExtractor._parse_vnodes()`.
Convert a sequence of `AnnotationManager` instances to the deprecated output format of \
`FeatureExtractor._annotate_files()` (`_parse_vnodes()` before refactoring).
In addition to `_file_to_vnodes_and_parents()` it restores `node_parents` mapping.
The function is created for backwards capability and should be removed after refactoring is \
finished.
Expand All @@ -911,7 +917,7 @@ def to_vnodes_format(files: Sequence[AnnotationManager],
node_parents = {}
vnodes = []
for file in files:
file_vnodes, file_vnode_parents = file_to_vnodes_and_parents(file)
file_vnodes, file_vnode_parents = _file_to_vnodes_and_parents(file)
file_node_parents = file.get(UASTAnnotation).parents
try:
file_lines = set(file.get(LinesToCheckAnnotation).lines)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from lookout.style.format.analyzer import FormatAnalyzer
from lookout.style.format.annotations.annotated_data import AnnotationManager
from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents
from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format
from lookout.style.format.tests.test_analyzer import get_config


Expand All @@ -29,7 +29,7 @@ def test_positions(self):
file = UnicodeFile(content=code_uni, uast=uast_uni, language="javascript", path="test.js")
annotated_data = AnnotationManager.from_file(file)
self.extractor._parse_file(annotated_data)
nodes, _ = raw_file_to_vnodes_and_parents(annotated_data)
nodes, _ = file_to_old_parse_file_format(annotated_data)
for index, (node1, node2) in enumerate(zip(nodes, nodes[1:])):
self.assertLessEqual(node1.start.line, node2.start.line,
"Start line position decrease for %d, %d nodes" % (
Expand Down
4 changes: 3 additions & 1 deletion lookout/style/format/tests/test_expected_vnodes_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@


class ExpectedVnodesTest(unittest.TestCase):
def setUp(self):
slogging.setup("INFO", False)

@unittest.skipIf(sys.version_info.minor == 6, "We test python 3.6 inside docker container, "
"so there is no another docker inside.")
def test_calc_expected_vnodes_number_entry(self):
slogging.setup("INFO", False)
quality_report_repos_filepath = os.path.abspath(
os.path.join(os.path.split(__file__)[0],
"../benchmarks/data/quality_report_repos.csv"))
Expand Down
8 changes: 4 additions & 4 deletions lookout/style/format/tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
LabelAnnotation, RawTokenAnnotation, TokenAnnotation, UASTAnnotation
from lookout.style.format.classes import CLASS_INDEX, CLASSES, CLS_NEWLINE, CLS_NOOP, \
CLS_SINGLE_QUOTE, CLS_SPACE, CLS_SPACE_DEC, CLS_SPACE_INC
from lookout.style.format.feature_extractor import FeatureExtractor, raw_file_to_vnodes_and_parents
from lookout.style.format.feature_extractor import FeatureExtractor, file_to_old_parse_file_format
from lookout.style.format.tests.test_analyzer import get_config
from lookout.style.format.virtual_node import Position, VirtualNode

Expand Down Expand Up @@ -54,7 +54,7 @@ def test_parse_file_exact_match(self):
File(uast=uast, content=code, language="javascript", path=""))
annotated_file = AnnotationManager.from_file(file)
self.extractor._parse_file(annotated_file)
nodes, _ = raw_file_to_vnodes_and_parents(annotated_file)
nodes, _ = file_to_old_parse_file_format(annotated_file)
self.assertEqual("".join(n.value for n in nodes), code.decode())
self.assertEqual("".join(annotated_file[token.span]
for token in annotated_file.iter_annotation(RawTokenAnnotation)),
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_parse_file(self):
self.extractor._parse_file(self.annotated_file)
text = []
offset = line = col = 0
nodes, _ = raw_file_to_vnodes_and_parents(self.annotated_file)
nodes, _ = file_to_old_parse_file_format(self.annotated_file)
parents = self.annotated_file.get(UASTAnnotation).parents
for n in nodes:
if line == n.start.line - 1:
Expand All @@ -105,7 +105,7 @@ def test_parse_file_with_trailing_space(self):
File(content=contents.encode(), uast=self.uast, language="javascript", path="test"))
annotated_data = AnnotationManager.from_file(file)
self.extractor._parse_file(annotated_data)
nodes, _ = raw_file_to_vnodes_and_parents(annotated_data)
nodes, _ = file_to_old_parse_file_format(annotated_data)
offset, line, col = nodes[-1].end
self.assertEqual(len(contents), offset)
# Space token always ends on the same line
Expand Down

0 comments on commit 4b557b0

Please sign in to comment.