Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Signed-off-by: Konstantin Slavnov <[email protected]>
  • Loading branch information
zurk committed May 21, 2019
1 parent 4b557b0 commit 5516ae3
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 101 deletions.
141 changes: 65 additions & 76 deletions lookout/style/format/annotations/annotated_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Iterator, Optional, Tuple, Union, Type # noqa F401
from typing import Dict, Iterator, Optional, Tuple, Union, Type, Callable # noqa F401

from lookout.core.analyzer import UnicodeFile
from sortedcontainers import SortedDict
Expand Down Expand Up @@ -64,14 +64,14 @@ def __init__(self, sequence: str):
# Dictionary to store annotations for the whole file (aka `global` annotations)
self._global_annotations = {} # type: Dict[Type[Annotation], Annotation]

# Next dictionaries are the main core of this class. The most common use-case we have in
# style-analyzer is iterating through Token annotations in the sorted order. That is why
# The following dictionaries are the main members. The most common use case we have in
# style-analyzer is iterating through Token annotations in the sorted order. This is why
# ordered dict is used.
# `self._type_to_annotations` is the main storage of the Annotations. It is a mapping
# from the annotation type to all annotations of this type which are are stored in the \
# from the annotation type to all annotations of this type which are stored in the \
# dictionary that is sorted by spans.
# `self._span_to_annotations` dict is an optimization to quickly lookup all
# `Annotation`-s that belongs to the same [start, stop) span.
# `Annotation`-s that belong to the same [start, stop) span.
self._span_to_annotations = SortedDict() # type: SortedDict[(int, int), Dict[Type[Annotation], Annotation]] # noqa E501
self._type_to_annotations = {} # type: Dict[Type[Annotation], SortedDict[(int, int), Annotation]] # noqa E501

Expand All @@ -83,7 +83,7 @@ def __len__(self):

def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str:
"""
Get the underlying sequence item or slice for the specified range.
Get the underlying sequence item or sequence slice for the specified range.
:param item: index, slice or (start, stop) tuple.
:return: The corresponding part of the sequence.
Expand All @@ -96,10 +96,7 @@ def __getitem__(self, item: Union[int, slice, Tuple[int, int]]) -> str:

def count(self, annotation_type: Type[Annotation]):
"""Count the number of annotations of a specific type."""
try:
return len(self._type_to_annotations[annotation_type])
except KeyError:
return 0
return len(self._type_to_annotations.get(annotation_type, []))

def add(self, *annotations: Annotation) -> None:
"""
Expand Down Expand Up @@ -130,13 +127,13 @@ def _add(self, annotation: Annotation) -> None:
self._type_to_annotations[annotation_type].peekitem(right_span_index)
if self._check_spans_overlap(*right_span, *annotation.span):
raise ValueError(
"Trying to add Annotation %s which is overlaps with existing %s" % (
"Trying to add Annotation %s which overlaps with existing %s" % (
right_annotation, annotation))
left_span_index = max(0, right_span_index - 1)
left_span, left_annotation = self._type_to_annotations[annotation_type].peekitem(
left_span_index)
if self._check_spans_overlap(*left_span, *annotation.span):
raise ValueError("Trying to add Annotation %s which is overlaps with existing"
raise ValueError("Trying to add Annotation %s which overlaps with existing"
"%s" % (left_annotation, annotation))
self._span_to_annotations[annotation.span][annotation_type] = annotation
self._type_to_annotations[annotation_type][annotation.span] = annotation
Expand All @@ -146,7 +143,7 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]]
"""
Return an annotation for the given span and type.
Looking for an exact match only.
Looking for an exact (type and span) match only.
:param annotation_type: Annotation type to get.
:param span: Annotation span (range) to get. If span is not specified it returns an \
Expand All @@ -155,45 +152,45 @@ def get(self, annotation_type: Type[Annotation], span: Optional[Tuple[int, int]]
"""
if span is None:
return self._global_annotations[annotation_type]
else:
check_span(*span)
return self._type_to_annotations[annotation_type][span]
check_span(*span)
return self._type_to_annotations[annotation_type][span]

def iter_annotations(self, annotation_type: Type[Annotation],
*additional: Type[Annotation],
start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]:
def iter_by_type_nested(self, annotation_type: Type[Annotation],
*additional: Type[Annotation],
start_offset: Optional[int] = None) -> Iterator[AnnotationsSpan]:
"""
Iterate through annotations of the specified type.
Iteration goes through `annotation_type`. it is additionally annotated with what is
specified in `additional` list. Spans of additional annotations should fully cover spans of
`annotation_type`. For example you have annotation *line* and *token*. Each line contains
several tokens. If you try to iterate through *line* and set *token* as `additional`
annotation you get only *line* annotation inside `AnnotationsSpan`. It happens because you
can annotate *token* with *line* but cannot *line* with *token* because *token* is covered
by only one line, but not vice versa.
Iteration goes over `annotation_type` objects. Annotations which are specified in
`additional` list added to resulting `AnnotationsSpan` object. Spans of additional
annotations should fully cover spans of `annotation_type`. For example, suppose that you
have *line* and *token* annotations. Each line contains several tokens. If you try to
iterate through *line* and set *token* as `additional` annotation you get only *line*
annotation inside `AnnotationsSpan`. It happens because you can annotate *token* with
*line* but not *line* with *token*: *token* is covered by only one line and not vice versa.
So,
manager.iter_annotations(line, token) # gives you only line annotation as output,
# *token* annotations not found
manager.iter_annotations(token, line) # gives you *token* and *line* annotation because
# It is possible to find only one covering *line* annotation.
# *token* annotations not found
manager.iter_annotations(token, line) # gives you *token* and *line* annotations because
# It is possible to find only one covering *line*
# annotation.
`additional` can't be empty. If you need to iterate through one annotation only use
`AnnotationManager.iter_annotation()`.
`additional` can't be empty. If you need to iterate through a single annotation, you should
call `AnnotationManager.iter_annotation()` instead.
:param annotation_type: Type of annotation to iterate through.
:param additional: Additional annotations that should be added to the main one.
:param start_offset: Start to iterate from the spesific offset. \
:param start_offset: Start to iterate from a specific offset. \
Can be used as a key argument only.
:return: Iterator through annotations of the requested type.
:return: Iterator over annotations of the requested type.
"""
if not additional:
raise ValueError("At least one additional annotation must be specified. "
"If you need to iterate through only one annotation use "
"If you need to iterate through a single annotation use "
"`iter_annotation()`.")
types = set(additional) | {annotation_type}
for annotation in self.iter_annotation(annotation_type, start_offset=start_offset):
for annotation in self.iter_by_type(annotation_type, start_offset=start_offset):
# Annotations with the same span
same_span_annotations = self._span_to_annotations[annotation.span]
same_span_annotations_type = set(same_span_annotations.keys())
Expand All @@ -209,8 +206,8 @@ def iter_annotations(self, annotation_type: Type[Annotation],
annotations.update({type: same_span_annotations[type] for type in common_types})
yield AnnotationsSpan(*annotation.span, annotations)

def iter_annotation(self, annotation_type: Type[Annotation], *,
start_offset: Optional[int] = None) -> Iterator[Annotation]:
def iter_by_type(self, annotation_type: Type[Annotation], *,
start_offset: Optional[int] = None) -> Iterator[Annotation]:
"""
Iterate through a specific type of annotation.
Expand All @@ -230,6 +227,21 @@ def iter_annotation(self, annotation_type: Type[Annotation], *,
for value in self._type_to_annotations[annotation_type].values()[search_from:]:
yield value

def _find_annotations(self, annotation_type: Type[Annotation], start: int, stop: int,
inspect: Callable, action: str) -> Annotation:
try:
annotation_layer = self._type_to_annotations[annotation_type]
except KeyError:
raise NoAnnotation("There is no annotation type %s" % annotation_type)
check_span(start, stop)
search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
search_stop = annotation_layer.bisect_right((stop, stop))
for span in annotation_layer.islice(search_start, search_stop):
if inspect(*span):
return annotation_layer[span]
raise NoAnnotation("There is no annotation %s that %s [%d, %d)" % (
annotation_type, action, start, stop))

def find_overlapping_annotation(self, annotation_type: Type[Annotation],
start: int, stop: int) -> Annotation:
"""
Expand All @@ -241,19 +253,10 @@ def find_overlapping_annotation(self, annotation_type: Type[Annotation],
:raise NoAnnotation: There is no such annotation that overlaps with the given interval.
:return: `Annotation` of the requested type.
"""
try:
annotation_layer = self._type_to_annotations[annotation_type]
except KeyError:
raise NoAnnotation("There is no annotation type %s" % annotation_type)
check_span(start, stop)
search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
search_stop = annotation_layer.bisect_right((stop, stop))
for span in annotation_layer.islice(search_start, search_stop):
if self._check_spans_overlap(start, stop, *span):
# assuming that there is only one such annotation
return annotation_layer[span]
raise NoAnnotation("There is no annotation %s from %d to %d" % (
annotation_type, start, stop))
return self._find_annotations(
annotation_type, start, stop, action="overlaps",
inspect=lambda span_start, span_stop: self._check_spans_overlap(
start, stop, span_start, span_stop))

def find_covering_annotation(self, annotation_type: Type[Annotation],
start: int, stop: int) -> Annotation:
Expand All @@ -266,27 +269,16 @@ def find_covering_annotation(self, annotation_type: Type[Annotation],
:raise NoAnnotation: There is no such annotation that overlaps with the given interval.
:return: `Annotation` of the requested type.
"""
try:
annotation_layer = self._type_to_annotations[annotation_type]
except KeyError:
raise NoAnnotation("There is no annotation type %s" % annotation_type)
check_span(start, stop)
search_start = max(0, annotation_layer.bisect_left((start, start)) - 1)
search_stop = annotation_layer.bisect_right((stop, stop))
for span_start, span_stop in annotation_layer.islice(search_start, search_stop):
if start == stop:
if self._check_spans_overlap(start, stop, span_start, span_stop):
return annotation_layer[(span_start, span_stop)]
elif span_start <= start and stop <= span_stop:
# assuming that there is only one such annotation because they cannot overlap
return annotation_layer[(span_start, span_stop)]
raise NoAnnotation("There is no annotation %s that contains [%d, %d)" % (
annotation_type, start, stop))
return self._find_annotations(
annotation_type, start, stop, action="contains",
inspect=lambda span_start, span_stop: self._check_spans_overlap(
start, stop, span_start, span_stop) if start == stop else
span_start <= start and stop <= span_stop)

@classmethod
def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int) -> bool:
"""
Check if two spans have at least 1 common point in their overlap.
Check if two spans have at least 1 common point.
Span 1 is [start1, stop1). `stop1` itself is excluded.
Span 2 is [start2, stop2). `stop2` itself is excluded.
Expand All @@ -313,15 +305,12 @@ def _check_spans_overlap(cls, start1: int, stop1: int, start2: int, stop2: int)
if start1 == stop1:
if start2 == stop2:
return start1 == start2
else:
return start2 < start1 < stop2
else:
if start2 == stop2:
return start1 < start2 < stop1
else:
return (start1 <= start2 < stop1 or
start1 < stop2 < stop1 or
start2 <= start1 < stop2)
return start2 < start1 < stop2
if start2 == stop2:
return start1 < start2 < stop1
return (start1 <= start2 < stop1 or
start1 < stop2 < stop1 or
start2 <= start1 < stop2)

@classmethod
def from_file(cls, file: UnicodeFile) -> "AnnotationManager":
Expand Down
2 changes: 1 addition & 1 deletion lookout/style/format/annotations/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def check_offset(value: Union[int, numpy.int], name: str) -> None:
Validate the offset: check the type and reject negative values.
:param value: Offset value to check.
:param name: Variable name for exception message.
:param name: Variable name for the exception message.
:raises ValueError: if value is incorrect.
"""
if not (isinstance(value, int) or isinstance(value, numpy.int)):
Expand Down
21 changes: 11 additions & 10 deletions lookout/style/format/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def _classify_vnodes(self, file: AnnotationManager) -> None:
:param file: Source code annotated with `RawTokenAnnotation`.
"""
indentation = []
for token in file.iter_annotation(RawTokenAnnotation):
for token in file.iter_by_type(RawTokenAnnotation):
token_value = file[token.span]
if token.has_node:
file.add(token.to_atomic_token_annotation())
Expand Down Expand Up @@ -574,7 +574,7 @@ def _class_seq_to_annotations(start, stop, current_class_seq):

start, stop, current_class_seq = None, None, []

for annotations in file.iter_annotations(
for annotations in file.iter_by_type_nested(
AtomicTokenAnnotation, ClassAnnotation, AccumulatedIndentationAnnotation):
has_target = ClassAnnotation in annotations
acc_indent = AccumulatedIndentationAnnotation in annotations
Expand Down Expand Up @@ -609,7 +609,8 @@ def _add_noops(self, file: AnnotationManager) -> None:
return

prev_annotations = None
for i, annotations in enumerate(file.iter_annotations(TokenAnnotation, LabelAnnotation)):
for i, annotations in enumerate(file.iter_by_type_nested(
TokenAnnotation, LabelAnnotation)):
if i == 0:
if LabelAnnotation not in annotations:
file.add(TokenAnnotation(0, 0))
Expand All @@ -630,7 +631,7 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left
) -> Optional[bblfsh.Node]:
"""
Compute the UAST parent of the `TokenAnnotation` as the LCA of the closest left and right \
babelfish nodes.
Babelfish nodes.
:param search_start_offset: Offset of the current node.
:param file: Source code annotated with `UASTAnnotation` and `TokenAnnotation`.
Expand All @@ -644,8 +645,8 @@ def _find_parent(search_start_offset: int, file: AnnotationManager, closest_left
left_ancestors.add(id(parents[current_left_ancestor_id]))
current_left_ancestor_id = id(parents[current_left_ancestor_id])

for future_vnode in file.iter_annotation(TokenAnnotation,
start_offset=search_start_offset):
for future_vnode in file.iter_by_type(TokenAnnotation,
start_offset=search_start_offset):
if future_vnode.has_node:
break
else:
Expand Down Expand Up @@ -798,7 +799,7 @@ def _compute_labels_mappings(self, vnodes: Iterable[VirtualNode]) -> None:
def _fill_vnode_parents(self, file: AnnotationManager):
closest_left_node_id = None
uast_annotation = file.get(UASTAnnotation)
for annotation in file.iter_annotation(TokenAnnotation):
for annotation in file.iter_by_type(TokenAnnotation):
if annotation.has_node:
closest_left_node_id = id(annotation.node)
file.add(TokenParentAnnotation(*annotation.span,
Expand Down Expand Up @@ -841,7 +842,7 @@ def file_to_old_parse_file_format(file: AnnotationManager) -> Tuple[List["Virtua
line_lens = [0] + [len(d) for d in raw_lines_data]
line_lens[-1] += 1
_line_start_offsets = numpy.array(line_lens).cumsum()
for annotation in file.iter_annotation(RawTokenAnnotation):
for annotation in file.iter_by_type(RawTokenAnnotation):
vnode = VirtualNode(
file[annotation.span],
_to_position(raw_lines_data, _line_start_offsets, annotation.start),
Expand Down Expand Up @@ -877,8 +878,8 @@ def _file_to_vnodes_and_parents(file: AnnotationManager) -> Tuple[List["VirtualN
line_lens[-1] += 1
_line_start_offsets = numpy.array(line_lens).cumsum()
vnode_parents = {}
for annotations in file.iter_annotations(TokenAnnotation,
LabelAnnotation, TokenParentAnnotation):
for annotations in file.iter_by_type_nested(TokenAnnotation,
LabelAnnotation, TokenParentAnnotation):
vnode = VirtualNode(
file[annotations.span],
_to_position(raw_lines_data, _line_start_offsets, annotations.start),
Expand Down
Loading

0 comments on commit 5516ae3

Please sign in to comment.