Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: better handle errors in static analysis #271

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 58 additions & 20 deletions codecov_cli/services/staticanalysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,15 @@
):
ff = select_file_finder(config)
files = list(ff.find_files(folder, pattern, folders_to_exclude))
logger.info(f"Running the analyzer on {len(files)} files")
mapped_func = partial(analyze_file, config)
all_data = {}
file_metadata = []
with click.progressbar(
length=len(files),
label="Analyzing files",
) as bar:
with get_context("fork").Pool(processes=numberprocesses) as pool:
file_results = pool.imap_unordered(mapped_func, files)
for x in file_results:
bar.update(1, x)
if x is not None:
res = x.asdict()["result"]
all_data[x.filename] = res
file_metadata.append(
{"filepath": x.filename, "file_hash": res["hash"]}
)
processing_results = await process_files(files, numberprocesses, config)
# Let users know if there were processing errors
# This is here and not in the funcition so we can add an option to ignore those (possibly)
# Also makes the function easier to test
processing_errors = processing_results["processing_errors"]
log_processing_errors(processing_errors)
# Upload results metadata to codecov to get list of files that we need to upload
file_metadata = processing_results["file_metadata"]
all_data = processing_results["all_data"]
try:
json_output = {"commit": commit, "filepaths": file_metadata}
logger.debug(
Expand Down Expand Up @@ -150,6 +141,51 @@
extra_log_attributes=dict(time_taken=response.elapsed.total_seconds())
),
)
log_processing_errors(processing_errors)


def log_processing_errors(processing_errors: typing.Dict[str, str]) -> None:
if len(processing_errors) > 0:
logger.error(

Check warning on line 149 in codecov_cli/services/staticanalysis/__init__.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/__init__.py#L149

Added line #L149 was not covered by tests
f"{len(processing_errors)} files have processing errors and have been IGNORED."
)
for file, error in processing_errors.items():
logger.error(f"-> {file}: ERROR {error}")

Check warning on line 153 in codecov_cli/services/staticanalysis/__init__.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/__init__.py#L152-L153

Added lines #L152 - L153 were not covered by tests


async def process_files(
files_to_analyze: typing.List[FileAnalysisRequest],
numberprocesses: int,
config: typing.Optional[typing.Dict],
):
logger.info(f"Running the analyzer on {len(files_to_analyze)} files")
mapped_func = partial(analyze_file, config)
all_data = {}
file_metadata = []
errors = {}
with click.progressbar(
length=len(files_to_analyze),
label="Analyzing files",
) as bar:
with get_context("fork").Pool(processes=numberprocesses) as pool:
file_results = pool.imap_unordered(mapped_func, files_to_analyze)
for result in file_results:
bar.update(1, result)
if result is not None:
if result.result:
all_data[result.filename] = result.result
file_metadata.append(
{
"filepath": result.filename,
"file_hash": result.result["hash"],
}
)
elif result.error:
errors[result.filename] = result.error
logger.info("All files have been processed")
return dict(
all_data=all_data, file_metadata=file_metadata, processing_errors=errors
)


async def send_single_upload_put(client, all_data, el):
Expand Down Expand Up @@ -205,7 +241,9 @@
return response


def analyze_file(config, filename: FileAnalysisRequest):
def analyze_file(
config, filename: FileAnalysisRequest
) -> typing.Optional[FileAnalysisResult]:
try:
with open(filename.actual_filepath, "rb") as file:
actual_code = file.read()
Expand All @@ -219,7 +257,7 @@
except AnalysisError as e:
error_dict = {
"filename": str(filename.result_filename),
"error_class": str(type(e)),
"error": str(e),
}
return FileAnalysisResult(
filename=str(filename.result_filename), error=error_dict
Expand Down
5 changes: 4 additions & 1 deletion codecov_cli/services/staticanalysis/analyzers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from codecov_cli.services.staticanalysis.analyzers.general import BaseAnalyzer
from codecov_cli.services.staticanalysis.analyzers.javascript_es6 import ES6Analyzer
from codecov_cli.services.staticanalysis.analyzers.python import PythonAnalyzer
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest


def get_best_analyzer(filename, actual_code) -> BaseAnalyzer:
def get_best_analyzer(
filename: FileAnalysisRequest, actual_code: bytes
) -> BaseAnalyzer:
if filename.actual_filepath.suffix == ".py":
return PythonAnalyzer(filename, actual_code)
if filename.actual_filepath.suffix == ".js":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from codecov_cli.services.staticanalysis.analyzers.python.node_wrappers import (
NodeVisitor,
)
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest

_function_query_str = """
(function_definition
Expand Down Expand Up @@ -52,14 +53,16 @@ class PythonAnalyzer(BaseAnalyzer):
]
wrappers = ["class_definition", "function_definition"]

def __init__(self, path, actual_code, **options):
def __init__(
self, file_analysis_request: FileAnalysisRequest, actual_code: bytes, **options
):
self.actual_code = actual_code
self.lines = self.actual_code.split(b"\n")
self.statements = []
self.import_lines = set()
self.definitions_lines = set()
self.functions = []
self.path = path.result_filename
self.path = file_analysis_request.result_filename
self.PY_LANGUAGE = Language(staticcodecov_languages.__file__, "python")
self.parser = Parser()
self.parser.set_language(self.PY_LANGUAGE)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from tree_sitter import Node

from codecov_cli.services.staticanalysis.exceptions import AnalysisError


class NodeVisitor(object):
def __init__(self, analyzer):
self.analyzer = analyzer

def start_visit(self, node):
self.visit(node)

def visit(self, node):
def visit(self, node: Node):
self.do_visit(node)
for c in node.children:
self.visit(c)

def _is_function_docstring(self, node):
def _is_function_docstring(self, node: Node):
"""Skips docstrings for funtions, such as this one.
Pytest doesn't include them in the report, so I don't think we should either,
at least for now.
Expand Down Expand Up @@ -39,15 +44,15 @@ def _is_function_docstring(self, node):
and is_in_function_context
)

def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node):
def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node: Node):
curr = node.prev_named_sibling
while curr is not None and (
curr.type == "comment" or self._is_function_docstring(curr)
):
curr = curr.prev_named_sibling
return curr

def do_visit(self, node):
def do_visit(self, node: Node):
if node.is_named:
current_line_number = node.start_point[0] + 1
if node.type in (
Expand Down Expand Up @@ -84,17 +89,29 @@ def do_visit(self, node):
}
)
if node.type in ("if_statement", "elif_clause"):
# Some of the children of a node have a field_name associated to them
# In the case of an if and elif, "consequence" is the code that is executed in that branch of code
first_if_statement = node.child_by_field_name("consequence")
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0]
try:
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0] # BUG
except IndexError:
raise AnalysisError(
f"if_statement consequence is empty block @ {self.analyzer.path}:{first_if_statement.start_point[0] + 1}, column {first_if_statement.start_point[1]}"
)
self.analyzer.line_surety_ancestorship[
first_if_statement.start_point[0] + 1
] = current_line_number
if node.type in ("for_statement", "while_statement"):
first_if_statement = node.child_by_field_name("body")
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0]
first_loop_statement = node.child_by_field_name("body")
try:
if first_loop_statement.type == "block":
first_loop_statement = first_loop_statement.children[0]
except IndexError:
raise AnalysisError(
f"loop_statement body is empty block @ {self.analyzer.path}:{first_loop_statement.start_point[0] + 1}, column {first_loop_statement.start_point[1]}"
)
self.analyzer.line_surety_ancestorship[
first_if_statement.start_point[0] + 1
first_loop_statement.start_point[0] + 1
] = current_line_number
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import pathlib

import pytest
from tree_sitter import Node

from codecov_cli.services.staticanalysis.analyzers.python import PythonAnalyzer
from codecov_cli.services.staticanalysis.analyzers.python.node_wrappers import (
NodeVisitor,
)
from codecov_cli.services.staticanalysis.exceptions import AnalysisError
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest


class TestMalformedIfStatements(object):
def test_if_empty_block_raises_analysis_error(self):
analysis_request = FileAnalysisRequest(
actual_filepath=pathlib.Path("test_file"), result_filename="test_file"
)
# Code for an empty IF. NOT valid python code
actual_code = b'x = 10\nif x == "batata":\n\n'
python_analyser = PythonAnalyzer(analysis_request, actual_code=actual_code)
# Parse the code snippet and get the if_statement node
tree = python_analyser.parser.parse(actual_code)
root = tree.root_node
assert root.type == "module"
assert root.child_count == 2
if_statement_node = root.children[1]
assert if_statement_node.type == "if_statement"
# Make sure it is indeed an empty if_statement
if_body = if_statement_node.child_by_field_name("consequence")
assert if_body.type == "block"
assert if_body.child_count == 0
visitor = NodeVisitor(python_analyser)
with pytest.raises(AnalysisError) as exp:
visitor.do_visit(if_statement_node)
assert (
str(exp.value)
== "if_statement consequence is empty block @ test_file:2, column 17"
)

def test_for_empty_block_raises_analysis_error(self):
analysis_request = FileAnalysisRequest(
actual_filepath=pathlib.Path("test_file"), result_filename="test_file"
)
# Code for an empty IF. NOT valid python code
actual_code = b"for x in range(10):\n\n"
python_analyser = PythonAnalyzer(analysis_request, actual_code=actual_code)
# Parse the code snippet and get the if_statement node
tree = python_analyser.parser.parse(actual_code)
root = tree.root_node
assert root.type == "module"
assert root.child_count == 1
for_statement_node = root.children[0]
assert for_statement_node.type == "for_statement"
# Make sure it is indeed an empty if_statement
if_body = for_statement_node.child_by_field_name("body")
assert if_body.type == "block"
assert if_body.child_count == 0
visitor = NodeVisitor(python_analyser)
with pytest.raises(AnalysisError) as exp:
visitor.do_visit(for_statement_node)
assert (
str(exp.value)
== "loop_statement body is empty block @ test_file:1, column 19"
)
58 changes: 56 additions & 2 deletions tests/services/static_analysis/test_static_analysis_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,65 @@
import responses
from responses import matchers

from codecov_cli.services.staticanalysis import run_analysis_entrypoint
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
from codecov_cli.services.staticanalysis import process_files, run_analysis_entrypoint
from codecov_cli.services.staticanalysis.types import (
FileAnalysisRequest,
FileAnalysisResult,
)


class TestStaticAnalysisService:
@pytest.mark.asyncio
async def test_process_files_with_error(self, mocker):
files_found = list(
map(
lambda filename: FileAnalysisRequest(str(filename), Path(filename)),
[
"correct_file.py",
"error_file.py",
],
)
)
mock_get_context = mocker.patch(
"codecov_cli.services.staticanalysis.get_context"
)

def side_effect(config, filename: FileAnalysisRequest):
if filename.result_filename == "correct_file.py":
return FileAnalysisResult(
filename=filename.result_filename, result={"hash": "abc123"}
)
if filename.result_filename == "error_file.py":
return FileAnalysisResult(
filename=filename.result_filename, error="some error @ line 12"
)
# Should not get here, so fail test
assert False

mock_analyze_function = mocker.patch(
"codecov_cli.services.staticanalysis.analyze_file"
)
mock_analyze_function.side_effect = side_effect

def imap_side_effect(mapped_func, files):
results = []
for file in files:
results.append(mapped_func(file))
return results

mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.side_effect = (
imap_side_effect
)

results = await process_files(files_found, 1, {})
mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.assert_called()
assert mock_analyze_function.call_count == 2
assert results == dict(
all_data={"correct_file.py": {"hash": "abc123"}},
file_metadata=[{"file_hash": "abc123", "filepath": "correct_file.py"}],
processing_errors={"error_file.py": "some error @ line 12"},
)

@pytest.mark.asyncio
async def test_static_analysis_service(self, mocker):
mock_file_finder = mocker.patch(
Expand Down