Skip to content

Commit

Permalink
fix: better handle errors in static analysis
Browse files Browse the repository at this point in the history
This was needed already.
But ultimately inspired by a user tha had the following error:

```
File "codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py", line 41, in do_visit
    first_if_statement = first_if_statement.children[0]
IndexError: list index out of range
```

Despite my best efforts, I was only able to reproduce this error using invalid Python code
(i.e. an empty `if` statement)

Regardless of what caused this issue in the 1st place, there may be valid reasons
for someone to have malformed code in their repo (depending what the repo is 👀)
So we should be able to still upload _something_ and ignore the failures.

The tricky thing here is that without the static analysis data ATS can't work well.
And we can't be sure that the analyzer doesn't have bugs. I don't think this case in
particular is one of them, but I could be wrong.

So these changes actually are a way to surface processing errors AND ignore the files
that generated them, WHILE also surfacing errors to users. They are surfaced twice.
Ideally users would look at them and let us know if something is not right.

But maybe we should still fail the static analysis step in their CI? 🧐
  • Loading branch information
giovanni-guidini committed Sep 29, 2023
1 parent 01986b6 commit 22b4630
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 35 deletions.
78 changes: 58 additions & 20 deletions codecov_cli/services/staticanalysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,15 @@ async def run_analysis_entrypoint(
):
ff = select_file_finder(config)
files = list(ff.find_files(folder, pattern, folders_to_exclude))
logger.info(f"Running the analyzer on {len(files)} files")
mapped_func = partial(analyze_file, config)
all_data = {}
file_metadata = []
with click.progressbar(
length=len(files),
label="Analyzing files",
) as bar:
with get_context("fork").Pool(processes=numberprocesses) as pool:
file_results = pool.imap_unordered(mapped_func, files)
for x in file_results:
bar.update(1, x)
if x is not None:
res = x.asdict()["result"]
all_data[x.filename] = res
file_metadata.append(
{"filepath": x.filename, "file_hash": res["hash"]}
)
processing_results = await process_files(files, numberprocesses, config)
# Let users know if there were processing errors
# This is here and not in the funcition so we can add an option to ignore those (possibly)
# Also makes the function easier to test
processing_errors = processing_results["processing_errors"]
log_processing_errors(processing_errors)
# Upload results metadata to codecov to get list of files that we need to upload
file_metadata = processing_results["file_metadata"]
all_data = processing_results["all_data"]
try:
json_output = {"commit": commit, "filepaths": file_metadata}
logger.debug(
Expand Down Expand Up @@ -150,6 +141,51 @@ async def run_analysis_entrypoint(
extra_log_attributes=dict(time_taken=response.elapsed.total_seconds())
),
)
log_processing_errors(processing_errors)


def log_processing_errors(processing_errors: typing.Dict[str, str]) -> None:
if len(processing_errors) > 0:
logger.error(

Check warning on line 149 in codecov_cli/services/staticanalysis/__init__.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/__init__.py#L149

Added line #L149 was not covered by tests
f"{len(processing_errors)} files have processing errors and have been IGNORED."
)
for file, error in processing_errors.items():
logger.error(f"-> {file}: ERROR {error}")

Check warning on line 153 in codecov_cli/services/staticanalysis/__init__.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/__init__.py#L152-L153

Added lines #L152 - L153 were not covered by tests


async def process_files(
files_to_analyze: typing.List[FileAnalysisRequest],
numberprocesses: int,
config: typing.Optional[typing.Dict],
):
logger.info(f"Running the analyzer on {len(files_to_analyze)} files")
mapped_func = partial(analyze_file, config)
all_data = {}
file_metadata = []
errors = {}
with click.progressbar(
length=len(files_to_analyze),
label="Analyzing files",
) as bar:
with get_context("fork").Pool(processes=numberprocesses) as pool:
file_results = pool.imap_unordered(mapped_func, files_to_analyze)
for result in file_results:
bar.update(1, result)
if result is not None:
if result.result:
all_data[result.filename] = result.result
file_metadata.append(
{
"filepath": result.filename,
"file_hash": result.result["hash"],
}
)
elif result.error:
errors[result.filename] = result.error
logger.info("All files have been processed")
return dict(
all_data=all_data, file_metadata=file_metadata, processing_errors=errors
)


async def send_single_upload_put(client, all_data, el):
Expand Down Expand Up @@ -205,7 +241,9 @@ def send_finish_signal(response_json, upload_url: str, token: str):
return response


def analyze_file(config, filename: FileAnalysisRequest):
def analyze_file(
config, filename: FileAnalysisRequest
) -> typing.Optional[FileAnalysisResult]:
try:
with open(filename.actual_filepath, "rb") as file:
actual_code = file.read()
Expand All @@ -219,7 +257,7 @@ def analyze_file(config, filename: FileAnalysisRequest):
except AnalysisError as e:
error_dict = {
"filename": str(filename.result_filename),
"error_class": str(type(e)),
"error": str(e),
}
return FileAnalysisResult(
filename=str(filename.result_filename), error=error_dict
Expand Down
5 changes: 4 additions & 1 deletion codecov_cli/services/staticanalysis/analyzers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from codecov_cli.services.staticanalysis.analyzers.general import BaseAnalyzer
from codecov_cli.services.staticanalysis.analyzers.javascript_es6 import ES6Analyzer
from codecov_cli.services.staticanalysis.analyzers.python import PythonAnalyzer
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest


def get_best_analyzer(filename, actual_code) -> BaseAnalyzer:
def get_best_analyzer(
filename: FileAnalysisRequest, actual_code: bytes
) -> BaseAnalyzer:
if filename.actual_filepath.suffix == ".py":
return PythonAnalyzer(filename, actual_code)
if filename.actual_filepath.suffix == ".js":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from codecov_cli.services.staticanalysis.analyzers.python.node_wrappers import (
NodeVisitor,
)
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest

_function_query_str = """
(function_definition
Expand Down Expand Up @@ -52,14 +53,16 @@ class PythonAnalyzer(BaseAnalyzer):
]
wrappers = ["class_definition", "function_definition"]

def __init__(self, path, actual_code, **options):
def __init__(
self, file_analysis_request: FileAnalysisRequest, actual_code: bytes, **options
):
self.actual_code = actual_code
self.lines = self.actual_code.split(b"\n")
self.statements = []
self.import_lines = set()
self.definitions_lines = set()
self.functions = []
self.path = path.result_filename
self.path = file_analysis_request.result_filename
self.PY_LANGUAGE = Language(staticcodecov_languages.__file__, "python")
self.parser = Parser()
self.parser.set_language(self.PY_LANGUAGE)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from tree_sitter import Node

from codecov_cli.services.staticanalysis.exceptions import AnalysisError


class NodeVisitor(object):
def __init__(self, analyzer):
self.analyzer = analyzer

def start_visit(self, node):
self.visit(node)

def visit(self, node):
def visit(self, node: Node):
self.do_visit(node)
for c in node.children:
self.visit(c)

def _is_function_docstring(self, node):
def _is_function_docstring(self, node: Node):
"""Skips docstrings for funtions, such as this one.
Pytest doesn't include them in the report, so I don't think we should either,
at least for now.
Expand Down Expand Up @@ -39,15 +44,15 @@ def _is_function_docstring(self, node):
and is_in_function_context
)

def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node):
def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node: Node):
curr = node.prev_named_sibling
while curr is not None and (
curr.type == "comment" or self._is_function_docstring(curr)
):
curr = curr.prev_named_sibling
return curr

def do_visit(self, node):
def do_visit(self, node: Node):
if node.is_named:
current_line_number = node.start_point[0] + 1
if node.type in (
Expand Down Expand Up @@ -84,17 +89,29 @@ def do_visit(self, node):
}
)
if node.type in ("if_statement", "elif_clause"):
# Some of the children of a node have a field_name associated to them
# In the case of an if and elif, "consequence" is the code that is executed in that branch of code
first_if_statement = node.child_by_field_name("consequence")
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0]
try:
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0] # BUG
except IndexError:
raise AnalysisError(

Check warning on line 99 in codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py#L98-L99

Added lines #L98 - L99 were not covered by tests
f"if_statement consequence is empty block @ {self.analyzer.path}:{first_if_statement.start_point[0] + 1}, column {first_if_statement.start_point[1]}"
)
self.analyzer.line_surety_ancestorship[
first_if_statement.start_point[0] + 1
] = current_line_number
if node.type in ("for_statement", "while_statement"):
first_if_statement = node.child_by_field_name("body")
if first_if_statement.type == "block":
first_if_statement = first_if_statement.children[0]
first_loop_statement = node.child_by_field_name("body")
try:
if first_loop_statement.type == "block":
first_loop_statement = first_loop_statement.children[0]
except IndexError:
raise AnalysisError(

Check warning on line 111 in codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py

View check run for this annotation

Codecov / codecov/patch

codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py#L110-L111

Added lines #L110 - L111 were not covered by tests
f"loop_statement body is empty block @ {self.analyzer.path}:{first_loop_statement.start_point[0] + 1}, column {first_loop_statement.start_point[1]}"
)
self.analyzer.line_surety_ancestorship[
first_if_statement.start_point[0] + 1
first_loop_statement.start_point[0] + 1
] = current_line_number
pass
58 changes: 56 additions & 2 deletions tests/services/static_analysis/test_static_analysis_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,65 @@
import responses
from responses import matchers

from codecov_cli.services.staticanalysis import run_analysis_entrypoint
from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
from codecov_cli.services.staticanalysis import process_files, run_analysis_entrypoint
from codecov_cli.services.staticanalysis.types import (
FileAnalysisRequest,
FileAnalysisResult,
)


class TestStaticAnalysisService:
@pytest.mark.asyncio
async def test_process_files_with_error(self, mocker):
files_found = list(
map(
lambda filename: FileAnalysisRequest(str(filename), Path(filename)),
[
"correct_file.py",
"error_file.py",
],
)
)
mock_get_context = mocker.patch(
"codecov_cli.services.staticanalysis.get_context"
)

def side_effect(config, filename: FileAnalysisRequest):
if filename.result_filename == "correct_file.py":
return FileAnalysisResult(
filename=filename.result_filename, result={"hash": "abc123"}
)
if filename.result_filename == "error_file.py":
return FileAnalysisResult(
filename=filename.result_filename, error="some error @ line 12"
)
# Should not get here, so fail test
assert False

mock_analyze_function = mocker.patch(
"codecov_cli.services.staticanalysis.analyze_file"
)
mock_analyze_function.side_effect = side_effect

def imap_side_effect(mapped_func, files):
results = []
for file in files:
results.append(mapped_func(file))
return results

mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.side_effect = (
imap_side_effect
)

results = await process_files(files_found, 1, {})
mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.assert_called()
assert mock_analyze_function.call_count == 2
assert results == dict(
all_data={"correct_file.py": {"hash": "abc123"}},
file_metadata=[{"file_hash": "abc123", "filepath": "correct_file.py"}],
processing_errors={"error_file.py": "some error @ line 12"},
)

@pytest.mark.asyncio
async def test_static_analysis_service(self, mocker):
mock_file_finder = mocker.patch(
Expand Down

0 comments on commit 22b4630

Please sign in to comment.