codecov · giovanni-guidini · Sep 29, 2023 · Sep 29, 2023
diff --git a/codecov_cli/services/staticanalysis/__init__.py b/codecov_cli/services/staticanalysis/__init__.py
@@ -36,24 +36,15 @@
 ):
     ff = select_file_finder(config)
     files = list(ff.find_files(folder, pattern, folders_to_exclude))
-    logger.info(f"Running the analyzer on {len(files)} files")
-    mapped_func = partial(analyze_file, config)
-    all_data = {}
-    file_metadata = []
-    with click.progressbar(
-        length=len(files),
-        label="Analyzing files",
-    ) as bar:
-        with get_context("fork").Pool(processes=numberprocesses) as pool:
-            file_results = pool.imap_unordered(mapped_func, files)
-            for x in file_results:
-                bar.update(1, x)
-                if x is not None:
-                    res = x.asdict()["result"]
-                    all_data[x.filename] = res
-                    file_metadata.append(
-                        {"filepath": x.filename, "file_hash": res["hash"]}
-                    )
+    processing_results = await process_files(files, numberprocesses, config)
+    # Let users know if there were processing errors
+    # This is here and not in the funcition so we can add an option to ignore those (possibly)
+    # Also makes the function easier to test
+    processing_errors = processing_results["processing_errors"]
+    log_processing_errors(processing_errors)
+    # Upload results metadata to codecov to get list of files that we need to upload
+    file_metadata = processing_results["file_metadata"]
+    all_data = processing_results["all_data"]
     try:
         json_output = {"commit": commit, "filepaths": file_metadata}
         logger.debug(
@@ -150,6 +141,51 @@
             extra_log_attributes=dict(time_taken=response.elapsed.total_seconds())
         ),
     )
+    log_processing_errors(processing_errors)
+
+
+def log_processing_errors(processing_errors: typing.Dict[str, str]) -> None:
+    if len(processing_errors) > 0:
+        logger.error(
+            f"{len(processing_errors)} files have processing errors and have been IGNORED."
+        )
+        for file, error in processing_errors.items():
+            logger.error(f"-> {file}: ERROR {error}")
+
+
+async def process_files(
+    files_to_analyze: typing.List[FileAnalysisRequest],
+    numberprocesses: int,
+    config: typing.Optional[typing.Dict],
+):
+    logger.info(f"Running the analyzer on {len(files_to_analyze)} files")
+    mapped_func = partial(analyze_file, config)
+    all_data = {}
+    file_metadata = []
+    errors = {}
+    with click.progressbar(
+        length=len(files_to_analyze),
+        label="Analyzing files",
+    ) as bar:
+        with get_context("fork").Pool(processes=numberprocesses) as pool:
+            file_results = pool.imap_unordered(mapped_func, files_to_analyze)
+            for result in file_results:
+                bar.update(1, result)
+                if result is not None:
+                    if result.result:
+                        all_data[result.filename] = result.result
+                        file_metadata.append(
+                            {
+                                "filepath": result.filename,
+                                "file_hash": result.result["hash"],
+                            }
+                        )
+                    elif result.error:
+                        errors[result.filename] = result.error
+    logger.info("All files have been processed")
+    return dict(
+        all_data=all_data, file_metadata=file_metadata, processing_errors=errors
+    )
 
 
 async def send_single_upload_put(client, all_data, el):
@@ -205,7 +241,9 @@
     return response
 
 
-def analyze_file(config, filename: FileAnalysisRequest):
+def analyze_file(
+    config, filename: FileAnalysisRequest
+) -> typing.Optional[FileAnalysisResult]:
     try:
         with open(filename.actual_filepath, "rb") as file:
             actual_code = file.read()
@@ -219,7 +257,7 @@
     except AnalysisError as e:
         error_dict = {
             "filename": str(filename.result_filename),
-            "error_class": str(type(e)),
+            "error": str(e),
         }
         return FileAnalysisResult(
             filename=str(filename.result_filename), error=error_dict

diff --git a/codecov_cli/services/staticanalysis/analyzers/__init__.py b/codecov_cli/services/staticanalysis/analyzers/__init__.py
@@ -1,9 +1,12 @@
 from codecov_cli.services.staticanalysis.analyzers.general import BaseAnalyzer
 from codecov_cli.services.staticanalysis.analyzers.javascript_es6 import ES6Analyzer
 from codecov_cli.services.staticanalysis.analyzers.python import PythonAnalyzer
+from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
 
 
-def get_best_analyzer(filename, actual_code) -> BaseAnalyzer:
+def get_best_analyzer(
+    filename: FileAnalysisRequest, actual_code: bytes
+) -> BaseAnalyzer:
     if filename.actual_filepath.suffix == ".py":
         return PythonAnalyzer(filename, actual_code)
     if filename.actual_filepath.suffix == ".js":

diff --git a/codecov_cli/services/staticanalysis/analyzers/python/__init__.py b/codecov_cli/services/staticanalysis/analyzers/python/__init__.py
@@ -7,6 +7,7 @@
 from codecov_cli.services.staticanalysis.analyzers.python.node_wrappers import (
     NodeVisitor,
 )
+from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
 
 _function_query_str = """
 (function_definition
@@ -52,14 +53,16 @@ class PythonAnalyzer(BaseAnalyzer):
     ]
     wrappers = ["class_definition", "function_definition"]
 
-    def __init__(self, path, actual_code, **options):
+    def __init__(
+        self, file_analysis_request: FileAnalysisRequest, actual_code: bytes, **options
+    ):
         self.actual_code = actual_code
         self.lines = self.actual_code.split(b"\n")
         self.statements = []
         self.import_lines = set()
         self.definitions_lines = set()
         self.functions = []
-        self.path = path.result_filename
+        self.path = file_analysis_request.result_filename
         self.PY_LANGUAGE = Language(staticcodecov_languages.__file__, "python")
         self.parser = Parser()
         self.parser.set_language(self.PY_LANGUAGE)

diff --git a/codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py b/codecov_cli/services/staticanalysis/analyzers/python/node_wrappers.py
@@ -1,16 +1,21 @@
+from tree_sitter import Node
+
+from codecov_cli.services.staticanalysis.exceptions import AnalysisError
+
+
 class NodeVisitor(object):
     def __init__(self, analyzer):
         self.analyzer = analyzer
 
     def start_visit(self, node):
         self.visit(node)
 
-    def visit(self, node):
+    def visit(self, node: Node):
         self.do_visit(node)
         for c in node.children:
             self.visit(c)
 
-    def _is_function_docstring(self, node):
+    def _is_function_docstring(self, node: Node):
         """Skips docstrings for funtions, such as this one.
         Pytest doesn't include them in the report, so I don't think we should either,
         at least for now.
@@ -39,15 +44,15 @@ def _is_function_docstring(self, node):
             and is_in_function_context
         )
 
-    def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node):
+    def _get_previous_sibling_that_is_not_comment_not_func_docstring(self, node: Node):
         curr = node.prev_named_sibling
         while curr is not None and (
             curr.type == "comment" or self._is_function_docstring(curr)
         ):
             curr = curr.prev_named_sibling
         return curr
 
-    def do_visit(self, node):
+    def do_visit(self, node: Node):
         if node.is_named:
             current_line_number = node.start_point[0] + 1
             if node.type in (
@@ -84,17 +89,29 @@ def do_visit(self, node):
                     }
                 )
             if node.type in ("if_statement", "elif_clause"):
+                # Some of the children of a node have a field_name associated to them
+                # In the case of an if and elif, "consequence" is the code that is executed in that branch of code
                 first_if_statement = node.child_by_field_name("consequence")
-                if first_if_statement.type == "block":
-                    first_if_statement = first_if_statement.children[0]
+                try:
+                    if first_if_statement.type == "block":
+                        first_if_statement = first_if_statement.children[0]  # BUG
+                except IndexError:
+                    raise AnalysisError(
+                        f"if_statement consequence is empty block @ {self.analyzer.path}:{first_if_statement.start_point[0] + 1}, column {first_if_statement.start_point[1]}"
+                    )
                 self.analyzer.line_surety_ancestorship[
                     first_if_statement.start_point[0] + 1
                 ] = current_line_number
             if node.type in ("for_statement", "while_statement"):
-                first_if_statement = node.child_by_field_name("body")
-                if first_if_statement.type == "block":
-                    first_if_statement = first_if_statement.children[0]
+                first_loop_statement = node.child_by_field_name("body")
+                try:
+                    if first_loop_statement.type == "block":
+                        first_loop_statement = first_loop_statement.children[0]
+                except IndexError:
+                    raise AnalysisError(
+                        f"loop_statement body is empty block @ {self.analyzer.path}:{first_loop_statement.start_point[0] + 1}, column {first_loop_statement.start_point[1]}"
+                    )
                 self.analyzer.line_surety_ancestorship[
-                    first_if_statement.start_point[0] + 1
+                    first_loop_statement.start_point[0] + 1
                 ] = current_line_number
                 pass
diff --git a/tests/services/static_analysis/languages/python/test_node_wrappers_malformed_code.py b/tests/services/static_analysis/languages/python/test_node_wrappers_malformed_code.py
@@ -0,0 +1,65 @@
+import pathlib
+
+import pytest
+from tree_sitter import Node
+
+from codecov_cli.services.staticanalysis.analyzers.python import PythonAnalyzer
+from codecov_cli.services.staticanalysis.analyzers.python.node_wrappers import (
+    NodeVisitor,
+)
+from codecov_cli.services.staticanalysis.exceptions import AnalysisError
+from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
+
+
+class TestMalformedIfStatements(object):
+    def test_if_empty_block_raises_analysis_error(self):
+        analysis_request = FileAnalysisRequest(
+            actual_filepath=pathlib.Path("test_file"), result_filename="test_file"
+        )
+        # Code for an empty IF. NOT valid python code
+        actual_code = b'x = 10\nif x == "batata":\n\n'
+        python_analyser = PythonAnalyzer(analysis_request, actual_code=actual_code)
+        # Parse the code snippet and get the if_statement node
+        tree = python_analyser.parser.parse(actual_code)
+        root = tree.root_node
+        assert root.type == "module"
+        assert root.child_count == 2
+        if_statement_node = root.children[1]
+        assert if_statement_node.type == "if_statement"
+        # Make sure it is indeed an empty if_statement
+        if_body = if_statement_node.child_by_field_name("consequence")
+        assert if_body.type == "block"
+        assert if_body.child_count == 0
+        visitor = NodeVisitor(python_analyser)
+        with pytest.raises(AnalysisError) as exp:
+            visitor.do_visit(if_statement_node)
+        assert (
+            str(exp.value)
+            == "if_statement consequence is empty block @ test_file:2, column 17"
+        )
+
+    def test_for_empty_block_raises_analysis_error(self):
+        analysis_request = FileAnalysisRequest(
+            actual_filepath=pathlib.Path("test_file"), result_filename="test_file"
+        )
+        # Code for an empty IF. NOT valid python code
+        actual_code = b"for x in range(10):\n\n"
+        python_analyser = PythonAnalyzer(analysis_request, actual_code=actual_code)
+        # Parse the code snippet and get the if_statement node
+        tree = python_analyser.parser.parse(actual_code)
+        root = tree.root_node
+        assert root.type == "module"
+        assert root.child_count == 1
+        for_statement_node = root.children[0]
+        assert for_statement_node.type == "for_statement"
+        # Make sure it is indeed an empty if_statement
+        if_body = for_statement_node.child_by_field_name("body")
+        assert if_body.type == "block"
+        assert if_body.child_count == 0
+        visitor = NodeVisitor(python_analyser)
+        with pytest.raises(AnalysisError) as exp:
+            visitor.do_visit(for_statement_node)
+        assert (
+            str(exp.value)
+            == "loop_statement body is empty block @ test_file:1, column 19"
+        )
diff --git a/tests/services/static_analysis/test_static_analysis_service.py b/tests/services/static_analysis/test_static_analysis_service.py
@@ -7,11 +7,65 @@
 import responses
 from responses import matchers
 
-from codecov_cli.services.staticanalysis import run_analysis_entrypoint
-from codecov_cli.services.staticanalysis.types import FileAnalysisRequest
+from codecov_cli.services.staticanalysis import process_files, run_analysis_entrypoint
+from codecov_cli.services.staticanalysis.types import (
+    FileAnalysisRequest,
+    FileAnalysisResult,
+)
 
 
 class TestStaticAnalysisService:
+    @pytest.mark.asyncio
+    async def test_process_files_with_error(self, mocker):
+        files_found = list(
+            map(
+                lambda filename: FileAnalysisRequest(str(filename), Path(filename)),
+                [
+                    "correct_file.py",
+                    "error_file.py",
+                ],
+            )
+        )
+        mock_get_context = mocker.patch(
+            "codecov_cli.services.staticanalysis.get_context"
+        )
+
+        def side_effect(config, filename: FileAnalysisRequest):
+            if filename.result_filename == "correct_file.py":
+                return FileAnalysisResult(
+                    filename=filename.result_filename, result={"hash": "abc123"}
+                )
+            if filename.result_filename == "error_file.py":
+                return FileAnalysisResult(
+                    filename=filename.result_filename, error="some error @ line 12"
+                )
+            # Should not get here, so fail test
+            assert False
+
+        mock_analyze_function = mocker.patch(
+            "codecov_cli.services.staticanalysis.analyze_file"
+        )
+        mock_analyze_function.side_effect = side_effect
+
+        def imap_side_effect(mapped_func, files):
+            results = []
+            for file in files:
+                results.append(mapped_func(file))
+            return results
+
+        mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.side_effect = (
+            imap_side_effect
+        )
+
+        results = await process_files(files_found, 1, {})
+        mock_get_context.return_value.Pool.return_value.__enter__.return_value.imap_unordered.assert_called()
+        assert mock_analyze_function.call_count == 2
+        assert results == dict(
+            all_data={"correct_file.py": {"hash": "abc123"}},
+            file_metadata=[{"file_hash": "abc123", "filepath": "correct_file.py"}],
+            processing_errors={"error_file.py": "some error @ line 12"},
+        )
+
     @pytest.mark.asyncio
     async def test_static_analysis_service(self, mocker):
         mock_file_finder = mocker.patch(