[dvsim,report] Add repro_command links to HTML report for simulation …

…flows - Signed-off-by: Harry Callahan <[email protected]>
hcallahan-lowrisc · Aug 17, 2024 · 0833876 · 0833876
1 parent fe89c99
commit 0833876
Show file tree

Hide file tree

Showing 5 changed files with 191 additions and 96 deletions.
diff --git a/util/dvsim/Deploy.py b/util/dvsim/Deploy.py
@@ -590,6 +590,24 @@ def extract_info_from_log(self, log_text: List):
             log.debug(f"{self.full_name}: {e}")
 
 
+    def create_repro_command(self) -> str:
+        git_cmd = [
+            f"git checkout {self.sim_cfg.results_dict['git_revision']}"
+        ]
+
+        dvsim_cmd = [
+            "util/dvsim/dvsim.py",
+            str((self.sim_cfg.flow_cfg_file).relative_to(self.sim_cfg.proj_root)),
+            f"-i={self.name}",
+            f"--tool={self.sim_cfg.tool}",
+            f"--fixed-seed={self.seed}",
+        ]
+        if self.build_seed is not None:
+            dvsim_cmd.append(f"--build-seed={self.build_seed}")
+
+        return " ".join(git_cmd + ["&&"] + dvsim_cmd)
+
+
 class CovUnr(Deploy):
     """Abstraction for coverage UNR flow."""
 

diff --git a/util/dvsim/SimCfg.py b/util/dvsim/SimCfg.py
@@ -26,11 +26,7 @@
 from tabulate import tabulate
 from Test import Test
 from Testplan import Testplan
-from utils import TS_FORMAT, rm_path
-
-# This affects the bucketizer failure report.
-_MAX_UNIQUE_TESTS = 5
-_MAX_TEST_RESEEDS = 2
+from utils import TS_FORMAT, rm_path, md_results_to_html
 
 
 class SimCfg(FlowCfg):
@@ -133,6 +129,7 @@ def __init__(self, flow_cfg_file, hjson_data, args, mk_config):
         self.links = {}
         self.build_list = []
         self.run_list = []
+        self.sim_results: dict = {}
         self.cov_merge_deploy = None
         self.cov_report_deploy = None
         self.results_summary = OrderedDict()
@@ -545,8 +542,39 @@ def cov_unr(self):
         for item in self.cfgs:
             item._cov_unr()
 
-    def _gen_json_results(self, run_results):
-        """Returns the run results as json-formatted dictionary.
+
+    def _gen_results(self) -> None:
+        # Setup the structure which summarizes all simulation results for this flow.
+        self.sim_results = SimResults(self.deploy, self.results)
+        # Post-process the above results data to generate something more useful for exporting.
+        self.results_dict = self._gen_results_dict()
+
+        # The base class flow first generates markdown, then converts it to HTML using mistletoe.
+        # However, we want to do something slightly different for simulation results.
+        #
+        # We want to generate a HTML report which contains interactive click-to-copy reproduction
+        # buttons, which by definition cannot exist in markdown, and hence we need a different
+        # flow here. We also don't want to generate markdown with placeholders for buttons and
+        # other elements as that would look ugly.
+
+        # This approaches generates the markdown report TWICE, once with embedded HTML for the
+        # repro buttons, and once without.
+
+        # Generate pure markdown for writing to disk, and also printing on the console.
+        self.results_md = self._gen_md_results()
+
+        # Generate the markdown report again, except with inline HTML buttons to copy the repro
+        # commands. We then pass this through the existing flow (using 'mistletoe'), which does not
+        # modify any valid inline HTML.
+        self.results_md_with_buttons = self._gen_md_results(add_repro_html_buttons=True)
+        self.results_html = md_results_to_html(self.results_title, self.css_file, self.results_md_with_buttons)
+
+        # Simulation flows also create a json results output file, so do this now.
+        self.results_json = json.dumps(self.results_dict)
+
+
+    def _gen_results_dict(self) -> dict:
+        """Process the run results and return a dictionary.
         """
 
         def _empty_str_as_none(s: str) -> Optional[str]:
@@ -623,9 +651,8 @@ def _test_result_to_dict(tr) -> dict:
 
         # If the testplan does not yet have test results mapped to testpoints,
         # map them now.
-        sim_results = SimResults(self.deploy, run_results)
         if not self.testplan.test_results_mapped:
-            self.testplan.map_test_results(test_results=sim_results.table)
+            self.testplan.map_test_results(test_results=self.sim_results.table)
 
         # Extract results of testpoints and tests into the `testpoints` field.
         for tp in self.testplan.testpoints:
@@ -660,7 +687,7 @@ def _test_result_to_dict(tr) -> dict:
             })
 
         # Extract unmapped tests.
-        unmapped_trs = [tr for tr in sim_results.table if not tr.mapped]
+        unmapped_trs = [tr for tr in self.sim_results.table if not tr.mapped]
         for tr in unmapped_trs:
             results['results']['unmapped_tests'].append(
                 _test_result_to_dict(tr))
@@ -683,8 +710,8 @@ def _test_result_to_dict(tr) -> dict:
                 results['results']['coverage'][k.lower()] = _pct_str_to_float(v)
 
         # Extract failure buckets.
-        if sim_results.buckets:
-            by_tests = sorted(sim_results.buckets.items(),
+        if self.sim_results.buckets:
+            by_tests = sorted(self.sim_results.buckets.items(),
                               key=lambda i: len(i[1]),
                               reverse=True)
             for bucket, tests in by_tests:
@@ -715,85 +742,22 @@ def _test_result_to_dict(tr) -> dict:
                     'failing_tests': fts,
                 })
 
-        # Store the `results` dictionary in this object.
-        self.results_dict = results
+        # Return the `results` dictionary
+        return results
 
-        # Return the `results` dictionary as json string.
-        return json.dumps(self.results_dict)
 
-    def _gen_results(self, run_results):
-        '''
-        The function is called after the regression has completed. It collates the
-        status of all run targets and generates a dict. It parses the testplan and
-        maps the generated result to the testplan entries to generate a final table
-        (list). It also prints the full list of failures for debug / triage. If cov
-        is enabled, then the summary coverage report is also generated. The final
-        result is in markdown format.
+    def _gen_md_results(self, add_repro_html_buttons: bool = False) -> str:
         '''
+        The function is called after the regression has completed. It does the following:
 
-        def indent_by(level):
-            return " " * (4 * level)
+        - collates the status of all run targets and generates a dict.
+        - parses the testplan and maps the generated result to the testplan entries
+        - Generates a final table (list).
+        - Prints the full list of failures for debug / triage.
+        - If cov is enabled, then the summary coverage report is also generated.
 
-        def create_failure_message(test, line, context):
-            message = [f"{indent_by(2)}* {test.qual_name}\\"]
-            if line:
-                message.append(
-                    f"{indent_by(2)}  Line {line}, in log " +
-                    test.get_log_path())
-            else:
-                message.append(f"{indent_by(2)} Log {test.get_log_path()}")
-            if context:
-                message.append("")
-                lines = [f"{indent_by(4)}{c.rstrip()}" for c in context]
-                message.extend(lines)
-            message.append("")
-            return message
-
-        def create_bucket_report(buckets):
-            """Creates a report based on the given buckets.
-
-            The buckets are sorted by descending number of failures. Within
-            buckets this also group tests by unqualified name, and just a few
-            failures are shown per unqualified name.
-
-            Args:
-              buckets: A dictionary by bucket containing triples
-                (test, line, context).
-
-            Returns:
-              A list of text lines for the report.
-            """
-            by_tests = sorted(buckets.items(),
-                              key=lambda i: len(i[1]),
-                              reverse=True)
-            fail_msgs = ["\n## Failure Buckets", ""]
-            for bucket, tests in by_tests:
-                fail_msgs.append(f"* `{bucket}` has {len(tests)} failures:")
-                unique_tests = collections.defaultdict(list)
-                for (test, line, context) in tests:
-                    unique_tests[test.name].append((test, line, context))
-                for name, test_reseeds in list(unique_tests.items())[
-                        :_MAX_UNIQUE_TESTS]:
-                    fail_msgs.append(f"{indent_by(1)}* Test {name} has "
-                                     f"{len(test_reseeds)} failures.")
-                    for test, line, context in test_reseeds[:_MAX_TEST_RESEEDS]:
-                        fail_msgs.extend(
-                            create_failure_message(test, line, context))
-                    if len(test_reseeds) > _MAX_TEST_RESEEDS:
-                        fail_msgs.append(
-                            f"{indent_by(2)}* ... and "
-                            f"{len(test_reseeds) - _MAX_TEST_RESEEDS} "
-                            "more failures.")
-                if len(unique_tests) > _MAX_UNIQUE_TESTS:
-                    fail_msgs.append(
-                        f"{indent_by(1)}* ... and "
-                        f"{len(unique_tests) - _MAX_UNIQUE_TESTS} more tests.")
-
-            fail_msgs.append("")
-            return fail_msgs
-
-        deployed_items = self.deploy
-        results = SimResults(deployed_items, run_results)
+        The final result in markdown format is returned as a string.
+        '''
 
         # Generate results table for runs.
         results_str = "## " + self.results_title + "\n"
@@ -843,13 +807,13 @@ def create_bucket_report(buckets):
             results_str += ("### Build randomization enabled with "
                             f"--build-seed {self.build_seed}\n")
 
-        if not results.table:
+        if not self.sim_results.table:
             results_str += "No results to display.\n"
 
         else:
             # Map regr results to the testplan entries.
             if not self.testplan.test_results_mapped:
-                self.testplan.map_test_results(test_results=results.table)
+                self.testplan.map_test_results(test_results=self.sim_results.table)
 
             results_str += self.testplan.get_test_results_table(
                 map_full_testplan=self.map_full_testplan)
@@ -861,7 +825,7 @@ def create_bucket_report(buckets):
 
             # Append coverage results if coverage was enabled.
             if self.cov_report_deploy is not None:
-                report_status = run_results[self.cov_report_deploy]
+                report_status = self.results[self.cov_report_deploy]
                 if report_status == "P":
                     results_str += "\n## Coverage Results\n"
                     # Link the dashboard page using "cov_report_page" value.
@@ -879,11 +843,10 @@ def create_bucket_report(buckets):
                 else:
                     self.results_summary["Coverage"] = "--"
 
-        if results.buckets:
+        if self.sim_results.buckets:
             self.errors_seen = True
-            results_str += "\n".join(create_bucket_report(results.buckets))
+            results_str += self.sim_results.create_md_bucket_report(add_repro_html_buttons)
 
-        self.results_md = results_str
         return results_str
 
     def gen_results_summary(self):
@@ -908,9 +871,8 @@ def gen_results_summary(self):
             if row:
                 # convert name entry to relative link
                 row = cfg.results_summary
-                row["Name"] = cfg._get_results_page_link(
-                    self.results_dir,
-                    row["Name"])
+                row["Name"] = cfg._get_md_relative_link_html_report(link_text=row["Name"])
+
 
                 # If header is set, ensure its the same for all cfgs.
                 if header:

diff --git a/util/dvsim/SimResults.py b/util/dvsim/SimResults.py
@@ -7,6 +7,7 @@
 
 import collections
 import re
+import uuid
 
 from Testplan import Result
 
@@ -65,6 +66,10 @@
 ]
 
 
+# This affects the bucketizer failure report.
+_MAX_UNIQUE_TESTS = 5
+_MAX_TEST_RESEEDS = 2
+
 class SimResults:
     '''An object wrapping up a table of results for some tests
 
@@ -127,3 +132,99 @@ def _bucketize(self, fail_msg):
         for regex in _REGEX_STAR:
             bucket = regex.sub('*', bucket)
         return bucket
+
+
+    def create_md_bucket_report(self, add_repro_html_buttons: bool = False) -> str:
+        """Creates a markdown report based on all added failure buckets.
+
+        The buckets are sorted by descending number of failures. Within
+        buckets this also group tests by unqualified name, and just a few
+        failures are shown per unqualified name.
+
+        Returns:
+          A markdown string to be embedded into report.
+        """
+
+        def indent_by(level):
+            return " " * (4 * level)
+
+        def create_failure_message(test, line, context) -> list[str]:
+            # First print the qualified name of the test
+            message = [f"{indent_by(2)}* {test.qual_name}\\"]
+
+            # Print the path to the logfile containng the failure, including a linenumber if present
+            log_msg = ""
+            if line:
+                log_msg = f"{indent_by(2)}  Line {line}, in log {test.get_log_path()}"
+            else:
+                log_msg = f"{indent_by(2)}  Log {test.get_log_path()}"
+
+            message.append(log_msg)
+
+            if add_repro_html_buttons:
+                # Append an HTML button to the above log_msg
+                # This button hooks some inline javascript in the report to copy the reproduction
+                # command to the clipboard. See utils::md_results_to_html() for the javascript.
+
+                repro_cmd = test.create_repro_command()
+                repro_button_id = uuid.uuid4()
+
+                repro_button_html = " ".join([
+                  f"<button id='{repro_button_id}'",
+                  "class='btn'",
+                  f"onclick='copyContent(\"{repro_button_id}\")'",
+                  f"repro_cmd='{repro_cmd}'>",
+                  "Click to copy repro",
+                  "</button>",
+                ])
+
+                message[-1] += f" {repro_button_html}"
+
+            # Print the logfile context around the failing line if present
+            if context:
+                message.append("")
+                context_lines = [f"{indent_by(4)}{c.rstrip()}" for c in context
+                                 if (c != "\n")] # Drop empty lines from the context
+                message.extend(context_lines)
+
+            return message
+
+        fail_msgs = ["\n## Failure Buckets", ""]
+
+        by_tests = sorted(self.buckets.items(),
+                          key=lambda i: len(i[1]),
+                          reverse=True)
+
+        # Loop over all buckets
+        for bucket, tests in by_tests:
+            fail_msgs.append(f"* `{bucket}` has {len(tests)} failures:")
+
+            # Reduce all failures in the bucket to a unique entry for each test
+            unique_tests = collections.defaultdict(list)
+            for (test, line, context) in tests:
+                unique_tests[test.name].append((test, line, context))
+
+            # Loop over the unqiue tests with failures in this bucket...
+            for name, test_reseeds in list(unique_tests.items())[:_MAX_UNIQUE_TESTS]:
+
+                msg = f"{indent_by(1)}* Test {name} has {len(test_reseeds)} failures."
+                fail_msgs.append(msg)
+
+                # Up to a maximum of _MAX_TEST_RESEEDS, print a short summary for each failing seed
+                for test, line, context in test_reseeds[:_MAX_TEST_RESEEDS]:
+                    msg_lines = create_failure_message(test, line, context)
+                    fail_msgs.extend(msg_lines)
+
+                # If there are too manu failing seeds and we have to truncate the list, and a
+                # "..and more failures" message to show this
+                if len(test_reseeds) > _MAX_TEST_RESEEDS:
+                    msg = f"{indent_by(2)}* ... and {len(test_reseeds) - _MAX_TEST_RESEEDS} more failures."
+                    fail_msgs.append(msg)
+
+            # If there are too many unique tests with this failure mode, also truncate the list...
+            if len(unique_tests) > _MAX_UNIQUE_TESTS:
+                msg = f"{indent_by(1)}* ... and {len(unique_tests) - _MAX_UNIQUE_TESTS} more tests."
+                fail_msgs.append(msg)
+
+        # Return as a string
+        return "\n".join(fail_msgs)
diff --git a/util/dvsim/Testplan.py b/util/dvsim/Testplan.py
@@ -659,7 +659,7 @@ def get_test_results_table(self, map_full_testplan=True):
                 stage = ""
                 tp_name = ""
 
-        text = "\n### Test Results\n"
+        text = "\n## Test Results\n"
         text += tabulate(table,
                          headers=header,
                          tablefmt="pipe",