From 5df2ddf27c6145df65f69137d441f791144225ef Mon Sep 17 00:00:00 2001
From: Abishek V Ashok <abishekvashok@fb.com>
Date: Thu, 8 Sep 2022 03:19:30 -0700
Subject: [PATCH] Send lines to highlight in trace [trace viewing 1/2]

Summary:
Adds feature in Pysa playground to view trace. After computation of taint-output.json file, send the results to Pysa frontend.

Modifies existing cache approach to make it more modular and add support for caching annotations as well.

Adds Github actions CI tests (in existing pysa action) to make sure the code for parsing the taint_config doesn't go out of date as the format of taint-output.json changes often. Existing pysa test was used because it already runs pysa from source code on `delibrately_vulnerable_flask_app`, and adding a same action with just few more lines to the pysa test doesn't really make much sense.

Signed-off-by: Abishek V Ashok <abishekvashok@fb.com>

Differential Revision: D38980480

fbshipit-source-id: bbb8b60bae73c0005625c10f7e0abd1e38227837
---
 .github/workflows/pysa.yml                    |  4 +
 .../run_integration_tests.sh                  |  3 +-
 tools/playground/application.py               | 94 ++++++++++++++++---
 .../tests/taint_output_parse_test.py          | 36 +++++++
 4 files changed, 124 insertions(+), 13 deletions(-)
 create mode 100644 tools/playground/tests/taint_output_parse_test.py

diff --git a/.github/workflows/pysa.yml b/.github/workflows/pysa.yml
index c61fbdfbc5a..905e8cfd9fc 100644
--- a/.github/workflows/pysa.yml
+++ b/.github/workflows/pysa.yml
@@ -50,3 +50,7 @@ jobs:
           cd ./documentation/deliberately_vulnerable_flask_app
           . ./setup.sh
           ./run_integration_tests.sh
+
+      - name: Test Pysa playground
+        run: |
+          python3 ./tools/playground/tests/taint_output_parse_test.py
diff --git a/documentation/deliberately_vulnerable_flask_app/run_integration_tests.sh b/documentation/deliberately_vulnerable_flask_app/run_integration_tests.sh
index 1d58a8cb537..b4ed31e86a7 100755
--- a/documentation/deliberately_vulnerable_flask_app/run_integration_tests.sh
+++ b/documentation/deliberately_vulnerable_flask_app/run_integration_tests.sh
@@ -7,7 +7,8 @@
 set +e
 python3 ../../tools/pysa_integration_tests/run.py \
     --skip-model-verification \
-    --run-from-source
+    --run-from-source \
+    --save-results-to=./
 
 exit_code=$?
 
diff --git a/tools/playground/application.py b/tools/playground/application.py
index c2d60a0a593..c25b39302df 100644
--- a/tools/playground/application.py
+++ b/tools/playground/application.py
@@ -15,7 +15,7 @@
 import tempfile
 import threading
 from pathlib import Path
-from typing import IO, List
+from typing import Dict, IO, List
 
 from flask import Flask, jsonify, request, Response
 from flask_cors import CORS
@@ -60,7 +60,7 @@ def _consume() -> None:
 @functools.lru_cache(maxsize=128)
 def _get_cache_contents(file_path: Path) -> str:
     with file_path.open() as cache_file:
-        return cache_file.read()
+        return json.loads(cache_file.read())
 
 
 @functools.lru_cache(maxsize=128)
@@ -74,6 +74,59 @@ def _get_cache_file_path(input: str, model: str) -> Path:
     return cache_file_path
 
 
+def _generate_cache_contents(
+    return_code: int, lines: List[str], annotations: List[Dict[str, str]]
+):
+    return json.dumps(
+        {"return_code": return_code, "lines": lines, "annotations": annotations}
+    )
+
+
+def _parse_annotations_from_taint_output(
+    taint_output_file_path: Path,
+) -> List[Dict[str, str]]:
+    if not (taint_output_file_path.is_file() and taint_output_file_path.exists()):
+        return []
+    annotations = []
+    with taint_output_file_path.open() as taint_output_file:
+        taint_output_contents = taint_output_file.readlines()
+        for taint_output_line in taint_output_contents:
+            taint_output_line_json = json.loads(taint_output_line)
+            if taint_output_line_json.get("kind") != "issue":
+                continue
+            data = taint_output_line_json.get("data")
+            if data is None:
+                continue
+            message = data.get("message")
+            if message is None:
+                continue
+            traces = data.get("traces")
+            if traces is None:
+                continue
+            for trace in traces:
+                trace_roots = trace.get("roots")
+                if trace_roots is None:
+                    continue
+                for trace_root in trace_roots:
+                    root = trace_root.get("root")
+                    if root is None:
+                        continue
+                    line = root.get("line")
+                    start = root.get("start")
+                    end = root.get("end")
+                    if line is None or start is None or end is None:
+                        continue
+                    annotations.append(
+                        {
+                            "message": message,
+                            "line": line,
+                            "start": start,
+                            "end": end,
+                        }
+                    )
+    return annotations
+
+
 class Pyre:
     def __init__(self) -> None:
         self._directory: Path = Path(tempfile.mkdtemp())
@@ -143,6 +196,7 @@ def __init__(
         self._stubs: Path = Path(tempfile.mkdtemp())
         self.input: str = input
         self.model: str = model
+        self.taint_output_file_path = Path(self._directory / "taint_output.json")
 
         LOG.debug(f"Intializing Pysa in `{self._directory}`...")
         pyre_configuration = json.dumps(
@@ -171,14 +225,14 @@ def __init__(
     def analyze(self) -> None:
         LOG.debug("Running pysa")
         with subprocess.Popen(
-            ["pyre", "-n", "analyze"],
+            ["pyre", "-n", "analyze", "--no-verify", "--save-results-to", "./"],
             stderr=subprocess.PIPE,
             stdout=subprocess.PIPE,
             cwd=self._directory,
             text=True,
         ) as process:
             model_verification_errors = []
-            cache_lines = ""
+            cache_lines = []
             # pyre-fixme[16]: process.stderr is marked as Optional
             for line in iter(process.stderr.readline, b""):
                 line = line.rstrip()
@@ -204,19 +258,28 @@ def analyze(self) -> None:
                         model_verification_errors = []
                     emit("pysa_results_channel", {"type": "output", "line": line})
                 LOG.debug(line)
-                cache_lines += line + "\n"
+                cache_lines.append(line)
 
             return_code = process.wait()
             if return_code != 0:
                 result = {"type": "finished", "result": "error"}
             else:
                 result = {"type": "finished", "result": "ok"}
-
+            annotations = _parse_annotations_from_taint_output(
+                self.taint_output_file_path
+            )
+            if len(annotations) > 0:
+                emit(
+                    "pysa_result_channel",
+                    {"type": "annotations", "annotations": annotations},
+                )
             emit("pysa_results_channel", result)
             # write to cache now:
             with _get_cache_file_path(self.input, self.model).open("w") as cache_file:
-                cache_file.write(str(return_code) + "\n")
-                cache_file.write(cache_lines)
+                cache_contents = _generate_cache_contents(
+                    return_code, cache_lines, annotations
+                )
+                cache_file.write(cache_contents)
 
 
 def get_server():
@@ -266,16 +329,23 @@ def analyze(json) -> None:
             cache_file_path = _get_cache_file_path(input, model)
             if cache_file_path.exists():
                 LOG.info(f"Using cache `{cache_file_path}`...")
-                cache_contents = _get_cache_contents(cache_file_path).split("\n")
-                run_status = cache_contents.pop(0)
+                cache_contents = _get_cache_contents(cache_file_path)
+                run_status = cache_contents["return_code"]
+                lines = cache_contents["lines"]
+                annotations = cache_contents["annotations"]
                 emit(
                     "pysa_results_channel",
                     {
                         "type": "output",
-                        "line": "\n".join(cache_contents),
+                        "line": "\n".join(lines),
                     },
                 )
-                if run_status != "0":
+                if len(annotations) > 0:
+                    emit(
+                        "pysa_result_channel",
+                        {"type": "annotations", "annotations": annotations},
+                    )
+                if run_status != 0:
                     result = {"type": "finished", "result": "error"}
                 else:
                     result = {"type": "finished", "result": "ok"}
diff --git a/tools/playground/tests/taint_output_parse_test.py b/tools/playground/tests/taint_output_parse_test.py
new file mode 100644
index 00000000000..122230f5dee
--- /dev/null
+++ b/tools/playground/tests/taint_output_parse_test.py
@@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import os
+import unittest
+from pathlib import Path
+
+from ..application import _parse_annotations_from_taint_output
+
+
+class TestTaintOutputParser(unittest.TestCase):
+    def test_parser(self):
+        expected_output = {}
+        expected_output_file_path = Path(
+            os.getcwd() / "taint_output_parsed.expected.json"
+        )
+        self.assertTrue(
+            expected_output_file_path.exists() and expected_output_file_path.is_file()
+        )
+        with expected_output_file_path.open() as expected_output_file:
+            expected_output_file = json.loads(expected_output_file.read())
+        taint_output_file_path = Path(os.getcwd() / "taint_output.json")
+        self.assertTrue(
+            taint_output_file_path.exists() and taint_output_file_path.is_file()
+        )
+        self.assertEqual(
+            _parse_annotations_from_taint_output(taint_output_file_path),
+            expected_output,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()