Merge pull request #44 from tatu-aalto/log_drain3

feat: Use Drain3 to template failure messages
WithSecureOpenSource · Jun 17, 2022 · 8a47a97 · 8a47a97
2 parents 2cc10fd + efe9538
commit 8a47a97
Show file tree

Hide file tree

Showing 9 changed files with 193 additions and 68 deletions.
diff --git a/failure_analysis/drain3.ini b/failure_analysis/drain3.ini
@@ -0,0 +1,10 @@
+[MASKING]
+masking = [
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"},
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"},
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"},
+          {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"}
+          ]
diff --git a/failure_analysis/failure_analysis.py b/failure_analysis/failure_analysis.py
@@ -13,10 +13,13 @@
 import itertools
 import os
 import sys
+from os.path import dirname
 from pathlib import Path
 
 import numpy as np
 import pandas as pd  # type: ignore
+from drain3 import TemplateMiner  # type: ignore
+from drain3.template_miner_config import TemplateMinerConfig  # type: ignore
 from lxml import etree  # type: ignore
 from sklearn.feature_extraction.text import CountVectorizer  # type: ignore
 from sklearn.metrics.pairwise import cosine_similarity  # type: ignore
@@ -68,11 +71,23 @@ def score_failures(failures: list):
     return coss
 
 
-def run(path: str, min_threshold: int):
+def template_failures(failures: list, drain_config: str) -> list:
+    config = TemplateMinerConfig()
+    if drain_config:
+        config.load(drain_config)
+    else:
+        config.load(dirname(__file__) + "/drain3.ini")
+    template_miner = TemplateMiner(config=config)
+    return [template_miner.add_log_message(failure).get("template_mined") for failure in failures]
+
+
+def run(path: str, min_threshold: int, drain_ini: str, drain_off: bool):
     xml_path = Path(path)
     if not xml_path.is_dir():
         raise IOError(f"{path} should be directory but it was not.")
     failure, testname, filename, classname = parse_xml(xml_path)
+    if not drain_off:
+        failure = template_failures(failure, drain_ini)
 
     if len(failure) == 0:
         print("NO FAILURES FOUND")
@@ -134,13 +149,31 @@ def main():
         ),
         default=0.80,
     )
+    parser.add_argument(
+        "--drain",
+        "-D",
+        type=str,
+        help=(
+            "Path to drain.ini file, which is used to configure Drain3 templating. "
+            "If not given default templating will be used, unless --drain-off argument is given."
+        ),
+        default="",
+    )
+    parser.add_argument(
+        "--drain-off",
+        help="Turns drain templating off and will use error text as is. By default drain is enabled.",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+    )
     parser.add_argument("path", type=str, help="Path to folder where xunit files are stored")
     args = parser.parse_args()
     path = args.path
     min_threshold = args.min
+    drain_ini = args.drain
+    drain_off = args.drain_off
     if not Path(path).is_dir():
         raise ValueError(f"{path} is not directory.")
-    run(path, min_threshold)
+    run(path, min_threshold, drain_ini, drain_off)
 
 
 if __name__ == "__main__":

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,60 +1,61 @@
-[tool.poetry]
-name = "failures-analysis"
-version = "1.1.0"
-description = " failures-analysis package provides fast and reliable way to find and group similar failures in test automation."
-authors = ["Tatu Aalto"]
-license = "Apache-2.0"
-readme = "README.md"
-homepage = "https://github.com/F-Secure/failures-analysis"
-classifiers = [
-    "Development Status :: 3 - Alpha",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "License :: OSI Approved :: Apache Software License",
-    "Operating System :: OS Independent",
-    "Topic :: Software Development :: Testing",
-]
-packages = [
-    { include = "failure_analysis", from="."}
-]
-
-[tool.poetry.scripts]
-failures-analysis = "failure_analysis.failure_analysis:main"
-
-[tool.poetry.urls]
-"Change log" = "https://github.com/F-Secure/failures-analysis/blob/main/CHANGELOG.md"
-
-[tool.poetry.dependencies]
-python = "^3.8"
-numpy = "^1.22.4"
-pandas = "^1.4.2"
-sklearn = "^0.0"
-lxml = "^4.9.0"
-
-[tool.poetry.dev-dependencies]
-pytest = "^7.1.2"
-black = "^22.3.0"
-isort = "^5.10.1"
-invoke = "^1.7.1"
-mypy = "^0.961"
-flake8 = "^4.0.1"
-python-semantic-release = "7.28.1"
-approvaltests = "^5.2.0"
-
-[build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
-
-[tool.black]
-line-length = 120
-target-version = ['py38']
-
-[tool.semantic_release]
-version_toml  = [
-    "pyproject.toml:tool.poetry.version",
-    "failure_analysis/__init__.py:__version__"
-]
-branch = "main"
-build_command = "poetry build"
+[tool.poetry]
+name = "failures-analysis"
+version = "1.1.0"
+description = " failures-analysis package provides fast and reliable way to find and group similar failures in test automation."
+authors = ["Tatu Aalto"]
+license = "Apache-2.0"
+readme = "README.md"
+homepage = "https://github.com/F-Secure/failures-analysis"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Topic :: Software Development :: Testing",
+]
+packages = [
+    { include = "failure_analysis", from="."}
+]
+
+[tool.poetry.scripts]
+failures-analysis = "failure_analysis.failure_analysis:main"
+
+[tool.poetry.urls]
+"Change log" = "https://github.com/F-Secure/failures-analysis/blob/main/CHANGELOG.md"
+
+[tool.poetry.dependencies]
+python = "^3.8"
+numpy = "^1.22.4"
+pandas = "^1.4.2"
+sklearn = "^0.0"
+lxml = "^4.9.0"
+drain3 = "^0.9.10"
+
+[tool.poetry.dev-dependencies]
+pytest = "^7.1.2"
+black = "^22.3.0"
+isort = "^5.10.1"
+invoke = "^1.7.1"
+mypy = "^0.961"
+flake8 = "^4.0.1"
+python-semantic-release = "7.28.1"
+approvaltests = "^5.2.0"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.black]
+line-length = 120
+target-version = ['py38']
+
+[tool.semantic_release]
+version_toml  = [
+    "pyproject.toml:tool.poetry.version",
+    "failure_analysis/__init__.py:__version__"
+]
+branch = "main"
+build_command = "poetry build"
diff --git a/utest/resources/drain3.ini b/utest/resources/drain3.ini
@@ -0,0 +1,4 @@
+[MASKING]
+masking = [
+          {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP_ADDRESS"}
+          ]
diff --git a/utest/test_drain3.py b/utest/test_drain3.py
@@ -0,0 +1,27 @@
+from pathlib import Path
+
+from approvaltests import verify  # type: ignore
+
+from failure_analysis.failure_analysis import template_failures
+
+UTEST_ROOT = Path(__file__).resolve().parent
+XUNIT_FILES_DIR = UTEST_ROOT / "resources"
+
+
+def test_template_failures():
+    failures = [
+        "def test_02():\n>       assert 'connected to 192.168.0.1' == 'connected to 192.0.0.0'\nE       assert False\n\n\ntests\\test_me.py:6: AssertionError",
+        "def test_02():\n>       assert 'connected to 192.168.0.2' == 'connected to 192.0.0.0'\nE       assert False\n\n\ntests\\test_me.py:6: AssertionError",
+        "def test_02():\n>       assert 'connected to 192.168.0.3' == 'connected to 192.0.0.0'\nE       assert False\n\n\ntests\\test_me.py:6: AssertionError",
+    ]
+    verify(template_failures(failures, ""))
+
+
+def test_custom_ini(tmp_path):
+    failures = [
+        "def test_02():\n>       assert 'connected to 192.168.0.1' == 'connected to 192.0.0.0'\nE       assert False\n\n\ntests\\test_me.py:6: AssertionError",
+    ]
+    drain3_ini = XUNIT_FILES_DIR / "drain3.ini"
+    drain3_ini = drain3_ini.resolve()
+    assert drain3_ini.is_file()
+    verify(template_failures(failures, str(drain3_ini)))
diff --git a/utest/test_drain3.test_custom_ini.approved.txt b/utest/test_drain3.test_custom_ini.approved.txt
@@ -0,0 +1 @@
+["def test_02(): > assert 'connected to <IP_ADDRESS>' == 'connected to <IP_ADDRESS>' E assert False tests\\test_me.py:6: AssertionError"]
diff --git a/utest/test_drain3.test_template_failures.approved.txt b/utest/test_drain3.test_template_failures.approved.txt
@@ -0,0 +1 @@
+["def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError", "def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError", "def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError"]
diff --git a/utest/test_similarity.py b/utest/test_similarity.py
@@ -36,11 +36,11 @@ def test_score_failures():
 
 def test_invalid_path():
     with pytest.raises(IOError):
-        run("not/here", MIN_THRESHOLD)
+        run("not/here", MIN_THRESHOLD, "", True)
 
 
 def test_console_output(capsys):
-    run(str(XUNIT_FILES_DIR), MIN_THRESHOLD)
+    run(str(XUNIT_FILES_DIR), MIN_THRESHOLD, "", True)
     captured = capsys.readouterr()
     verify(captured.out)
 
@@ -49,13 +49,13 @@ def test_no_failures(capsys):
     with pytest.raises(SystemExit):
         with tempfile.TemporaryDirectory() as temp_folder:
             shutil.copy(PASS_01_FILE_PATH, Path(temp_folder) / PASS_01_FILE_NAME)
-            run(temp_folder, MIN_THRESHOLD)
+            run(temp_folder, MIN_THRESHOLD, "", False)
             captured = capsys.readouterr()
             assert captured.out == "NO FAILURES FOUND"
 
     with pytest.raises(SystemExit):
         with tempfile.TemporaryDirectory() as temp_folder:
-            run(temp_folder, MIN_THRESHOLD)
+            run(temp_folder, MIN_THRESHOLD, "", True)
             captured = capsys.readouterr()
             assert captured.out == "NO FAILURES FOUND"
 
@@ -67,6 +67,6 @@ def test_finding_files(capsys):
         shutil.copy(PASS_01_FILE_PATH, folder_match_filter_patters / PASS_01_FILE_NAME)
         shutil.copy(FAIL_01_FILE_PATH, folder_match_filter_patters / FAIL_01_FILE_NAME)
         shutil.copy(FAIL_02_FILE_PATH, folder_match_filter_patters / FAIL_02_FILE_NAME)
-        run(temp_folder, MIN_THRESHOLD)
+        run(temp_folder, MIN_THRESHOLD, "", True)
         captured = capsys.readouterr()
         verify(captured.out)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		["def test_02(): > assert 'connected to <IP_ADDRESS>' == 'connected to <IP_ADDRESS>' E assert False tests\\test_me.py:6: AssertionError"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		["def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError", "def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError", "def test_<NUM>(): > assert 'connected to <IP>' == 'connected to <IP>' E assert False tests\\test_me.py:<NUM>: AssertionError"]