diff --git a/failure_analysis/drain3.ini b/failure_analysis/drain3.ini new file mode 100644 index 0000000..eeb3a71 --- /dev/null +++ b/failure_analysis/drain3.ini @@ -0,0 +1,10 @@ +[MASKING] +masking = [ + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"}, + {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"} + ] diff --git a/failure_analysis/failure_analysis.py b/failure_analysis/failure_analysis.py index 1c1e828..54c27fe 100644 --- a/failure_analysis/failure_analysis.py +++ b/failure_analysis/failure_analysis.py @@ -13,10 +13,13 @@ import itertools import os import sys +from os.path import dirname from pathlib import Path import numpy as np import pandas as pd # type: ignore +from drain3 import TemplateMiner # type: ignore +from drain3.template_miner_config import TemplateMinerConfig # type: ignore from lxml import etree # type: ignore from sklearn.feature_extraction.text import CountVectorizer # type: ignore from sklearn.metrics.pairwise import cosine_similarity # type: ignore @@ -68,11 +71,23 @@ def score_failures(failures: list): return coss -def run(path: str, min_threshold: int): +def template_failures(failures: list, drain_config: str) -> list: + config = TemplateMinerConfig() + if drain_config: + config.load(drain_config) + else: + config.load(dirname(__file__) + "/drain3.ini") + template_miner = TemplateMiner(config=config) + return [template_miner.add_log_message(failure).get("template_mined") for failure in failures] + + +def run(path: str, min_threshold: int, drain_ini: str, drain_off: bool): xml_path = Path(path) if not xml_path.is_dir(): raise IOError(f"{path} should be directory but it was not.") failure, testname, filename, classname = parse_xml(xml_path) + if not drain_off: + failure = template_failures(failure, drain_ini) if len(failure) == 0: print("NO FAILURES FOUND") @@ -134,13 +149,31 @@ def main(): ), default=0.80, ) + parser.add_argument( + "--drain", + "-D", + type=str, + help=( + "Path to drain.ini file, which is used to configure Drain3 templating. " + "If not given default templating will be used, unless --drain-off argument is given." + ), + default="", + ) + parser.add_argument( + "--drain-off", + help="Turns drain templating off and will use error text as is. By default drain is enabled.", + action=argparse.BooleanOptionalAction, + default=False, + ) parser.add_argument("path", type=str, help="Path to folder where xunit files are stored") args = parser.parse_args() path = args.path min_threshold = args.min + drain_ini = args.drain + drain_off = args.drain_off if not Path(path).is_dir(): raise ValueError(f"{path} is not directory.") - run(path, min_threshold) + run(path, min_threshold, drain_ini, drain_off) if __name__ == "__main__": diff --git a/poetry.lock b/poetry.lock index 1421fe2..536f922 100644 --- a/poetry.lock +++ b/poetry.lock @@ -104,6 +104,14 @@ webencodings = "*" css = ["tinycss2 (>=1.1.0)"] dev = ["pip-tools (==6.5.1)", "pytest (==7.1.1)", "flake8 (==4.0.1)", "tox (==3.24.5)", "sphinx (==4.3.2)", "twine (==4.0.0)", "wheel (==0.37.1)", "hashin (==0.17.0)", "black (==22.3.0)", "mypy (==0.942)"] +[[package]] +name = "cachetools" +version = "4.2.1" +description = "Extensible memoizing collections and decorators" +category = "main" +optional = false +python-versions = "~=3.5" + [[package]] name = "certifi" version = "2022.5.18.1" @@ -202,6 +210,22 @@ python-versions = "*" [package.dependencies] setuptools_scm = "*" +[[package]] +name = "drain3" +version = "0.9.10" +description = "Persistent & streaming log template miner" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +cachetools = "4.2.1" +jsonpickle = "1.5.1" + +[package.extras] +kafka = ["kafka-python (==2.0.1)"] +redis = ["redis (==3.5.3)"] + [[package]] name = "empty-files" version = "0.0.3" @@ -322,6 +346,19 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "jsonpickle" +version = "1.5.1" +description = "Python library for serializing any arbitrary object graph into JSON" +category = "main" +optional = false +python-versions = ">=2.7" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] +testing = ["coverage (<5)", "pytest (>=3.5,!=3.7.3)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-black-multipy", "pytest-cov", "ecdsa", "feedparser", "numpy", "pandas", "pymongo", "sklearn", "sqlalchemy", "enum34", "jsonlib"] +"testing.libs" = ["demjson", "simplejson", "ujson", "yajl"] + [[package]] name = "keyring" version = "23.5.1" @@ -883,7 +920,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "7b6ac9ae759a0c18e388a395495d026de08a057973a44b722928259c68388c93" +content-hash = "2a2945d5dabbc2ebdaa93a9079723d4cf8d3b9afcfc1f24f348cd16b5e111f99" [metadata.files] allpairspy = [ @@ -935,6 +972,10 @@ bleach = [ {file = "bleach-5.0.0-py3-none-any.whl", hash = "sha256:08a1fe86d253b5c88c92cc3d810fd8048a16d15762e1e5b74d502256e5926aa1"}, {file = "bleach-5.0.0.tar.gz", hash = "sha256:c6d6cc054bdc9c83b48b8083e236e5f00f238428666d2ce2e083eaa5fd568565"}, ] +cachetools = [ + {file = "cachetools-4.2.1-py3-none-any.whl", hash = "sha256:1d9d5f567be80f7c07d765e21b814326d78c61eb0c3a637dffc0e5d1796cb2e2"}, + {file = "cachetools-4.2.1.tar.gz", hash = "sha256:f469e29e7aa4cff64d8de4aad95ce76de8ea1125a16c68e0d93f65c3c3dc92e9"}, +] certifi = [ {file = "certifi-2022.5.18.1-py3-none-any.whl", hash = "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a"}, {file = "certifi-2022.5.18.1.tar.gz", hash = "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7"}, @@ -1038,6 +1079,9 @@ docutils = [ dotty-dict = [ {file = "dotty_dict-1.3.0.tar.gz", hash = "sha256:eb0035a3629ecd84397a68f1f42f1e94abd1c34577a19cd3eacad331ee7cbaf0"}, ] +drain3 = [ + {file = "drain3-0.9.10.tar.gz", hash = "sha256:2c9c5466f5d9af3d1f273c7ff4cc6995df363df091a538c12fd070cb035f7e4f"}, +] empty-files = [ {file = "empty-files-0.0.3.tar.gz", hash = "sha256:87277db100a3bfdafc2ba18f6094cd37090e257058fb1c0b15873a89e1003149"}, {file = "empty_files-0.0.3-py3-none-any.whl", hash = "sha256:ec464f7f88a028d4567b380d57983fc4ffb79147538626690cd94c33090cd216"}, @@ -1082,6 +1126,10 @@ joblib = [ {file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"}, {file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"}, ] +jsonpickle = [ + {file = "jsonpickle-1.5.1-py2.py3-none-any.whl", hash = "sha256:8eb8323f0e12cb40687f0445e2115d8165901e20ac670add55bb53a95c68c0e5"}, + {file = "jsonpickle-1.5.1.tar.gz", hash = "sha256:060f97096559d1b86aa16cac2f4ea5f7b6da0c15d8a4de150b78013a886f9a51"}, +] keyring = [ {file = "keyring-23.5.1-py3-none-any.whl", hash = "sha256:9ef58314bcc823f426b49ec787539a2d73571b37de4cd498f839803b01acff1e"}, {file = "keyring-23.5.1.tar.gz", hash = "sha256:dee502cdf18a98211bef428eea11456a33c00718b2f08524fd5727c7f424bffd"}, diff --git a/pyproject.toml b/pyproject.toml index f828c83..188c5dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,60 +1,61 @@ -[tool.poetry] -name = "failures-analysis" -version = "1.1.0" -description = " failures-analysis package provides fast and reliable way to find and group similar failures in test automation." -authors = ["Tatu Aalto"] -license = "Apache-2.0" -readme = "README.md" -homepage = "https://github.com/F-Secure/failures-analysis" -classifiers = [ - "Development Status :: 3 - Alpha", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Topic :: Software Development :: Testing", -] -packages = [ - { include = "failure_analysis", from="."} -] - -[tool.poetry.scripts] -failures-analysis = "failure_analysis.failure_analysis:main" - -[tool.poetry.urls] -"Change log" = "https://github.com/F-Secure/failures-analysis/blob/main/CHANGELOG.md" - -[tool.poetry.dependencies] -python = "^3.8" -numpy = "^1.22.4" -pandas = "^1.4.2" -sklearn = "^0.0" -lxml = "^4.9.0" - -[tool.poetry.dev-dependencies] -pytest = "^7.1.2" -black = "^22.3.0" -isort = "^5.10.1" -invoke = "^1.7.1" -mypy = "^0.961" -flake8 = "^4.0.1" -python-semantic-release = "7.28.1" -approvaltests = "^5.2.0" - -[build-system] -requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" - -[tool.black] -line-length = 120 -target-version = ['py38'] - -[tool.semantic_release] -version_toml = [ - "pyproject.toml:tool.poetry.version", - "failure_analysis/__init__.py:__version__" -] -branch = "main" -build_command = "poetry build" +[tool.poetry] +name = "failures-analysis" +version = "1.1.0" +description = " failures-analysis package provides fast and reliable way to find and group similar failures in test automation." +authors = ["Tatu Aalto"] +license = "Apache-2.0" +readme = "README.md" +homepage = "https://github.com/F-Secure/failures-analysis" +classifiers = [ + "Development Status :: 3 - Alpha", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Testing", +] +packages = [ + { include = "failure_analysis", from="."} +] + +[tool.poetry.scripts] +failures-analysis = "failure_analysis.failure_analysis:main" + +[tool.poetry.urls] +"Change log" = "https://github.com/F-Secure/failures-analysis/blob/main/CHANGELOG.md" + +[tool.poetry.dependencies] +python = "^3.8" +numpy = "^1.22.4" +pandas = "^1.4.2" +sklearn = "^0.0" +lxml = "^4.9.0" +drain3 = "^0.9.10" + +[tool.poetry.dev-dependencies] +pytest = "^7.1.2" +black = "^22.3.0" +isort = "^5.10.1" +invoke = "^1.7.1" +mypy = "^0.961" +flake8 = "^4.0.1" +python-semantic-release = "7.28.1" +approvaltests = "^5.2.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.black] +line-length = 120 +target-version = ['py38'] + +[tool.semantic_release] +version_toml = [ + "pyproject.toml:tool.poetry.version", + "failure_analysis/__init__.py:__version__" +] +branch = "main" +build_command = "poetry build" diff --git a/utest/resources/drain3.ini b/utest/resources/drain3.ini new file mode 100644 index 0000000..fc2ca11 --- /dev/null +++ b/utest/resources/drain3.ini @@ -0,0 +1,4 @@ +[MASKING] +masking = [ + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP_ADDRESS"} + ] \ No newline at end of file diff --git a/utest/test_drain3.py b/utest/test_drain3.py new file mode 100644 index 0000000..07474d4 --- /dev/null +++ b/utest/test_drain3.py @@ -0,0 +1,27 @@ +from pathlib import Path + +from approvaltests import verify # type: ignore + +from failure_analysis.failure_analysis import template_failures + +UTEST_ROOT = Path(__file__).resolve().parent +XUNIT_FILES_DIR = UTEST_ROOT / "resources" + + +def test_template_failures(): + failures = [ + "def test_02():\n> assert 'connected to 192.168.0.1' == 'connected to 192.0.0.0'\nE assert False\n\n\ntests\\test_me.py:6: AssertionError", + "def test_02():\n> assert 'connected to 192.168.0.2' == 'connected to 192.0.0.0'\nE assert False\n\n\ntests\\test_me.py:6: AssertionError", + "def test_02():\n> assert 'connected to 192.168.0.3' == 'connected to 192.0.0.0'\nE assert False\n\n\ntests\\test_me.py:6: AssertionError", + ] + verify(template_failures(failures, "")) + + +def test_custom_ini(tmp_path): + failures = [ + "def test_02():\n> assert 'connected to 192.168.0.1' == 'connected to 192.0.0.0'\nE assert False\n\n\ntests\\test_me.py:6: AssertionError", + ] + drain3_ini = XUNIT_FILES_DIR / "drain3.ini" + drain3_ini = drain3_ini.resolve() + assert drain3_ini.is_file() + verify(template_failures(failures, str(drain3_ini))) diff --git a/utest/test_drain3.test_custom_ini.approved.txt b/utest/test_drain3.test_custom_ini.approved.txt new file mode 100644 index 0000000..b06a2d5 --- /dev/null +++ b/utest/test_drain3.test_custom_ini.approved.txt @@ -0,0 +1 @@ +["def test_02(): > assert 'connected to ' == 'connected to ' E assert False tests\\test_me.py:6: AssertionError"] diff --git a/utest/test_drain3.test_template_failures.approved.txt b/utest/test_drain3.test_template_failures.approved.txt new file mode 100644 index 0000000..b544d40 --- /dev/null +++ b/utest/test_drain3.test_template_failures.approved.txt @@ -0,0 +1 @@ +["def test_(): > assert 'connected to ' == 'connected to ' E assert False tests\\test_me.py:: AssertionError", "def test_(): > assert 'connected to ' == 'connected to ' E assert False tests\\test_me.py:: AssertionError", "def test_(): > assert 'connected to ' == 'connected to ' E assert False tests\\test_me.py:: AssertionError"] diff --git a/utest/test_similarity.py b/utest/test_similarity.py index 5b4d567..81782f2 100644 --- a/utest/test_similarity.py +++ b/utest/test_similarity.py @@ -36,11 +36,11 @@ def test_score_failures(): def test_invalid_path(): with pytest.raises(IOError): - run("not/here", MIN_THRESHOLD) + run("not/here", MIN_THRESHOLD, "", True) def test_console_output(capsys): - run(str(XUNIT_FILES_DIR), MIN_THRESHOLD) + run(str(XUNIT_FILES_DIR), MIN_THRESHOLD, "", True) captured = capsys.readouterr() verify(captured.out) @@ -49,13 +49,13 @@ def test_no_failures(capsys): with pytest.raises(SystemExit): with tempfile.TemporaryDirectory() as temp_folder: shutil.copy(PASS_01_FILE_PATH, Path(temp_folder) / PASS_01_FILE_NAME) - run(temp_folder, MIN_THRESHOLD) + run(temp_folder, MIN_THRESHOLD, "", False) captured = capsys.readouterr() assert captured.out == "NO FAILURES FOUND" with pytest.raises(SystemExit): with tempfile.TemporaryDirectory() as temp_folder: - run(temp_folder, MIN_THRESHOLD) + run(temp_folder, MIN_THRESHOLD, "", True) captured = capsys.readouterr() assert captured.out == "NO FAILURES FOUND" @@ -67,6 +67,6 @@ def test_finding_files(capsys): shutil.copy(PASS_01_FILE_PATH, folder_match_filter_patters / PASS_01_FILE_NAME) shutil.copy(FAIL_01_FILE_PATH, folder_match_filter_patters / FAIL_01_FILE_NAME) shutil.copy(FAIL_02_FILE_PATH, folder_match_filter_patters / FAIL_02_FILE_NAME) - run(temp_folder, MIN_THRESHOLD) + run(temp_folder, MIN_THRESHOLD, "", True) captured = capsys.readouterr() verify(captured.out)