From 5fc5d42c6589c778cde232baf375c2cba19a06b1 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Thu, 11 Jan 2024 12:00:00 +0100 Subject: [PATCH] TarOutput: Close collected files after reading them to free buffers With tar output, TarOutput.addRealFile() did not close file objects. So in tar mode, the file objects of all captures regular files were not closed. As the fixed line use `TarFile.addfile(tarinfo, open("fn"))` the opened file object could have never been passed anywhere else so it is safe to replace this pattern using a context manager. Summary: - The changes to xen-bugtool are 4 safe lines to use `with open` - All other changes are improvements for the unit tests to cover it. Description: This was found by PYTHONDEVMODE=yes to trace unclosed resources. I set this up in .pre-commit-config.yaml, so while developing tests, I saw the warning while testing with pre-commit run -v. Fixes in this PR: - Use context managers to fix the locations which are easy to fix. - Extend the unit tests (those in master now) to cover both locations. - While updating those tests, improve them to check the file list of the ZIP and tar archives generated in this particular test case. - Configure GitHub CI to create source annotations for PYTHONWARNINGS which are issued by such cases and are shown in code review of PRs. Notes: In the course of a year, my development machine accumulated 24000 files in /var/xapi/blobs (spread evenly over all subdirectories), so Python apparently does some garbage collection, but it's certainly not good to never close so many open file-like objects in Python. Closing them in time has the potential to reduce the memory usage of the xen-bugtool / xenserver-status-report process. There are more locations where Python 3.10 reports unclosed file objects, but it seems we may have to use enable tracemalloc as recommended by the issued PYTHONWARNINGS to find their source. Signed-off-by: Bernhard Kaindl --- .../PYTHONWARNINGS-problemMatcher.json | 17 ++++++++ .github/workflows/main.yml | 8 ++++ tests/integration/dom0-template/etc/group | 1 + tests/integration/dom0-template/etc/passwd | 1 + .../etc/xensource/bugtool/mock/stuff.xml | 2 + tests/unit/test_load_plugins.py | 12 +++++- tests/unit/test_output.py | 43 ++++++++++++++++--- xen-bugtool | 7 ++- 8 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/PYTHONWARNINGS-problemMatcher.json create mode 100644 tests/integration/dom0-template/etc/group create mode 100644 tests/integration/dom0-template/etc/passwd diff --git a/.github/workflows/PYTHONWARNINGS-problemMatcher.json b/.github/workflows/PYTHONWARNINGS-problemMatcher.json new file mode 100644 index 00000000..f167ab96 --- /dev/null +++ b/.github/workflows/PYTHONWARNINGS-problemMatcher.json @@ -0,0 +1,17 @@ +{ + "problemMatcher": [ + { + "owner": "python-libs-PYTHONWARNINGS", + "severity": "warning", + "pattern": [ + { + "regexp": "^.*/python-libs/(.+):([0-9]*):(.*):(.*)$", + "file": 1, + "line": 2, + "code": 3, + "message": 4 + } + ] + } + ] +} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 39315b3a..d042a6e0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -89,9 +89,17 @@ jobs: path: ~/.cache/pre-commit key: pre-commit|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} + # https://docs.python.org/3/library/devmode.html#resourcewarning-example + # If pytest runs with PYTHONDEVMODE=yes, it enables resource checks like + # unclosed file warnings. Configure GitHub to show all such warnings as + # annotations at the source location they occur in the PR code review: + + - run: echo "::add-matcher::.github/workflows/PYTHONWARNINGS-problemMatcher.json" + - uses: pre-commit/action@v3.0.0 name: Run pre-commit checks env: + PYTHONDEVMODE: yes # Enable Python3 checks. See the comment above. # Skip the no-commit-to-branch check inside of GitHub CI (for CI on merge to master) SKIP: no-commit-to-branch diff --git a/tests/integration/dom0-template/etc/group b/tests/integration/dom0-template/etc/group new file mode 100644 index 00000000..1dbf9013 --- /dev/null +++ b/tests/integration/dom0-template/etc/group @@ -0,0 +1 @@ +root:x:0: diff --git a/tests/integration/dom0-template/etc/passwd b/tests/integration/dom0-template/etc/passwd new file mode 100644 index 00000000..aebc4923 --- /dev/null +++ b/tests/integration/dom0-template/etc/passwd @@ -0,0 +1 @@ +root:x:0:0:root:/root:/bin/bash diff --git a/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml b/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml index ffcd404f..9b8a8567 100644 --- a/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml +++ b/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml @@ -1,5 +1,7 @@ /etc +/etc/passwd +/etc/group /proc/self/status /proc/sys/fs/epoll cat /proc/version diff --git a/tests/unit/test_load_plugins.py b/tests/unit/test_load_plugins.py index 006efd09..77989dd9 100644 --- a/tests/unit/test_load_plugins.py +++ b/tests/unit/test_load_plugins.py @@ -23,8 +23,8 @@ def test_load_plugins(bugtool, dom0_template): False, 9, ) - # Assert the size of the created mock capability: - assert bugtool.cap_sizes["mock"] == 0 + # Assert the size of the files of the created mock inventory entry: + assert bugtool.cap_sizes["mock"] > 0 # /etc/passwd should have content # Assert the entries added to the bugtool.data dict: assert bugtool.data == { "ls -l /etc": { @@ -32,6 +32,14 @@ def test_load_plugins(bugtool, dom0_template): "cmd_args": ["ls", "-l", "/etc"], "filter": None, }, + "/etc/passwd": { + "cap": "mock", + "filename": "/etc/passwd", + }, + "/etc/group": { + "cap": "mock", + "filename": "/etc/group", + }, "/proc/self/status": { "cap": "mock", "filename": "/proc/self/status", diff --git a/tests/unit/test_output.py b/tests/unit/test_output.py index a3100d54..b7cfecc8 100644 --- a/tests/unit/test_output.py +++ b/tests/unit/test_output.py @@ -13,8 +13,24 @@ def assert_valid_inventory_schema(inventory_tree): XMLSchema(parse(xml_schema)).assertValid(inventory_tree) -def assert_mock_bugtool_plugin_output(extracted): +def assert_mock_bugtool_plugin_output(temporary_directory, subdir, names): """Assertion check of the output files from the mock bugtool plugin""" + + # Assert the list of file names in the status report archive: + expected_names = [ + subdir + "/etc/group", + subdir + "/etc/passwd.tar", + subdir + "/inventory.xml", + subdir + "/ls-l-%etc.out", + subdir + "/proc/self/status", + subdir + "/proc/sys/fs/epoll/max_user_watches", + subdir + "/proc_version.out", + ] + assert sorted(names) == expected_names + + extracted = "%s/%s/" % (temporary_directory, subdir) + + # Will be refactored to be more easy in a separate commit soon: assert_valid_inventory_schema(parse(extracted + "inventory.xml")) with open(extracted + "proc_version.out") as proc_version: assert proc_version.read()[:14] == "Linux version " @@ -24,6 +40,17 @@ def assert_mock_bugtool_plugin_output(extracted): assert status.read()[:5] == "Name:" with open(extracted + "proc/sys/fs/epoll/max_user_watches") as max_user_watches: assert int(max_user_watches.read()) > 0 + with open(extracted + "etc/group") as group: + assert group.readline() == "root:x:0:\n" + + # Check the contents of the sub-archive "etc/passwd.tar": + with tarfile.TarFile(extracted + "etc/passwd.tar") as tar: + assert tar.getnames() == [subdir + "/etc/passwd"] + # TarFile.extractfile() does not support context managers on Python2: + passwd = tar.extractfile(subdir + "/etc/passwd") + assert passwd + assert passwd.readline() == b"root:x:0:0:root:/root:/bin/bash\n" + passwd.close() def minimal_bugtool(bugtool, dom0_template, archive, subdir): @@ -32,7 +59,9 @@ def minimal_bugtool(bugtool, dom0_template, archive, subdir): # Load the mock plugin from dom0_template and process the plugin's caps: bugtool.PLUGIN_DIR = dom0_template + "/etc/xensource/bugtool" bugtool.entries = ["mock"] + archive.declare_subarchive("/etc/passwd", subdir + "/etc/passwd.tar") bugtool.load_plugins(just_capabilities=False) + # Mock the 2nd argument of the ls -l /etc to collect it using dom0_template: bugtool.data["ls -l /etc"]["cmd_args"][2] = dom0_template + "/etc" bugtool.collect_data(subdir, archive) bugtool.include_inventory(archive, subdir) @@ -51,10 +80,9 @@ def test_tar_output(bugtool, tmp_path, dom0_template): # Check the TarFile contents tmp = tmp_path.as_posix() - tar_archive = tarfile.TarFile(tmp + "/tarball.tar") - tar_archive.extractall(tmp) - tar_archive.close() - assert_mock_bugtool_plugin_output(tmp + "/" + subdir + "/") + with tarfile.TarFile(tmp + "/tarball.tar") as tar: + tar.extractall(tmp) + assert_mock_bugtool_plugin_output(tmp, subdir, tar.getnames()) def test_zip_output(bugtool, tmp_path, dom0_template): @@ -69,5 +97,6 @@ def test_zip_output(bugtool, tmp_path, dom0_template): # Check the ZipFile contents tmp = tmp_path.as_posix() - zipfile.ZipFile(tmp + "/zipfile.zip").extractall(tmp) - assert_mock_bugtool_plugin_output(tmp + "/" + subdir + "/") + with zipfile.ZipFile(tmp + "/zipfile.zip") as zip: + zip.extractall(tmp) + assert_mock_bugtool_plugin_output(tmp, subdir, zip.namelist()) diff --git a/xen-bugtool b/xen-bugtool index cabb727c..dec4b410 100755 --- a/xen-bugtool +++ b/xen-bugtool @@ -1697,7 +1697,8 @@ class TarSubArchive(io.BytesIO): :param name: Recorded path of the the file in the tar archive for extraction :param filename: Real file name of the file to be added to the tar archive """ - self.file.addfile(self.file.gettarinfo(filename, name), open(filename, "rb")) + with open(filename, "rb") as buffered_reader: + self.file.addfile(self.file.gettarinfo(filename, name), buffered_reader) class ArchiveWithTarSubarchives(object): """Base class for TarOutput and ZipOutput with support to create sub-archives""" @@ -1762,7 +1763,9 @@ class TarOutput(ArchiveWithTarSubarchives): s = os.stat(filename) ti.mtime = s.st_mtime ti.size = s.st_size - self.tf.addfile(ti, open(filename, "rb")) + with open(filename, "rb") as buffered_reader: + self.tf.addfile(ti, buffered_reader) + def add_path_with_data(self, name, data): ti = self._getTi(name)