From 5fc5d42c6589c778cde232baf375c2cba19a06b1 Mon Sep 17 00:00:00 2001
From: Bernhard Kaindl <bernhard.kaindl@cloud.com>
Date: Thu, 11 Jan 2024 12:00:00 +0100
Subject: [PATCH] TarOutput: Close collected files after reading them to free
 buffers

With tar output, TarOutput.addRealFile() did not close file objects.
So in tar mode, the file objects of all captures regular files were
not closed.

As the fixed line use `TarFile.addfile(tarinfo, open("fn"))` the
opened file object could have never been passed anywhere else so
it is safe to replace this pattern using a context manager.

Summary:

- The changes to xen-bugtool are 4 safe lines to use `with open`
- All other changes are improvements for the unit tests to cover it.

Description:

This was found by PYTHONDEVMODE=yes to trace unclosed resources.
I set this up in .pre-commit-config.yaml, so while developing
tests, I saw the warning while testing with pre-commit run -v.

Fixes in this PR:

- Use context managers to fix the locations which are easy to fix.
- Extend the unit tests (those in master now) to cover both locations.
- While updating those tests, improve them to check the file list of
  the ZIP and tar archives generated in this particular test case.
- Configure GitHub CI to create source annotations for PYTHONWARNINGS
  which are issued by such cases and are shown in code review of PRs.

Notes:

In the course of a year, my development machine accumulated 24000
files in /var/xapi/blobs (spread evenly over all subdirectories),
so Python apparently does some garbage collection, but it's certainly
not good to never close so many open file-like objects in Python.

Closing them in time has the potential to reduce the memory usage
of the xen-bugtool / xenserver-status-report process.

There are more locations where Python 3.10 reports unclosed file
objects, but it seems we may have to use enable tracemalloc as
recommended by the issued PYTHONWARNINGS to find their source.

Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com>
---
 .../PYTHONWARNINGS-problemMatcher.json        | 17 ++++++++
 .github/workflows/main.yml                    |  8 ++++
 tests/integration/dom0-template/etc/group     |  1 +
 tests/integration/dom0-template/etc/passwd    |  1 +
 .../etc/xensource/bugtool/mock/stuff.xml      |  2 +
 tests/unit/test_load_plugins.py               | 12 +++++-
 tests/unit/test_output.py                     | 43 ++++++++++++++++---
 xen-bugtool                                   |  7 ++-
 8 files changed, 80 insertions(+), 11 deletions(-)
 create mode 100644 .github/workflows/PYTHONWARNINGS-problemMatcher.json
 create mode 100644 tests/integration/dom0-template/etc/group
 create mode 100644 tests/integration/dom0-template/etc/passwd

diff --git a/.github/workflows/PYTHONWARNINGS-problemMatcher.json b/.github/workflows/PYTHONWARNINGS-problemMatcher.json
new file mode 100644
index 00000000..f167ab96
--- /dev/null
+++ b/.github/workflows/PYTHONWARNINGS-problemMatcher.json
@@ -0,0 +1,17 @@
+{
+  "problemMatcher": [
+        {
+            "owner": "python-libs-PYTHONWARNINGS",
+            "severity": "warning",
+            "pattern": [
+                {
+                    "regexp": "^.*/python-libs/(.+):([0-9]*):(.*):(.*)$",
+                    "file": 1,
+                    "line": 2,
+                    "code": 3,
+                    "message": 4
+                }
+            ]
+        }
+  ]
+}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 39315b3a..d042a6e0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -89,9 +89,17 @@ jobs:
           path: ~/.cache/pre-commit
           key: pre-commit|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
 
+      # https://docs.python.org/3/library/devmode.html#resourcewarning-example
+      # If pytest runs with PYTHONDEVMODE=yes, it enables resource checks like
+      # unclosed file warnings. Configure GitHub to show all such warnings as
+      # annotations at the source location they occur in the PR code review:
+
+      - run: echo "::add-matcher::.github/workflows/PYTHONWARNINGS-problemMatcher.json"
+
       - uses: pre-commit/action@v3.0.0
         name: Run pre-commit checks
         env:
+          PYTHONDEVMODE: yes  # Enable Python3 checks. See the comment above.
           # Skip the no-commit-to-branch check inside of GitHub CI (for CI on merge to master)
           SKIP: no-commit-to-branch
 
diff --git a/tests/integration/dom0-template/etc/group b/tests/integration/dom0-template/etc/group
new file mode 100644
index 00000000..1dbf9013
--- /dev/null
+++ b/tests/integration/dom0-template/etc/group
@@ -0,0 +1 @@
+root:x:0:
diff --git a/tests/integration/dom0-template/etc/passwd b/tests/integration/dom0-template/etc/passwd
new file mode 100644
index 00000000..aebc4923
--- /dev/null
+++ b/tests/integration/dom0-template/etc/passwd
@@ -0,0 +1 @@
+root:x:0:0:root:/root:/bin/bash
diff --git a/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml b/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml
index ffcd404f..9b8a8567 100644
--- a/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml
+++ b/tests/integration/dom0-template/etc/xensource/bugtool/mock/stuff.xml
@@ -1,5 +1,7 @@
 <collect>
 <list>/etc</list>
+<files>/etc/passwd</files>
+<files>/etc/group</files>
 <files>/proc/self/status</files>
 <directory pattern=".*ax_user_watches">/proc/sys/fs/epoll</directory>
 <command label="proc_version">cat /proc/version</command>
diff --git a/tests/unit/test_load_plugins.py b/tests/unit/test_load_plugins.py
index 006efd09..77989dd9 100644
--- a/tests/unit/test_load_plugins.py
+++ b/tests/unit/test_load_plugins.py
@@ -23,8 +23,8 @@ def test_load_plugins(bugtool, dom0_template):
         False,
         9,
     )
-    # Assert the size of the created mock capability:
-    assert bugtool.cap_sizes["mock"] == 0
+    # Assert the size of the files of the created mock inventory entry:
+    assert bugtool.cap_sizes["mock"] > 0  # /etc/passwd should have content
     # Assert the entries added to the bugtool.data dict:
     assert bugtool.data == {
         "ls -l /etc": {
@@ -32,6 +32,14 @@ def test_load_plugins(bugtool, dom0_template):
             "cmd_args": ["ls", "-l", "/etc"],
             "filter": None,
         },
+        "/etc/passwd": {
+            "cap": "mock",
+            "filename": "/etc/passwd",
+        },
+        "/etc/group": {
+            "cap": "mock",
+            "filename": "/etc/group",
+        },
         "/proc/self/status": {
             "cap": "mock",
             "filename": "/proc/self/status",
diff --git a/tests/unit/test_output.py b/tests/unit/test_output.py
index a3100d54..b7cfecc8 100644
--- a/tests/unit/test_output.py
+++ b/tests/unit/test_output.py
@@ -13,8 +13,24 @@ def assert_valid_inventory_schema(inventory_tree):
         XMLSchema(parse(xml_schema)).assertValid(inventory_tree)
 
 
-def assert_mock_bugtool_plugin_output(extracted):
+def assert_mock_bugtool_plugin_output(temporary_directory, subdir, names):
     """Assertion check of the output files from the mock bugtool plugin"""
+
+    # Assert the list of file names in the status report archive:
+    expected_names = [
+        subdir + "/etc/group",
+        subdir + "/etc/passwd.tar",
+        subdir + "/inventory.xml",
+        subdir + "/ls-l-%etc.out",
+        subdir + "/proc/self/status",
+        subdir + "/proc/sys/fs/epoll/max_user_watches",
+        subdir + "/proc_version.out",
+    ]
+    assert sorted(names) == expected_names
+
+    extracted = "%s/%s/" % (temporary_directory, subdir)
+
+    # Will be refactored to be more easy in a separate commit soon:
     assert_valid_inventory_schema(parse(extracted + "inventory.xml"))
     with open(extracted + "proc_version.out") as proc_version:
         assert proc_version.read()[:14] == "Linux version "
@@ -24,6 +40,17 @@ def assert_mock_bugtool_plugin_output(extracted):
         assert status.read()[:5] == "Name:"
     with open(extracted + "proc/sys/fs/epoll/max_user_watches") as max_user_watches:
         assert int(max_user_watches.read()) > 0
+    with open(extracted + "etc/group") as group:
+        assert group.readline() == "root:x:0:\n"
+
+    # Check the contents of the sub-archive "etc/passwd.tar":
+    with tarfile.TarFile(extracted + "etc/passwd.tar") as tar:
+        assert tar.getnames() == [subdir + "/etc/passwd"]
+        # TarFile.extractfile() does not support context managers on Python2:
+        passwd = tar.extractfile(subdir + "/etc/passwd")
+        assert passwd
+        assert passwd.readline() == b"root:x:0:0:root:/root:/bin/bash\n"
+        passwd.close()
 
 
 def minimal_bugtool(bugtool, dom0_template, archive, subdir):
@@ -32,7 +59,9 @@ def minimal_bugtool(bugtool, dom0_template, archive, subdir):
     # Load the mock plugin from dom0_template and process the plugin's caps:
     bugtool.PLUGIN_DIR = dom0_template + "/etc/xensource/bugtool"
     bugtool.entries = ["mock"]
+    archive.declare_subarchive("/etc/passwd", subdir + "/etc/passwd.tar")
     bugtool.load_plugins(just_capabilities=False)
+    # Mock the 2nd argument of the ls -l /etc to collect it using dom0_template:
     bugtool.data["ls -l /etc"]["cmd_args"][2] = dom0_template + "/etc"
     bugtool.collect_data(subdir, archive)
     bugtool.include_inventory(archive, subdir)
@@ -51,10 +80,9 @@ def test_tar_output(bugtool, tmp_path, dom0_template):
 
     # Check the TarFile contents
     tmp = tmp_path.as_posix()
-    tar_archive = tarfile.TarFile(tmp + "/tarball.tar")
-    tar_archive.extractall(tmp)
-    tar_archive.close()
-    assert_mock_bugtool_plugin_output(tmp + "/" + subdir + "/")
+    with tarfile.TarFile(tmp + "/tarball.tar") as tar:
+        tar.extractall(tmp)
+        assert_mock_bugtool_plugin_output(tmp, subdir, tar.getnames())
 
 
 def test_zip_output(bugtool, tmp_path, dom0_template):
@@ -69,5 +97,6 @@ def test_zip_output(bugtool, tmp_path, dom0_template):
 
     # Check the ZipFile contents
     tmp = tmp_path.as_posix()
-    zipfile.ZipFile(tmp + "/zipfile.zip").extractall(tmp)
-    assert_mock_bugtool_plugin_output(tmp + "/" + subdir + "/")
+    with zipfile.ZipFile(tmp + "/zipfile.zip") as zip:
+        zip.extractall(tmp)
+        assert_mock_bugtool_plugin_output(tmp, subdir, zip.namelist())
diff --git a/xen-bugtool b/xen-bugtool
index cabb727c..dec4b410 100755
--- a/xen-bugtool
+++ b/xen-bugtool
@@ -1697,7 +1697,8 @@ class TarSubArchive(io.BytesIO):
         :param name: Recorded path of the the file in the tar archive for extraction
         :param filename: Real file name of the file to be added to the tar archive
         """
-        self.file.addfile(self.file.gettarinfo(filename, name), open(filename, "rb"))
+        with open(filename, "rb") as buffered_reader:
+            self.file.addfile(self.file.gettarinfo(filename, name), buffered_reader)
 
 class ArchiveWithTarSubarchives(object):
     """Base class for TarOutput and ZipOutput with support to create sub-archives"""
@@ -1762,7 +1763,9 @@ class TarOutput(ArchiveWithTarSubarchives):
         s = os.stat(filename)
         ti.mtime = s.st_mtime
         ti.size = s.st_size
-        self.tf.addfile(ti, open(filename, "rb"))
+        with open(filename, "rb") as buffered_reader:
+            self.tf.addfile(ti, buffered_reader)
+
 
     def add_path_with_data(self, name, data):
         ti = self._getTi(name)