man-group · G-D-Petrov · Dec 18, 2024 · Dec 31, 2024 · Dec 31, 2024 · Dec 31, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -23,6 +23,10 @@ on:
         description: Tag of the ArcticCB development image to use for the Linux C++ tests build
         type: string
         default: latest
+      pytest_args:
+        description: Rewrite what tests will run
+        type: string
+        default: ""
 run-name: Building ${{github.ref_name}} on ${{github.event_name}} by ${{github.actor}}
 concurrency:
   group: ${{github.ref}}
@@ -191,6 +195,7 @@ jobs:
       python_deps_ids: ${{toJson(matrix.python_deps_ids)}}
       persistent_storage: ${{inputs.persistent_storage}}
       pytest_xdist_mode: ${{matrix.pytest_xdist_mode}}
+      pytest_args: ${{inputs.pytest_args}}
 
   cpp-test-windows:
     needs: [common_config]
@@ -219,6 +224,7 @@ jobs:
       cmake_preset_type: ${{needs.common_config.outputs.cmake_preset_type_resolved}}
       matrix: ${{needs.common_config.outputs.windows_matrix}}
       persistent_storage: ${{ inputs.persistent_storage }}
+      pytest_args: ${{inputs.pytest_args}}
 
   persistent_storage_verify_linux:
     needs: [common_config, build-python-wheels-linux, build-python-wheels-windows]

diff --git a/.github/workflows/build_steps.yml b/.github/workflows/build_steps.yml
@@ -12,6 +12,7 @@ on:
       python3:           {default: -1, type: number, description: Python 3 minor version}
       persistent_storage:      {default: "false", type: string, description: Specifies whether the python tests should tests against real storages e.g. AWS S3  }
       pytest_xdist_mode: {default: "", type: string, description: additional argument to pass for pytest-xdist}
+      pytest_args:       {default: "", type: string, description: a way to rewrite the pytest args to change what tests are being run}
 jobs:
   start_ec2_runner:
     if: inputs.compile-override == 'compile-on-ec2'
@@ -363,6 +364,7 @@ jobs:
           CI_MONGO_HOST: mongodb
           HYPOTHESIS_PROFILE: ci_${{matrix.os}}
           PYTEST_XDIST_MODE: ${{inputs.pytest_xdist_mode}}
+          ARCTICDB_PYTEST_ARGS: ${{inputs.pytest_args}}
 
       - name: Collect crash dumps (Windows)
         if: matrix.os == 'windows' && failure()

diff --git a/build_tooling/parallel_test.sh b/build_tooling/parallel_test.sh
@@ -18,7 +18,16 @@ cd $PARALLEL_TEST_ROOT
 
 export ARCTICDB_RAND_SEED=$RANDOM
 
-$catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v --log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
-    --junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
-    --basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
-    "$@" 2>&1 | sed -ur "s#^(tests/.*/([^/]+\.py))?#\2#"
+if [ -z "$ARCTICDB_PYTEST_ARGS" ]; then
+    $catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v \
+        --log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
+        --junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
+        --basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
+        "$@" 2>&1 | sed -r "s#^(tests/.*/([^/]+\.py))?#\2#"
+else
+    $catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v \
+        --log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
+        --junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
+        --basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
+        "$ARCTICDB_PYTEST_ARGS" 2>&1
+fi
@@ -15,13 +15,18 @@
 
 logger = logging.getLogger("Compatibility tests")
 
+
 class ErrorInVenv(Exception):
     """To signal errors occuring from within the venv"""
 
+
 def is_running_on_windows():
-    return os.name == 'nt'
+    return os.name == "nt"
 
-def run_shell_command(command : List[Union[str, os.PathLike]], cwd : Optional[os.PathLike] = None) -> subprocess.CompletedProcess:
+
+def run_shell_command(
+    command: List[Union[str, os.PathLike]], cwd: Optional[os.PathLike] = None
+) -> subprocess.CompletedProcess:
     logger.info(f"Executing command: {command}")
     result = None
     if is_running_on_windows():
@@ -30,18 +35,28 @@ def run_shell_command(command : List[Union[str, os.PathLike]], cwd : Optional[os
     else:
         # On linux we need shell=True for conda feedstock runners (because otherwise they fail to expand path variables)
         # But to correctly work with shell=True we need a single command string.
-        command_string = ' '.join(command)
-        result = subprocess.run(command_string, cwd=cwd, capture_output=True, shell=True, stdin=subprocess.DEVNULL)
+        command_string = " ".join(command)
+        result = subprocess.run(
+            command_string,
+            cwd=cwd,
+            capture_output=True,
+            shell=True,
+            stdin=subprocess.DEVNULL,
+        )
     if result.returncode != 0:
-        logger.warning(f"Command failed, stdout: {str(result.stdout)}, stderr: {str(result.stderr)}")
+        logger.warning(
+            f"Command failed, stdout: {str(result.stdout)}, stderr: {str(result.stderr)}"
+        )
     return result
 
+
 def get_os_specific_venv_python() -> str:
     if is_running_on_windows():
         return os.path.join("Scripts", "python.exe")
     else:
         return os.path.join("bin", "python")
 
+
 class Venv:
     def __init__(self, venv_path, requirements_file, version):
         self.path = venv_path
@@ -57,17 +72,26 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
     def init_venv(self):
         venv.create(self.path, with_pip=True, clear=True)
-        command = [get_os_specific_venv_python(), "-m", "pip", "install", "-r", self.requirements_file]
+        command = [
+            get_os_specific_venv_python(),
+            "-m",
+            "pip",
+            "install",
+            "-r",
+            self.requirements_file,
+        ]
         run_shell_command(command, self.path)
 
     def tear_down_venv(self):
-        shutil.rmtree(self.path)
+        shutil.rmtree(self.path, ignore_errors=True)
 
-    def execute_python_file(self, python_path : Union[str, os.PathLike]) -> subprocess.CompletedProcess:
+    def execute_python_file(
+        self, python_path: Union[str, os.PathLike]
+    ) -> subprocess.CompletedProcess:
         command = [get_os_specific_venv_python(), python_path]
         return run_shell_command(command, self.path)
 
-    def create_arctic(self, uri : str) -> 'VenvArctic':
+    def create_arctic(self, uri: str) -> "VenvArctic":
         return VenvArctic(self, uri)
 
 
@@ -77,42 +101,52 @@ def __init__(self, venv, uri):
         self.uri = uri
         self.init_storage()
 
-    def execute(self, python_commands : List[str], dfs : Optional[Dict] = None) -> None:
+    def execute(self, python_commands: List[str], dfs: Optional[Dict] = None) -> None:
         """
         Prepares the dataframe parquet files and the python script to be run from within the venv.
         """
         if dfs is None:
             dfs = {}
 
+        # Add a random prefix to the temporary directory to avoid conflicts
+        # rand_prefix = os.urandom(8).hex()
         with tempfile.TemporaryDirectory() as dir:
             df_load_commands = []
             for df_name, df_value in dfs.items():
                 parquet_file = os.path.join(dir, f"{df_name}.parquet")
                 df_value.to_parquet(parquet_file)
-                df_load_commands.append(f"{df_name} = pd.read_parquet({repr(parquet_file)})")
-
-            python_commands = [
-                "from arcticdb import Arctic",
-                "import pandas as pd",
-                "import numpy as np",
-                f"ac = Arctic({repr(self.uri)})"
-            ] + df_load_commands + python_commands
+                df_load_commands.append(
+                    f"{df_name} = pd.read_parquet({repr(parquet_file)})"
+                )
+
+            python_commands = (
+                [
+                    "from arcticdb import Arctic",
+                    "import pandas as pd",
+                    "import numpy as np",
+                    f"ac = Arctic({repr(self.uri)})",
+                ]
+                + df_load_commands
+                + python_commands
+            )
 
             python_path = os.path.join(dir, "run.py")
             with open(python_path, "w") as python_file:
                 python_file.write("\n".join(python_commands))
 
             result = self.venv.execute_python_file(python_path)
             if result.returncode != 0:
-                raise ErrorInVenv(f"Executing {python_commands} failed with return code {result.returncode}")
+                raise ErrorInVenv(
+                    f"Executing {python_commands} failed with return code {result.returncode}: {result}"
+                )
 
     def init_storage(self):
         self.execute([])
 
-    def create_library(self, lib_name : str) -> 'VenvLib':
+    def create_library(self, lib_name: str) -> "VenvLib":
         return VenvLib(self, lib_name, create_if_not_exists=True)
 
-    def get_library(self, lib_name : str) -> 'VenvLib':
+    def get_library(self, lib_name: str) -> "VenvLib":
         return VenvLib(self, lib_name, create_if_not_exists=False)
 
 
@@ -126,33 +160,34 @@ def __init__(self, arctic, lib_name, create_if_not_exists=True):
     def create_lib(self):
         self.arctic.execute([f"ac.create_library('{self.lib_name}')"])
 
-    def execute(self, python_commands : List[str], dfs : Optional[Dict] = None) -> None:
+    def execute(self, python_commands: List[str], dfs: Optional[Dict] = None) -> None:
         python_commands = [
             f"lib = ac.get_library('{self.lib_name}')",
         ] + python_commands
         return self.arctic.execute(python_commands, dfs)
 
-    def write(self, sym : str, df) -> None:
+    def write(self, sym: str, df) -> None:
         return self.execute([f"lib.write('{sym}', df)"], {"df": df})
 
-    def assert_read(self, sym : str, df) -> None:
+    def assert_read(self, sym: str, df) -> None:
         python_commands = [
             f"read_df = lib.read('{sym}').data",
-            "pd.testing.assert_frame_equal(read_df, expected_df)"
+            "print(read_df)",
+            "pd.testing.assert_frame_equal(read_df, expected_df)",
         ]
         return self.execute(python_commands, {"expected_df": df})
 
 
 @pytest.fixture(
-    scope="session",
+    # scope="session",
     params=[
         pytest.param("1.6.2", marks=VENV_COMPAT_TESTS_MARK),
         pytest.param("4.5.1", marks=VENV_COMPAT_TESTS_MARK),
-    ] # TODO: Extend this list with other old versions
+    ],  # TODO: Extend this list with other old versions
 )
-def old_venv(request):
+def old_venv(request, tmp_path):
     version = request.param
-    path = os.path.join("venvs", version)
+    path = os.path.join("venvs", tmp_path, version)
     compat_dir = os.path.dirname(os.path.abspath(__file__))
     requirements_file = os.path.join(compat_dir, f"requirements-{version}.txt")
     with Venv(path, requirements_file, version) as old_venv:
@@ -161,7 +196,9 @@ def old_venv(request):
 
 @pytest.fixture(
     params=[
-        "lmdb",
+        # Don't include lmdb because there is a bug in 1.6.2/4.5.1
+        # which causes the tests to segfault intermittently
+        # "lmdb",
         "s3_ssl_disabled",
         pytest.param("azurite", marks=AZURE_TESTS_MARK),
         pytest.param("mongo", marks=MONGO_TESTS_MARK),
@@ -183,11 +220,7 @@ def arctic_uri(request):
 @pytest.fixture()
 def old_venv_and_arctic_uri(old_venv, arctic_uri):
     # TODO: Once #1979 is understood and fixed reenable mongo, lmdb and azure for versions which have the fix.
-    if arctic_uri.startswith("mongo"):
-        pytest.skip("Mongo storage backend has a probable desctruction bug, which can cause flaky segfaults.")
-    if arctic_uri.startswith("lmdb"):
-        pytest.skip("LMDB storage backend has a probable desctruction bug, which can cause flaky segfaults.")
-    if arctic_uri.startswith("azure"):
-        pytest.skip("Azure storage backend has probable a desctruction bug, which can cause flaky segfaults.")
+    if arctic_uri.startswith("mongo") and "1.6.2" in old_venv.version:
+        pytest.skip("Mongo storage backend is not supported in 1.6.2")
 
     return old_venv, arctic_uri