Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility test may trigger segfaults #2098

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ on:
description: Tag of the ArcticCB development image to use for the Linux C++ tests build
type: string
default: latest
pytest_args:
description: Rewrite what tests will run
type: string
default: ""
run-name: Building ${{github.ref_name}} on ${{github.event_name}} by ${{github.actor}}
concurrency:
group: ${{github.ref}}
Expand Down Expand Up @@ -191,6 +195,7 @@ jobs:
python_deps_ids: ${{toJson(matrix.python_deps_ids)}}
persistent_storage: ${{inputs.persistent_storage}}
pytest_xdist_mode: ${{matrix.pytest_xdist_mode}}
pytest_args: ${{inputs.pytest_args}}

cpp-test-windows:
needs: [common_config]
Expand Down Expand Up @@ -219,6 +224,7 @@ jobs:
cmake_preset_type: ${{needs.common_config.outputs.cmake_preset_type_resolved}}
matrix: ${{needs.common_config.outputs.windows_matrix}}
persistent_storage: ${{ inputs.persistent_storage }}
pytest_args: ${{inputs.pytest_args}}

persistent_storage_verify_linux:
needs: [common_config, build-python-wheels-linux, build-python-wheels-windows]
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
python3: {default: -1, type: number, description: Python 3 minor version}
persistent_storage: {default: "false", type: string, description: Specifies whether the python tests should tests against real storages e.g. AWS S3 }
pytest_xdist_mode: {default: "", type: string, description: additional argument to pass for pytest-xdist}
pytest_args: {default: "", type: string, description: a way to rewrite the pytest args to change what tests are being run}
jobs:
start_ec2_runner:
if: inputs.compile-override == 'compile-on-ec2'
Expand Down Expand Up @@ -363,6 +364,7 @@ jobs:
CI_MONGO_HOST: mongodb
HYPOTHESIS_PROFILE: ci_${{matrix.os}}
PYTEST_XDIST_MODE: ${{inputs.pytest_xdist_mode}}
ARCTICDB_PYTEST_ARGS: ${{inputs.pytest_args}}

- name: Collect crash dumps (Windows)
if: matrix.os == 'windows' && failure()
Expand Down
17 changes: 13 additions & 4 deletions build_tooling/parallel_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@ cd $PARALLEL_TEST_ROOT

export ARCTICDB_RAND_SEED=$RANDOM

$catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v --log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
--junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
--basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
"$@" 2>&1 | sed -ur "s#^(tests/.*/([^/]+\.py))?#\2#"
if [ -z "$ARCTICDB_PYTEST_ARGS" ]; then
$catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v \
--log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
--junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
--basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
"$@" 2>&1 | sed -r "s#^(tests/.*/([^/]+\.py))?#\2#"
else
$catch python -m pytest --timeout=3600 $PYTEST_XDIST_MODE -v \
--log-file="$TEST_OUTPUT_DIR/pytest-logger.$group.log" \
--junitxml="$TEST_OUTPUT_DIR/pytest.$group.xml" \
--basetemp="$PARALLEL_TEST_ROOT/temp-pytest-output" \
"$ARCTICDB_PYTEST_ARGS" 2>&1
fi
105 changes: 69 additions & 36 deletions python/tests/compat/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,18 @@

logger = logging.getLogger("Compatibility tests")


class ErrorInVenv(Exception):
"""To signal errors occuring from within the venv"""


def is_running_on_windows():
return os.name == 'nt'
return os.name == "nt"

def run_shell_command(command : List[Union[str, os.PathLike]], cwd : Optional[os.PathLike] = None) -> subprocess.CompletedProcess:

def run_shell_command(
command: List[Union[str, os.PathLike]], cwd: Optional[os.PathLike] = None
) -> subprocess.CompletedProcess:
logger.info(f"Executing command: {command}")
result = None
if is_running_on_windows():
Expand All @@ -30,18 +35,28 @@ def run_shell_command(command : List[Union[str, os.PathLike]], cwd : Optional[os
else:
# On linux we need shell=True for conda feedstock runners (because otherwise they fail to expand path variables)
# But to correctly work with shell=True we need a single command string.
command_string = ' '.join(command)
result = subprocess.run(command_string, cwd=cwd, capture_output=True, shell=True, stdin=subprocess.DEVNULL)
command_string = " ".join(command)
result = subprocess.run(
command_string,
cwd=cwd,
capture_output=True,
shell=True,
stdin=subprocess.DEVNULL,
)
if result.returncode != 0:
logger.warning(f"Command failed, stdout: {str(result.stdout)}, stderr: {str(result.stderr)}")
logger.warning(
f"Command failed, stdout: {str(result.stdout)}, stderr: {str(result.stderr)}"
)
return result


def get_os_specific_venv_python() -> str:
if is_running_on_windows():
return os.path.join("Scripts", "python.exe")
else:
return os.path.join("bin", "python")


class Venv:
def __init__(self, venv_path, requirements_file, version):
self.path = venv_path
Expand All @@ -57,17 +72,26 @@ def __exit__(self, exc_type, exc_val, exc_tb):

def init_venv(self):
venv.create(self.path, with_pip=True, clear=True)
command = [get_os_specific_venv_python(), "-m", "pip", "install", "-r", self.requirements_file]
command = [
get_os_specific_venv_python(),
"-m",
"pip",
"install",
"-r",
self.requirements_file,
]
run_shell_command(command, self.path)

def tear_down_venv(self):
shutil.rmtree(self.path)
shutil.rmtree(self.path, ignore_errors=True)

def execute_python_file(self, python_path : Union[str, os.PathLike]) -> subprocess.CompletedProcess:
def execute_python_file(
self, python_path: Union[str, os.PathLike]
) -> subprocess.CompletedProcess:
command = [get_os_specific_venv_python(), python_path]
return run_shell_command(command, self.path)

def create_arctic(self, uri : str) -> 'VenvArctic':
def create_arctic(self, uri: str) -> "VenvArctic":
return VenvArctic(self, uri)


Expand All @@ -77,42 +101,52 @@ def __init__(self, venv, uri):
self.uri = uri
self.init_storage()

def execute(self, python_commands : List[str], dfs : Optional[Dict] = None) -> None:
def execute(self, python_commands: List[str], dfs: Optional[Dict] = None) -> None:
"""
Prepares the dataframe parquet files and the python script to be run from within the venv.
"""
if dfs is None:
dfs = {}

# Add a random prefix to the temporary directory to avoid conflicts
# rand_prefix = os.urandom(8).hex()
with tempfile.TemporaryDirectory() as dir:
df_load_commands = []
for df_name, df_value in dfs.items():
parquet_file = os.path.join(dir, f"{df_name}.parquet")
df_value.to_parquet(parquet_file)
df_load_commands.append(f"{df_name} = pd.read_parquet({repr(parquet_file)})")

python_commands = [
"from arcticdb import Arctic",
"import pandas as pd",
"import numpy as np",
f"ac = Arctic({repr(self.uri)})"
] + df_load_commands + python_commands
df_load_commands.append(
f"{df_name} = pd.read_parquet({repr(parquet_file)})"
)

python_commands = (
[
"from arcticdb import Arctic",
"import pandas as pd",
"import numpy as np",
f"ac = Arctic({repr(self.uri)})",
]
+ df_load_commands
+ python_commands
)

python_path = os.path.join(dir, "run.py")
with open(python_path, "w") as python_file:
python_file.write("\n".join(python_commands))

result = self.venv.execute_python_file(python_path)
if result.returncode != 0:
raise ErrorInVenv(f"Executing {python_commands} failed with return code {result.returncode}")
raise ErrorInVenv(
f"Executing {python_commands} failed with return code {result.returncode}: {result}"
)

def init_storage(self):
self.execute([])

def create_library(self, lib_name : str) -> 'VenvLib':
def create_library(self, lib_name: str) -> "VenvLib":
return VenvLib(self, lib_name, create_if_not_exists=True)

def get_library(self, lib_name : str) -> 'VenvLib':
def get_library(self, lib_name: str) -> "VenvLib":
return VenvLib(self, lib_name, create_if_not_exists=False)


Expand All @@ -126,33 +160,34 @@ def __init__(self, arctic, lib_name, create_if_not_exists=True):
def create_lib(self):
self.arctic.execute([f"ac.create_library('{self.lib_name}')"])

def execute(self, python_commands : List[str], dfs : Optional[Dict] = None) -> None:
def execute(self, python_commands: List[str], dfs: Optional[Dict] = None) -> None:
python_commands = [
f"lib = ac.get_library('{self.lib_name}')",
] + python_commands
return self.arctic.execute(python_commands, dfs)

def write(self, sym : str, df) -> None:
def write(self, sym: str, df) -> None:
return self.execute([f"lib.write('{sym}', df)"], {"df": df})

def assert_read(self, sym : str, df) -> None:
def assert_read(self, sym: str, df) -> None:
python_commands = [
f"read_df = lib.read('{sym}').data",
"pd.testing.assert_frame_equal(read_df, expected_df)"
"print(read_df)",
"pd.testing.assert_frame_equal(read_df, expected_df)",
]
return self.execute(python_commands, {"expected_df": df})


@pytest.fixture(
scope="session",
# scope="session",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes here are so we can support parallel executions with pytest-xdist

params=[
pytest.param("1.6.2", marks=VENV_COMPAT_TESTS_MARK),
pytest.param("4.5.1", marks=VENV_COMPAT_TESTS_MARK),
] # TODO: Extend this list with other old versions
], # TODO: Extend this list with other old versions
)
def old_venv(request):
def old_venv(request, tmp_path):
version = request.param
path = os.path.join("venvs", version)
path = os.path.join("venvs", tmp_path, version)
compat_dir = os.path.dirname(os.path.abspath(__file__))
requirements_file = os.path.join(compat_dir, f"requirements-{version}.txt")
with Venv(path, requirements_file, version) as old_venv:
Expand All @@ -161,7 +196,9 @@ def old_venv(request):

@pytest.fixture(
params=[
"lmdb",
# Don't include lmdb because there is a bug in 1.6.2/4.5.1
# which causes the tests to segfault intermittently
# "lmdb",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main changes, as :

  • there is a bug on version 1.6.2 and 4.5.1 with lead to flaky seg faults with lmdb storage
  • 1.6.2 doesn't support Mongo

"s3_ssl_disabled",
pytest.param("azurite", marks=AZURE_TESTS_MARK),
pytest.param("mongo", marks=MONGO_TESTS_MARK),
Expand All @@ -183,11 +220,7 @@ def arctic_uri(request):
@pytest.fixture()
def old_venv_and_arctic_uri(old_venv, arctic_uri):
# TODO: Once #1979 is understood and fixed reenable mongo, lmdb and azure for versions which have the fix.
if arctic_uri.startswith("mongo"):
pytest.skip("Mongo storage backend has a probable desctruction bug, which can cause flaky segfaults.")
if arctic_uri.startswith("lmdb"):
pytest.skip("LMDB storage backend has a probable desctruction bug, which can cause flaky segfaults.")
if arctic_uri.startswith("azure"):
pytest.skip("Azure storage backend has probable a desctruction bug, which can cause flaky segfaults.")
if arctic_uri.startswith("mongo") and "1.6.2" in old_venv.version:
pytest.skip("Mongo storage backend is not supported in 1.6.2")

return old_venv, arctic_uri
Loading