From 86084fa3b80101ceca11a4b4083ecdceb1921ea4 Mon Sep 17 00:00:00 2001 From: Sam Greenbury Date: Wed, 19 Jun 2024 09:42:28 +0100 Subject: [PATCH] Use pytest fixtures across tests, add deprecation warning in reader --- python/tests/test_builder.py | 55 ++++++++++++++++-------------------- python/tests/test_reader.py | 34 +++++++++++++++++++--- python/tests/test_utils.py | 52 +++++++++------------------------- 3 files changed, 68 insertions(+), 73 deletions(-) diff --git a/python/tests/test_builder.py b/python/tests/test_builder.py index 94349fc..b870d17 100644 --- a/python/tests/test_builder.py +++ b/python/tests/test_builder.py @@ -1,5 +1,6 @@ import pytest -from uatk_spc.builder import unnest_pandas, unnest_polars +from test_utils import TEST_URL_PB, TEST_URL_PQ +from uatk_spc.builder import Builder TEST_PARAMS = [ ("protobuf", "pandas"), @@ -7,45 +8,37 @@ ("parquet", "pandas"), ("parquet", "polars"), ] -TEST_PARAMS_PANDAS = [ - ("protobuf", "pandas"), - ("parquet", "pandas"), -] -TEST_PARAMS_POLARS = [ - ("protobuf", "polars"), - ("parquet", "polars"), -] TEST_PARAMS_PAIRED = [ ("protobuf", "protobuf"), ("parquet", "parquet"), ] -EXPECTED_COLUMNS = [ - "id", - "msoa11cd", - "oa11cd", - "members", - "hid", - "nssec8", - "accommodation_type", - "communal_type", - "num_rooms", - "central_heat", - "tenure", - "num_cars", -] + +@pytest.fixture +def builder(request): + input_type, backend = request.param + if input_type == "parquet": + return Builder(filepath=TEST_URL_PQ, backend=backend) + else: + return Builder(filepath=TEST_URL_PB, backend=backend) -@pytest.mark.parametrize("reader", TEST_PARAMS_PANDAS, indirect=True) -def test_unnest_pandas_data(reader): - spc_unnested = unnest_pandas(reader.households, ["details"]) - assert sorted(spc_unnested.columns.to_list()) == sorted(EXPECTED_COLUMNS) +@pytest.fixture +def builder_pandas(request): + input_type = request.param + if input_type == "parquet": + return Builder(filepath=TEST_URL_PQ, backend="pandas") + else: + return Builder(filepath=TEST_URL_PB, backend="pandas") -@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True) -def test_unnest_polars_data(reader): - spc_unnested = unnest_polars(reader.households, ["details"]) - assert sorted(spc_unnested.columns) == sorted(EXPECTED_COLUMNS) +@pytest.fixture +def builder_polars(request): + input_type = request.param + if input_type == "parquet": + return Builder(filepath=TEST_URL_PQ, backend="polars") + else: + return Builder(filepath=TEST_URL_PB, backend="polars") @pytest.mark.parametrize( diff --git a/python/tests/test_reader.py b/python/tests/test_reader.py index b0c7764..2cbaa74 100644 --- a/python/tests/test_reader.py +++ b/python/tests/test_reader.py @@ -1,8 +1,22 @@ import pytest -from test_utils import TEST_URL_PB, TEST_URL_PQ +from test_utils import EXPECTED_COLUMNS, TEST_URL_PB, TEST_URL_PQ +from uatk_spc.builder import unnest_pandas, unnest_polars from uatk_spc.reader import Reader, filepath_to_path_and_region, is_parquet, is_protobuf -TEST_READER_PARAMS = [("parquet", "polars"), ("protobuf", "polars")] +TEST_PARAMS_POLARS = [("parquet", "polars"), ("protobuf", "polars")] +TEST_PARAMS_PANDAS = [ + ("protobuf", "pandas"), + ("parquet", "pandas"), +] + + +@pytest.fixture +def reader(request): + input_type, backend = request.param + if input_type == "parquet": + return Reader(filepath=TEST_URL_PQ, backend=backend) + else: + return Reader(filepath=TEST_URL_PB, backend=backend) def test_is_parquet(): @@ -28,7 +42,7 @@ def test_reader(filepath): assert spc.people.shape[0] == 4991 -@pytest.mark.parametrize("reader", TEST_READER_PARAMS, indirect=True) +@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True) def test_merge_people_and_time_use_diaries(reader): merged = reader.merge_people_and_time_use_diaries( {"health": ["bmi"], "demographics": ["age_years"]}, diary_type="weekday_diaries" @@ -36,7 +50,19 @@ def test_merge_people_and_time_use_diaries(reader): assert merged.shape == (197_397, 30) -@pytest.mark.parametrize("reader", TEST_READER_PARAMS, indirect=True) +@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True) def test_merge_people_and_households(reader): merged = reader.merge_people_and_households() assert merged.shape == (4991, 17) + + +@pytest.mark.parametrize("reader", TEST_PARAMS_PANDAS, indirect=True) +def test_unnest_pandas_data(reader): + spc_unnested = unnest_pandas(reader.households, ["details"]) + assert sorted(spc_unnested.columns.to_list()) == sorted(EXPECTED_COLUMNS) + + +@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True) +def test_unnest_polars_data(reader): + spc_unnested = unnest_polars(reader.households, ["details"]) + assert sorted(spc_unnested.columns) == sorted(EXPECTED_COLUMNS) diff --git a/python/tests/test_utils.py b/python/tests/test_utils.py index 8143312..b9a567c 100644 --- a/python/tests/test_utils.py +++ b/python/tests/test_utils.py @@ -1,7 +1,3 @@ -import pytest -from uatk_spc.builder import Builder -from uatk_spc.reader import Reader - TEST_URL_PB = ( "https://ramp0storage.blob.core.windows.net/test-spc-output/test_region.pb.gz" ) @@ -10,37 +6,17 @@ ) -@pytest.fixture -def reader(request): - input_type, backend = request.param - if input_type == "parquet": - return Reader(filepath=TEST_URL_PQ, backend=backend) - else: - return Reader(filepath=TEST_URL_PB, backend=backend) - - -@pytest.fixture -def builder(request): - input_type, backend = request.param - if input_type == "parquet": - return Builder(filepath=TEST_URL_PQ, backend=backend) - else: - return Builder(filepath=TEST_URL_PB, backend=backend) - - -@pytest.fixture -def builder_pandas(request): - input_type = request.param - if input_type == "parquet": - return Builder(filepath=TEST_URL_PQ, backend="pandas") - else: - return Builder(filepath=TEST_URL_PB, backend="pandas") - - -@pytest.fixture -def builder_polars(request): - input_type = request.param - if input_type == "parquet": - return Builder(filepath=TEST_URL_PQ, backend="polars") - else: - return Builder(filepath=TEST_URL_PB, backend="polars") +EXPECTED_COLUMNS = [ + "id", + "msoa11cd", + "oa11cd", + "members", + "hid", + "nssec8", + "accommodation_type", + "communal_type", + "num_rooms", + "central_heat", + "tenure", + "num_cars", +]