Skip to content

Commit

Permalink
Use pytest fixtures across tests, add deprecation warning in reader
Browse files Browse the repository at this point in the history
  • Loading branch information
sgreenbury committed Jun 19, 2024
1 parent 09678d0 commit 86084fa
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 73 deletions.
55 changes: 24 additions & 31 deletions python/tests/test_builder.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,44 @@
import pytest
from uatk_spc.builder import unnest_pandas, unnest_polars
from test_utils import TEST_URL_PB, TEST_URL_PQ
from uatk_spc.builder import Builder

TEST_PARAMS = [
("protobuf", "pandas"),
("protobuf", "polars"),
("parquet", "pandas"),
("parquet", "polars"),
]
TEST_PARAMS_PANDAS = [
("protobuf", "pandas"),
("parquet", "pandas"),
]
TEST_PARAMS_POLARS = [
("protobuf", "polars"),
("parquet", "polars"),
]
TEST_PARAMS_PAIRED = [
("protobuf", "protobuf"),
("parquet", "parquet"),
]

EXPECTED_COLUMNS = [
"id",
"msoa11cd",
"oa11cd",
"members",
"hid",
"nssec8",
"accommodation_type",
"communal_type",
"num_rooms",
"central_heat",
"tenure",
"num_cars",
]

@pytest.fixture
def builder(request):
input_type, backend = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend=backend)
else:
return Builder(filepath=TEST_URL_PB, backend=backend)


@pytest.mark.parametrize("reader", TEST_PARAMS_PANDAS, indirect=True)
def test_unnest_pandas_data(reader):
spc_unnested = unnest_pandas(reader.households, ["details"])
assert sorted(spc_unnested.columns.to_list()) == sorted(EXPECTED_COLUMNS)
@pytest.fixture
def builder_pandas(request):
input_type = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend="pandas")
else:
return Builder(filepath=TEST_URL_PB, backend="pandas")


@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True)
def test_unnest_polars_data(reader):
spc_unnested = unnest_polars(reader.households, ["details"])
assert sorted(spc_unnested.columns) == sorted(EXPECTED_COLUMNS)
@pytest.fixture
def builder_polars(request):
input_type = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend="polars")
else:
return Builder(filepath=TEST_URL_PB, backend="polars")


@pytest.mark.parametrize(
Expand Down
34 changes: 30 additions & 4 deletions python/tests/test_reader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
import pytest
from test_utils import TEST_URL_PB, TEST_URL_PQ
from test_utils import EXPECTED_COLUMNS, TEST_URL_PB, TEST_URL_PQ
from uatk_spc.builder import unnest_pandas, unnest_polars
from uatk_spc.reader import Reader, filepath_to_path_and_region, is_parquet, is_protobuf

TEST_READER_PARAMS = [("parquet", "polars"), ("protobuf", "polars")]
TEST_PARAMS_POLARS = [("parquet", "polars"), ("protobuf", "polars")]
TEST_PARAMS_PANDAS = [
("protobuf", "pandas"),
("parquet", "pandas"),
]


@pytest.fixture
def reader(request):
input_type, backend = request.param
if input_type == "parquet":
return Reader(filepath=TEST_URL_PQ, backend=backend)
else:
return Reader(filepath=TEST_URL_PB, backend=backend)


def test_is_parquet():
Expand All @@ -28,15 +42,27 @@ def test_reader(filepath):
assert spc.people.shape[0] == 4991


@pytest.mark.parametrize("reader", TEST_READER_PARAMS, indirect=True)
@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True)
def test_merge_people_and_time_use_diaries(reader):
merged = reader.merge_people_and_time_use_diaries(
{"health": ["bmi"], "demographics": ["age_years"]}, diary_type="weekday_diaries"
)
assert merged.shape == (197_397, 30)


@pytest.mark.parametrize("reader", TEST_READER_PARAMS, indirect=True)
@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True)
def test_merge_people_and_households(reader):
merged = reader.merge_people_and_households()
assert merged.shape == (4991, 17)


@pytest.mark.parametrize("reader", TEST_PARAMS_PANDAS, indirect=True)
def test_unnest_pandas_data(reader):
spc_unnested = unnest_pandas(reader.households, ["details"])
assert sorted(spc_unnested.columns.to_list()) == sorted(EXPECTED_COLUMNS)


@pytest.mark.parametrize("reader", TEST_PARAMS_POLARS, indirect=True)
def test_unnest_polars_data(reader):
spc_unnested = unnest_polars(reader.households, ["details"])
assert sorted(spc_unnested.columns) == sorted(EXPECTED_COLUMNS)
52 changes: 14 additions & 38 deletions python/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import pytest
from uatk_spc.builder import Builder
from uatk_spc.reader import Reader

TEST_URL_PB = (
"https://ramp0storage.blob.core.windows.net/test-spc-output/test_region.pb.gz"
)
Expand All @@ -10,37 +6,17 @@
)


@pytest.fixture
def reader(request):
input_type, backend = request.param
if input_type == "parquet":
return Reader(filepath=TEST_URL_PQ, backend=backend)
else:
return Reader(filepath=TEST_URL_PB, backend=backend)


@pytest.fixture
def builder(request):
input_type, backend = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend=backend)
else:
return Builder(filepath=TEST_URL_PB, backend=backend)


@pytest.fixture
def builder_pandas(request):
input_type = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend="pandas")
else:
return Builder(filepath=TEST_URL_PB, backend="pandas")


@pytest.fixture
def builder_polars(request):
input_type = request.param
if input_type == "parquet":
return Builder(filepath=TEST_URL_PQ, backend="polars")
else:
return Builder(filepath=TEST_URL_PB, backend="polars")
EXPECTED_COLUMNS = [
"id",
"msoa11cd",
"oa11cd",
"members",
"hid",
"nssec8",
"accommodation_type",
"communal_type",
"num_rooms",
"central_heat",
"tenure",
"num_cars",
]

0 comments on commit 86084fa

Please sign in to comment.