Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: pd.concat dataframes with different datetime64 resolutions #53641

Merged
merged 18 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
34d65b6
BUG: pd.concat dataframes with different datetime64 resolutions
Charlie-XIAO Jun 13, 2023
6b14b40
resolve mypy
Charlie-XIAO Jun 13, 2023
4c9c7d1
retrigger checks
Charlie-XIAO Jun 13, 2023
fa8ef90
Merge branch 'main' into concat-dt-diff
Charlie-XIAO Jul 16, 2023
25a7316
Merge upstream main and resolve conflicts
Charlie-XIAO Aug 29, 2023
a4bf47c
Merge branch 'concat-dt-diff' of https://github.com/Charlie-XIAO/pand…
Charlie-XIAO Aug 29, 2023
5e1b524
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Aug 29, 2023
ce26e23
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 23, 2023
82ea086
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 23, 2023
b16024d
parametrize test
Charlie-XIAO Oct 25, 2023
bf99746
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 25, 2023
9a2eda3
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 25, 2023
4323960
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 25, 2023
4ffc51b
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 27, 2023
5865480
apply suggestion of jbrockmendel
Charlie-XIAO Oct 27, 2023
555106b
add parametrization for test_concat_tz_series*
Charlie-XIAO Oct 27, 2023
a55a6db
Merge remote-tracking branch 'upstream/main' into concat-dt-diff
Charlie-XIAO Oct 28, 2023
571c23c
apply suggested changes by jbrockmendel
Charlie-XIAO Oct 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,16 @@ def concat_compat(
# i.e. isinstance(to_concat[0], ExtensionArray)
to_concat_eas = cast("Sequence[ExtensionArray]", to_concat)
cls = type(to_concat[0])
return cls._concat_same_type(to_concat_eas)
# GH#53640: eg. for datetime array, axis=1 but 0 is default
# However, class method `_concat_same_type()` for some classes
# may not support the `axis` keyword
if ea_compat_axis or axis == 0:
return cls._concat_same_type(to_concat_eas)
else:
return cls._concat_same_type(
to_concat_eas,
axis=axis, # type: ignore[call-arg]
)
else:
to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
result = np.concatenate(to_concat_arrs, axis=axis)
Expand Down
58 changes: 41 additions & 17 deletions pandas/tests/reshape/concat/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@
)
import pandas._testing as tm

UNITS = ["s", "ms", "us", "ns"]


@pytest.fixture(params=UNITS)
def unit(request):
return request.param


unit2 = unit


def _get_finer_unit(unit, unit2):
Charlie-XIAO marked this conversation as resolved.
Show resolved Hide resolved
if UNITS.index(unit) >= UNITS.index(unit2):
return unit
return unit2


class TestDatetimeConcat:
def test_concat_datetime64_block(self):
Expand Down Expand Up @@ -307,50 +323,58 @@ def test_concat_tz_series2(self):
result = concat([x, y], ignore_index=True)
tm.assert_series_equal(result, expected)

def test_concat_tz_series3(self):
def test_concat_tz_series3(self, unit, unit2):
# see gh-12217 and gh-12306
# Concatenating two UTC times
first = DataFrame([[datetime(2016, 1, 1)]])
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
first[0] = first[0].dt.tz_localize("UTC")

second = DataFrame([[datetime(2016, 1, 2)]])
second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]")
second[0] = second[0].dt.tz_localize("UTC")

result = concat([first, second])
assert result[0].dtype == "datetime64[ns, UTC]"
exp_unit = _get_finer_unit(unit, unit2)
assert result[0].dtype == f"datetime64[{exp_unit}, UTC]"

def test_concat_tz_series4(self):
def test_concat_tz_series4(self, unit, unit2):
# Concatenating two London times
first = DataFrame([[datetime(2016, 1, 1)]])
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
first[0] = first[0].dt.tz_localize("Europe/London")

second = DataFrame([[datetime(2016, 1, 2)]])
second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]")
second[0] = second[0].dt.tz_localize("Europe/London")

result = concat([first, second])
assert result[0].dtype == "datetime64[ns, Europe/London]"
exp_unit = _get_finer_unit(unit, unit2)
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"

def test_concat_tz_series5(self):
def test_concat_tz_series5(self, unit, unit2):
# Concatenating 2+1 London times
first = DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
first = DataFrame(
[[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]], dtype=f"M8[{unit}]"
)
first[0] = first[0].dt.tz_localize("Europe/London")

second = DataFrame([[datetime(2016, 1, 3)]])
second = DataFrame([[datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]")
second[0] = second[0].dt.tz_localize("Europe/London")

result = concat([first, second])
assert result[0].dtype == "datetime64[ns, Europe/London]"
exp_unit = _get_finer_unit(unit, unit2)
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"

def test_concat_tz_series6(self):
# Concat'ing 1+2 London times
first = DataFrame([[datetime(2016, 1, 1)]])
def test_concat_tz_series6(self, unit, unit2):
# Concatenating 1+2 London times
first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]")
first[0] = first[0].dt.tz_localize("Europe/London")

second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
second = DataFrame(
[[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]"
)
second[0] = second[0].dt.tz_localize("Europe/London")

result = concat([first, second])
assert result[0].dtype == "datetime64[ns, Europe/London]"
exp_unit = _get_finer_unit(unit, unit2)
assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]"

def test_concat_tz_series_tzlocal(self):
# see gh-13583
Expand Down
Loading