diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5d65ad82cc95..2707adb06a1d6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -311,6 +311,7 @@ Removal of prior version deprecations/changes - Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`) - Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`) - Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`) +- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`) - Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`) - Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`) - Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 12395b42bba19..a757ef6fc1a29 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -10,7 +10,6 @@ ContextManager, cast, ) -import warnings import numpy as np @@ -290,17 +289,11 @@ def box_expected(expected, box_cls, transpose: bool = True): else: expected = pd.array(expected, copy=False) elif box_cls is Index: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning) - expected = Index(expected) + expected = Index(expected) elif box_cls is Series: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning) - expected = Series(expected) + expected = Series(expected) elif box_cls is DataFrame: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning) - expected = Series(expected).to_frame() + expected = Series(expected).to_frame() if transpose: # for vector operations, we need a DataFrame to be a single-row, # not a single-column, in order to operate against non-DataFrame diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f01d8822241c9..360e1d5ddd3ff 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -554,7 +554,7 @@ def sanitize_array( # Avoid ending up with a NumpyExtensionArray dtype = dtype.numpy_dtype - data_was_index = isinstance(data, ABCIndex) + infer_object = not isinstance(data, (ABCIndex, ABCSeries)) # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray data = extract_array(data, extract_numpy=True, extract_range=True) @@ -607,7 +607,7 @@ def sanitize_array( if dtype is None: subarr = data - if data.dtype == object and not data_was_index: + if data.dtype == object and infer_object: subarr = maybe_infer_to_datetimelike(data) elif data.dtype.kind == "U" and using_pyarrow_string_dtype(): from pandas.core.arrays.string_ import StringDtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97a4e414608b8..703fece35b23a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -728,10 +728,6 @@ def __init__( NDFrame.__init__(self, data) return - is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - data_dtype = getattr(data, "dtype", None) - original_dtype = dtype - # GH47215 if isinstance(index, set): raise ValueError("index cannot be a set") @@ -896,18 +892,6 @@ def __init__( NDFrame.__init__(self, mgr) - if original_dtype is None and is_pandas_object and data_dtype == np.object_: - if self.dtypes.iloc[0] != data_dtype: - warnings.warn( - "Dtype inference on a pandas object " - "(Series, Index, ExtensionArray) is deprecated. The DataFrame " - "constructor will keep the original dtype in the future. " - "Call `infer_objects` on the result to get the old " - "behavior.", - FutureWarning, - stacklevel=2, - ) - # ---------------------------------------------------------------------- def __dataframe__( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 56030a15dc143..15c318e5e9caf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -490,8 +490,6 @@ def __new__( if not copy and isinstance(data, (ABCSeries, Index)): refs = data._references - is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray)) - # range if isinstance(data, (range, RangeIndex)): result = RangeIndex(start=data, copy=copy, name=name) @@ -508,7 +506,7 @@ def __new__( elif is_ea_or_datetimelike_dtype(data_dtype): pass - elif isinstance(data, (np.ndarray, Index, ABCSeries)): + elif isinstance(data, (np.ndarray, ABCMultiIndex)): if isinstance(data, ABCMultiIndex): data = data._values @@ -518,7 +516,9 @@ def __new__( # they are actually ints, e.g. '0' and 0.0 # should not be coerced data = com.asarray_tuplesafe(data, dtype=_dtype_obj) - + elif isinstance(data, (ABCSeries, Index)): + # GH 56244: Avoid potential inference on object types + pass elif is_scalar(data): raise cls._raise_scalar_data_error(data) elif hasattr(data, "__array__"): @@ -571,19 +571,7 @@ def __new__( klass = cls._dtype_to_subclass(arr.dtype) arr = klass._ensure_array(arr, arr.dtype, copy=False) - result = klass._simple_new(arr, name, refs=refs) - if dtype is None and is_pandas_object and data_dtype == np.object_: - if result.dtype != data_dtype: - warnings.warn( - "Dtype inference on a pandas object " - "(Series, Index, ExtensionArray) is deprecated. The Index " - "constructor will keep the original dtype in the future. " - "Call `infer_objects` on the result to get the old " - "behavior.", - FutureWarning, - stacklevel=2, - ) - return result # type: ignore[return-value] + return klass._simple_new(arr, name, refs=refs) @classmethod def _ensure_array(cls, data, dtype, copy: bool): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index cea52bf8c91b2..23572975a1112 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -192,6 +192,7 @@ def ndarray_to_mgr( ) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray + infer_object = not isinstance(values, (ABCSeries, Index, ExtensionArray)) if isinstance(values, ABCSeries): if columns is None: @@ -287,15 +288,14 @@ def ndarray_to_mgr( # if we don't have a dtype specified, then try to convert objects # on the entire block; this is to convert if we have datetimelike's # embedded in an object type - if dtype is None and is_object_dtype(values.dtype): + if dtype is None and infer_object and is_object_dtype(values.dtype): obj_columns = list(values) maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] # don't convert (and copy) the objects if no type inference occurs if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): - dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime] block_values = [ - new_block_2d(dvals_list[n], placement=BlockPlacement(n)) - for n in range(len(dvals_list)) + new_block_2d(ensure_block_shape(dval, 2), placement=BlockPlacement(n)) + for n, dval in enumerate(maybe_datetime) ] else: bp = BlockPlacement(slice(len(columns))) diff --git a/pandas/core/series.py b/pandas/core/series.py index f67c0753fa9df..bfaba866c3dfd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -389,10 +389,6 @@ def __init__( self.name = name return - is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) - data_dtype = getattr(data, "dtype", None) - original_dtype = dtype - if isinstance(data, (ExtensionArray, np.ndarray)): if copy is not False: if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)): @@ -438,7 +434,6 @@ def __init__( data = data.astype(dtype) refs = data._references - data = data._values copy = False elif isinstance(data, np.ndarray): @@ -512,17 +507,6 @@ def __init__( self.name = name self._set_axis(0, index) - if original_dtype is None and is_pandas_object and data_dtype == np.object_: - if self.dtype != data_dtype: - warnings.warn( - "Dtype inference on a pandas object " - "(Series, Index, ExtensionArray) is deprecated. The Series " - "constructor will keep the original dtype in the future. " - "Call `infer_objects` on the result to get the old behavior.", - FutureWarning, - stacklevel=find_stack_level(), - ) - def _init_dict( self, data: Mapping, index: Index | None = None, dtype: DtypeObj | None = None ): diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index bc931b53b37d0..eb5177e393936 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -228,12 +228,12 @@ def test_dataframe_from_series_or_index_different_dtype(index_or_series): assert df._mgr._has_no_reference(0) -def test_dataframe_from_series_infer_datetime(): +def test_dataframe_from_series_dont_infer_datetime(): ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - df = DataFrame(ser) - assert not np.shares_memory(get_array(ser), get_array(df, 0)) - assert df._mgr._has_no_reference(0) + df = DataFrame(ser) + assert df.dtypes.iloc[0] == np.dtype(object) + assert np.shares_memory(get_array(ser), get_array(df, 0)) + assert not df._mgr._has_no_reference(0) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cbd969e5d90bf..5032932256488 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2702,21 +2702,14 @@ def test_frame_string_inference_block_dim(self): df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]])) assert df._mgr.blocks[0].ndim == 2 - def test_inference_on_pandas_objects(self): + @pytest.mark.parametrize("klass", [Series, Index]) + def test_inference_on_pandas_objects(self, klass): # GH#56012 - idx = Index([Timestamp("2019-12-31")], dtype=object) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = DataFrame(idx, columns=["a"]) - assert result.dtypes.iloc[0] != np.object_ - result = DataFrame({"a": idx}) + obj = klass([Timestamp("2019-12-31")], dtype=object) + result = DataFrame(obj, columns=["a"]) assert result.dtypes.iloc[0] == np.object_ - ser = Series([Timestamp("2019-12-31")], dtype=object) - - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = DataFrame(ser, columns=["a"]) - assert result.dtypes.iloc[0] != np.object_ - result = DataFrame({"a": ser}) + result = DataFrame({"a": obj}) assert result.dtypes.iloc[0] == np.object_ def test_dict_keys_returns_rangeindex(self): diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py index e5956f808286d..6036eddce7a01 100644 --- a/pandas/tests/indexes/base_class/test_constructors.py +++ b/pandas/tests/indexes/base_class/test_constructors.py @@ -59,18 +59,12 @@ def test_index_string_inference(self): ser = Index(["a", 1]) tm.assert_index_equal(ser, expected) - def test_inference_on_pandas_objects(self): + @pytest.mark.parametrize("klass", [Series, Index]) + def test_inference_on_pandas_objects(self, klass): # GH#56012 - idx = Index([pd.Timestamp("2019-12-31")], dtype=object) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = Index(idx) - assert result.dtype != np.object_ - - ser = Series([pd.Timestamp("2019-12-31")], dtype=object) - - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = Index(ser) - assert result.dtype != np.object_ + obj = klass([pd.Timestamp("2019-12-31")], dtype=object) + result = Index(obj) + assert result.dtype == np.object_ def test_constructor_not_read_only(self): # GH#57130 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bd38e6c2ff333..e701a49ea93ad 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -104,16 +104,9 @@ def test_constructor_copy(self, using_infer_string): ) def test_constructor_from_index_dtlike(self, cast_as_obj, index): if cast_as_obj: - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = Index(index.astype(object)) - else: - result = Index(index) - - tm.assert_index_equal(result, index) - - if isinstance(index, DatetimeIndex): - assert result.tz == index.tz - if cast_as_obj: + result = Index(index.astype(object)) + assert result.dtype == np.dtype(object) + if isinstance(index, DatetimeIndex): # GH#23524 check that Index(dti, dtype=object) does not # incorrectly raise ValueError, and that nanoseconds are not # dropped @@ -121,6 +114,10 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index): result = Index(index, dtype=object) assert result.dtype == np.object_ assert list(result) == list(index) + else: + result = Index(index) + + tm.assert_index_equal(result, index) @pytest.mark.parametrize( "index,has_tz", diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 8c60f7beb317d..49ae0a60e6608 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -256,9 +256,8 @@ def test_dt_accessor_limited_display_api(self): tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) # Period - idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - ser = Series(idx) + idx = period_range("20130101", periods=5, freq="D", name="xxx") + ser = Series(idx) results = get_dir(ser) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods)) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 875ffdd3fe851..b94723b7cbddf 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -82,15 +82,13 @@ def test_equals_matching_nas(): left = Series([np.datetime64("NaT")], dtype=object) right = Series([np.datetime64("NaT")], dtype=object) assert left.equals(right) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - assert Index(left).equals(Index(right)) + assert Index(left).equals(Index(right)) assert left.array.equals(right.array) left = Series([np.timedelta64("NaT")], dtype=object) right = Series([np.timedelta64("NaT")], dtype=object) assert left.equals(right) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - assert Index(left).equals(Index(right)) + assert Index(left).equals(Index(right)) assert left.array.equals(right.array) left = Series([np.float64("NaN")], dtype=object) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 00c614cf72c20..44a7862c21273 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1318,9 +1318,8 @@ def test_constructor_periodindex(self): pi = period_range("20130101", periods=5, freq="D") s = Series(pi) assert s.dtype == "Period[D]" - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - expected = Series(pi.astype(object)) - tm.assert_series_equal(s, expected) + expected = Series(pi.astype(object)) + assert expected.dtype == object def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} @@ -2137,20 +2136,14 @@ def test_series_string_inference_na_first(self): result = Series([pd.NA, "b"]) tm.assert_series_equal(result, expected) - def test_inference_on_pandas_objects(self): + @pytest.mark.parametrize("klass", [Series, Index]) + def test_inference_on_pandas_objects(self, klass): # GH#56012 - ser = Series([Timestamp("2019-12-31")], dtype=object) - with tm.assert_produces_warning(None): - # This doesn't do inference - result = Series(ser) + obj = klass([Timestamp("2019-12-31")], dtype=object) + # This doesn't do inference + result = Series(obj) assert result.dtype == np.object_ - idx = Index([Timestamp("2019-12-31")], dtype=object) - - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - result = Series(idx) - assert result.dtype != np.object_ - class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index edfc1973a2bd9..dad5c73b89626 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -23,7 +23,6 @@ date_range, period_range, ) -import pandas._testing as tm from pandas.core.arrays import ( DatetimeArray, TimedeltaArray, @@ -202,17 +201,6 @@ def test_infer_freq_custom(base_delta_code_pair, constructor): assert frequencies.infer_freq(index) is None -@pytest.mark.parametrize( - "freq,expected", [("Q", "QE-DEC"), ("Q-NOV", "QE-NOV"), ("Q-OCT", "QE-OCT")] -) -def test_infer_freq_index(freq, expected): - rng = period_range("1959Q2", "2009Q3", freq=freq) - with tm.assert_produces_warning(FutureWarning, match="Dtype inference"): - rng = Index(rng.to_timestamp("D", how="e").astype(object)) - - assert rng.inferred_freq == expected - - @pytest.mark.parametrize( "expected,dates", list(