From 1d18637ce8dd276db73beeb5a95d3d9ef8d7ff82 Mon Sep 17 00:00:00 2001 From: Christopher Neffshade Date: Tue, 19 Sep 2023 09:32:46 +0000 Subject: [PATCH 1/2] Change handling of copy=None defaults for Pandas 2 --- sdks/python/apache_beam/dataframe/frame_base.py | 10 +++++++++- .../apache_beam/dataframe/frame_base_test.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py index 48a4c29d0589..383455accc2d 100644 --- a/sdks/python/apache_beam/dataframe/frame_base.py +++ b/sdks/python/apache_beam/dataframe/frame_base.py @@ -678,7 +678,15 @@ def wrap(func): def wrapper(**kwargs): for name in defaults_to_populate: if name not in kwargs: - kwargs[name] = arg_to_default[name] + # In pandas 2, many methods rely on the default copy=None + # to mean that copy is the value of copy_on_write. Since + # copy_on_write will always be true for Beam, just fill it + # in here. In pandas 1, the default was True anyway. + if name == 'copy' and arg_to_default[name] is None: + kwargs[name] = True + else: + kwargs[name] = arg_to_default[name] + return func(**kwargs) return wrapper diff --git a/sdks/python/apache_beam/dataframe/frame_base_test.py b/sdks/python/apache_beam/dataframe/frame_base_test.py index b3077320720f..0a73905339fd 100644 --- a/sdks/python/apache_beam/dataframe/frame_base_test.py +++ b/sdks/python/apache_beam/dataframe/frame_base_test.py @@ -174,6 +174,21 @@ def func(self, a, **kwargs): 'a': 2, 'b': 4, 'c': 6, 'kw_only': 8 }) + def test_populate_defaults_overwrites_copy(self): + class Base(object): + def func(self, a=1, b=2, c=3, *, copy=None): + pass + + class Proxy(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, copy, **kwargs): + return dict(kwargs, a=a, copy=copy) + + proxy = Proxy() + self.assertEqual(proxy.func(), {'a': 1, 'copy': True}) + self.assertEqual(proxy.func(copy=False), {'a': 1, 'copy': False}) + if __name__ == '__main__': unittest.main() From ffa45c9dbeb808cdbc6ac0fc4673484e8f43e447 Mon Sep 17 00:00:00 2001 From: Christopher Neffshade Date: Wed, 20 Sep 2023 17:40:00 +0000 Subject: [PATCH 2/2] Remove copy logic from wrapper --- sdks/python/apache_beam/dataframe/frame_base.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py index 383455accc2d..4e89e473b730 100644 --- a/sdks/python/apache_beam/dataframe/frame_base.py +++ b/sdks/python/apache_beam/dataframe/frame_base.py @@ -674,18 +674,18 @@ def wrap(func): if removed_args: defaults_to_populate -= set(removed_args) + # In pandas 2, many methods rely on the default copy=None + # to mean that copy is the value of copy_on_write. Since + # copy_on_write will always be true for Beam, just fill it + # in here. In pandas 1, the default was True anyway. + if 'copy' in arg_to_default and arg_to_default['copy'] is None: + arg_to_default['copy'] = True + @functools.wraps(func) def wrapper(**kwargs): for name in defaults_to_populate: if name not in kwargs: - # In pandas 2, many methods rely on the default copy=None - # to mean that copy is the value of copy_on_write. Since - # copy_on_write will always be true for Beam, just fill it - # in here. In pandas 1, the default was True anyway. - if name == 'copy' and arg_to_default[name] is None: - kwargs[name] = True - else: - kwargs[name] = arg_to_default[name] + kwargs[name] = arg_to_default[name] return func(**kwargs)