Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support kw only arguments for frame methods for Pandas 2 #28454

Merged
merged 30 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
7efe546
Remove level and errors arguments for Pandas 2 compatibility.
caneff Sep 8, 2023
64be5a8
Fix formatting
caneff Sep 8, 2023
006c55b
More formatting
caneff Sep 8, 2023
a9bcdfd
Fix empty error case
caneff Sep 8, 2023
0efa4fa
Merge branch 'apache:master' into errors_level3
caneff Sep 11, 2023
647b006
Remove append method for Pandas >= 2.0.
caneff Sep 11, 2023
cba15dd
Add support for ignored arguments and more tests.
caneff Sep 11, 2023
de643c5
Delete removed_args arg from doc part that wasn't using it
caneff Sep 11, 2023
bfa0ae8
Fix up frame_base
caneff Sep 11, 2023
a8cf5d1
Fix with new removed_args argument
caneff Sep 11, 2023
c202057
Fix line lengths
caneff Sep 11, 2023
2a8b489
Fix formatting
caneff Sep 11, 2023
944770e
Add test for append deprecation
caneff Sep 12, 2023
ee7a858
Only populate kwargs if not there
caneff Sep 12, 2023
fcbbcbc
Merge branch 'remove_append' into errors_level3
caneff Sep 12, 2023
d0a7585
Formatting
caneff Sep 12, 2023
035fdfd
Fix frames.py to work with kw only args.
caneff Sep 12, 2023
3ed2822
Fix arg.
caneff Sep 12, 2023
1e2c62c
Merge branch 'remove_append' into errors_level3
caneff Sep 12, 2023
758c390
Merge branch 'errors_level3' into kw_only
caneff Sep 12, 2023
fb38399
Fix arg.
caneff Sep 12, 2023
9fd78c6
Merge branch 'remove_append' into errors_level3
caneff Sep 12, 2023
96607db
Merge branch 'remove_append' into kw_only
caneff Sep 12, 2023
d3d07bb
Fix comments
caneff Sep 13, 2023
5366d87
Merge branch 'remove_append' into errors_level3
caneff Sep 13, 2023
64d0c77
Merge branch 'errors_level3' into kw_only
caneff Sep 13, 2023
85bc52c
Merge branch 'kw_only' of https://github.com/caneff/beam into kw_only
caneff Sep 14, 2023
3ae3d54
Merge branch 'apache:master' into kw_only
caneff Sep 15, 2023
8808774
Address comments
caneff Sep 15, 2023
a5c3a0e
typo
caneff Sep 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions sdks/python/apache_beam/dataframe/frame_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ def wrap(func):

removed_arg_names = removed_args if removed_args is not None else []

# TODO: Better handle position only arguments if they are ever a thing
caneff marked this conversation as resolved.
Show resolved Hide resolved
# in Pandas (as of 2.1 currently they aren't).
base_arg_spec = getfullargspec(unwrap(getattr(base_type, func.__name__)))
base_arg_names = base_arg_spec.args
# Some arguments are keyword only and we still want to check against those.
Expand All @@ -514,6 +516,9 @@ def wrap(func):

@functools.wraps(func)
def wrapper(*args, **kwargs):
if len(args) > len(base_arg_names):
raise TypeError(f"{func.__name__} got too many positioned arguments.")

for name, value in zip(base_arg_names, args):
if name in kwargs:
raise TypeError(
Expand All @@ -523,7 +528,7 @@ def wrapper(*args, **kwargs):
# Still have to populate these for the Beam function signature.
if removed_args:
for name in removed_args:
if not name in kwargs:
if name not in kwargs:
kwargs[name] = None
return func(**kwargs)

Expand Down Expand Up @@ -646,13 +651,18 @@ def wrap(func):
return func

base_argspec = getfullargspec(unwrap(getattr(base_type, func.__name__)))
if not base_argspec.defaults:
if not base_argspec.defaults and not base_argspec.kwonlydefaults:
return func

arg_to_default = dict(
zip(
base_argspec.args[-len(base_argspec.defaults):],
base_argspec.defaults))
arg_to_default = {}
if base_argspec.defaults:
arg_to_default.update(
zip(
base_argspec.args[-len(base_argspec.defaults):],
base_argspec.defaults))

if base_argspec.kwonlydefaults:
arg_to_default.update(base_argspec.kwonlydefaults)

unwrapped_func = unwrap(func)
# args that do not have defaults in func, but do have defaults in base
Expand Down
44 changes: 43 additions & 1 deletion sdks/python/apache_beam/dataframe/frame_base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def add_one(frame):

def test_args_to_kwargs(self):
class Base(object):
def func(self, a=1, b=2, c=3):
def func(self, a=1, b=2, c=3, *, kw_only=4):
caneff marked this conversation as resolved.
Show resolved Hide resolved
pass

class Proxy(object):
Expand All @@ -87,6 +87,9 @@ def func(self, **kwargs):
self.assertEqual(proxy.func(2, 4, 6), {'a': 2, 'b': 4, 'c': 6})
self.assertEqual(proxy.func(2, c=6), {'a': 2, 'c': 6})
self.assertEqual(proxy.func(c=6, a=2), {'a': 2, 'c': 6})
self.assertEqual(proxy.func(2, kw_only=20), {'a': 2, 'kw_only': 20})
with self.assertRaises(TypeError): # got too many positioned arguments
proxy.func(2, 4, 6, 8)

def test_args_to_kwargs_populates_defaults(self):
class Base(object):
Expand Down Expand Up @@ -129,6 +132,45 @@ def func_removed_args(self, a, c, **kwargs):
proxy.func_removed_args()
self.assertEqual(proxy.func_removed_args(12, d=100), {'a': 12, 'd': 100})

def test_args_to_kwargs_populates_default_handles_kw_only(self):
class Base(object):
def func(self, a=1, b=2, c=3, *, kw_only=4):
pass

class ProxyUsesKwOnly(object):
@frame_base.args_to_kwargs(Base)
@frame_base.populate_defaults(Base)
def func(self, a, kw_only, **kwargs):
return dict(kwargs, a=a, kw_only=kw_only)

proxy = ProxyUsesKwOnly()

# pylint: disable=too-many-function-args,no-value-for-parameter
self.assertEqual(proxy.func(), {'a': 1, 'kw_only': 4})
self.assertEqual(proxy.func(100), {'a': 100, 'kw_only': 4})
self.assertEqual(
proxy.func(2, 4, 6, kw_only=8), {
'a': 2, 'b': 4, 'c': 6, 'kw_only': 8
})
with self.assertRaises(TypeError):
proxy.func(2, 4, 6, 8) # got too many positioned arguments

class ProxyDoesntUseKwOnly(object):
@frame_base.args_to_kwargs(Base)
@frame_base.populate_defaults(Base)
def func(self, a, **kwargs):
return dict(kwargs, a=a)

proxy = ProxyDoesntUseKwOnly()

# pylint: disable=too-many-function-args,no-value-for-parameter
self.assertEqual(proxy.func(), {'a': 1})
caneff marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(proxy.func(100), {'a': 100})
self.assertEqual(
proxy.func(2, 4, 6, kw_only=8), {
'a': 2, 'b': 4, 'c': 6, 'kw_only': 8
})


if __name__ == '__main__':
unittest.main()
4 changes: 2 additions & 2 deletions sdks/python/apache_beam/dataframe/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ def sort_index(self, axis, **kwargs):
return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'sort_index',
lambda df: df.sort_index(axis, **kwargs),
lambda df: df.sort_index(axis=axis, **kwargs),
[self._expr],
requires_partition_by=partitionings.Arbitrary(),
preserves_partition_by=partitionings.Arbitrary(),
Expand Down Expand Up @@ -2687,7 +2687,7 @@ def set_axis(self, labels, axis, **kwargs):
return frame_base.DeferredFrame.wrap(
expressions.ComputedExpression(
'set_axis',
lambda df: df.set_axis(labels, axis, **kwargs),
lambda df: df.set_axis(labels, axis=axis, **kwargs),
[self._expr],
requires_partition_by=partitionings.Arbitrary(),
preserves_partition_by=partitionings.Arbitrary()))
Expand Down
Loading