Skip to content

Commit

Permalink
Fix ValueError for Empty DataFrames: Ensure Process Count is at Least…
Browse files Browse the repository at this point in the history
… 1 (#245)

* Return single slice for nb_items = 0

* Add empty dataframe and series apply test

* Adapt progress bar for 0 size data

* Adapt progress bar for 0 size data
  • Loading branch information
Mithil467 authored Feb 16, 2024
1 parent 4666931 commit 261a652
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
11 changes: 8 additions & 3 deletions pandarallel/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ def __remove_displayed_lines(self) -> None:
self.__lines = []

def __update_line(self, done: int, total: int) -> str:
percent = done / total
if total == 0:
percent = 0
else:
percent = done / total
bar = (":" * int(percent * 40)).ljust(40, " ")
percent = round(percent * 100, 2)
format = " {percent:6.2f}% {bar:s} | {done:8d} / {total:8d} |"
Expand Down Expand Up @@ -155,13 +158,15 @@ def update(self, values: List[int]) -> None:
for index, value in enumerate(values):
bar, label = self.__bars[index].children

label.value = "{} / {}".format(value, bar.max)

bar.value = value
bar.description = "{:.2f}%".format(value / bar.max * 100)

if value >= bar.max:
bar.bar_style = "success"

label.value = "{} / {}".format(value, bar.max)
if bar.max != 0:
bar.description = "{:.2f}%".format(bar.value / bar.max * 100)

def set_error(self, index: int) -> None:
"""Set a bar on error"""
Expand Down
3 changes: 3 additions & 0 deletions pandarallel/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def chunk(nb_item: int, nb_chunks: int, start_offset=0) -> List[slice]:
>>> chunks
[slice(0, 26, None), slice(26, 52, None), slice(52, 78, None), slice(78, 103, None)]
"""
if nb_item == 0:
return [slice(0)]

if nb_item <= nb_chunks:
return [slice(max(0, idx - start_offset), idx + 1) for idx in range(nb_item)]

Expand Down
21 changes: 21 additions & 0 deletions tests/test_pandarallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,20 @@ def test_dataframe_apply_invalid_axis(pandarallel_init):

with pytest.raises(ValueError):
df.parallel_apply(lambda x: x, axis="invalid")

def test_empty_dataframe_apply_axis_0(pandarallel_init, func_dataframe_apply_axis_0):
df = pd.DataFrame()

res = df.apply(func_dataframe_apply_axis_0)
res_parallel = df.parallel_apply(func_dataframe_apply_axis_0)
assert res.equals(res_parallel)

def test_empty_dataframe_apply_axis_1(pandarallel_init, func_dataframe_apply_axis_1):
df = pd.DataFrame()

res = df.apply(func_dataframe_apply_axis_1)
res_parallel = df.parallel_apply(func_dataframe_apply_axis_1)
assert res.equals(res_parallel)


def test_dataframe_applymap(pandarallel_init, func_dataframe_applymap, df_size):
Expand Down Expand Up @@ -238,6 +252,13 @@ def test_series_apply(pandarallel_init, func_series_apply, df_size):
res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)
assert res.equals(res_parallel)

def test_empty_series_apply(pandarallel_init, func_series_apply):
df = pd.DataFrame(dict(a=[]))

res = df.a.apply(func_series_apply, args=(2,), bias=3)
res_parallel = df.a.parallel_apply(func_series_apply, args=(2,), bias=3)
assert res.equals(res_parallel)


def test_series_rolling_apply(pandarallel_init, func_series_rolling_apply, df_size):
df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size), b=list(range(df_size))))
Expand Down

0 comments on commit 261a652

Please sign in to comment.