diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e741fa7b37f33..8ba9926c054ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7505,8 +7505,8 @@ def nlargest( - ``first`` : prioritize the first occurrence(s) - ``last`` : prioritize the last occurrence(s) - - ``all`` : do not drop any duplicates, even it means - selecting more than `n` items. + - ``all`` : keep all the ties of the smallest item even if it means + selecting more than ``n`` items. Returns ------- @@ -7568,7 +7568,9 @@ def nlargest( Italy 59000000 1937894 IT Brunei 434000 12128 BN - When using ``keep='all'``, all duplicate items are maintained: + When using ``keep='all'``, the number of element kept can go beyond ``n`` + if there are duplicate values for the smallest element, all the + ties are kept: >>> df.nlargest(3, 'population', keep='all') population GDP alpha-2 @@ -7578,6 +7580,16 @@ def nlargest( Maldives 434000 4520 MV Brunei 434000 12128 BN + However, ``nlargest`` does not keep ``n`` distinct largest elements: + + >>> df.nlargest(5, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + To order by the largest values in column "population" and then "GDP", we can specify multiple columns like in the next example. @@ -7614,8 +7626,8 @@ def nsmallest( - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - - ``all`` : do not drop any duplicates, even it means - selecting more than `n` items. + - ``all`` : keep all the ties of the largest item even if it means + selecting more than ``n`` items. Returns ------- @@ -7669,7 +7681,9 @@ def nsmallest( Tuvalu 11300 38 TV Nauru 337000 182 NR - When using ``keep='all'``, all duplicate items are maintained: + When using ``keep='all'``, the number of element kept can go beyond ``n`` + if there are duplicate values for the largest element, all the + ties are kept. >>> df.nsmallest(3, 'population', keep='all') population GDP alpha-2 @@ -7678,6 +7692,16 @@ def nsmallest( Iceland 337000 17036 IS Nauru 337000 182 NR + However, ``nsmallest`` does not keep ``n`` distinct + smallest elements: + + >>> df.nsmallest(4, 'population', keep='all') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + Nauru 337000 182 NR + To order by the smallest values in column "population" and then "GDP", we can specify multiple columns like in the next example.