Skip to content

Commit 95f5a6e

Browse files
feat: add DataFrame.pipe() method (#421)
1 parent 456fb32 commit 95f5a6e

File tree

4 files changed

+196
-1
lines changed

4 files changed

+196
-1
lines changed

tests/system/small/test_dataframe.py

+25
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,31 @@ def test_apply_series_scalar_callable(
10001000
pandas.testing.assert_series_equal(bf_result, pd_result)
10011001

10021002

1003+
def test_df_pipe(
1004+
scalars_df_index,
1005+
scalars_pandas_df_index,
1006+
):
1007+
columns = ["int64_too", "int64_col"]
1008+
1009+
def foo(x: int, y: int, df):
1010+
return (df + x) % y
1011+
1012+
bf_result = (
1013+
scalars_df_index[columns]
1014+
.pipe((foo, "df"), x=7, y=9)
1015+
.pipe(lambda x: x**2)
1016+
.to_pandas()
1017+
)
1018+
1019+
pd_result = (
1020+
scalars_pandas_df_index[columns]
1021+
.pipe((foo, "df"), x=7, y=9)
1022+
.pipe(lambda x: x**2)
1023+
)
1024+
1025+
pandas.testing.assert_frame_equal(bf_result, pd_result)
1026+
1027+
10031028
def test_df_keys(
10041029
scalars_df_index,
10051030
scalars_pandas_df_index,

tests/system/small/test_series.py

+25
Original file line numberDiff line numberDiff line change
@@ -3203,3 +3203,28 @@ def test_apply_not_supported(scalars_dfs, col, lambda_, exception):
32033203
bf_col = scalars_df[col]
32043204
with pytest.raises(exception):
32053205
bf_col.apply(lambda_, by_row=False)
3206+
3207+
3208+
def test_series_pipe(
3209+
scalars_df_index,
3210+
scalars_pandas_df_index,
3211+
):
3212+
column = "int64_too"
3213+
3214+
def foo(x: int, y: int, df):
3215+
return (df + x) % y
3216+
3217+
bf_result = (
3218+
scalars_df_index[column]
3219+
.pipe((foo, "df"), x=7, y=9)
3220+
.pipe(lambda x: x**2)
3221+
.to_pandas()
3222+
)
3223+
3224+
pd_result = (
3225+
scalars_pandas_df_index[column]
3226+
.pipe((foo, "df"), x=7, y=9)
3227+
.pipe(lambda x: x**2)
3228+
)
3229+
3230+
assert_series_equal(bf_result, pd_result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/common.py
2+
from __future__ import annotations
3+
4+
from typing import Callable, TYPE_CHECKING
5+
6+
if TYPE_CHECKING:
7+
from bigframes_vendored.pandas.pandas._typing import T
8+
9+
10+
def pipe(
11+
obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
12+
) -> T:
13+
"""
14+
Apply a function ``func`` to object ``obj`` either by passing obj as the
15+
first argument to the function or, in the case that the func is a tuple,
16+
interpret the first element of the tuple as a function and pass the obj to
17+
that function as a keyword argument whose key is the value of the second
18+
element of the tuple.
19+
20+
Args:
21+
func (callable or tuple of (callable, str)):
22+
Function to apply to this object or, alternatively, a
23+
``(callable, data_keyword)`` tuple where ``data_keyword`` is a
24+
string indicating the keyword of ``callable`` that expects the
25+
object.
26+
args (iterable, optional):
27+
Positional arguments passed into ``func``.
28+
kwargs (dict, optional):
29+
A dictionary of keyword arguments passed into ``func``.
30+
31+
Returns:
32+
object: the return type of ``func``.
33+
"""
34+
if isinstance(func, tuple):
35+
func, target = func
36+
if target in kwargs:
37+
msg = f"{target} is both the pipe target and a keyword argument"
38+
raise ValueError(msg)
39+
kwargs[target] = obj
40+
return func(*args, **kwargs)
41+
else:
42+
return func(obj, *args, **kwargs)

third_party/bigframes_vendored/pandas/core/generic.py

+104-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
22
from __future__ import annotations
33

4-
from typing import Iterator, Literal, Optional
4+
from typing import Callable, Iterator, Literal, Optional, TYPE_CHECKING
55

66
from bigframes_vendored.pandas.core import indexing
7+
import bigframes_vendored.pandas.core.common as common
78

89
from bigframes import constants
910

11+
if TYPE_CHECKING:
12+
from bigframes_vendored.pandas.pandas._typing import T
13+
1014

1115
class NDFrame(indexing.IndexingMixin):
1216
"""
@@ -963,6 +967,105 @@ def expanding(self, min_periods=1):
963967
"""
964968
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
965969

970+
def pipe(
971+
self,
972+
func: Callable[..., T] | tuple[Callable[..., T], str],
973+
*args,
974+
**kwargs,
975+
) -> T:
976+
"""
977+
Apply chainable functions that expect Series or DataFrames.
978+
979+
**Examples:**
980+
981+
Constructing a income DataFrame from a dictionary.
982+
983+
>>> import bigframes.pandas as bpd
984+
>>> import numpy as np
985+
>>> bpd.options.display.progress_bar = None
986+
987+
>>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]]
988+
>>> df = bpd.DataFrame(data, columns=['Salary', 'Others'])
989+
>>> df
990+
Salary Others
991+
0 8000 1000.0
992+
1 9500 <NA>
993+
2 5000 2000.0
994+
<BLANKLINE>
995+
[3 rows x 2 columns]
996+
997+
Functions that perform tax reductions on an income DataFrame.
998+
999+
>>> def subtract_federal_tax(df):
1000+
... return df * 0.9
1001+
>>> def subtract_state_tax(df, rate):
1002+
... return df * (1 - rate)
1003+
>>> def subtract_national_insurance(df, rate, rate_increase):
1004+
... new_rate = rate + rate_increase
1005+
... return df * (1 - new_rate)
1006+
1007+
Instead of writing
1008+
1009+
>>> subtract_national_insurance(
1010+
... subtract_state_tax(subtract_federal_tax(df), rate=0.12),
1011+
... rate=0.05,
1012+
... rate_increase=0.02) # doctest: +SKIP
1013+
1014+
You can write
1015+
1016+
>>> (
1017+
... df.pipe(subtract_federal_tax)
1018+
... .pipe(subtract_state_tax, rate=0.12)
1019+
... .pipe(subtract_national_insurance, rate=0.05, rate_increase=0.02)
1020+
... )
1021+
Salary Others
1022+
0 5892.48 736.56
1023+
1 6997.32 <NA>
1024+
2 3682.8 1473.12
1025+
<BLANKLINE>
1026+
[3 rows x 2 columns]
1027+
1028+
If you have a function that takes the data as (say) the second
1029+
argument, pass a tuple indicating which keyword expects the
1030+
data. For example, suppose ``national_insurance`` takes its data as ``df``
1031+
in the second argument:
1032+
1033+
>>> def subtract_national_insurance(rate, df, rate_increase):
1034+
... new_rate = rate + rate_increase
1035+
... return df * (1 - new_rate)
1036+
>>> (
1037+
... df.pipe(subtract_federal_tax)
1038+
... .pipe(subtract_state_tax, rate=0.12)
1039+
... .pipe(
1040+
... (subtract_national_insurance, 'df'),
1041+
... rate=0.05,
1042+
... rate_increase=0.02
1043+
... )
1044+
... )
1045+
Salary Others
1046+
0 5892.48 736.56
1047+
1 6997.32 <NA>
1048+
2 3682.8 1473.12
1049+
<BLANKLINE>
1050+
[3 rows x 2 columns]
1051+
1052+
Args:
1053+
func (function):
1054+
Function to apply to this object.
1055+
``args``, and ``kwargs`` are passed into ``func``.
1056+
Alternatively a ``(callable, data_keyword)`` tuple where
1057+
``data_keyword`` is a string indicating the keyword of
1058+
``callable`` that expects this object.
1059+
args (iterable, optional):
1060+
Positional arguments passed into ``func``.
1061+
kwargs (mapping, optional):
1062+
A dictionary of keyword arguments passed into ``func``.
1063+
1064+
Returns:
1065+
same type as caller
1066+
"""
1067+
return common.pipe(self, func, *args, **kwargs)
1068+
9661069
def __nonzero__(self):
9671070
raise ValueError(
9681071
f"The truth value of a {type(self).__name__} is ambiguous. "

0 commit comments

Comments
 (0)