Skip to content

Commit

Permalink
ENH: Rolling window with step size (pandas-devGH-15354) (pandas-dev#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
rtpsw authored and yehoshuadimarsky committed Jul 13, 2022
1 parent 9533479 commit 4932554
Show file tree
Hide file tree
Showing 20 changed files with 562 additions and 262 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Other enhancements
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`)
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba <https://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`45428`)
- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`)
- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`)
- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`)
- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`)
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/window/indexers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
min_periods,
center: bool,
closed: str | None,
step: int | None,
index: np.ndarray, # const int64_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
9 changes: 8 additions & 1 deletion pandas/_libs/window/indexers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
object min_periods, # unused but here to match get_window_bounds signature
bint center,
str closed,
int64_t step,
const int64_t[:] index
):
"""
Expand All @@ -38,6 +39,9 @@ def calculate_variable_window_bounds(
closed : str
string of side of the window that should be closed
step : int64
Spacing between windows
index : ndarray[int64]
time series index to roll over
Expand All @@ -52,6 +56,9 @@ def calculate_variable_window_bounds(
int64_t start_bound, end_bound, index_growth_sign = 1
Py_ssize_t i, j

if num_values <= 0:
return np.empty(0, dtype='int64'), np.empty(0, dtype='int64')

# default is 'right'
if closed is None:
closed = 'right'
Expand Down Expand Up @@ -143,4 +150,4 @@ def calculate_variable_window_bounds(
# right endpoint is open
if not right_closed and not center:
end[i] -= 1
return start, end
return start[::step], end[::step]
3 changes: 3 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11261,6 +11261,7 @@ def rolling(
on: str | None = None,
axis: Axis = 0,
closed: str | None = None,
step: int | None = None,
method: str = "single",
):
axis = self._get_axis_number(axis)
Expand All @@ -11275,6 +11276,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
step=step,
method=method,
)

Expand All @@ -11287,6 +11289,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
step=step,
method=method,
)

Expand Down
54 changes: 45 additions & 9 deletions pandas/core/indexers/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
center passed from the top level rolling API
closed : str, default None
closed passed from the top level rolling API
step : int, default None
step passed from the top level rolling API
.. versionadded:: 1.5
win_type : str, default None
win_type passed from the top level rolling API
Expand Down Expand Up @@ -62,6 +65,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

raise NotImplementedError
Expand All @@ -77,14 +81,15 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if center:
offset = (self.window_size - 1) // 2
else:
offset = 0

end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
start = end - self.window_size
if closed in ["left", "both"]:
start -= 1
Expand All @@ -107,8 +112,12 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if step is not None:
raise NotImplementedError("step not implemented for variable window")

# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
# type "Optional[bool]"; expected "bool"
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
Expand All @@ -119,6 +128,7 @@ def get_window_bounds(
min_periods,
center, # type: ignore[arg-type]
closed,
1,
self.index_array, # type: ignore[arg-type]
)

Expand All @@ -145,8 +155,14 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if step is not None:
raise NotImplementedError("step not implemented for variable offset window")
if num_values <= 0:
return np.empty(0, dtype="int64"), np.empty(0, dtype="int64")

# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = "right" if self.index is not None else "both"
Expand Down Expand Up @@ -215,12 +231,15 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

return (
np.zeros(num_values, dtype=np.int64),
np.arange(1, num_values + 1, dtype=np.int64),
)
if step is not None:
raise NotImplementedError("step not implemented for expanding window")

end = np.arange(1, num_values + 1, dtype=np.int64)
start = np.zeros(len(end), dtype=np.int64)
return start, end


class FixedForwardWindowIndexer(BaseIndexer):
Expand Down Expand Up @@ -256,6 +275,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

if center:
Expand All @@ -264,11 +284,13 @@ def get_window_bounds(
raise ValueError(
"Forward-looking windows don't support setting the closed argument"
)
if step is None:
step = 1

start = np.arange(num_values, dtype="int64")
start = np.arange(0, num_values, step, dtype="int64")
end = start + self.window_size
if self.window_size:
end[-self.window_size :] = num_values
end = np.clip(end, 0, num_values)

return start, end

Expand Down Expand Up @@ -319,7 +341,11 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if step is not None:
raise NotImplementedError("step not implemented for groupby window")

# 1) For each group, get the indices that belong to the group
# 2) Use the indices to calculate the start & end bounds of the window
# 3) Append the window bounds in group order
Expand All @@ -339,7 +365,7 @@ def get_window_bounds(
**self.indexer_kwargs,
)
start, end = indexer.get_window_bounds(
len(indices), min_periods, center, closed
len(indices), min_periods, center, closed, step
)
start = start.astype(np.int64)
end = end.astype(np.int64)
Expand All @@ -358,6 +384,8 @@ def get_window_bounds(
)
start_arrays.append(window_indices.take(ensure_platform_int(start)))
end_arrays.append(window_indices.take(ensure_platform_int(end)))
if len(start_arrays) == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.int64)
start = np.concatenate(start_arrays)
end = np.concatenate(end_arrays)
return start, end
Expand All @@ -373,6 +401,14 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:

return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
if step is not None:
raise NotImplementedError(
"step not implemented for exponentail moving window"
)
return (
np.array([0], dtype=np.int64),
np.array([num_values], dtype=np.int64),
)
2 changes: 2 additions & 0 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
step=self.step,
)
result = window_aggregations.ewmcov(
x_array,
Expand Down Expand Up @@ -798,6 +799,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
step=self.step,
)

def _cov(X, Y):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/window/numba_.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ def roll_table(
minimum_periods: int,
*args: Any,
):
result = np.empty(values.shape)
min_periods_mask = np.empty(values.shape)
result = np.empty((len(begin), values.shape[1]))
min_periods_mask = np.empty(result.shape)
for i in numba.prange(len(result)):
start = begin[i]
stop = end[i]
Expand Down

0 comments on commit 4932554

Please sign in to comment.