Skip to content

Commit

Permalink
Implemented dateframe.between_time (#2111)
Browse files Browse the repository at this point in the history
ref #1929

Implement `DataFrame.between_time`

```py
>>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
>>> kts = ks.from_pandas(ts)
>>> kts
                     A
2018-04-09 00:00:00  1
2018-04-10 00:20:00  2
2018-04-11 00:40:00  3
2018-04-12 01:00:00  4

>>> kts.between_time('0:15', '0:45')
                     A
2018-04-10 00:20:00  2
2018-04-11 00:40:00  3

You get the times that are *not* between two times by setting
``start_time`` later than ``end_time``:

>>> kts.between_time('0:45', '0:15')
                     A
2018-04-09 00:00:00  1
2018-04-12 01:00:00  4
```
  • Loading branch information
LSturtew committed Mar 20, 2021
1 parent 48c311b commit 583e03d
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 1 deletion.
85 changes: 85 additions & 0 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
cast,
TYPE_CHECKING,
)
import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -2982,6 +2983,90 @@ class locomotion
).resolved_copy
return DataFrame(internal)

def between_time(
self,
start_time: Union[datetime.time, str],
end_time: Union[datetime.time, str],
include_start: bool = True,
include_end: bool = True,
axis: Union[int, str] = 0,
) -> Union["Series", "DataFrame"]:
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).
By setting ``start_time`` to be later than ``end_time``,
you can get the times that are *not* between the two times.
Parameters
----------
start_time : datetime.time or str
Initial time as a time filter limit.
end_time : datetime.time or str
End time as a time filter limit.
include_start : bool, default True
Whether the start time needs to be included in the result.
include_end : bool, default True
Whether the end time needs to be included in the result.
axis : {0 or 'index', 1 or 'columns'}, default 0
Determine range time on index or columns value.
Returns
-------
Series or DataFrame
Data from the original object filtered to the specified dates range.
Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`
See Also
--------
at_time : Select values at a particular time of the day.
first : Select initial periods of time series based on a date offset.
last : Select final periods of time series based on a date offset.
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day.
Examples
--------
>>> idx = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> kdf = ks.DataFrame({'A': [1, 2, 3, 4]}, index=idx)
>>> kdf
A
2018-04-09 00:00:00 1
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
2018-04-12 01:00:00 4
>>> kdf.between_time('0:15', '0:45')
A
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
You get the times that are *not* between two times by setting
``start_time`` later than ``end_time``:
>>> kdf.between_time('0:45', '0:15')
A
2018-04-09 00:00:00 1
2018-04-12 01:00:00 4
"""
from databricks.koalas.indexes import DatetimeIndex

axis = validate_axis(axis)

if axis != 0:
raise NotImplementedError("between_time currently only works for axis=0")

if not isinstance(self.index, DatetimeIndex):
raise TypeError("Index must be DatetimeIndex")

def pandas_between_time(pdf):
return pdf.between_time(start_time, end_time, include_start, include_end)

return self.koalas.apply_batch(pandas_between_time)

def where(self, cond, other=np.nan) -> "DataFrame":
"""
Replace values where the condition is False.
Expand Down
1 change: 0 additions & 1 deletion databricks/koalas/missing/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class _MissingPandasLikeDataFrame(object):
asfreq = _unsupported_function("asfreq")
asof = _unsupported_function("asof")
at_time = _unsupported_function("at_time")
between_time = _unsupported_function("between_time")
boxplot = _unsupported_function("boxplot")
combine = _unsupported_function("combine")
combine_first = _unsupported_function("combine_first")
Expand Down
23 changes: 23 additions & 0 deletions databricks/koalas/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5422,3 +5422,26 @@ def test_align(self):
pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=1)
self.assert_eq(kdf_l.sort_index(), pdf_l.sort_index())
self.assert_eq(kdf_r.sort_index(), pdf_r.sort_index())

def test_between_time(self):
idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
kdf = ks.from_pandas(pdf)
self.assert_eq(
pdf.between_time("0:15", "0:45"),
kdf.between_time("0:15", "0:45").sort_index(),
almost=True,
)

with self.assertRaisesRegex(
NotImplementedError, "between_time currently only works for axis=0"
):
kdf.between_time("0:15", "0:45", axis=1)

kdf = ks.DataFrame({"A": [1, 2, 3, 4]})
with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
kdf.between_time("0:15", "0:45")

def test_between_time_no_shortcut(self):
with ks.option_context("compute.shortcut_limit", 0):
self.test_between_time()
1 change: 1 addition & 0 deletions docs/source/reference/frame.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ Reindexing / Selection / Label manipulation
DataFrame.add_prefix
DataFrame.add_suffix
DataFrame.align
DataFrame.between_time
DataFrame.drop
DataFrame.droplevel
DataFrame.drop_duplicates
Expand Down

0 comments on commit 583e03d

Please sign in to comment.