Skip to content

Commit

Permalink
tests passing again after merging all pull requests
Browse files Browse the repository at this point in the history
  • Loading branch information
stringertheory committed Feb 5, 2024
1 parent 378a43c commit 949801a
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 119 deletions.
169 changes: 84 additions & 85 deletions tests/test_traces.py
@@ -1,12 +1,11 @@
import csv
import os
import pickle
from datetime import datetime
from datetime import datetime, timedelta

from pandas.util.testing import assert_series_equal

from traces import TimeSeries
import pandas as pd
import pytest
from pandas.testing import assert_series_equal

from traces import TimeSeries

Expand Down Expand Up @@ -79,6 +78,7 @@ def test_merge():
ts_c = TimeSeries.merge([])
assert list(ts_c.items()) == []


def test_set_interval():
ts = TimeSeries()

Expand Down Expand Up @@ -142,7 +142,6 @@ def test_set_interval_datetime():
]



def test_remove_points_from_interval():
ts = TimeSeries(default=0)
ts[0] = 0
Expand Down Expand Up @@ -171,102 +170,103 @@ def test_remove_points_from_interval():


def test_sample_interval_days():
import pandas as pd
ts = Domain([(datetime(2012, 1, 1), 400),
(datetime(2012, 3, 1), 400)])

ts[datetime(2012, 1, 4):datetime(2012, 1, 20)] = 10
ts[datetime(2012, 1, 25):datetime(2012, 2, 7)] = 50
ts[datetime(2012, 1, 19):datetime(2012, 1, 27)] = 0

sr = ts.sample_interval(sampling_period=timedelta(days=1), end=datetime(2012, 2, 1))
assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
(pd.Timestamp('2012-01-02 00:00:00'), 400.0),
(pd.Timestamp('2012-01-03 00:00:00'), 400.0),
(pd.Timestamp('2012-01-04 00:00:00'), 10.0),
(pd.Timestamp('2012-01-05 00:00:00'), 10.0),
(pd.Timestamp('2012-01-06 00:00:00'), 10.0),
(pd.Timestamp('2012-01-07 00:00:00'), 10.0),
(pd.Timestamp('2012-01-08 00:00:00'), 10.0),
(pd.Timestamp('2012-01-09 00:00:00'), 10.0),
(pd.Timestamp('2012-01-10 00:00:00'), 10.0),
(pd.Timestamp('2012-01-11 00:00:00'), 10.0),
(pd.Timestamp('2012-01-12 00:00:00'), 10.0),
(pd.Timestamp('2012-01-13 00:00:00'), 10.0),
(pd.Timestamp('2012-01-14 00:00:00'), 10.0),
(pd.Timestamp('2012-01-15 00:00:00'), 10.0),
(pd.Timestamp('2012-01-16 00:00:00'), 10.0),
(pd.Timestamp('2012-01-17 00:00:00'), 10.0),
(pd.Timestamp('2012-01-18 00:00:00'), 10.0),
(pd.Timestamp('2012-01-19 00:00:00'), 0.0),
(pd.Timestamp('2012-01-20 00:00:00'), 0.0),
(pd.Timestamp('2012-01-21 00:00:00'), 0.0),
(pd.Timestamp('2012-01-22 00:00:00'), 0.0),
(pd.Timestamp('2012-01-23 00:00:00'), 0.0),
(pd.Timestamp('2012-01-24 00:00:00'), 0.0),
(pd.Timestamp('2012-01-25 00:00:00'), 0.0),
(pd.Timestamp('2012-01-26 00:00:00'), 0.0),
(pd.Timestamp('2012-01-27 00:00:00'), 50.0),
(pd.Timestamp('2012-01-28 00:00:00'), 50.0),
(pd.Timestamp('2012-01-29 00:00:00'), 50.0),
(pd.Timestamp('2012-01-30 00:00:00'), 50.0),
(pd.Timestamp('2012-01-31 00:00:00'), 50.0)]
ts = TimeSeries([(datetime(2012, 1, 1), 400), (datetime(2012, 3, 1), 400)])
ts[datetime(2012, 1, 4) : datetime(2012, 1, 20)] = 10
ts[datetime(2012, 1, 25) : datetime(2012, 2, 7)] = 50
ts[datetime(2012, 1, 19) : datetime(2012, 1, 27)] = 0

sr = ts.sample_interval(
sampling_period=timedelta(days=1), end=datetime(2012, 2, 1)
)
assert list(sr.items()) == [
(pd.Timestamp("2012-01-01 00:00:00"), 400.0),
(pd.Timestamp("2012-01-02 00:00:00"), 400.0),
(pd.Timestamp("2012-01-03 00:00:00"), 400.0),
(pd.Timestamp("2012-01-04 00:00:00"), 10.0),
(pd.Timestamp("2012-01-05 00:00:00"), 10.0),
(pd.Timestamp("2012-01-06 00:00:00"), 10.0),
(pd.Timestamp("2012-01-07 00:00:00"), 10.0),
(pd.Timestamp("2012-01-08 00:00:00"), 10.0),
(pd.Timestamp("2012-01-09 00:00:00"), 10.0),
(pd.Timestamp("2012-01-10 00:00:00"), 10.0),
(pd.Timestamp("2012-01-11 00:00:00"), 10.0),
(pd.Timestamp("2012-01-12 00:00:00"), 10.0),
(pd.Timestamp("2012-01-13 00:00:00"), 10.0),
(pd.Timestamp("2012-01-14 00:00:00"), 10.0),
(pd.Timestamp("2012-01-15 00:00:00"), 10.0),
(pd.Timestamp("2012-01-16 00:00:00"), 10.0),
(pd.Timestamp("2012-01-17 00:00:00"), 10.0),
(pd.Timestamp("2012-01-18 00:00:00"), 10.0),
(pd.Timestamp("2012-01-19 00:00:00"), 0.0),
(pd.Timestamp("2012-01-20 00:00:00"), 0.0),
(pd.Timestamp("2012-01-21 00:00:00"), 0.0),
(pd.Timestamp("2012-01-22 00:00:00"), 0.0),
(pd.Timestamp("2012-01-23 00:00:00"), 0.0),
(pd.Timestamp("2012-01-24 00:00:00"), 0.0),
(pd.Timestamp("2012-01-25 00:00:00"), 0.0),
(pd.Timestamp("2012-01-26 00:00:00"), 0.0),
(pd.Timestamp("2012-01-27 00:00:00"), 50.0),
(pd.Timestamp("2012-01-28 00:00:00"), 50.0),
(pd.Timestamp("2012-01-29 00:00:00"), 50.0),
(pd.Timestamp("2012-01-30 00:00:00"), 50.0),
(pd.Timestamp("2012-01-31 00:00:00"), 50.0),
]

def test_sample_interval_hours():
import pandas as pd

ts = Domain([(datetime(2012, 1, 1), 400),
(datetime(2012, 1, 10), 400)])
def test_sample_interval_hours():
ts = TimeSeries([(datetime(2012, 1, 1), 400), (datetime(2012, 1, 10), 400)])

ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10
ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50
ts[datetime(2012, 1, 4, 12) : datetime(2012, 1, 6, 20)] = 10
ts[datetime(2012, 1, 7, 9) : datetime(2012, 1, 10)] = 50

sr = ts.sample_interval(sampling_period=timedelta(days=1))
assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
(pd.Timestamp('2012-01-02 00:00:00'), 400.0),
(pd.Timestamp('2012-01-03 00:00:00'), 400.0),
(pd.Timestamp('2012-01-04 00:00:00'), 205.0),
(pd.Timestamp('2012-01-05 00:00:00'), 10.0),
(pd.Timestamp('2012-01-06 00:00:00'), 75.0),
(pd.Timestamp('2012-01-07 00:00:00'), 181.25),
(pd.Timestamp('2012-01-08 00:00:00'), 50.0),
(pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
assert list(sr.items()) == [
(pd.Timestamp("2012-01-01 00:00:00"), 400.0),
(pd.Timestamp("2012-01-02 00:00:00"), 400.0),
(pd.Timestamp("2012-01-03 00:00:00"), 400.0),
(pd.Timestamp("2012-01-04 00:00:00"), 205.0),
(pd.Timestamp("2012-01-05 00:00:00"), 10.0),
(pd.Timestamp("2012-01-06 00:00:00"), 75.0),
(pd.Timestamp("2012-01-07 00:00:00"), 181.25),
(pd.Timestamp("2012-01-08 00:00:00"), 50.0),
(pd.Timestamp("2012-01-09 00:00:00"), 50.0),
]

sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="max")
assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
(pd.Timestamp('2012-01-02 00:00:00'), 400.0),
(pd.Timestamp('2012-01-03 00:00:00'), 400.0),
(pd.Timestamp('2012-01-04 00:00:00'), 400.0),
(pd.Timestamp('2012-01-05 00:00:00'), 10.0),
(pd.Timestamp('2012-01-06 00:00:00'), 400.0),
(pd.Timestamp('2012-01-07 00:00:00'), 400.0),
(pd.Timestamp('2012-01-08 00:00:00'), 50.0),
(pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
assert list(sr.items()) == [
(pd.Timestamp("2012-01-01 00:00:00"), 400.0),
(pd.Timestamp("2012-01-02 00:00:00"), 400.0),
(pd.Timestamp("2012-01-03 00:00:00"), 400.0),
(pd.Timestamp("2012-01-04 00:00:00"), 400.0),
(pd.Timestamp("2012-01-05 00:00:00"), 10.0),
(pd.Timestamp("2012-01-06 00:00:00"), 400.0),
(pd.Timestamp("2012-01-07 00:00:00"), 400.0),
(pd.Timestamp("2012-01-08 00:00:00"), 50.0),
(pd.Timestamp("2012-01-09 00:00:00"), 50.0),
]

sr = ts.sample_interval(sampling_period=timedelta(days=1), operation="min")
assert list(sr.iteritems()) == [(pd.Timestamp('2012-01-01 00:00:00'), 400.0),
(pd.Timestamp('2012-01-02 00:00:00'), 400.0),
(pd.Timestamp('2012-01-03 00:00:00'), 400.0),
(pd.Timestamp('2012-01-04 00:00:00'), 10.0),
(pd.Timestamp('2012-01-05 00:00:00'), 10.0),
(pd.Timestamp('2012-01-06 00:00:00'), 10.0),
(pd.Timestamp('2012-01-07 00:00:00'), 50.0),
(pd.Timestamp('2012-01-08 00:00:00'), 50.0),
(pd.Timestamp('2012-01-09 00:00:00'), 50.0)]
assert list(sr.items()) == [
(pd.Timestamp("2012-01-01 00:00:00"), 400.0),
(pd.Timestamp("2012-01-02 00:00:00"), 400.0),
(pd.Timestamp("2012-01-03 00:00:00"), 400.0),
(pd.Timestamp("2012-01-04 00:00:00"), 10.0),
(pd.Timestamp("2012-01-05 00:00:00"), 10.0),
(pd.Timestamp("2012-01-06 00:00:00"), 10.0),
(pd.Timestamp("2012-01-07 00:00:00"), 50.0),
(pd.Timestamp("2012-01-08 00:00:00"), 50.0),
(pd.Timestamp("2012-01-09 00:00:00"), 50.0),
]


def test_sample_interval_index():
import pandas as pd

start = datetime(2012, 1, 1)
end = datetime(2012, 1, 10)

ts = Domain([(start, 400),
(end, 400)])
ts = TimeSeries([(start, 400), (end, 400)])

ts[datetime(2012, 1, 4, 12):datetime(2012, 1, 6, 20)] = 10
ts[datetime(2012, 1, 7, 9):datetime(2012, 1, 10)] = 50
ts[datetime(2012, 1, 4, 12) : datetime(2012, 1, 6, 20)] = 10
ts[datetime(2012, 1, 7, 9) : datetime(2012, 1, 10)] = 50

idx = pd.date_range(start, end, freq="D")
sr = ts.sample_interval(sampling_period=timedelta(days=1))
Expand Down Expand Up @@ -364,4 +364,3 @@ def test_convenience_access_methods():
assert ts.last_item() == (8, 4)
assert ts.get_item_by_index(0) == (1, 2)
assert ts.get_item_by_index(-1) == (8, 4)

67 changes: 33 additions & 34 deletions traces/timeseries.py
Expand Up @@ -14,7 +14,6 @@
import sortedcontainers
from infinity import inf


from . import histogram, operations, plot, utils

NotGiven = object()
Expand Down Expand Up @@ -422,34 +421,37 @@ def _check_regularization(self, start, end, sampling_period=None):
return sampling_period

def sample(
self, sampling_period, start=None, end=None, interpolate="previous"
self,
sampling_period,
start=None,
end=None,
interpolate="previous",
mask=None,
):
"""Sampling at regular time periods."""
start, end, mask = self._check_boundaries(start, end)

sampling_period = \
self._check_regularization(start, end, sampling_period)

if isinstance(mask, TimeSeries):
mask = mask.to_domain()

distribution_mask = Domain([start, end])
if mask:
distribution_mask &= mask
sampling_period = self._check_regularization(
start, end, sampling_period
)

result = []
for start, end in distribution_mask.intervals():
for start, end, _ in mask.iterperiods(value=True):
current_time = start
while current_time <= end:
value = self.get(current_time, interpolate=interpolate)
result.append((current_time, value))
current_time += sampling_period
return result

def sample_interval(self, sampling_period=None,
start=None, end=None,
idx=None,
operation="mean"):
def sample_interval( # noqa: C901
self,
sampling_period=None,
start=None,
end=None,
idx=None,
operation="mean",
):
"""Sampling on intervals by using some operation (mean,max,min).
It can be called either with sampling_period, [start], [end]
Expand All @@ -468,16 +470,19 @@ def sample_interval(self, sampling_period=None,

try:
import pandas as pd
except ImportError:
except ImportError as error:
msg = "sample_interval need pandas to be installed"
raise ImportError(msg)
raise ImportError(msg) from error

if idx is None:
start, end, mask = self._check_boundaries(start, end)
sampling_period = self._check_regularization(start, end,
sampling_period)
sampling_period = self._check_regularization(
start, end, sampling_period
)
# create index on [start, end)
idx = pd.date_range(start, end, freq=sampling_period, closed=None)
idx = pd.date_range(
start, end, freq=sampling_period, inclusive="both"
)
else:
start, end, mask = self._check_boundaries(idx[0], idx[-1])

Expand All @@ -499,11 +504,7 @@ def items_in_horizon():
inflexion_times = pd.DatetimeIndex(inflexion_times)

# identify all inflexion intervals
# by index: point i is in interval [idx[ifl_int[i]], idx[ifl_int[i]+1]
# TODO: look to use searchsorted as it operates more
# TODO: efficienly (but offset of 1 in most cases)
inflexion_intervals = inflexion_times.map(
lambda t: idx.get_loc(t, method="ffill"))
inflexion_intervals = idx.get_indexer(inflexion_times, method="ffill")

# convert DatetimeIndex to numpy array for faster indexation
inflexion_times = inflexion_times.values
Expand All @@ -512,8 +513,8 @@ def items_in_horizon():

# convert to timestamp
# (to make interval arithmetic faster, no need for total_seconds)
inflexion_times = (inflexion_times.astype("int64"))
idx_times = (idx.astype("int64"))
inflexion_times = inflexion_times.astype("int64")
idx_times = idx.astype("int64")

# initialise init, update and finish functions depending
# on the aggregation operator
Expand Down Expand Up @@ -541,9 +542,9 @@ def items_in_horizon():
agg = init(t0, v0)

result = []
for i1, t1, v1 in zip(inflexion_intervals,
inflexion_times,
inflexion_values):
for i1, t1, v1 in zip(
inflexion_intervals, inflexion_times, inflexion_values
):
if i0 != i1:
# change of interval

Expand All @@ -561,7 +562,7 @@ def items_in_horizon():
break

# set up new interval
t_start, t_end = idx_times[i1:i1 + 2]
t_start, t_end = idx_times[i1 : i1 + 2]
i0, t0 = i1, t_start
agg = init(t0, v0)

Expand Down Expand Up @@ -993,14 +994,12 @@ def operation(self, other, function, **kwargs):
"""
result = TimeSeries(**kwargs)
if isinstance(other, TimeSeries):
result.default = function(self.default, other.default)
for time, value in self:
result[time] = function(value, other[time])
for time, value in other:
result[time] = function(self[time], value)
else:
for time, value in self:
result.default = function(self.default, other)
result[time] = function(value, other)
return result

Expand Down

0 comments on commit 949801a

Please sign in to comment.