/
baseline.py
161 lines (136 loc) · 5.31 KB
/
baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from builtins import range
import numpy as np
import pandas as pd
from numba import jit
from .xarray_wrapper import xarray_apply_along_time_dim
################################################
# Functions for setting the RSL baseline level #
################################################
@xarray_apply_along_time_dim()
def baseline_constant(trsl, wet, n_average_last_dry=1):
"""
Build baseline with constant level during a `wet` period
Parameters
----------
trsl : numpy.array or pandas.Series
Transmitted signal level minus received signal level (TRSL) or
received signal level or t
wet : numpy.array or pandas.Series
Information if classified index of times series is wet (True)
or dry (False). Note that `NaN`s in `wet` will lead to `NaN`s in
`baseline` also after the `NaN` period since it is then not clear
whether or not there was a change of wet/dry within the `NaN` period.
n_average_last_dry: int, default = 1
Number of last baseline values before start of wet event that should
be averaged to get the value of the baseline during the wet event.
Note that this values should not be too large because the baseline
might be at an expected level, e.g. if another wet event is
ending shortly before.
Returns
-------
baseline : numpy.array
Baseline during wet period
"""
return _numba_baseline_constant(
trsl=np.asarray(trsl, dtype=np.float64),
wet=np.asarray(wet, dtype=np.bool),
n_average_last_dry=n_average_last_dry,
)
@jit(nopython=True)
def _numba_baseline_constant(trsl, wet, n_average_last_dry):
baseline = np.zeros_like(trsl, dtype=np.float64)
baseline[0:n_average_last_dry] = trsl[0:n_average_last_dry]
for i in range(n_average_last_dry, len(trsl)):
if np.isnan(wet[i]):
baseline[i] = np.NaN
elif wet[i] & ~wet[i - 1]:
baseline[i] = np.mean(baseline[(i - n_average_last_dry) : i])
elif wet[i] & wet[i - 1]:
baseline[i] = baseline[i - 1]
else:
baseline[i] = trsl[i]
return baseline
def baseline_linear(rsl, wet, ignore_nan=False):
"""
Build baseline with linear interpolation from start till end of `wet` period
Parameters
----------
rsl : numpy.array or pandas.Series
Received signal level or transmitted signal level minus received
signal level
wet : numpy.array or pandas.Series
Information if classified index of times series is wet (True)
or dry (False). Note that `NaN`s in `wet` will lead to `NaN`s in
`baseline` also after the `NaN` period since it is then not clear
wheter there was a change of wet/dry within the `NaN` period.
ignore_nan : bool
If set to True the last wet/dry state before a NaN will be used for deriving
the baseline. If set to False, the baseline for any wet period which contains
a NaN will be set to NaN for the duration of the wet period. Default is False.
Returns
-------
baseline : numpy.array
Baseline during wet period
"""
if type(rsl) == pd.Series:
rsl = rsl.values
if type(wet) == pd.Series:
wet = wet.values
rsl = rsl.astype(np.float64)
wet = wet.astype(np.float64)
return _numba_baseline_linear(rsl, wet, ignore_nan)
@jit(nopython=True)
def _numba_baseline_linear(rsl, wet, ignore_nan=False):
baseline = np.zeros_like(rsl, dtype=np.float64)
baseline[0] = rsl[0]
last_dry_i = 0
last_dry_rsl = rsl[0]
last_i_is_wet = False
found_nan = False
for i in range(1, len(rsl)):
rsl_i = rsl[i]
wet_i = wet[i]
is_wet = wet_i
# Check for NaN values.
if np.isnan(is_wet):
# If NaNs should be ignored, continue with the last wet/dry state
if ignore_nan:
is_wet = last_i_is_wet
else:
found_nan = True
# raise ValueError('There must not be `NaN`s in `wet` if '
# '`ignore_nan` is set to `True`.')
# at the beginning of a wet period
if is_wet and not last_i_is_wet:
last_i_is_wet = True
# within a wet period
if is_wet and last_i_is_wet:
last_i_is_wet = True
# at the end of a wet period, do the baseline interpolation
elif last_i_is_wet and not is_wet:
if found_nan:
baseline[last_dry_i : i + 1] = np.NaN
else:
# !! Only works correctly with 'i+1'. With 'i' the first dry
# !! baseline value is kept at 0. No clue why we need the '+1'
baseline[last_dry_i : i + 1] = np.linspace(
last_dry_rsl, rsl_i, i - last_dry_i + 1
)
found_nan = False
last_i_is_wet = False
last_dry_i = i
last_dry_rsl = rsl_i
# within a dry period
elif not last_i_is_wet and not is_wet:
if found_nan:
baseline[i] = np.NaN
else:
baseline[i] = rsl_i
found_nan = False
last_i_is_wet = False
last_dry_i = i
last_dry_rsl = rsl_i
else:
# print('This should be impossible')
raise
return baseline