Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement the nearby_wetdry approach from rainlink (#113)
* implementing nearby_wetdry approach * add example notebook * add tests
- Loading branch information
1 parent
94ee2ef
commit c5e516c
Showing
7 changed files
with
968 additions
and
133 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file modified
BIN
-3.04 KB
(98%)
pycomlink/io/example_data/example_path_averaged_reference_data.nc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
import xarray as xr | ||
import numpy as np | ||
from tqdm import tqdm | ||
import pycomlink.spatial.helper as spatial | ||
|
||
|
||
def calc_distance_between_cml_endpoints( | ||
cml_ids, | ||
site_a_latitude, | ||
site_a_longitude, | ||
site_b_latitude, | ||
site_b_longitude, | ||
): | ||
""" | ||
Calculating the distance from and to all start and endpoints of a network | ||
of CMLs using the Haversine distance formula. This includes the start and | ||
endpoint of each CML (which equals its length). The distance between start | ||
and enpoint of a CML will be set to 0 for the case when r (the radius for | ||
which CMLs are considered to be "nearby") is smaller than the actual length | ||
of the CML | ||
---------- | ||
cml_ids : list of str or int | ||
ids of CMLs | ||
site_a_latitude : list or array | ||
latitude values of site a | ||
site_a_longitude : list or array | ||
longitude values of site a | ||
site_b_latitude : list or array | ||
latitude values of site b | ||
site_b_longitude : list or array | ||
longitude values of site b | ||
Returns | ||
------- | ||
xarray.Dataset | ||
distances between all start and endpoints in four variables, a_to_all_a, | ||
a_to_all_b, b_to_all_a, b_to_all_b | ||
""" | ||
|
||
n_cmls = len(cml_ids) | ||
ds = xr.Dataset( | ||
data_vars=dict( | ||
a_to_all_a=(["cml_id1", "cml_id2"], np.full([n_cmls, n_cmls], np.nan)), | ||
a_to_all_b=(["cml_id1", "cml_id2"], np.full([n_cmls, n_cmls], np.nan)), | ||
b_to_all_a=(["cml_id1", "cml_id2"], np.full([n_cmls, n_cmls], np.nan)), | ||
b_to_all_b=(["cml_id1", "cml_id2"], np.full([n_cmls, n_cmls], np.nan)), | ||
), | ||
coords=dict(cml_id1=cml_ids, cml_id2=cml_ids), | ||
) | ||
for i, cmlid in tqdm(enumerate(cml_ids)): | ||
ds.loc[dict(cml_id1=cmlid)]["a_to_all_a"][:] = spatial.haversine( | ||
np.array(site_a_longitude[i]), | ||
np.array(site_a_latitude[i]), | ||
np.array(site_a_longitude), | ||
np.array(site_a_latitude), | ||
) | ||
ds.loc[dict(cml_id1=cmlid)]["a_to_all_b"][:] = spatial.haversine( | ||
np.array(site_a_longitude[i]), | ||
np.array(site_a_latitude[i]), | ||
np.array(site_b_longitude), | ||
np.array(site_b_latitude), | ||
) | ||
ds.loc[dict(cml_id1=cmlid)]["b_to_all_a"][:] = spatial.haversine( | ||
np.array(site_b_longitude[i]), | ||
np.array(site_b_latitude[i]), | ||
np.array(site_a_longitude), | ||
np.array(site_a_latitude), | ||
) | ||
ds.loc[dict(cml_id1=cmlid)]["b_to_all_b"][:] = spatial.haversine( | ||
np.array(site_b_longitude[i]), | ||
np.array(site_b_latitude[i]), | ||
np.array(site_b_longitude), | ||
np.array(site_b_latitude), | ||
) | ||
# set distance between start and endpoint of each CML to 0km | ||
# this has to be done so that CMLs are not rejected when the radius r | ||
# which defines nearby CMLs is larger than the distance between endpoints | ||
# of a CML (aka its length). Otherwise, the signal levels of CMLs with | ||
# length > r will not be considered in the nearby approach | ||
ds = xr.where( | ||
ds.cml_id1 == ds.cml_id2, | ||
0, | ||
ds) | ||
return ds | ||
|
||
|
||
def instanteanous_to_minmax_data(ds_cml, interval=15, timeperiod=24, min_hours=6): | ||
""" | ||
calculating pmin from instanteanousy measured rsl and tsl values | ||
---------- | ||
ds_cml : xarray.Dataset | ||
Time series of rsl and tsl | ||
interval : int | ||
Interval of pmin in minutes | ||
timeperiod : int | ||
Number of previous hours over which max(Pmin) is to be computed | ||
min_hours : int | ||
Minimum number of hours needed to compute max(Pmin) | ||
Returns | ||
------- | ||
xarray.Dataset | ||
Time series of pmin, max_pmin, deltaP and deltaPL | ||
References | ||
---------- | ||
.. [1] Overeem, A., Leijnse, H., and Uijlenhoet, R.: Retrieval algorithm | ||
for rainfall mapping from microwave links in a cellular communication network, | ||
Atmos. Meas. Tech., 9, 2425–2444, https://doi.org/10.5194/amt-9-2425-2016, 2016. | ||
""" | ||
ds_cml["pmin"] = ds_cml.rsl - ds_cml.tsl | ||
ds_cml_minmax = ds_cml.pmin.resample(time=str(interval) + "min").min().to_dataset() | ||
|
||
# rolling window * 60min/interval(in minutes) | ||
period = int(timeperiod * 60 / interval) | ||
# min hours for calculation * 60min/interval(in minutes) | ||
hours_needed = int(min_hours * 60 / interval) | ||
ds_cml_minmax["max_pmin"] = ds_cml_minmax.pmin.rolling( | ||
time=period, | ||
min_periods=hours_needed, | ||
).max(skipna=False) | ||
|
||
ds_cml_minmax["deltaP"] = ds_cml_minmax.pmin - ds_cml_minmax.max_pmin | ||
ds_cml_minmax["deltaPL"] = ds_cml_minmax["deltaP"] / ds_cml_minmax.length | ||
|
||
return ds_cml_minmax | ||
|
||
|
||
def nearby_wetdry( | ||
ds_cml, | ||
ds_dist, | ||
r=15, | ||
thresh_median_P=-1.4, | ||
thresh_median_PL=-0.7, | ||
min_links=3, | ||
|
||
): | ||
""" | ||
Classification of rainy and dry periods from diagnostic (min-max) CML signal | ||
levels following the nearby link approach. | ||
---------- | ||
ds_cml : xarray.Dataset | ||
Dataset consisting minmax values (pmin, max_pmin, deltaP and deltaPL | ||
from CML data. | ||
ds_dist : xarray.Dataset | ||
Distance matrix between all CML endpoints calculated with | ||
`calc_distance_between_cml_endpoints()` | ||
r : float | ||
Radius for which surrounding CMLs (both end points are within a chosen | ||
radius r from either end of the selected link) | ||
thresh_median_P : float | ||
Threshold for median_P. Is dependent on the spatial correlation of rainfall. | ||
thresh_median_PL : float | ||
Threshold for median_PL. Is dependent on the spatial correlation of rainfall. | ||
min_links : int | ||
minimum number of CMLs within r needed to perform wet-dry classification | ||
Returns | ||
------- | ||
xarray.Dataset | ||
Time series wet-dry classification | ||
References | ||
---------- | ||
.. [1] Overeem, A., Leijnse, H., and Uijlenhoet, R.: Retrieval algorithm | ||
for rainfall mapping from microwave links in a cellular communication network, | ||
Atmos. Meas. Tech., 9, 2425–2444, https://doi.org/10.5194/amt-9-2425-2016, 2016. | ||
""" | ||
|
||
# get number of CMLs within r for each CML | ||
ds_dist["within_r"] = ( | ||
(ds_dist.a_to_all_a < r) | ||
& (ds_dist.a_to_all_b < r) | ||
& (ds_dist.b_to_all_a < r) | ||
& (ds_dist.b_to_all_b < r) | ||
) | ||
|
||
wet = xr.full_like(ds_cml.pmin, np.nan) | ||
|
||
for cmlid in tqdm(ds_cml.cml_id): | ||
# only make wet dry detection if min_links is reached within r | ||
if sum(ds_dist.within_r.sel(cml_id1=cmlid).values) > min_links: | ||
# select all CMLs within r | ||
ds_nearby_cmls = ds_cml.isel( | ||
cml_id=ds_dist.within_r.sel(cml_id1=cmlid).values | ||
) | ||
|
||
# calculate median delta P and delta PL for each timestep | ||
# .where checks if the minimal require number of CMLs has data | ||
medianP = ( | ||
ds_nearby_cmls.deltaP.where( | ||
ds_nearby_cmls.deltaP.count("cml_id") > min_links) | ||
.median(dim="cml_id", skipna=False) | ||
.values | ||
) | ||
medianPL = ( | ||
ds_nearby_cmls.where( | ||
ds_nearby_cmls.deltaP.count("cml_id") > min_links) | ||
.deltaPL.median(dim="cml_id", skipna=False) | ||
.values | ||
) | ||
|
||
## actual wet dry classification | ||
wet.loc[dict(cml_id=cmlid)] = xr.where( | ||
cond=(np.isnan(medianP)) | (np.isnan(medianPL)), | ||
x=np.nan, | ||
y=((medianP < thresh_median_P) & (medianPL < thresh_median_PL)), | ||
) | ||
|
||
# if maxpim - pmin > 2db set two timesteps before and one aftere to wet | ||
diff2db_rule = ( | ||
ds_nearby_cmls.sel(cml_id=cmlid).max_pmin | ||
- ds_nearby_cmls.sel(cml_id=cmlid).pmin | ||
> 2 | ||
) | ||
for shift in [-2, -1, 1]: | ||
wet.loc[dict(cml_id=cmlid.values)] = xr.where( | ||
diff2db_rule.shift(time=shift) > 2, | ||
x=1, | ||
y=wet.loc[dict(cml_id=cmlid)], | ||
) | ||
return wet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.