Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added a simple MLP neural network for wet-dry classification (#146)
* added a simple neural network trained at detecting rainfall with 10minute resolution * upload example notebook comparing the different wet dry methods * remove np.round in assert_almost equal * added more extensive doc string to mlp * A simple MLP naural network for wet-dry classification: mlp.py: - added docstring Wet dry example.ipynb: - Do pre-processing in one cell, refer to "Basic CML processing workflow.ipynb" for more details. - Investigate two interesting CMLs - Shorten the notebook to only compare baselines. * A simple MLP naural network for wet-dry classification: Update: The MLP was retrained using more CMLs and a larger validation dataset. Wet dry example.ipynb: - re run notebook with retrained weights mlp.py: - updated docstring to match retrained architecture model_mlp.keras: - updated weights and architecture test_wet_dry_mlp: - updated to run with new weights * A simple MLP neural network for wet-dry classification: Rain event detection methods.ipynb: - Renamed example notebook to current name - Updated cosmetic suggestions in example notebook
- Loading branch information
Showing
4 changed files
with
548 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import numpy as np | ||
from numpy.lib.stride_tricks import sliding_window_view | ||
import tensorflow as tf | ||
import pkg_resources | ||
import pandas as pd | ||
|
||
def get_model_file_path(): | ||
return pkg_resources.resource_filename( | ||
"pycomlink", "/processing/wet_dry/mlp_model_files" | ||
) | ||
|
||
model = tf.keras.models.load_model(str(get_model_file_path() + "/model_mlp.keras")) | ||
|
||
def mlp_wet_dry( | ||
trsl_channel_1, | ||
trsl_channel_2, | ||
threshold=None, # 0.5 is often good, or argmax | ||
): | ||
""" | ||
Wet dry classification using a simple neural network: | ||
This MLP calculates wet and dry periods using a 40 minutes rolling window | ||
for the CML signal loss from two sublinks (trsl_channel_1 and | ||
trsl_channel_2) with temporal resolution equal to 1 minute. It consists of | ||
one fully connected hidden layers with 20 neurons using the relu | ||
activation function. The MLP was trained to predict rainfall recorded | ||
at narby disdrometers at 10 minute resolution for one month of data with 14 | ||
pairs of CMLs and disdrometers from different locations in Norway. The MLP | ||
was trained using MLPClassifier from sklearn and then transformed | ||
to tensorflow to be compatible with the pycomlink environment. | ||
If only one channel is available from the CML, use that channel for both | ||
trsl_channel_1 and trsl_channel_2. | ||
The error "WARNING:absl:Skipping variable loading for optimizer 'Adam', | ||
because it has 13 variables whereas the saved optimizer has 1 variables." | ||
can safely be ignored. | ||
Parameters | ||
---------- | ||
trsl_channel_1 : iterable of float | ||
Time series of received signal level of channel 1 | ||
trsl_channel_2 : iterable of float | ||
Time series of received signal level of channel 2 | ||
threshold : float | ||
Threshold (0 - 1) for setting event as wet or dry. If set to None | ||
(default), returns the continuous probability of wet [0, 1] from the | ||
logistic activation function. | ||
Returns | ||
------- | ||
iterable of float | ||
Time series of wet/dry probability or (if threshold is provided) | ||
wet dry classification | ||
References | ||
---------- | ||
""" | ||
# Normalization | ||
trsl_channel_1_norm = (trsl_channel_1 - np.nanmean(trsl_channel_1)) / np.nanstd(trsl_channel_1) | ||
trsl_channel_2_norm = (trsl_channel_2 - np.nanmean(trsl_channel_2)) / np.nanstd(trsl_channel_2) | ||
|
||
# add nan to start and end | ||
windowsize = 40 # use two channels | ||
x_start = np.ones([int(windowsize/2), windowsize*2])*np.nan | ||
x_end = np.ones([int(windowsize/2)- 1, windowsize*2])*np.nan | ||
|
||
# sliding window | ||
sliding_window_ch1 = sliding_window_view( | ||
trsl_channel_1_norm, | ||
window_shape = windowsize | ||
) | ||
|
||
sliding_window_ch2 = sliding_window_view( | ||
trsl_channel_2_norm, | ||
window_shape = windowsize | ||
) | ||
|
||
x_fts = np.vstack( | ||
[x_start, np.hstack([sliding_window_ch1, sliding_window_ch2]), x_end] | ||
) | ||
|
||
mlp_pred = np.zeros([x_fts.shape[0], 2])*np.nan | ||
indices = np.argwhere(~np.isnan(x_fts).any(axis = 1)).ravel() | ||
|
||
if indices.size > 0: # else: predictions are kept as nan | ||
mlp_pred_ = model.predict(x_fts[indices], verbose=0) | ||
mlp_pred[indices] = mlp_pred_ | ||
|
||
if threshold == None: | ||
return mlp_pred | ||
else: | ||
mlp_pred = mlp_pred[:, 1] | ||
mlp_pred[indices] = mlp_pred[indices] > threshold | ||
return mlp_pred |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import unittest | ||
import numpy as np | ||
from pycomlink.processing.wet_dry.mlp import mlp_wet_dry | ||
|
||
class Testmlppred(unittest.TestCase): | ||
""" | ||
This runs the same tests as test_wet_dry_cnn.py but with different | ||
content in truth_raw. | ||
""" | ||
|
||
def test_mlppred(self): | ||
trsl_channel_1 = np.arange(0, 60 * 8).astype(float) | ||
trsl_channel_2 = np.arange(0, 60 * 8).astype(float) | ||
|
||
trsl_channel_1[310] = np.nan # shorter window than in cnn | ||
|
||
pred_raw = mlp_wet_dry( | ||
trsl_channel_1, | ||
trsl_channel_2, | ||
threshold=None, | ||
)[:, 1] | ||
|
||
pred = mlp_wet_dry( | ||
trsl_channel_1, | ||
trsl_channel_2, | ||
threshold=0.197, # low threshold for testing | ||
) | ||
|
||
# check if length of array is the same | ||
assert len(pred_raw) == 60 * 8 | ||
assert len(pred) == 60 * 8 | ||
|
||
# check if array is as expected | ||
truth_raw = np.array( | ||
[ | ||
0.19271295, | ||
0.19395444, | ||
0.19520202, | ||
0.19645563, | ||
0.19771534, | ||
0.19898114, | ||
0.20025298, | ||
0.20153098, | ||
0.20281503, | ||
0.20410511, | ||
0.20540135, | ||
np.nan, | ||
np.nan, | ||
] | ||
) | ||
truth = np.array( | ||
[ | ||
0, | ||
0, | ||
0, | ||
0, | ||
1, | ||
1, | ||
1, | ||
1, | ||
1, | ||
1, | ||
1, | ||
np.nan, | ||
np.nan, | ||
] | ||
) | ||
np.testing.assert_almost_equal(pred[280:293], truth) | ||
np.testing.assert_almost_equal(pred_raw[280:293], truth_raw, decimal=7) |