Merge pull request #8 from openearth/update/2020-data

Update with 2019 data (2020 version)
openearth · Nov 30, 2020 · 85cc3b2 · 85cc3b2
2 parents 9a95846 + 46814bc
commit 85cc3b2
Show file tree

Hide file tree

Showing 7 changed files with 982 additions and 504 deletions.
diff --git a/README.md b/README.md
@@ -2,8 +2,7 @@
 Tools and applications used to monitor sea-level rise. The notebooks under `notebooks` contain analysis used for the Dutch sea-level monitor. The directory `data` contains scripts that are used to process common sources of sea-level data. The directory `app` contains the public sea-level rise website.
 
 # Notebooks
-
-You can also view them using the [nbviewer](https://nbviewer.ipython.org/github/openearth/sealevel/tree/master/notebooks/) website. You can try out the notebooks by going to the [binder](https://mybinder.org/v2/gh/openearth/sealevel/master?filepath=notebooks) website or using github codespaces. You need to follow a few steps to get an environment that can run the notebooks. The following steps will help you through it. 
+You can view the notebooks using the [nbviewer](https://nbviewer.ipython.org/github/openearth/sealevel/tree/master/notebooks/) website. You can try out the notebooks by going to the [binder](https://mybinder.org/v2/gh/openearth/sealevel/master?filepath=notebooks) website or using github codespaces. You need to follow a few steps to get an environment that can run the notebooks. The following steps will help you through it.
 
 # Download data
 For windows systems:
@@ -21,7 +20,18 @@ make
 Note that for the main sea-level monitor you only need to download the data from the directories: `psmsl` and `noaa`. So you can go into those directories and run make there. This will download all the tide gauge information and information needed to correct for wind effects.
 
 # Packages
-You can install the packages in the file `requirements.txt` using pip or anaconda.
+You can install the packages in the file `requirements.txt` using pip or anaconda. For pip this is done using `pip install -r requirements.txt`. Windows users might want to prefer installing these packages through anaconda.
+
+```
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+conda install --file requirements.txt
+```
+
+
+
+# Running the notebook
+You can run the notebook in either jupyter notebook or jupyterlab. To start jupyter notebook. From the main level of the repository start `jupyter notebook` and browse through the notebooks folders. The main notebook is `dutch-sea-level-monitor.ipynb`.
 
 
 # Tags

diff --git a/data/deltares/results/dutch-sea-level-monitor-export-2020-11-25.csv b/data/deltares/results/dutch-sea-level-monitor-export-2020-11-25.csv
diff --git a/lib/__init__.py b/lib/__init__.py
diff --git a/lib/models.py b/lib/models.py
@@ -0,0 +1,96 @@
+import numpy as np
+import statsmodels.api as sm
+
+# define the statistical model
+def linear_acceleration_model(df):
+    """define a simple linear model, with nodal tide and without wind (no acceleration)"""
+    y = df['height']
+    X = np.c_[
+        df['year']-1970,
+        (df['year'] - 1970) * (df['year'] - 1970),
+        np.cos(2*np.pi*(df['year']-1970)/18.613),
+        np.sin(2*np.pi*(df['year']-1970)/18.613)
+    ]
+    month = np.mod(df['year'], 1) * 12.0
+    names = ['Constant', 'Trend', 'Acceleration', 'Nodal U', 'Nodal V']
+    X = sm.add_constant(X)
+    model = sm.OLS(y, X, missing='drop')
+    fit = model.fit()
+    return fit, names
+
+
+# define the statistical model
+def quadratic_model(df, with_wind=True):
+    """This model computes a parabolic linear fit. This corresponds to the hypothesis that sea-level is accelerating."""
+    y = df['height']
+    X = np.c_[
+        df['year']-1970,
+        (df['year'] - 1970) * (df['year'] - 1970),
+        np.cos(2*np.pi*(df['year']-1970)/18.613),
+        np.sin(2*np.pi*(df['year']-1970)/18.613)
+    ]
+    names = ['Constant', 'Trend', 'Acceleration', 'Nodal U', 'Nodal V']
+    if with_wind:
+        X = np.c_[
+            X,
+            df['u2'],
+            df['v2']
+        ]
+        names.extend(['Wind $u^2$', 'Wind $v^2$'])
+    X = sm.add_constant(X)
+    model_quadratic = sm.GLSAR(y, X, rho=1)
+    fit = model_quadratic.iterative_fit(cov_type='HC0')
+    return fit, names
+
+
+# define the statistical model
+def broken_linear_model(df, with_wind=True):
+    """This model fits the sea-level rise has started to rise faster in 1993."""
+    y = df['height']
+    X = np.c_[
+        df['year']-1970,
+        (df['year'] > 1993) * (df['year'] - 1993),
+        np.cos(2*np.pi*(df['year']-1970)/18.613),
+        np.sin(2*np.pi*(df['year']-1970)/18.613)
+    ]
+    names = ['Constant', 'Trend', '+trend (1993)', 'Nodal U', 'Nodal V']
+    if with_wind:
+        X = np.c_[
+            X,
+            df['u2'],
+            df['v2']
+        ]
+        names.extend(['Wind $u^2$', 'Wind $v^2$'])
+    X = sm.add_constant(X)
+    model_broken_linear = sm.GLSAR(y, X, rho=1)
+    fit = model_broken_linear.iterative_fit(cov_type='HC0')
+    return fit, names
+
+
+def linear_model(df, with_wind=True, with_ar=True):
+    """Define the linear model with optional wind and autoregression.
+    See the latest report for a detailed description.
+    """
+
+    y = df['height']
+    X = np.c_[
+        df['year']-1970,
+        np.cos(2*np.pi*(df['year']-1970)/18.613),
+        np.sin(2*np.pi*(df['year']-1970)/18.613)
+    ]
+    month = np.mod(df['year'], 1) * 12.0
+    names = ['Constant', 'Trend', 'Nodal U', 'Nodal V']
+    if with_wind:
+        X = np.c_[
+            X,
+            df['u2'],
+            df['v2']
+        ]
+        names.extend(['Wind $u^2$', 'Wind $v^2$'])
+    X = sm.add_constant(X)
+    if with_ar:
+        model = sm.GLSAR(y, X, missing='drop', rho=1)
+    else:
+        model = sm.OLS(y, X, missing='drop')
+    fit = model.fit(cov_type='HC0')
+    return fit, names
diff --git a/lib/psmsl.py b/lib/psmsl.py
@@ -0,0 +1,107 @@
+import io
+import datetime
+
+import numpy as np
+import pandas as pd
+
+
+def missing2nan(value, missing=-99999):
+    """convert the value to nan if the float of value equals the missing value"""
+    value = float(value)
+    if value == missing:
+        return np.nan
+    return value
+
+def year2date(year_fraction, dtype='datetime64[s]'):
+    """convert a fraction of a year + fraction of a year to a date, for example 1993.083 -~> 1993-02-01.
+    The dtype should be a valid numpy datetime unit, such as datetime64[s]"""
+    startpoints = np.linspace(0, 1, num=12, endpoint=False)
+    remainder = np.mod(year_fraction, 1)
+    year = np.floor_divide(year_fraction, 1).astype('int')
+    month = np.searchsorted(startpoints, remainder)
+    if (month == 0).all():
+        # if month is set to 0 (for annual data), set to january
+        month = np.ones_like(month)
+    dates = [
+        datetime.datetime(year_i, month_i, 1)
+        for year_i, month_i
+        in zip(year, month)
+    ]
+    datetime64s = np.asarray(dates, dtype=dtype)
+    return datetime64s
+
+def get_data(zf, station, dataset_name):
+    """get data for the station (pandas record) from the dataset (url)"""
+    info = dict(
+        dataset_name=dataset_name,
+        id=station.name
+    )
+    bytes = zf.read("{dataset_name}/data/{id}.rlrdata".format(**info))
+    df = pd.read_csv(
+        io.BytesIO(bytes),
+        sep=';',
+        names=('year', 'height', 'interpolated', 'flags'),
+        converters={
+            "year": lambda x: float(x),
+            "height": lambda x: missing2nan(x),
+            "interpolated": str.strip,
+        }
+    )
+    df['station'] = station.name
+    # store time as t
+    df['t'] = year2date(df.year)
+    # use time as an index
+    df = df.set_index('t')
+    return df
+
+def compute_u2v2(df, wind_df):
+    """compute the u2 and v2 based on the direction and alpha"""
+
+    # convert alpha to radians and from North 0, CW to 0 east, CW
+    # x * pi / 180
+    alpha_in_rad = np.deg2rad(90 - df['alpha'])
+    direction_in_rad = np.arctan2(df['v'], df['u'])
+    # these were used in intermediate reports
+    df['u2main'] = (wind_df['speed'] ** 2) * np.cos(direction_in_rad - alpha_in_rad)
+    df['u2perp'] = (wind_df['speed'] ** 2) * np.sin(direction_in_rad - alpha_in_rad)
+    # the squared wind speed components along and perpendicular to the coastline
+    df['u2main'].fillna(df['u2main'].mean(), inplace=True)
+    df['u2perp'].fillna(df['u2perp'].mean(), inplace=True)
+    # we now switched to the signed mean (sometimes the wind comes from the north/east)
+    df['u2'] = df['u']**2 * np.sign(df['u'])
+    df['v2'] = df['v']**2 * np.sign(df['v'])
+    df['u2'].fillna(df['u2'].mean(), inplace=True)
+    df['v2'].fillna(df['v2'].mean(), inplace=True)
+
+    return df
+
+def get_data_with_wind(station, dataset_name, wind_df, annual_wind_df, zipfiles, url_names):
+    """get data for the station (pandas record) from the dataset (url)"""
+    info = dict(
+        dataset_name=dataset_name,
+        id=station.name
+    )
+    bytes = zipfiles[dataset_name].read(url_names[dataset_name].format(**info))
+    df = pd.read_csv(
+        io.BytesIO(bytes),
+        sep=';',
+        names=('year', 'height', 'interpolated', 'flags'),
+        converters={
+            "height": lambda x: missing2nan(x) - station['nap-rlr'],
+            "interpolated": str.strip,
+        }
+    )
+    df['station'] = station.name
+    # store time as t
+    df['t'] = year2date(df.year, dtype=wind_df.index.dtype)
+    df['alpha'] = station['alpha']
+    df = df.set_index('t')
+    # merge the wind and water levels
+    if 'monthly' in dataset_name:
+        merged = pd.merge(df, wind_df, how='left', left_index=True, right_index=True)
+    else:
+        merged = pd.merge(df, annual_wind_df, how='left', left_index=True, right_index=True)
+    merged = compute_u2v2(merged, wind_df)
+
+
+    return merged