From 0347bba671e94fa7d663dd67d5dbe78f354955db Mon Sep 17 00:00:00 2001 From: Eric Firing Date: Mon, 8 Mar 2021 14:31:09 -1000 Subject: [PATCH 1/2] Solve has new "order_constit" kwarg --- tests/test_order_constit.py | 73 ++++++++++++++++++++++++ utide/_solve.py | 110 +++++++++++++++++++++--------------- utide/diagnostics.py | 68 +++++++++------------- 3 files changed, 163 insertions(+), 88 deletions(-) create mode 100644 tests/test_order_constit.py diff --git a/tests/test_order_constit.py b/tests/test_order_constit.py new file mode 100644 index 0000000..126385b --- /dev/null +++ b/tests/test_order_constit.py @@ -0,0 +1,73 @@ +import numpy as np +import pytest + +from utide import reconstruct, solve +from utide._ut_constants import constit_index_dict, ut_constants + + +ts = 735604 +duration = 35 + +time = np.linspace(ts, ts + duration, 842) +tref = (time[-1] + time[0]) / 2 + +const = ut_constants.const + +amps = [1.0, 0.5, 0.6, 0.1] +names = ["M2", "S2", "K1", "O1"] +cpds = [24 * const.freq[constit_index_dict[name]] for name in names] +sinusoids = [] +for amp, cpd in zip(amps, cpds): + arg = 2 * np.pi * (time - tref) * cpd + sinusoids.append(amp * np.cos(arg)) +tide = np.hstack(tuple(sinusoids)).sum(axis=0) + +np.random.seed(1234) +noise = 1e-3 * np.random.randn(len(time)) + +time_series = tide + noise + +opts0 = { + "constit": ["K1", "M2", "O1", "S2"], + "order_constit": "frequency", + "phase": "raw", + "nodal": False, + "trend": False, + "method": "ols", + "conf_int": "MC", + "Rayleigh_min": 0.95, +} + + +@pytest.mark.parametrize("conf_int", ["none", "linear", "MC"]) +def test_order(conf_int): + + orders = ["energy", "frequency", opts0["constit"]] + if conf_int != "none": + orders.append("snr") + elevs = [] + ts_elevs = [] + vels = [] + ts_vels = [] + for order in orders: + opts = opts0.copy() + opts["order_constit"] = order + opts["conf_int"] = conf_int + elevs.append(solve(time, time_series, lat=45, **opts)) + vels.append(solve(time, time_series, time_series, lat=45, **opts)) + ts_elevs.append(reconstruct(time, elevs[-1], min_SNR=0)) + ts_vels.append(reconstruct(time, vels[-1], min_SNR=0)) + + # Are the reconstructions all the same? + for i in range(1, len(elevs)): + assert (ts_elevs[i].h == ts_elevs[0].h).all() + assert (ts_vels[i].u == ts_vels[0].u).all() + assert (ts_vels[i].v == ts_vels[0].v).all() + + +def test_invalid_snr(): + opts = opts0.copy() + opts["conf_int"] = "none" + opts["order_constit"] = "snr" + with pytest.raises(ValueError): + solve(time, time_series, lat=45, **opts) diff --git a/utide/_solve.py b/utide/_solve.py index 675f3d5..5755c41 100644 --- a/utide/_solve.py +++ b/utide/_solve.py @@ -5,10 +5,9 @@ import numpy as np from ._time_conversion import _normalize_time -from ._ut_constants import constit_index_dict from .confidence import _confidence from .constituent_selection import ut_cnstitsel -from .diagnostics import ut_diagn +from .diagnostics import _PE, _SNR, ut_diagn from .ellipse_params import ut_cs2cep from .harmonics import ut_E from .robustfit import robustfit @@ -17,6 +16,7 @@ default_opts = { "constit": "auto", + "order_constit": None, "conf_int": "linear", "method": "ols", "trend": True, @@ -37,6 +37,8 @@ def _process_opts(opts, is_2D): newopts.update_values(strict=True, **opts) # TODO: add more validations. newopts.infer = validate_infer(newopts.infer, is_2D) + snr = newopts.conf_int != "none" + newopts.order_constit = validate_order_constit(newopts.order_constit, snr) compat_opts = _translate_opts(newopts) @@ -48,6 +50,7 @@ def _translate_opts(opts): # Here or elsewhere, proper validation remains to be added. oldopts = Bunch() oldopts.cnstit = opts.constit + oldopts.ordercnstit = opts.order_constit oldopts.infer = opts.infer # we will not use the matlab names, though oldopts.conf_int = True @@ -101,6 +104,24 @@ def validate_infer(infer, is_2D): return infer +def validate_order_constit(arg, have_snr): + available = ["energy", "frequency"] + if have_snr: + available.append("snr") + if arg is None: + return "energy" + if isinstance(arg, str): + arg = arg.lower() + if arg in available: + return arg + if not isinstance(arg, str) and np.iterable(arg): + return arg # TODO: add checking of its elements + raise ValueError( + f"order_constit must be one of {available} or" + f" a sequence of constituents, not '{arg}'", + ) + + def solve(t, u, v=None, lat=None, **opts): """ Calculate amplitude, phase, confidence intervals of tidal constituents. @@ -122,7 +143,7 @@ def solve(t, u, v=None, lat=None, **opts): standard library `datetime` proleptic Gregorian calendar, starting with 1 at 00:00 on January 1 of year 1; this is the 'datenum' used by Matplotlib. - constit : {'auto', array_like}, optional + constit : {'auto', sequence}, optional List of strings with standard letter abbreviations of tidal constituents; or 'auto' to let the list be determined based on the time span. @@ -165,6 +186,14 @@ def solve(t, u, v=None, lat=None, **opts): amp_ratios and phase_offsets have length N for a scalar time series, or 2N for a vector series. + order_constit : {'energy', 'SNR', 'frequency', sequence}, optional + The default is 'energy' order, returning results ordered from + high energy to low. + The 'SNR' order is from high signal-to-noise ratio to low, and is + available only if `conf_int` is not 'none'. The + 'frequency' order is from low to high frequency. Alternatively, a + sequence of constituent names may be supplied, typically the same list as + given in the *constit* option. MC_n : integer, optional Not yet implemented. robust_kw : dict, optional @@ -370,7 +399,7 @@ def _solv1(tin, uin, vin, lat, **opts): coef.theta = np.hstack((coef.theta, theta)) coef.g = np.hstack((coef.g, g)) - if opt["conf_int"] is True: + if opt["conf_int"]: coef = _confidence( coef, cnstit, @@ -392,63 +421,50 @@ def _solv1(tin, uin, vin, lat, **opts): # Diagnostics. if not opt["nodiagn"]: - coef, indPE = ut_diagn(coef, opt) + coef = ut_diagn(coef) + # Adds a diagn dictionary, always sorted by energy. + # This doesn't seem very useful. Let's directly add the variables + # to the base coef structure. Then they can be sorted with everything + # else. + coef["PE"] = _PE(coef) + coef["SNR"] = _SNR(coef) # Re-order constituents. - if opt["ordercnstit"] is not None: + coef = _reorder(coef, opt) + # This might have added PE if it was not already present. - if opt["ordercnstit"] == "frq": - ind = coef["aux"]["frq"].argsort() + if opt["RunTimeDisp"]: + print("done.") - elif opt["ordercnstit"] == "snr": - if not opt["nodiagn"]: - ind = coef["diagn"]["SNR"].argsort()[::-1] - else: - if opt["twodim"]: - SNR = (coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) / ( - (coef["Lsmaj_ci"] / 1.96) ** 2 + (coef["Lsmin_ci"] / 1.96) ** 2 - ) + return coef - else: - SNR = (coef["A"] ** 2) / (coef["A_ci"] / 1.96) ** 2 - ind = SNR.argsort()[::-1] +def _reorder(coef, opt): + if opt["ordercnstit"] == "energy": + # Default: order by decreasing energy. + if "PE" not in coef: + coef["PE"] = _PE(coef) + ind = coef["PE"].argsort()[::-1] - else: - ilist = [constit_index_dict[name] for name in opt["ordercnstit"]] - ind = np.array(ilist, dtype=int) + elif opt["ordercnstit"] == "frequency": + ind = coef["aux"]["frq"].argsort() - else: # Default: order by decreasing energy. - if not opt["nodiagn"]: - ind = indPE - else: - if opt["twodim"]: - PE = np.sum(coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) - PE = 100 * (coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) / PE - else: - PE = 100 * coef["A"] ** 2 / np.sum(coef["A"] ** 2) - - ind = PE.argsort()[::-1] - - reorderlist = ["g", "name"] - if opt.twodim: - reorderlist.extend(["Lsmaj", "Lsmin", "theta"]) - if opt.conf_int: - reorderlist.extend(["Lsmaj_ci", "Lsmin_ci", "theta_ci", "g_ci"]) + elif opt["ordercnstit"] == "snr": + # If we are here, we should be guaranteed to have SNR already. + ind = coef["SNR"].argsort()[::-1] else: - reorderlist.append("A") - if opt.conf_int: - reorderlist.extend(["A_ci", "g_ci"]) + namelist = list(coef["name"]) + ilist = [namelist.index(name) for name in opt["ordercnstit"]] + ind = np.array(ilist, dtype=int) + + arrays = "name PE SNR A A_ci g g_ci Lsmaj Lsmaj_ci Lsmin Lsmin_ci theta theta_ci" + reorderlist = [a for a in arrays.split() if a in coef] for key in reorderlist: coef[key] = coef[key][ind] coef["aux"]["frq"] = coef["aux"]["frq"][ind] coef["aux"]["lind"] = coef["aux"]["lind"][ind] - - if opt["RunTimeDisp"]: - print("done.") - return coef @@ -532,7 +548,7 @@ def _slvinit(tin, uin, vin, lat, **opts): opt["rmin"] = 1 opt["method"] = "ols" opt["tunrdn"] = 1 - opt["linci"] = 0 + opt["linci"] = False opt["white"] = 0 opt["nrlzn"] = 200 opt["lsfrqosmp"] = 1 diff --git a/utide/diagnostics.py b/utide/diagnostics.py index bae848b..b6a250a 100644 --- a/utide/diagnostics.py +++ b/utide/diagnostics.py @@ -1,58 +1,44 @@ import numpy as np -def ut_diagn(coef, opt): - - if opt["RunTimeDisp"]: - print("diagnostics ... ", end="") - coef["diagn"] = {} +def _PE(coef): + """ + Return the energy percentage for each constituent. + """ + if "Lsmaj" in coef: + E = coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2 + PE = (100 / np.sum(E)) * E + else: + PE = 100 * coef["A"] ** 2 / np.sum(coef["A"] ** 2) + return PE - if opt["twodim"]: - PE = np.sum(coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) - PE = 100 * (coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) / PE +def _SNR(coef): + """ + Return the signal-to-noise ratio for each constituent. + """ + if "Lsmaj" in coef: SNR = (coef["Lsmaj"] ** 2 + coef["Lsmin"] ** 2) / ( (coef["Lsmaj_ci"] / 1.96) ** 2 + (coef["Lsmin_ci"] / 1.96) ** 2 ) - else: - PE = 100 * coef["A"] ** 2 / np.sum(coef["A"] ** 2) SNR = (coef["A"] ** 2) / (coef["A_ci"] / 1.96) ** 2 + return SNR + +def ut_diagn(coef): + """ + Add to coef the names, PE, and SNR, *always* sorted by energy. + + To be eliminated... + """ + coef["diagn"] = {} + PE = _PE(coef) + SNR = _SNR(coef) indPE = PE.argsort()[::-1] coef["diagn"]["name"] = coef["name"][indPE] coef["diagn"]["PE"] = PE[indPE] coef["diagn"]["SNR"] = SNR[indPE] - return coef, indPE - - -# [~,indPE] = sort(PE,'descend'); -# coef.diagn.name = coef.name(indPE); -# coef.diagn.PE = PE(indPE); -# coef.diagn.SNR = SNR; % used in ut_diagntable; ordered by PE there -# if opt.twodim -# [coef.diagn,usnrc,vsnrc] = ut_diagntable(coef,cnstit,... -# t,u,v,xmod,m,B,W,varMSM,Gall,Hall,elor,varcov_mCw,indPE); -# else -# [coef.diagn,usnrc,~] = ut_diagntable(coef,cnstit,... -# t,u,[],xmod,m,B,W,varMSM,Gall,Hall,elor,varcov_mCw,indPE); -# end -# if opt.diagnplots -# tmp = nan*ones(size(uin)); -# tmp(uvgd) = usnrc; -# usnrc = tmp; -# tmp = nan*ones(size(uin)); -# tmp(uvgd) = e; -# e = tmp; -# if opt.twodim -# tmp = nan*ones(size(uin)); -# tmp(uvgd) = vsnrc; -# vsnrc = tmp; -# ut_diagnfigs(coef,indPE,tin,uin,vin,usnrc,vsnrc,e); -# else -# ut_diagnfigs(coef,indPE,tin,uin,[],usnrc,[],e); -# end -# end -# end + return coef From 9dc3e0003652004c0280cb378069ee9f14b0a918 Mon Sep 17 00:00:00 2001 From: Eric Firing Date: Wed, 10 Mar 2021 13:54:02 -1000 Subject: [PATCH 2/2] Name order_constit option "PE" to match array name --- tests/test_order_constit.py | 10 +++++++--- utide/_solve.py | 20 +++++++++----------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/test_order_constit.py b/tests/test_order_constit.py index 126385b..8c87e45 100644 --- a/tests/test_order_constit.py +++ b/tests/test_order_constit.py @@ -42,9 +42,9 @@ @pytest.mark.parametrize("conf_int", ["none", "linear", "MC"]) def test_order(conf_int): - orders = ["energy", "frequency", opts0["constit"]] + orders = [None, "PE", "frequency", opts0["constit"]] if conf_int != "none": - orders.append("snr") + orders.append("SNR") elevs = [] ts_elevs = [] vels = [] @@ -64,10 +64,14 @@ def test_order(conf_int): assert (ts_vels[i].u == ts_vels[0].u).all() assert (ts_vels[i].v == ts_vels[0].v).all() + # Is None equivalent to "PE"? (Just a spot check.) + assert (elevs[0].name == elevs[1].name).all() + assert (elevs[0].A == elevs[1].A).all() + def test_invalid_snr(): opts = opts0.copy() opts["conf_int"] = "none" - opts["order_constit"] = "snr" + opts["order_constit"] = "SNR" with pytest.raises(ValueError): solve(time, time_series, lat=45, **opts) diff --git a/utide/_solve.py b/utide/_solve.py index 5755c41..20ed0e8 100644 --- a/utide/_solve.py +++ b/utide/_solve.py @@ -105,15 +105,13 @@ def validate_infer(infer, is_2D): def validate_order_constit(arg, have_snr): - available = ["energy", "frequency"] + available = ["PE", "frequency"] if have_snr: - available.append("snr") + available.append("SNR") if arg is None: - return "energy" - if isinstance(arg, str): - arg = arg.lower() - if arg in available: - return arg + return "PE" + if isinstance(arg, str) and arg in available: + return arg if not isinstance(arg, str) and np.iterable(arg): return arg # TODO: add checking of its elements raise ValueError( @@ -186,8 +184,8 @@ def solve(t, u, v=None, lat=None, **opts): amp_ratios and phase_offsets have length N for a scalar time series, or 2N for a vector series. - order_constit : {'energy', 'SNR', 'frequency', sequence}, optional - The default is 'energy' order, returning results ordered from + order_constit : {'PE', 'SNR', 'frequency', sequence}, optional + The default is 'PE' (percent energy) order, returning results ordered from high energy to low. The 'SNR' order is from high signal-to-noise ratio to low, and is available only if `conf_int` is not 'none'. The @@ -440,7 +438,7 @@ def _solv1(tin, uin, vin, lat, **opts): def _reorder(coef, opt): - if opt["ordercnstit"] == "energy": + if opt["ordercnstit"] == "PE": # Default: order by decreasing energy. if "PE" not in coef: coef["PE"] = _PE(coef) @@ -449,7 +447,7 @@ def _reorder(coef, opt): elif opt["ordercnstit"] == "frequency": ind = coef["aux"]["frq"].argsort() - elif opt["ordercnstit"] == "snr": + elif opt["ordercnstit"] == "SNR": # If we are here, we should be guaranteed to have SNR already. ind = coef["SNR"].argsort()[::-1] else: