diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py index 614341b5..01526f45 100644 --- a/pysindy/optimizers/base.py +++ b/pysindy/optimizers/base.py @@ -97,17 +97,24 @@ def __init__( unbias: bool = True, ): super().__init__(fit_intercept=False, copy_X=copy_X) - - if max_iter <= 0: - raise ValueError("max_iter must be positive") - self.max_iter = max_iter self.iters = 0 - if np.ndim(initial_guess) == 1: - initial_guess = initial_guess.reshape(1, -1) self.initial_guess = initial_guess self.normalize_columns = normalize_columns self.unbias = unbias + self.__post_init_guard() + + # See name mangling rules for double underscore rationale + def __post_init_guard(self): + """Conduct initialization post-init, as required by scikitlearn API.""" + if np.ndim(self.initial_guess) == 1: + self.initial_guess = self.initial_guess.reshape(1, -1) + if self.max_iter <= 0: + raise ValueError("max_iter must be positive") + + def set_params(self, **kwargs): + super().set_params(**kwargs) + self.__post_init_guard # Force subclasses to implement this @abc.abstractmethod diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py index 9eb2ae1f..dbc771a6 100644 --- a/pysindy/optimizers/constrained_sr3.py +++ b/pysindy/optimizers/constrained_sr3.py @@ -1,4 +1,5 @@ import warnings +from typing import Tuple try: import cvxpy as cp @@ -39,6 +40,9 @@ class ConstrainedSR3(SR3): to learn parsimonious physics-informed models from data." IEEE Access 8 (2020): 169259-169271. + Zheng, Peng, et al. "A unified framework for sparse relaxed + regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423. + Parameters ---------- threshold : float, optional (default 0.1) @@ -93,9 +97,6 @@ class ConstrainedSR3(SR3): is deprecated in sklearn versions >= 1.0 and will be removed. Note that this parameter is incompatible with the constraints! - copy_X : boolean, optional (default True) - If True, X will be copied; else, it may be overwritten. - initial_guess : np.ndarray, optional (default None) Shape should be (n_features) or (n_targets, n_features). Initial guess for coefficients ``coef_``, (v in the mathematical equations) @@ -138,6 +139,15 @@ class ConstrainedSR3(SR3): Weight vector(s) that are not subjected to the regularization. This is the w in the objective function. + history_ : list + History of sparse coefficients. ``history_[k]`` contains the + sparse coefficients (v in the optimization objective function) + at iteration k. + + objective_history_ : list + History of the value of the objective at each step. Note that + the trapping SINDy problem is nonconvex, meaning that this value + may increase and decrease as the algorithm works. """ def __init__( @@ -249,17 +259,22 @@ def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse): rhs = rhs.reshape(g.shape) return inv1.dot(rhs) - def _update_coef_cvxpy(self, x, y, coef_sparse): - xi = cp.Variable(coef_sparse.shape[0] * coef_sparse.shape[1]) - cost = cp.sum_squares(x @ xi - y.flatten()) + def _create_var_and_part_cost( + self, var_len: int, x_expanded: np.ndarray, y: np.ndarray + ) -> Tuple[cp.Variable, cp.Expression]: + xi = cp.Variable(var_len) + cost = cp.sum_squares(x_expanded @ xi - y.flatten()) if self.thresholder.lower() == "l1": cost = cost + self.threshold * cp.norm1(xi) elif self.thresholder.lower() == "weighted_l1": cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi) elif self.thresholder.lower() == "l2": - cost = cost + self.threshold * cp.norm2(xi) + cost = cost + self.threshold * cp.norm2(xi) ** 2 elif self.thresholder.lower() == "weighted_l2": - cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) + cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2 + return xi, cost + + def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol): if self.use_constraints: if self.inequality_constraints and self.equality_constraints: # Process inequality constraints then equality constraints @@ -285,26 +300,26 @@ def _update_coef_cvxpy(self, x, y, coef_sparse): else: prob = cp.Problem(cp.Minimize(cost)) - # default solver is OSQP here but switches to ECOS for L2 + # default solver is SCS/OSQP here but switches to ECOS for L2 try: prob.solve( max_iter=self.max_iter, - eps_abs=self.tol, - eps_rel=self.tol, + eps_abs=tol, + eps_rel=tol, verbose=self.verbose_cvxpy, ) # Annoying error coming from L2 norm switching to use the ECOS # solver, which uses "max_iters" instead of "max_iter", and # similar semantic changes for the other variables. - except TypeError: + except (TypeError, ValueError): try: - prob.solve(abstol=self.tol, reltol=self.tol, verbose=self.verbose_cvxpy) + prob.solve(max_iters=self.max_iter, verbose=self.verbose_cvxpy) except cp.error.SolverError: print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1]) + xi.value = np.zeros(var_len) except cp.error.SolverError: print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1]) + xi.value = np.zeros(var_len) if xi.value is None: warnings.warn( @@ -313,7 +328,7 @@ def _update_coef_cvxpy(self, x, y, coef_sparse): ConvergenceWarning, ) return None - coef_new = (xi.value).reshape(coef_sparse.shape) + coef_new = (xi.value).reshape(coef_prev.shape) return coef_new def _update_sparse_coef(self, coef_full): @@ -420,7 +435,11 @@ def _reduce(self, x, y): objective_history = [] if self.inequality_constraints: - coef_sparse = self._update_coef_cvxpy(x_expanded, y, coef_sparse) + var_len = coef_sparse.shape[0] * coef_sparse.shape[1] + xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) + coef_sparse = self._update_coef_cvxpy( + xi, cost, var_len, coef_sparse, self.tol + ) objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse)) else: for k in range(self.max_iter): @@ -459,9 +478,8 @@ def _reduce(self, x, y): break else: warnings.warn( - "SR3._reduce did not converge after {} iterations.".format( - self.max_iter - ), + f"ConstrainedSR3 did not converge after {self.max_iter}" + " iterations.", ConvergenceWarning, ) if self.use_constraints and self.constraint_order.lower() == "target": diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py index 765d4c1f..58db5ef5 100644 --- a/pysindy/optimizers/sr3.py +++ b/pysindy/optimizers/sr3.py @@ -342,8 +342,8 @@ def _reduce(self, x, y): coef_sparse = self.coef_.T n_samples, n_features = x.shape + coef_full = coef_sparse.copy() if self.use_trimming: - coef_full = coef_sparse.copy() trimming_array = np.repeat(1.0 - self.trimming_fraction, n_samples) self.history_trimming_ = [trimming_array] else: @@ -392,9 +392,7 @@ def _reduce(self, x, y): break else: warnings.warn( - "SR3._reduce did not converge after {} iterations.".format( - self.max_iter - ), + f"SR3 did not converge after {self.max_iter} iterations.", ConvergenceWarning, ) self.coef_ = coef_sparse.T diff --git a/pysindy/optimizers/stable_linear_sr3.py b/pysindy/optimizers/stable_linear_sr3.py index 106e784b..6763ab15 100644 --- a/pysindy/optimizers/stable_linear_sr3.py +++ b/pysindy/optimizers/stable_linear_sr3.py @@ -417,9 +417,7 @@ def _reduce(self, x, y): break else: warnings.warn( - "StableLinearSR3._reduce did not converge after {} iterations.".format( - self.max_iter - ), + f"StableLinearSR3 did not converge after {self.max_iter} iterations.", ConvergenceWarning, ) diff --git a/pysindy/optimizers/stlsq.py b/pysindy/optimizers/stlsq.py index 62ebacd9..9cbddd1b 100644 --- a/pysindy/optimizers/stlsq.py +++ b/pysindy/optimizers/stlsq.py @@ -203,6 +203,7 @@ def _reduce(self, x, y): n_targets = y.shape[1] n_features_selected = np.sum(ind) sparse_sub = [np.array(self.sparse_ind)] * y.shape[1] + optvar = np.zeros((n_targets, n_features)) # Print initial values for each term in the optimization if self.verbose: @@ -266,9 +267,7 @@ def _reduce(self, x, y): break else: warnings.warn( - "STLSQ._reduce did not converge after {} iterations.".format( - self.max_iter - ), + "STLSQ did not converge after {self.max_iter} iterations.", ConvergenceWarning, ) if self.sparse_ind is None: diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py index dad0aa8c..0de82d81 100644 --- a/pysindy/optimizers/trapping_sr3.py +++ b/pysindy/optimizers/trapping_sr3.py @@ -1,17 +1,21 @@ import warnings +from itertools import combinations_with_replacement as combo_wr +from itertools import product from typing import Tuple +from typing import Union import cvxpy as cp import numpy as np +from numpy.typing import NDArray from scipy.linalg import cho_factor from scipy.linalg import cho_solve from sklearn.exceptions import ConvergenceWarning from ..utils import reorder_constraints -from .sr3 import SR3 +from .constrained_sr3 import ConstrainedSR3 -class TrappingSR3(SR3): +class TrappingSR3(ConstrainedSR3): """ Trapping variant of sparse relaxed regularized regression. This optimizer can be used to identify systems with globally @@ -43,134 +47,67 @@ class TrappingSR3(SR3): data-driven models of quadratic nonlinear dynamics." arXiv preprint arXiv:2105.01843 (2021). - Zheng, Peng, et al. "A unified framework for sparse relaxed - regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423. - - Champion, Kathleen, et al. "A unified sparse optimization framework - to learn parsimonious physics-informed models from data." - IEEE Access 8 (2020): 169259-169271. - Parameters ---------- - evolve_w : bool, optional (default True) + evolve_w : If false, don't update w and just minimize over (m, A) - threshold : float, optional (default 0.1) - Determines the strength of the regularization. When the - regularization function R is the L0 norm, the regularization - is equivalent to performing hard thresholding, and lambda - is chosen to threshold at the value given by this parameter. - This is equivalent to choosing lambda = threshold^2 / (2 * nu). - - eta : float, optional (default 1.0e20) + eta : Determines the strength of the stability term ||Pw-A||^2 in the optimization. The default value is very large so that the algorithm default is to ignore the stability term. In this limit, this should be approximately equivalent to the ConstrainedSR3 method. - alpha_m : float, optional (default eta * 0.1) - Determines the step size in the prox-gradient descent over m. - For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||. - Typically 0.01 * eta <= alpha_m <= 0.1 * eta. + eps_solver : + If threshold != 0, this specifies the error tolerance in the + CVXPY (OSQP) solve. Default 1.0e-7 (Default is 1.0e-3 in OSQP.) - alpha_A : float, optional (default eta) + relax_optim : + If relax_optim = True, use the relax-and-split method. If False, + try a direct minimization on the largest eigenvalue. + + inequality_constraints : bool, optional (default False) + If True, relax_optim must be false or relax_optim = True + AND threshold != 0, so that the CVXPY methods are used. + + alpha_A : Determines the step size in the prox-gradient descent over A. - For convergence, need alpha_A <= eta, so typically + For convergence, need alpha_A <= eta, so default alpha_A = eta is used. - gamma : float, optional (default 0.1) + alpha_m : + Determines the step size in the prox-gradient descent over m. + For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||. + Typically 0.01 * eta <= alpha_m <= 0.1 * eta. (default eta * 0.1) + + gamma : Determines the negative interval that matrix A is projected onto. For most applications gamma = 0.1 - 1.0 works pretty well. - tol : float, optional (default 1e-5) - Tolerance used for determining convergence of the optimization - algorithm over w. - - tol_m : float, optional (default 1e-5) + tol_m : Tolerance used for determining convergence of the optimization algorithm over m. - thresholder : string, optional (default 'L1') + thresholder : Regularization function to use. For current trapping SINDy, only the L1 and L2 norms are implemented. Note that other convex norms could be straightforwardly implemented, but L0 requires - reformulation because of nonconvexity. - - thresholds : np.ndarray, shape (n_targets, n_features), optional \ - (default None) - Array of thresholds for each library function coefficient. - Each row corresponds to a measurement variable and each column - to a function from the feature library. - Recall that SINDy seeks a matrix :math:`\\Xi` such that - :math:`\\dot{X} \\approx \\Theta(X)\\Xi`. - ``thresholds[i, j]`` should specify the threshold to be used for the - (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the - threshold to be used for the (j + 1)st library function in the equation - for the (i + 1)st measurement variable. - - eps_solver : float, optional (default 1.0e-7) - If threshold != 0, this specifies the error tolerance in the - CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP. + reformulation because of nonconvexity. (default 'L1') - relax_optim : bool, optional (default True) - If relax_optim = True, use the relax-and-split method. If False, - try a direct minimization on the largest eigenvalue. - - inequality_constraints : bool, optional (default False) - If True, relax_optim must be false or relax_optim = True - AND threshold != 0, so that the CVXPY methods are used. - - max_iter : int, optional (default 30) - Maximum iterations of the optimization algorithm. - - accel : bool, optional (default False) + accel : Whether or not to use accelerated prox-gradient descent for (m, A). + (default False) - m0 : np.ndarray, shape (n_targets), optional (default None) - Initial guess for vector m in the optimization. Otherwise - each component of m is randomly initialized in [-1, 1]. - - A0 : np.ndarray, shape (n_targets, n_targets), optional (default None) - Initial guess for vector A in the optimization. Otherwise - A is initialized as A = diag(gamma). - - copy_X : boolean, optional (default True) - If True, X will be copied; else, it may be overwritten. - - normalize_columns : boolean, optional (default False) - Normalize the columns of x (the SINDy library terms) before regression - by dividing by the L2-norm. Note that the 'normalize' option in sklearn - is deprecated in sklearn versions >= 1.0 and will be removed. + m0 : + Initial guess for trap center in the optimization. Default None + initializes vector elements randomly in [-1, 1]. shape (n_targets) - verbose : bool, optional (default False) - If True, prints out the different error terms every - max_iter / 10 iterations. - - verbose_cvxpy : bool, optional (default False) - Boolean flag which is passed to CVXPY solve function to indicate if - output should be verbose or not. Only relevant for optimizers that - use the CVXPY package in some capabity. - - unbias: bool (default False) - See base class for definition. Most options are incompatible - with unbiasing. + A0 : + Initial guess for vector A in the optimization. Shape (n_targets, n_targets) + Default None, meaning A is initialized as A = diag(gamma). Attributes ---------- - coef_ : array, shape (n_features,) or (n_targets, n_features) - Regularized weight vector(s). This is the v in the objective - function. - - history_ : list - History of sparse coefficients. ``history_[k]`` contains the - sparse coefficients (v in the optimization objective function) - at iteration k. - - objective_history_ : list - History of the value of the objective at each step. Note that - the trapping SINDy problem is nonconvex, meaning that this value - may increase and decrease as the algorithm works. - A_history_ : list History of the auxiliary variable A that approximates diag(PW). @@ -198,6 +135,9 @@ class TrappingSR3(SR3): n_targets, n_targets, n_features) Quadratic coefficient part of the P matrix in ||Pw - A||^2 + objective_history_: list + History of the objective value at each iteration + Examples -------- >>> import numpy as np @@ -220,221 +160,122 @@ class TrappingSR3(SR3): def __init__( self, - evolve_w=True, - threshold=0.1, - eps_solver=1e-7, - relax_optim=True, + *, + eta: Union[float, None] = None, + eps_solver: float = 1e-7, + relax_optim: bool = True, inequality_constraints=False, - eta=None, - alpha_A=None, - alpha_m=None, - gamma=-0.1, - tol=1e-5, - tol_m=1e-5, - thresholder="l1", - thresholds=None, - max_iter=30, - accel=False, - normalize_columns=False, - copy_X=True, - m0=None, - A0=None, - objective_history=None, - constraint_lhs=None, - constraint_rhs=None, - constraint_order="target", - verbose=False, - verbose_cvxpy=False, - unbias=False, + alpha_A: Union[float, None] = None, + alpha_m: Union[float, None] = None, + gamma: float = -0.1, + tol_m: float = 1e-5, + thresholder: str = "l1", + accel: bool = False, + m0: Union[NDArray, None] = None, + A0: Union[NDArray, None] = None, + **kwargs, ): - super().__init__( - threshold=threshold, - max_iter=max_iter, - normalize_columns=normalize_columns, - copy_X=copy_X, - thresholder=thresholder, - thresholds=thresholds, - verbose=verbose, - unbias=unbias, - ) - if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"): + super().__init__(thresholder=thresholder, **kwargs) + self.eps_solver = eps_solver + self.relax_optim = relax_optim + self.inequality_constraints = inequality_constraints + self.m0 = m0 + self.A0 = A0 + self.alpha_A = alpha_A + self.alpha_m = alpha_m + self.eta = eta + self.gamma = gamma + self.tol_m = tol_m + self.accel = accel + self.__post_init_guard() + + def __post_init_guard(self): + """Conduct initialization post-init, as required by scikitlearn API""" + if self.thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"): raise ValueError("Regularizer must be (weighted) L1 or L2") - if eta is None: + if self.eta is None: warnings.warn( "eta was not set, so defaulting to eta = 1e20 " "with alpha_m = 1e-2 * eta, alpha_A = eta. Here eta is so " "large that the stability term in the optimization " "will be ignored." ) - eta = 1e20 - alpha_m = 1e18 - alpha_A = 1e20 + self.eta = 1e20 + self.alpha_m = 1e18 + self.alpha_A = 1e20 else: - if alpha_m is None: - alpha_m = eta * 1e-2 - if alpha_A is None: - alpha_A = eta - if eta <= 0: + if self.alpha_m is None: + self.alpha_m = self.eta * 1e-2 + if self.alpha_A is None: + self.alpha_A = self.eta + if self.eta <= 0: raise ValueError("eta must be positive") - if alpha_m < 0 or alpha_m > eta: + if self.alpha_m < 0 or self.alpha_m > self.eta: raise ValueError("0 <= alpha_m <= eta") - if alpha_A < 0 or alpha_A > eta: + if self.alpha_A < 0 or self.alpha_A > self.eta: raise ValueError("0 <= alpha_A <= eta") - if gamma >= 0: + if self.gamma >= 0: raise ValueError("gamma must be negative") - if tol <= 0 or tol_m <= 0 or eps_solver <= 0: + if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0: raise ValueError("tol and tol_m must be positive") - if not evolve_w and not relax_optim: - raise ValueError("If doing direct solve, must evolve w") - if inequality_constraints and relax_optim and threshold == 0.0: + if self.inequality_constraints and self.relax_optim and self.threshold == 0.0: raise ValueError( "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False." ) - if inequality_constraints and not evolve_w: - raise ValueError( - "Use of inequality constraints requires solving for xi (evolve_w=True)." - ) - self.evolve_w = evolve_w - self.eps_solver = eps_solver - self.relax_optim = relax_optim - self.inequality_constraints = inequality_constraints - self.m0 = m0 - self.A0 = A0 - self.alpha_A = alpha_A - self.alpha_m = alpha_m - self.eta = eta - self.gamma = gamma - self.tol = tol - self.tol_m = tol_m - self.accel = accel - self.verbose_cvxpy = verbose_cvxpy - self.objective_history = objective_history - self.unbias = False - self.use_constraints = (constraint_lhs is not None) and ( - constraint_rhs is not None - ) - - self.constraint_lhs = constraint_lhs - self.constraint_rhs = constraint_rhs - self.constraint_order = constraint_order - if self.use_constraints: - if constraint_order not in ("feature", "target"): - raise ValueError( - "constraint_order must be either 'feature' or 'target'" - ) - if unbias: - raise ValueError( - "Constraints are incompatible with an unbiasing step. Set" - " unbias=False" - ) + def set_params(self, **kwargs): + super().set_params(**kwargs) + self.__post_init_guard - def _set_Ptensors(self, r): + def _set_Ptensors( + self, n_targets: int + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Make the projection tensors used for the algorithm.""" - N = int((r**2 + 3 * r) / 2.0) + N = int((n_targets**2 + 3 * n_targets) / 2.0) # delta_{il}delta_{jk} - PL_tensor = np.zeros((r, r, r, N)) - PL_tensor_unsym = np.zeros((r, r, r, N)) - for i in range(r): - for j in range(r): - for k in range(r): - for kk in range(N): - if i == k and j == kk: - PL_tensor_unsym[i, j, k, kk] = 1.0 + PL_tensor_unsym = np.zeros((n_targets, n_targets, n_targets, N)) + for i, j in combo_wr(range(n_targets), 2): + PL_tensor_unsym[i, j, i, j] = 1.0 # Now symmetrize PL - for i in range(r): - for j in range(N): - PL_tensor[:, :, i, j] = 0.5 * ( - PL_tensor_unsym[:, :, i, j] + PL_tensor_unsym[:, :, i, j].T - ) + PL_tensor = (PL_tensor_unsym + np.transpose(PL_tensor_unsym, [1, 0, 2, 3])) / 2 # if j == k, delta_{il}delta_{N-r+j,n} # if j != k, delta_{il}delta_{r+j+k-1,n} - PQ_tensor = np.zeros((r, r, r, r, N)) - for i in range(r): - for j in range(r): - for k in range(r): - for kk in range(r): - for n in range(N): - if (j == k) and (n == N - r + j) and (i == kk): - PQ_tensor[i, j, k, kk, n] = 1.0 - if (j != k) and (n == r + j + k - 1) and (i == kk): - PQ_tensor[i, j, k, kk, n] = 1 / 2 + PQ_tensor = np.zeros((n_targets, n_targets, n_targets, n_targets, N)) + for (i, j, k, kk), n in product(combo_wr(range(n_targets), 4), range(N)): + if (j == k) and (n == N - n_targets + j) and (i == kk): + PQ_tensor[i, j, k, kk, n] = 1.0 + if (j != k) and (n == n_targets + j + k - 1) and (i == kk): + PQ_tensor[i, j, k, kk, n] = 1 / 2 return PL_tensor_unsym, PL_tensor, PQ_tensor - def _bad_PL(self, PL): - """Check if PL tensor is properly defined""" - tol = 1e-10 - return np.any((np.transpose(PL, [1, 0, 2, 3]) - PL) > tol) - - def _bad_PQ(self, PQ): - """Check if PQ tensor is properly defined""" - tol = 1e-10 - return np.any((np.transpose(PQ, [0, 2, 1, 3, 4]) - PQ) > tol) - - def _check_P_matrix(self, r, n_features, N): + @staticmethod + def _check_P_matrix( + n_tgts: int, n_feat: int, n_feat_expected: int, PL: np.ndarray, PQ: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray]: """Check if P tensor is properly defined""" - # If these tensors are not passed, or incorrect shape, assume zeros - if self.PQ_ is None: - self.PQ_ = np.zeros((r, r, r, r, n_features)) - warnings.warn( - "The PQ tensor (a requirement for the stability promotion) was" - " not set, so setting this tensor to all zeros. " - ) - elif (self.PQ_).shape != (r, r, r, r, n_features) and (self.PQ_).shape != ( - r, - r, - r, - r, - N, + if ( + PQ is None + or PL is None + or PQ.shape != (n_tgts, n_tgts, n_tgts, n_tgts, n_feat) + or PL.shape != (n_tgts, n_tgts, n_tgts, n_feat) + or n_feat != n_feat_expected # library is not quadratic/incorrect shape ): - self.PQ_ = np.zeros((r, r, r, r, n_features)) + PL = np.zeros((n_tgts, n_tgts, n_tgts, n_feat)) + PQ = np.zeros((n_tgts, n_tgts, n_tgts, n_tgts, n_feat)) warnings.warn( - "The PQ tensor (a requirement for the stability promotion) was" - " initialized with incorrect dimensions, " - "so setting this tensor to all zeros " - "(with the correct dimensions). " + "PQ and PL tensors not defined, wrong shape, or incompatible with " + "feature library shape. Ensure feature library is quadratic. " + "Setting tensors to zero" ) - if self.PL_ is None: - self.PL_ = np.zeros((r, r, r, n_features)) - warnings.warn( - "The PL tensor (a requirement for the stability promotion) was" - " not set, so setting this tensor to all zeros. " - ) - elif (self.PL_).shape != (r, r, r, n_features) and (self.PL_).shape != ( - r, - r, - r, - N, - ): - self.PL_ = np.zeros((r, r, r, n_features)) - warnings.warn( - "The PL tensor (a requirement for the stability promotion) was" - " initialized with incorrect dimensions, " - "so setting this tensor to all zeros " - "(with the correct dimensions). " - ) - - # Check if the tensor symmetries are properly defined - if self._bad_PL(self.PL_): - raise ValueError("PL tensor was passed but the symmetries are not correct") - if self._bad_PQ(self.PQ_): - raise ValueError("PQ tensor was passed but the symmetries are not correct") - - # If PL/PQ finite and correct, so trapping theorem is being used, - # then make sure library is quadratic and correct shape - if (np.any(self.PL_ != 0.0) or np.any(self.PQ_ != 0.0)) and n_features != N: - warnings.warn( - "The feature library is the wrong shape or not quadratic, " - "so please correct this if you are attempting to use the " - "trapping algorithm with the stability term included. Setting " - "PL and PQ tensors to zeros for now." - ) - self.PL_ = np.zeros((r, r, r, n_features)) - self.PQ_ = np.zeros((r, r, r, r, n_features)) + if not np.allclose( + np.transpose(PL, [1, 0, 2, 3]), PL, atol=1e-10 + ) or not np.allclose(np.transpose(PQ, [0, 2, 1, 3, 4]), PQ, atol=1e-10): + raise ValueError("PQ/PL tensors were passed but have the wrong symmetry") + return PL, PQ def _update_coef_constraints(self, H, x_transpose_y, P_transpose_A, coef_sparse): """Solves the coefficient update analytically if threshold = 0""" @@ -497,73 +338,7 @@ def _objective(self, x, y, coef_sparse, A, PW, q): ) return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1 - def _create_var_and_part_cost( - self, var_len: int, x_expanded: np.ndarray, y: np.ndarray - ) -> Tuple[cp.Variable, cp.Expression]: - xi = cp.Variable(var_len) - cost = cp.sum_squares(x_expanded @ xi - y.flatten()) - if self.thresholder.lower() == "l1": - cost = cost + self.threshold * cp.norm1(xi) - elif self.thresholder.lower() == "weighted_l1": - cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi) - elif self.thresholder.lower() == "l2": - cost = cost + self.threshold * cp.norm2(xi) ** 2 - elif self.thresholder.lower() == "weighted_l2": - cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2 - return xi, cost - - def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev): - """Solve coefficient update with CVXPY if threshold != 0""" - xi, cost = self._create_var_and_part_cost(N * r, x_expanded, y) - cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta - if self.use_constraints: - if self.inequality_constraints: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi <= self.constraint_rhs], - ) - else: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi == self.constraint_rhs], - ) - else: - prob = cp.Problem(cp.Minimize(cost)) - - # default solver is OSQP here but switches to ECOS for L2 - try: - prob.solve( - eps_abs=self.eps_solver, - eps_rel=self.eps_solver, - verbose=self.verbose_cvxpy, - ) - # Annoying error coming from L2 norm switching to use the ECOS - # solver, which uses "max_iters" instead of "max_iter", and - # similar semantic changes for the other variables. - except TypeError: - try: - prob.solve( - abstol=self.eps_solver, - reltol=self.eps_solver, - verbose=self.verbose_cvxpy, - ) - except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(N * r) - except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(N * r) - - if xi.value is None: - warnings.warn( - "Infeasible solve, increase/decrease eta", - ConvergenceWarning, - ) - return None - coef_sparse = (xi.value).reshape(coef_prev.shape) - return coef_sparse - - def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous): + def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous): """ If using the relaxation formulation of trapping SINDy, solves the (m, A) algorithm update. @@ -605,7 +380,7 @@ def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev): ) return coef_sparse - def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev): + def _solve_m_direct(self, n_tgts, coef_sparse): """ If using the direct formulation of trapping SINDy, solves the entire problem in CVXPY regardless of the threshold value. @@ -613,53 +388,18 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev): problem, solved in CVXPY, so convergence/quality guarantees are not available here! """ - xi, cost = self._create_var_and_part_cost(N * r, x_expanded, y) - cost = cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (r, r))) / self.eta - if self.use_constraints: - if self.inequality_constraints: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi <= self.constraint_rhs], - ) - else: - prob = cp.Problem( - cp.Minimize(cost), - [self.constraint_lhs @ xi == self.constraint_rhs], - ) - else: - prob = cp.Problem(cp.Minimize(cost)) - - # default solver is SCS here - try: - prob.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy) - # Annoying error coming from L2 norm switching to use the ECOS - # solver, which uses "max_iters" instead of "max_iter", and - # similar semantic changes for the other variables. - except TypeError: - prob.solve( - abstol=self.eps_solver, - reltol=self.eps_solver, - verbose=self.verbose_cvxpy, - ) - except cp.error.SolverError: - print("Solver failed, setting coefs to zeros") - xi.value = np.zeros(N * r) - - if xi.value is None: - print("Infeasible solve, increase/decrease eta") - return None, None - coef_sparse = (xi.value).reshape(coef_prev.shape) if np.all(self.PL_ == 0) and np.all(self.PQ_ == 0): - return np.zeros(r), coef_sparse # no optimization over m + return np.zeros(n_tgts), coef_sparse # no optimization over m else: - m_cp = cp.Variable(r) + m_cp = cp.Variable(n_tgts) L = np.tensordot(self.PL_, coef_sparse, axes=([3, 2], [0, 1])) Q = np.reshape( - np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])), (r, r * r) + np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])), + (n_tgts, n_tgts * n_tgts), ) Ls = 0.5 * (L + L.T).flatten() - cost_m = cp.lambda_max(cp.reshape(Ls - m_cp @ Q, (r, r))) + cost_m = cp.lambda_max(cp.reshape(Ls - m_cp @ Q, (n_tgts, n_tgts))) prob_m = cp.Problem(cp.Minimize(cost_m)) # default solver is SCS here @@ -668,8 +408,8 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev): m = m_cp.value if m is None: print("Infeasible solve over m, increase/decrease eta") - return None, coef_sparse - return m, coef_sparse + return None + return m def _reduce(self, x, y): """ @@ -683,14 +423,16 @@ def _reduce(self, x, y): self.PWeigs_history_ = [] self.history_ = [] n_samples, n_features = x.shape - r = y.shape[1] - N = int((r**2 + 3 * r) / 2.0) + n_tgts = y.shape[1] + var_len = n_features * n_tgts + n_feat_expected = int((n_tgts**2 + 3 * n_tgts) / 2.0) - # Define PL and PQ tensors, only relevant if the stability term in - # trapping SINDy is turned on. - self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(r) + # Only relevant if the stability term is turned on. + self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(n_tgts) # make sure dimensions/symmetries are correct - self._check_P_matrix(r, n_features, N) + self.PL_, self.PQ_ = self._check_P_matrix( + n_tgts, n_features, n_feat_expected, self.PL_, self.PQ_ + ) # Set initial coefficients if self.use_constraints and self.constraint_order.lower() == "target": @@ -716,100 +458,107 @@ def _reduce(self, x, y): if self.A0 is not None: A = self.A0 elif np.any(self.PQ_ != 0.0): - A = np.diag(self.gamma * np.ones(r)) + A = np.diag(self.gamma * np.ones(n_tgts)) else: - A = np.diag(np.zeros(r)) + A = np.diag(np.zeros(n_tgts)) self.A_history_.append(A) # initial guess for m if self.m0 is not None: - m = self.m0 + trap_ctr = self.m0 else: np.random.seed(1) - m = (np.random.rand(r) - np.ones(r)) * 2 - self.m_history_.append(m) + trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2 + self.m_history_.append(trap_ctr) # Precompute some objects for optimization - x_expanded = np.zeros((n_samples, r, n_features, r)) - for i in range(r): + x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts)) + for i in range(n_tgts): x_expanded[:, i, :, i] = x - x_expanded = np.reshape(x_expanded, (n_samples * r, r * n_features)) + x_expanded = np.reshape(x_expanded, (n_samples * n_tgts, n_tgts * n_features)) xTx = np.dot(x_expanded.T, x_expanded) xTy = np.dot(x_expanded.T, y.flatten()) # if using acceleration tk_prev = 1 - m_prev = m + trap_prev_ctr = trap_ctr # Begin optimization loop - objective_history = [] + self.objective_history_ = [] for k in range(self.max_iter): - # update P tensor from the newest m - mPQ = np.tensordot(m, self.PQ_, axes=([0], [0])) + # update P tensor from the newest trap center + mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0])) p = self.PL_ - mPQ - Pmatrix = p.reshape(r * r, r * n_features) + Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features) - # update w coef_prev = coef_sparse - if self.evolve_w: - if self.relax_optim: - if self.threshold > 0.0: - coef_sparse = self._solve_sparse_relax_and_split( - r, n_features, x_expanded, y, Pmatrix, A, coef_prev - ) - else: - pTp = np.dot(Pmatrix.T, Pmatrix) - H = xTx + pTp / self.eta - P_transpose_A = np.dot(Pmatrix.T, A.flatten()) - coef_sparse = self._solve_nonsparse_relax_and_split( - H, xTy, P_transpose_A, coef_prev - ) + if self.relax_optim: + if self.threshold > 0.0: + # sparse relax_and_split + coef_sparse = self._update_coef_sparse_rs( + var_len, x_expanded, y, Pmatrix, A, coef_prev + ) else: - m, coef_sparse = self._solve_direct_cvxpy( - r, n_features, x_expanded, y, Pmatrix, coef_prev + coef_sparse = self._update_coef_nonsparse_rs( + Pmatrix, A, coef_prev, xTx, xTy ) + trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split( + trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev + ) + else: + coef_sparse = self._update_coef_direct( + var_len, x_expanded, y, Pmatrix, coef_prev, n_tgts + ) + trap_ctr = self._solve_m_direct(n_tgts, coef_sparse) # If problem over xi becomes infeasible, break out of the loop if coef_sparse is None: coef_sparse = coef_prev break - if self.relax_optim: - m_prev, m, A, tk_prev = self._solve_m_relax_and_split( - r, n_features, m_prev, m, A, coef_sparse, tk_prev - ) - # If problem over m becomes infeasible, break out of the loop - if m is None: - m = m_prev + if trap_ctr is None: + trap_ctr = trap_prev_ctr break self.history_.append(coef_sparse.T) PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1])) # (m,A) update finished, append the result - self.m_history_.append(m) + self.m_history_.append(trap_ctr) self.A_history_.append(A) eigvals, eigvecs = np.linalg.eig(PW) self.PW_history_.append(PW) self.PWeigs_history_.append(np.sort(eigvals)) # update objective - objective_history.append(self._objective(x, y, coef_sparse, A, PW, k)) + self.objective_history_.append(self._objective(x, y, coef_sparse, A, PW, k)) if ( self._m_convergence_criterion() < self.tol_m and self._convergence_criterion() < self.tol ): - # Could not (further) select important features break - if k == self.max_iter - 1: + else: warnings.warn( - "TrappingSR3._reduce did not converge after {} iters.".format( - self.max_iter - ), + f"TrappingSR3 did not converge after {self.max_iter} iters.", ConvergenceWarning, ) self.coef_ = coef_sparse.T - self.objective_history = objective_history + + def _update_coef_sparse_rs(self, var_len, x_expanded, y, Pmatrix, A, coef_prev): + xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) + cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta + return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver) + + def _update_coef_nonsparse_rs(self, Pmatrix, A, coef_prev, xTx, xTy): + pTp = np.dot(Pmatrix.T, Pmatrix) + H = xTx + pTp / self.eta + P_transpose_A = np.dot(Pmatrix.T, A.flatten()) + return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev) + + def _update_coef_direct(self, var_len, x_expanded, y, Pmatrix, coef_prev, n_tgts): + xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y) + cost += cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta + return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver) diff --git a/test/test_optimizers.py b/test/test_optimizers.py index 2170fb5e..114a6582 100644 --- a/test/test_optimizers.py +++ b/test/test_optimizers.py @@ -211,13 +211,11 @@ def test_sr3_bad_parameters(optimizer, params): dict(eta=1, alpha_m=-1), dict(eta=1, alpha_A=-1), dict(gamma=1), - dict(evolve_w=False, relax_optim=False), dict(thresholder="l0"), dict(threshold=-1), dict(max_iter=0), dict(eta=10, alpha_m=20), dict(eta=10, alpha_A=20), - dict(inequality_constraints=True, evolve_w=False), dict( constraint_lhs=np.zeros((10, 10)), constraint_rhs=np.zeros(10), @@ -525,7 +523,6 @@ def test_trapping_sr3_quadratic_library(params, trapping_sr3_params, quadratic_l opt = TrappingSR3(**params) opt.fit(features, x_dot) - assert opt.PL_unsym_.shape == (1, 1, 1, 2) assert opt.PL_.shape == (1, 1, 1, 2) assert opt.PQ_.shape == (1, 1, 1, 1, 2) check_is_fitted(opt) @@ -539,7 +536,6 @@ def test_trapping_sr3_quadratic_library(params, trapping_sr3_params, quadratic_l opt = TrappingSR3(**params) opt.fit(features, x_dot) - assert opt.PL_unsym_.shape == (1, 1, 1, 2) assert opt.PL_.shape == (1, 1, 1, 2) assert opt.PQ_.shape == (1, 1, 1, 1, 2) check_is_fitted(opt) @@ -806,17 +802,6 @@ def test_sr3_enable_trimming(optimizer, data_linear_oscillator_corrupted): np.testing.assert_allclose(model_plain.coef_, model_trimming.coef_) -@pytest.mark.parametrize( - "optimizer", [SR3, ConstrainedSR3, StableLinearSR3, TrappingSR3] -) -def test_sr3_warn(optimizer, data_linear_oscillator_corrupted): - x, x_dot, _ = data_linear_oscillator_corrupted - model = optimizer(max_iter=1, tol=1e-10) - - with pytest.warns(ConvergenceWarning): - model.fit(x, x_dot) - - @pytest.mark.parametrize( "optimizer", [ @@ -830,12 +815,16 @@ def test_sr3_warn(optimizer, data_linear_oscillator_corrupted): def test_fit_warn(data_derivative_1d, optimizer): x, x_dot = data_derivative_1d x = x.reshape(-1, 1) + optimizer.max_iter = 0 # normally prohibited in constructor with pytest.warns(ConvergenceWarning): optimizer.fit(x, x_dot) -@pytest.mark.parametrize("optimizer", [ConstrainedSR3, TrappingSR3, MIOSR]) +@pytest.mark.parametrize( + "optimizer", + [(ConstrainedSR3, {"max_iter": 80}), (TrappingSR3, {"max_iter": 100}), (MIOSR, {})], +) @pytest.mark.parametrize("target_value", [0, -1, 3]) def test_row_format_constraints(data_linear_combination, optimizer, target_value): # Solution is x_dot = x.dot(np.array([[1, 1, 0], [0, 1, 1]])) @@ -848,10 +837,11 @@ def test_row_format_constraints(data_linear_combination, optimizer, target_value constraint_lhs[0, 0] = 1 constraint_lhs[1, 3] = 1 - model = optimizer( + model = optimizer[0]( constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, constraint_order="feature", + **optimizer[1], ) model.fit(x, x_dot) @@ -861,7 +851,13 @@ def test_row_format_constraints(data_linear_combination, optimizer, target_value @pytest.mark.parametrize( - "optimizer", [ConstrainedSR3, StableLinearSR3, TrappingSR3, MIOSR] + "optimizer", + [ + (ConstrainedSR3, {"max_iter": 80}), + (StableLinearSR3, {}), + (TrappingSR3, {"max_iter": 100}), + (MIOSR, {}), + ], ) @pytest.mark.parametrize("target_value", [0, -1, 3]) def test_target_format_constraints(data_linear_combination, optimizer, target_value): @@ -874,7 +870,9 @@ def test_target_format_constraints(data_linear_combination, optimizer, target_va constraint_lhs[0, 1] = 1 constraint_lhs[1, 4] = 1 - model = optimizer(constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs) + model = optimizer[0]( + constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, **optimizer[1] + ) model.fit(x, x_dot) np.testing.assert_allclose(model.coef_[:, 1], target_value, atol=1e-8) diff --git a/test/test_optimizers_complexity.py b/test/test_optimizers_complexity.py index bcdbe522..b338fc03 100644 --- a/test/test_optimizers_complexity.py +++ b/test/test_optimizers_complexity.py @@ -46,7 +46,7 @@ def test_complexity_parameter( optimizers = [ WrappedOptimizer(opt_cls(**{reg_name: reg_value}), normalize_columns=True) - for reg_value in [10, 1, 0.1, 0.01] + for reg_value in [10, 1, 0.1, 0.001] ] for opt in optimizers: