From e5d635c6fc129eb9a61c08ecbbebe3058fd36986 Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Tue, 7 Nov 2023 14:08:45 +0000
Subject: [PATCH 01/14] CLN: Defer to super() as much as possible in Trapping

Change base class from SR3 to ConstrainedSR3
---
 pysindy/optimizers/constrained_sr3.py |  34 +++++--
 pysindy/optimizers/trapping_sr3.py    | 135 ++------------------------
 2 files changed, 33 insertions(+), 136 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index 9eb2ae1f..eeef760d 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Tuple
 
 try:
     import cvxpy as cp
@@ -39,6 +40,9 @@ class ConstrainedSR3(SR3):
         to learn parsimonious physics-informed models from data."
         IEEE Access 8 (2020): 169259-169271.
 
+        Zheng, Peng, et al. "A unified framework for sparse relaxed
+        regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423.
+
     Parameters
     ----------
     threshold : float, optional (default 0.1)
@@ -93,9 +97,6 @@ class ConstrainedSR3(SR3):
         is deprecated in sklearn versions >= 1.0 and will be removed. Note that
         this parameter is incompatible with the constraints!
 
-    copy_X : boolean, optional (default True)
-        If True, X will be copied; else, it may be overwritten.
-
     initial_guess : np.ndarray, optional (default None)
         Shape should be (n_features) or (n_targets, n_features).
         Initial guess for coefficients ``coef_``, (v in the mathematical equations)
@@ -138,6 +139,15 @@ class ConstrainedSR3(SR3):
         Weight vector(s) that are not subjected to the regularization.
         This is the w in the objective function.
 
+    history_ : list
+        History of sparse coefficients. ``history_[k]`` contains the
+        sparse coefficients (v in the optimization objective function)
+        at iteration k.
+
+    objective_history_ : list
+        History of the value of the objective at each step. Note that
+        the trapping SINDy problem is nonconvex, meaning that this value
+        may increase and decrease as the algorithm works.
     """
 
     def __init__(
@@ -249,17 +259,25 @@ def _update_full_coef_constraints(self, H, x_transpose_y, coef_sparse):
         rhs = rhs.reshape(g.shape)
         return inv1.dot(rhs)
 
-    def _update_coef_cvxpy(self, x, y, coef_sparse):
-        xi = cp.Variable(coef_sparse.shape[0] * coef_sparse.shape[1])
-        cost = cp.sum_squares(x @ xi - y.flatten())
+    def _create_var_and_part_cost(
+        self, var_len: int, x_expanded: np.ndarray, y: np.ndarray
+    ) -> Tuple[cp.Variable, cp.Expression]:
+        xi = cp.Variable(var_len)
+        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
         if self.thresholder.lower() == "l1":
             cost = cost + self.threshold * cp.norm1(xi)
         elif self.thresholder.lower() == "weighted_l1":
             cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
         elif self.thresholder.lower() == "l2":
-            cost = cost + self.threshold * cp.norm2(xi)
+            cost = cost + self.threshold * cp.norm2(xi) ** 2
         elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi)
+            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
+        return xi, cost
+
+    def _update_coef_cvxpy(self, x, y, coef_sparse):
+        xi, cost = self._create_var_and_part_cost(
+            coef_sparse.shape[0] * coef_sparse.shape[1], x, y
+        )
         if self.use_constraints:
             if self.inequality_constraints and self.equality_constraints:
                 # Process inequality constraints then equality constraints
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index dad0aa8c..d83e3633 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -1,5 +1,4 @@
 import warnings
-from typing import Tuple
 
 import cvxpy as cp
 import numpy as np
@@ -8,10 +7,10 @@
 from sklearn.exceptions import ConvergenceWarning
 
 from ..utils import reorder_constraints
-from .sr3 import SR3
+from .constrained_sr3 import ConstrainedSR3
 
 
-class TrappingSR3(SR3):
+class TrappingSR3(ConstrainedSR3):
     """
     Trapping variant of sparse relaxed regularized regression.
     This optimizer can be used to identify systems with globally
@@ -43,25 +42,11 @@ class TrappingSR3(SR3):
         data-driven models of quadratic nonlinear dynamics."
         arXiv preprint arXiv:2105.01843 (2021).
 
-        Zheng, Peng, et al. "A unified framework for sparse relaxed
-        regularized regression: Sr3." IEEE Access 7 (2018): 1404-1423.
-
-        Champion, Kathleen, et al. "A unified sparse optimization framework
-        to learn parsimonious physics-informed models from data."
-        IEEE Access 8 (2020): 169259-169271.
-
     Parameters
     ----------
     evolve_w : bool, optional (default True)
         If false, don't update w and just minimize over (m, A)
 
-    threshold : float, optional (default 0.1)
-        Determines the strength of the regularization. When the
-        regularization function R is the L0 norm, the regularization
-        is equivalent to performing hard thresholding, and lambda
-        is chosen to threshold at the value given by this parameter.
-        This is equivalent to choosing lambda = threshold^2 / (2 * nu).
-
     eta : float, optional (default 1.0e20)
         Determines the strength of the stability term ||Pw-A||^2 in the
         optimization. The default value is very large so that the
@@ -82,10 +67,6 @@ class TrappingSR3(SR3):
         Determines the negative interval that matrix A is projected onto.
         For most applications gamma = 0.1 - 1.0 works pretty well.
 
-    tol : float, optional (default 1e-5)
-        Tolerance used for determining convergence of the optimization
-        algorithm over w.
-
     tol_m : float, optional (default 1e-5)
         Tolerance used for determining convergence of the optimization
         algorithm over m.
@@ -96,18 +77,6 @@ class TrappingSR3(SR3):
         could be straightforwardly implemented, but L0 requires
         reformulation because of nonconvexity.
 
-    thresholds : np.ndarray, shape (n_targets, n_features), optional \
-            (default None)
-        Array of thresholds for each library function coefficient.
-        Each row corresponds to a measurement variable and each column
-        to a function from the feature library.
-        Recall that SINDy seeks a matrix :math:`\\Xi` such that
-        :math:`\\dot{X} \\approx \\Theta(X)\\Xi`.
-        ``thresholds[i, j]`` should specify the threshold to be used for the
-        (j + 1, i + 1) entry of :math:`\\Xi`. That is to say it should give the
-        threshold to be used for the (j + 1)st library function in the equation
-        for the (i + 1)st measurement variable.
-
     eps_solver : float, optional (default 1.0e-7)
         If threshold != 0, this specifies the error tolerance in the
         CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
@@ -120,9 +89,6 @@ class TrappingSR3(SR3):
         If True, relax_optim must be false or relax_optim = True
         AND threshold != 0, so that the CVXPY methods are used.
 
-    max_iter : int, optional (default 30)
-        Maximum iterations of the optimization algorithm.
-
     accel : bool, optional (default False)
         Whether or not to use accelerated prox-gradient descent for (m, A).
 
@@ -134,43 +100,8 @@ class TrappingSR3(SR3):
         Initial guess for vector A in the optimization. Otherwise
         A is initialized as A = diag(gamma).
 
-    copy_X : boolean, optional (default True)
-        If True, X will be copied; else, it may be overwritten.
-
-    normalize_columns : boolean, optional (default False)
-        Normalize the columns of x (the SINDy library terms) before regression
-        by dividing by the L2-norm. Note that the 'normalize' option in sklearn
-        is deprecated in sklearn versions >= 1.0 and will be removed.
-
-    verbose : bool, optional (default False)
-        If True, prints out the different error terms every
-        max_iter / 10 iterations.
-
-    verbose_cvxpy : bool, optional (default False)
-        Boolean flag which is passed to CVXPY solve function to indicate if
-        output should be verbose or not. Only relevant for optimizers that
-        use the CVXPY package in some capabity.
-
-    unbias: bool (default False)
-        See base class for definition.  Most options are incompatible
-        with unbiasing.
-
     Attributes
     ----------
-    coef_ : array, shape (n_features,) or (n_targets, n_features)
-        Regularized weight vector(s). This is the v in the objective
-        function.
-
-    history_ : list
-        History of sparse coefficients. ``history_[k]`` contains the
-        sparse coefficients (v in the optimization objective function)
-        at iteration k.
-
-    objective_history_ : list
-        History of the value of the objective at each step. Note that
-        the trapping SINDy problem is nonconvex, meaning that this value
-        may increase and decrease as the algorithm works.
-
     A_history_ : list
         History of the auxiliary variable A that approximates diag(PW).
 
@@ -221,7 +152,6 @@ class TrappingSR3(SR3):
     def __init__(
         self,
         evolve_w=True,
-        threshold=0.1,
         eps_solver=1e-7,
         relax_optim=True,
         inequality_constraints=False,
@@ -229,35 +159,19 @@ def __init__(
         alpha_A=None,
         alpha_m=None,
         gamma=-0.1,
-        tol=1e-5,
         tol_m=1e-5,
         thresholder="l1",
-        thresholds=None,
-        max_iter=30,
         accel=False,
-        normalize_columns=False,
-        copy_X=True,
         m0=None,
         A0=None,
         objective_history=None,
-        constraint_lhs=None,
-        constraint_rhs=None,
-        constraint_order="target",
-        verbose=False,
-        verbose_cvxpy=False,
-        unbias=False,
+        **kwargs,
     ):
         super().__init__(
-            threshold=threshold,
-            max_iter=max_iter,
-            normalize_columns=normalize_columns,
-            copy_X=copy_X,
             thresholder=thresholder,
-            thresholds=thresholds,
-            verbose=verbose,
-            unbias=unbias,
+            **kwargs,
         )
-        if thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
+        if self.thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
             raise ValueError("Regularizer must be (weighted) L1 or L2")
         if eta is None:
             warnings.warn(
@@ -282,11 +196,11 @@ def __init__(
             raise ValueError("0 <= alpha_A <= eta")
         if gamma >= 0:
             raise ValueError("gamma must be negative")
-        if tol <= 0 or tol_m <= 0 or eps_solver <= 0:
+        if self.tol <= 0 or tol_m <= 0 or eps_solver <= 0:
             raise ValueError("tol and tol_m must be positive")
         if not evolve_w and not relax_optim:
             raise ValueError("If doing direct solve, must evolve w")
-        if inequality_constraints and relax_optim and threshold == 0.0:
+        if inequality_constraints and relax_optim and self.threshold == 0.0:
             raise ValueError(
                 "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
             )
@@ -305,29 +219,9 @@ def __init__(
         self.alpha_m = alpha_m
         self.eta = eta
         self.gamma = gamma
-        self.tol = tol
         self.tol_m = tol_m
         self.accel = accel
-        self.verbose_cvxpy = verbose_cvxpy
         self.objective_history = objective_history
-        self.unbias = False
-        self.use_constraints = (constraint_lhs is not None) and (
-            constraint_rhs is not None
-        )
-
-        self.constraint_lhs = constraint_lhs
-        self.constraint_rhs = constraint_rhs
-        self.constraint_order = constraint_order
-        if self.use_constraints:
-            if constraint_order not in ("feature", "target"):
-                raise ValueError(
-                    "constraint_order must be either 'feature' or 'target'"
-                )
-            if unbias:
-                raise ValueError(
-                    "Constraints are incompatible with an unbiasing step.  Set"
-                    " unbias=False"
-                )
 
     def _set_Ptensors(self, r):
         """Make the projection tensors used for the algorithm."""
@@ -497,21 +391,6 @@ def _objective(self, x, y, coef_sparse, A, PW, q):
             )
         return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
 
-    def _create_var_and_part_cost(
-        self, var_len: int, x_expanded: np.ndarray, y: np.ndarray
-    ) -> Tuple[cp.Variable, cp.Expression]:
-        xi = cp.Variable(var_len)
-        cost = cp.sum_squares(x_expanded @ xi - y.flatten())
-        if self.thresholder.lower() == "l1":
-            cost = cost + self.threshold * cp.norm1(xi)
-        elif self.thresholder.lower() == "weighted_l1":
-            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
-        elif self.thresholder.lower() == "l2":
-            cost = cost + self.threshold * cp.norm2(xi) ** 2
-        elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
-        return xi, cost
-
     def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev):
         """Solve coefficient update with CVXPY if threshold != 0"""
         xi, cost = self._create_var_and_part_cost(N * r, x_expanded, y)

From a6d33bcbf72cd8dc822aa588874d6bb434572b30 Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 8 Nov 2023 16:07:57 +0000
Subject: [PATCH 02/14] CLN: Move cvxpy problem formation to ConstrainedSR3

---
 pysindy/optimizers/constrained_sr3.py | 11 ++++++-----
 pysindy/optimizers/trapping_sr3.py    | 12 +++++++-----
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index eeef760d..4631240b 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -274,10 +274,7 @@ def _create_var_and_part_cost(
             cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
         return xi, cost
 
-    def _update_coef_cvxpy(self, x, y, coef_sparse):
-        xi, cost = self._create_var_and_part_cost(
-            coef_sparse.shape[0] * coef_sparse.shape[1], x, y
-        )
+    def _update_coef_cvxpy(self, xi, cost, var_len, x, y, coef_sparse):
         if self.use_constraints:
             if self.inequality_constraints and self.equality_constraints:
                 # Process inequality constraints then equality constraints
@@ -438,7 +435,11 @@ def _reduce(self, x, y):
 
         objective_history = []
         if self.inequality_constraints:
-            coef_sparse = self._update_coef_cvxpy(x_expanded, y, coef_sparse)
+            var_len = coef_sparse.shape[0] * coef_sparse.shape[1]
+            xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+            coef_sparse = self._update_coef_cvxpy(
+                xi, cost, var_len, x_expanded, y, coef_sparse
+            )
             objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse))
         else:
             for k in range(self.max_iter):
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index d83e3633..0a1b52aa 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -391,9 +391,8 @@ def _objective(self, x, y, coef_sparse, A, PW, q):
             )
         return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
 
-    def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_prev):
+    def _solve_sparse_relax_and_split(self, xi, cost, var_len, Pmatrix, A, coef_prev):
         """Solve coefficient update with CVXPY if threshold != 0"""
-        xi, cost = self._create_var_and_part_cost(N * r, x_expanded, y)
         cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
         if self.use_constraints:
             if self.inequality_constraints:
@@ -428,10 +427,10 @@ def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_pr
                 )
             except cp.error.SolverError:
                 print("Solver failed, setting coefs to zeros")
-                xi.value = np.zeros(N * r)
+                xi.value = np.zeros(var_len)
         except cp.error.SolverError:
             print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(N * r)
+            xi.value = np.zeros(var_len)
 
         if xi.value is None:
             warnings.warn(
@@ -634,8 +633,11 @@ def _reduce(self, x, y):
             if self.evolve_w:
                 if self.relax_optim:
                     if self.threshold > 0.0:
+                        xi, cost = self._create_var_and_part_cost(
+                            n_features * r, x_expanded, y
+                        )
                         coef_sparse = self._solve_sparse_relax_and_split(
-                            r, n_features, x_expanded, y, Pmatrix, A, coef_prev
+                            xi, cost, r * n_features, Pmatrix, A, coef_prev
                         )
                     else:
                         pTp = np.dot(Pmatrix.T, Pmatrix)

From a16027b64990e321971f32c99020e3b87247048a Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 8 Nov 2023 16:20:09 +0000
Subject: [PATCH 03/14] CLN: Make TrappingSR3 use superclass cvxpy interface

---
 pysindy/optimizers/constrained_sr3.py | 16 ++++----
 pysindy/optimizers/trapping_sr3.py    | 58 +++------------------------
 test/test_optimizers.py               | 20 +++++++--
 3 files changed, 30 insertions(+), 64 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index 4631240b..7e9fa036 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -274,7 +274,7 @@ def _create_var_and_part_cost(
             cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
         return xi, cost
 
-    def _update_coef_cvxpy(self, xi, cost, var_len, x, y, coef_sparse):
+    def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol):
         if self.use_constraints:
             if self.inequality_constraints and self.equality_constraints:
                 # Process inequality constraints then equality constraints
@@ -304,8 +304,8 @@ def _update_coef_cvxpy(self, xi, cost, var_len, x, y, coef_sparse):
         try:
             prob.solve(
                 max_iter=self.max_iter,
-                eps_abs=self.tol,
-                eps_rel=self.tol,
+                eps_abs=tol,
+                eps_rel=tol,
                 verbose=self.verbose_cvxpy,
             )
         # Annoying error coming from L2 norm switching to use the ECOS
@@ -313,13 +313,13 @@ def _update_coef_cvxpy(self, xi, cost, var_len, x, y, coef_sparse):
         # similar semantic changes for the other variables.
         except TypeError:
             try:
-                prob.solve(abstol=self.tol, reltol=self.tol, verbose=self.verbose_cvxpy)
+                prob.solve(abstol=tol, reltol=tol, verbose=self.verbose_cvxpy)
             except cp.error.SolverError:
                 print("Solver failed, setting coefs to zeros")
-                xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1])
+                xi.value = np.zeros(var_len)
         except cp.error.SolverError:
             print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(coef_sparse.shape[0] * coef_sparse.shape[1])
+            xi.value = np.zeros(var_len)
 
         if xi.value is None:
             warnings.warn(
@@ -328,7 +328,7 @@ def _update_coef_cvxpy(self, xi, cost, var_len, x, y, coef_sparse):
                 ConvergenceWarning,
             )
             return None
-        coef_new = (xi.value).reshape(coef_sparse.shape)
+        coef_new = (xi.value).reshape(coef_prev.shape)
         return coef_new
 
     def _update_sparse_coef(self, coef_full):
@@ -438,7 +438,7 @@ def _reduce(self, x, y):
             var_len = coef_sparse.shape[0] * coef_sparse.shape[1]
             xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
             coef_sparse = self._update_coef_cvxpy(
-                xi, cost, var_len, x_expanded, y, coef_sparse
+                xi, cost, var_len, coef_sparse, self.tol
             )
             objective_history.append(self._objective(x, y, 0, coef_full, coef_sparse))
         else:
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 0a1b52aa..c25803c7 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -391,56 +391,6 @@ def _objective(self, x, y, coef_sparse, A, PW, q):
             )
         return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
 
-    def _solve_sparse_relax_and_split(self, xi, cost, var_len, Pmatrix, A, coef_prev):
-        """Solve coefficient update with CVXPY if threshold != 0"""
-        cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
-        if self.use_constraints:
-            if self.inequality_constraints:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi <= self.constraint_rhs],
-                )
-            else:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi == self.constraint_rhs],
-                )
-        else:
-            prob = cp.Problem(cp.Minimize(cost))
-
-        # default solver is OSQP here but switches to ECOS for L2
-        try:
-            prob.solve(
-                eps_abs=self.eps_solver,
-                eps_rel=self.eps_solver,
-                verbose=self.verbose_cvxpy,
-            )
-        # Annoying error coming from L2 norm switching to use the ECOS
-        # solver, which uses "max_iters" instead of "max_iter", and
-        # similar semantic changes for the other variables.
-        except TypeError:
-            try:
-                prob.solve(
-                    abstol=self.eps_solver,
-                    reltol=self.eps_solver,
-                    verbose=self.verbose_cvxpy,
-                )
-            except cp.error.SolverError:
-                print("Solver failed, setting coefs to zeros")
-                xi.value = np.zeros(var_len)
-        except cp.error.SolverError:
-            print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(var_len)
-
-        if xi.value is None:
-            warnings.warn(
-                "Infeasible solve, increase/decrease eta",
-                ConvergenceWarning,
-            )
-            return None
-        coef_sparse = (xi.value).reshape(coef_prev.shape)
-        return coef_sparse
-
     def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
         """
         If using the relaxation formulation of trapping SINDy, solves the
@@ -636,8 +586,12 @@ def _reduce(self, x, y):
                         xi, cost = self._create_var_and_part_cost(
                             n_features * r, x_expanded, y
                         )
-                        coef_sparse = self._solve_sparse_relax_and_split(
-                            xi, cost, r * n_features, Pmatrix, A, coef_prev
+                        cost = (
+                            cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
+                        )
+                        # sparse relax_and_split
+                        coef_sparse = self._update_coef_cvxpy(
+                            xi, cost, r * n_features, coef_prev, self.eps_solver
                         )
                     else:
                         pTp = np.dot(Pmatrix.T, Pmatrix)
diff --git a/test/test_optimizers.py b/test/test_optimizers.py
index c69ce982..e4d13c4f 100644
--- a/test/test_optimizers.py
+++ b/test/test_optimizers.py
@@ -792,7 +792,10 @@ def test_fit_warn(data_derivative_1d, optimizer):
         optimizer.fit(x, x_dot)
 
 
-@pytest.mark.parametrize("optimizer", [ConstrainedSR3, TrappingSR3, MIOSR])
+@pytest.mark.parametrize(
+    "optimizer",
+    [(ConstrainedSR3, {"max_iter": 80}), (TrappingSR3, {"max_iter": 100}), (MIOSR, {})],
+)
 @pytest.mark.parametrize("target_value", [0, -1, 3])
 def test_row_format_constraints(data_linear_combination, optimizer, target_value):
     # Solution is x_dot = x.dot(np.array([[1, 1, 0], [0, 1, 1]]))
@@ -805,10 +808,11 @@ def test_row_format_constraints(data_linear_combination, optimizer, target_value
     constraint_lhs[0, 0] = 1
     constraint_lhs[1, 3] = 1
 
-    model = optimizer(
+    model = optimizer[0](
         constraint_lhs=constraint_lhs,
         constraint_rhs=constraint_rhs,
         constraint_order="feature",
+        **optimizer[1],
     )
     model.fit(x, x_dot)
 
@@ -818,7 +822,13 @@ def test_row_format_constraints(data_linear_combination, optimizer, target_value
 
 
 @pytest.mark.parametrize(
-    "optimizer", [ConstrainedSR3, StableLinearSR3, TrappingSR3, MIOSR]
+    "optimizer",
+    [
+        (ConstrainedSR3, {"max_iter": 80}),
+        (StableLinearSR3, {}),
+        (TrappingSR3, {"max_iter": 100}),
+        (MIOSR, {}),
+    ],
 )
 @pytest.mark.parametrize("target_value", [0, -1, 3])
 def test_target_format_constraints(data_linear_combination, optimizer, target_value):
@@ -831,7 +841,9 @@ def test_target_format_constraints(data_linear_combination, optimizer, target_va
     constraint_lhs[0, 1] = 1
     constraint_lhs[1, 4] = 1
 
-    model = optimizer(constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs)
+    model = optimizer[0](
+        constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs, **optimizer[1]
+    )
     model.fit(x, x_dot)
     np.testing.assert_allclose(model.coef_[:, 1], target_value, atol=1e-8)
 

From bca287f008b491abae9a1e1bf1f41ae3a17dbf74 Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 9 Nov 2023 11:23:15 +0000
Subject: [PATCH 04/14] CLN: Annotate and flatten function creating projection
 tensors

Also: Rename single-letter variable to meaningful
---
 pysindy/optimizers/trapping_sr3.py | 72 +++++++++++++-----------------
 test/test_optimizers.py            |  2 -
 test/test_optimizers_complexity.py |  2 +-
 3 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index c25803c7..6024ba3d 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -1,4 +1,7 @@
 import warnings
+from itertools import combinations_with_replacement as combo_wr
+from itertools import product
+from typing import Tuple
 
 import cvxpy as cp
 import numpy as np
@@ -223,39 +226,28 @@ def __init__(
         self.accel = accel
         self.objective_history = objective_history
 
-    def _set_Ptensors(self, r):
+    def _set_Ptensors(
+        self, n_targets: int
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """Make the projection tensors used for the algorithm."""
-        N = int((r**2 + 3 * r) / 2.0)
+        N = int((n_targets**2 + 3 * n_targets) / 2.0)
 
         # delta_{il}delta_{jk}
-        PL_tensor = np.zeros((r, r, r, N))
-        PL_tensor_unsym = np.zeros((r, r, r, N))
-        for i in range(r):
-            for j in range(r):
-                for k in range(r):
-                    for kk in range(N):
-                        if i == k and j == kk:
-                            PL_tensor_unsym[i, j, k, kk] = 1.0
+        PL_tensor_unsym = np.zeros((n_targets, n_targets, n_targets, N))
+        for i, j in combo_wr(range(n_targets), 2):
+            PL_tensor_unsym[i, j, i, j] = 1.0
 
         # Now symmetrize PL
-        for i in range(r):
-            for j in range(N):
-                PL_tensor[:, :, i, j] = 0.5 * (
-                    PL_tensor_unsym[:, :, i, j] + PL_tensor_unsym[:, :, i, j].T
-                )
+        PL_tensor = (PL_tensor_unsym + np.transpose(PL_tensor_unsym, [1, 0, 2, 3])) / 2
 
         # if j == k, delta_{il}delta_{N-r+j,n}
         # if j != k, delta_{il}delta_{r+j+k-1,n}
-        PQ_tensor = np.zeros((r, r, r, r, N))
-        for i in range(r):
-            for j in range(r):
-                for k in range(r):
-                    for kk in range(r):
-                        for n in range(N):
-                            if (j == k) and (n == N - r + j) and (i == kk):
-                                PQ_tensor[i, j, k, kk, n] = 1.0
-                            if (j != k) and (n == r + j + k - 1) and (i == kk):
-                                PQ_tensor[i, j, k, kk, n] = 1 / 2
+        PQ_tensor = np.zeros((n_targets, n_targets, n_targets, n_targets, N))
+        for (i, j, k, kk), n in product(combo_wr(range(n_targets), 4), range(N)):
+            if (j == k) and (n == N - n_targets + j) and (i == kk):
+                PQ_tensor[i, j, k, kk, n] = 1.0
+            if (j != k) and (n == n_targets + j + k - 1) and (i == kk):
+                PQ_tensor[i, j, k, kk, n] = 1 / 2
 
         return PL_tensor_unsym, PL_tensor, PQ_tensor
 
@@ -511,14 +503,14 @@ def _reduce(self, x, y):
         self.PWeigs_history_ = []
         self.history_ = []
         n_samples, n_features = x.shape
-        r = y.shape[1]
-        N = int((r**2 + 3 * r) / 2.0)
+        n_tgts = y.shape[1]
+        N = int((n_tgts**2 + 3 * n_tgts) / 2.0)
 
         # Define PL and PQ tensors, only relevant if the stability term in
         # trapping SINDy is turned on.
-        self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(r)
+        self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(n_tgts)
         # make sure dimensions/symmetries are correct
-        self._check_P_matrix(r, n_features, N)
+        self._check_P_matrix(n_tgts, n_features, N)
 
         # Set initial coefficients
         if self.use_constraints and self.constraint_order.lower() == "target":
@@ -544,9 +536,9 @@ def _reduce(self, x, y):
         if self.A0 is not None:
             A = self.A0
         elif np.any(self.PQ_ != 0.0):
-            A = np.diag(self.gamma * np.ones(r))
+            A = np.diag(self.gamma * np.ones(n_tgts))
         else:
-            A = np.diag(np.zeros(r))
+            A = np.diag(np.zeros(n_tgts))
         self.A_history_.append(A)
 
         # initial guess for m
@@ -554,14 +546,14 @@ def _reduce(self, x, y):
             m = self.m0
         else:
             np.random.seed(1)
-            m = (np.random.rand(r) - np.ones(r)) * 2
+            m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
         self.m_history_.append(m)
 
         # Precompute some objects for optimization
-        x_expanded = np.zeros((n_samples, r, n_features, r))
-        for i in range(r):
+        x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts))
+        for i in range(n_tgts):
             x_expanded[:, i, :, i] = x
-        x_expanded = np.reshape(x_expanded, (n_samples * r, r * n_features))
+        x_expanded = np.reshape(x_expanded, (n_samples * n_tgts, n_tgts * n_features))
         xTx = np.dot(x_expanded.T, x_expanded)
         xTy = np.dot(x_expanded.T, y.flatten())
 
@@ -576,7 +568,7 @@ def _reduce(self, x, y):
             # update P tensor from the newest m
             mPQ = np.tensordot(m, self.PQ_, axes=([0], [0]))
             p = self.PL_ - mPQ
-            Pmatrix = p.reshape(r * r, r * n_features)
+            Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
             # update w
             coef_prev = coef_sparse
@@ -584,14 +576,14 @@ def _reduce(self, x, y):
                 if self.relax_optim:
                     if self.threshold > 0.0:
                         xi, cost = self._create_var_and_part_cost(
-                            n_features * r, x_expanded, y
+                            n_features * n_tgts, x_expanded, y
                         )
                         cost = (
                             cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
                         )
                         # sparse relax_and_split
                         coef_sparse = self._update_coef_cvxpy(
-                            xi, cost, r * n_features, coef_prev, self.eps_solver
+                            xi, cost, n_tgts * n_features, coef_prev, self.eps_solver
                         )
                     else:
                         pTp = np.dot(Pmatrix.T, Pmatrix)
@@ -602,7 +594,7 @@ def _reduce(self, x, y):
                         )
                 else:
                     m, coef_sparse = self._solve_direct_cvxpy(
-                        r, n_features, x_expanded, y, Pmatrix, coef_prev
+                        n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
                     )
 
             # If problem over xi becomes infeasible, break out of the loop
@@ -612,7 +604,7 @@ def _reduce(self, x, y):
 
             if self.relax_optim:
                 m_prev, m, A, tk_prev = self._solve_m_relax_and_split(
-                    r, n_features, m_prev, m, A, coef_sparse, tk_prev
+                    n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev
                 )
 
             # If problem over m becomes infeasible, break out of the loop
diff --git a/test/test_optimizers.py b/test/test_optimizers.py
index e4d13c4f..f208c7a6 100644
--- a/test/test_optimizers.py
+++ b/test/test_optimizers.py
@@ -483,7 +483,6 @@ def test_trapping_sr3_quadratic_library(params, trapping_sr3_params, quadratic_l
 
     opt = TrappingSR3(**params)
     opt.fit(features, x_dot)
-    assert opt.PL_unsym_.shape == (1, 1, 1, 2)
     assert opt.PL_.shape == (1, 1, 1, 2)
     assert opt.PQ_.shape == (1, 1, 1, 1, 2)
     check_is_fitted(opt)
@@ -497,7 +496,6 @@ def test_trapping_sr3_quadratic_library(params, trapping_sr3_params, quadratic_l
 
     opt = TrappingSR3(**params)
     opt.fit(features, x_dot)
-    assert opt.PL_unsym_.shape == (1, 1, 1, 2)
     assert opt.PL_.shape == (1, 1, 1, 2)
     assert opt.PQ_.shape == (1, 1, 1, 1, 2)
     check_is_fitted(opt)
diff --git a/test/test_optimizers_complexity.py b/test/test_optimizers_complexity.py
index 8a6486d8..f027f12f 100644
--- a/test/test_optimizers_complexity.py
+++ b/test/test_optimizers_complexity.py
@@ -45,7 +45,7 @@ def test_complexity_parameter(
 
     optimizers = [
         WrappedOptimizer(opt_cls(**{reg_name: reg_value}), normalize_columns=True)
-        for reg_value in [10, 1, 0.1, 0.01]
+        for reg_value in [10, 1, 0.1, 0.001]
     ]
 
     for opt in optimizers:

From 87180b710d2c6ba1eac4e2928314917c2a9757b7 Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 9 Nov 2023 12:58:19 +0000
Subject: [PATCH 05/14] CLN: Simplify checking PQ/PL tensors

---
 pysindy/optimizers/trapping_sr3.py | 95 ++++++++----------------------
 1 file changed, 25 insertions(+), 70 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 6024ba3d..33eed646 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -251,76 +251,30 @@ def _set_Ptensors(
 
         return PL_tensor_unsym, PL_tensor, PQ_tensor
 
-    def _bad_PL(self, PL):
-        """Check if PL tensor is properly defined"""
-        tol = 1e-10
-        return np.any((np.transpose(PL, [1, 0, 2, 3]) - PL) > tol)
-
-    def _bad_PQ(self, PQ):
-        """Check if PQ tensor is properly defined"""
-        tol = 1e-10
-        return np.any((np.transpose(PQ, [0, 2, 1, 3, 4]) - PQ) > tol)
-
-    def _check_P_matrix(self, r, n_features, N):
+    @staticmethod
+    def _check_P_matrix(
+        n_tgts: int, n_feat: int, n_feat_expected: int, PL: np.ndarray, PQ: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray]:
         """Check if P tensor is properly defined"""
-        # If these tensors are not passed, or incorrect shape, assume zeros
-        if self.PQ_ is None:
-            self.PQ_ = np.zeros((r, r, r, r, n_features))
-            warnings.warn(
-                "The PQ tensor (a requirement for the stability promotion) was"
-                " not set, so setting this tensor to all zeros. "
-            )
-        elif (self.PQ_).shape != (r, r, r, r, n_features) and (self.PQ_).shape != (
-            r,
-            r,
-            r,
-            r,
-            N,
-        ):
-            self.PQ_ = np.zeros((r, r, r, r, n_features))
-            warnings.warn(
-                "The PQ tensor (a requirement for the stability promotion) was"
-                " initialized with incorrect dimensions, "
-                "so setting this tensor to all zeros "
-                "(with the correct dimensions). "
-            )
-        if self.PL_ is None:
-            self.PL_ = np.zeros((r, r, r, n_features))
-            warnings.warn(
-                "The PL tensor (a requirement for the stability promotion) was"
-                " not set, so setting this tensor to all zeros. "
-            )
-        elif (self.PL_).shape != (r, r, r, n_features) and (self.PL_).shape != (
-            r,
-            r,
-            r,
-            N,
+        if (
+            PQ is None
+            or PL is None
+            or PQ.shape != (n_tgts, n_tgts, n_tgts, n_tgts, n_feat)
+            or PL.shape != (n_tgts, n_tgts, n_tgts, n_feat)
+            or n_feat != n_feat_expected  # library is not quadratic/incorrect shape
         ):
-            self.PL_ = np.zeros((r, r, r, n_features))
+            PL = np.zeros((n_tgts, n_tgts, n_tgts, n_feat))
+            PQ = np.zeros((n_tgts, n_tgts, n_tgts, n_tgts, n_feat))
             warnings.warn(
-                "The PL tensor (a requirement for the stability promotion) was"
-                " initialized with incorrect dimensions, "
-                "so setting this tensor to all zeros "
-                "(with the correct dimensions). "
+                "PQ and PL tensors not defined, wrong shape, or incompatible with "
+                "feature library shape.  Ensure feature library is quadratic. "
+                "Setting tensors to zero"
             )
-
-        # Check if the tensor symmetries are properly defined
-        if self._bad_PL(self.PL_):
-            raise ValueError("PL tensor was passed but the symmetries are not correct")
-        if self._bad_PQ(self.PQ_):
-            raise ValueError("PQ tensor was passed but the symmetries are not correct")
-
-        # If PL/PQ finite and correct, so trapping theorem is being used,
-        # then make sure library is quadratic and correct shape
-        if (np.any(self.PL_ != 0.0) or np.any(self.PQ_ != 0.0)) and n_features != N:
-            warnings.warn(
-                "The feature library is the wrong shape or not quadratic, "
-                "so please correct this if you are attempting to use the "
-                "trapping algorithm with the stability term included. Setting "
-                "PL and PQ tensors to zeros for now."
-            )
-            self.PL_ = np.zeros((r, r, r, n_features))
-            self.PQ_ = np.zeros((r, r, r, r, n_features))
+        if not np.allclose(
+            np.transpose(PL, [1, 0, 2, 3]), PL, atol=1e-10
+        ) or not np.allclose(np.transpose(PQ, [0, 2, 1, 3, 4]), PQ, atol=1e-10):
+            raise ValueError("PQ/PL tensors were passed but have the wrong symmetry")
+        return PL, PQ
 
     def _update_coef_constraints(self, H, x_transpose_y, P_transpose_A, coef_sparse):
         """Solves the coefficient update analytically if threshold = 0"""
@@ -504,13 +458,14 @@ def _reduce(self, x, y):
         self.history_ = []
         n_samples, n_features = x.shape
         n_tgts = y.shape[1]
-        N = int((n_tgts**2 + 3 * n_tgts) / 2.0)
+        n_feat_expected = int((n_tgts**2 + 3 * n_tgts) / 2.0)
 
-        # Define PL and PQ tensors, only relevant if the stability term in
-        # trapping SINDy is turned on.
+        # Only relevant if the stability term is turned on.
         self.PL_unsym_, self.PL_, self.PQ_ = self._set_Ptensors(n_tgts)
         # make sure dimensions/symmetries are correct
-        self._check_P_matrix(n_tgts, n_features, N)
+        self.PL_, self.PQ_ = self._check_P_matrix(
+            n_tgts, n_features, n_feat_expected, self.PL_, self.PQ_
+        )
 
         # Set initial coefficients
         if self.use_constraints and self.constraint_order.lower() == "target":

From 263094c1b17cc7b655475a27ff7723be488cb635 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Mon, 27 Nov 2023 21:16:20 +0000
Subject: [PATCH 06/14] CLN: Remove nonfunctional objective_history argument in
 TrappingSR3

Document it as an attribute, however.
---
 pysindy/optimizers/trapping_sr3.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 33eed646..e86eba0e 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -132,6 +132,9 @@ class TrappingSR3(ConstrainedSR3):
                             n_targets, n_targets, n_features)
         Quadratic coefficient part of the P matrix in ||Pw - A||^2
 
+    objective_history_: list
+        History of the objective value at each iteration
+
     Examples
     --------
     >>> import numpy as np
@@ -167,7 +170,6 @@ def __init__(
         accel=False,
         m0=None,
         A0=None,
-        objective_history=None,
         **kwargs,
     ):
         super().__init__(
@@ -224,7 +226,6 @@ def __init__(
         self.gamma = gamma
         self.tol_m = tol_m
         self.accel = accel
-        self.objective_history = objective_history
 
     def _set_Ptensors(
         self, n_targets: int
@@ -594,4 +595,4 @@ def _reduce(self, x, y):
             )
 
         self.coef_ = coef_sparse.T
-        self.objective_history = objective_history
+        self.objective_history_ = objective_history

From 09e8e55820c5b6d5542fa9c3388fbc131b7552e5 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Mon, 27 Nov 2023 21:41:29 +0000
Subject: [PATCH 07/14] CLN: Annotate types in TrappingSR3, clean/organize
 docstring

---
 pysindy/optimizers/trapping_sr3.py | 90 ++++++++++++++++--------------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index e86eba0e..34e49d9a 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -5,6 +5,7 @@
 
 import cvxpy as cp
 import numpy as np
+from numpy.typing import NDArray
 from scipy.linalg import cho_factor
 from scipy.linalg import cho_solve
 from sklearn.exceptions import ConvergenceWarning
@@ -47,61 +48,63 @@ class TrappingSR3(ConstrainedSR3):
 
     Parameters
     ----------
-    evolve_w : bool, optional (default True)
+    evolve_w :
         If false, don't update w and just minimize over (m, A)
 
-    eta : float, optional (default 1.0e20)
+    eta :
         Determines the strength of the stability term ||Pw-A||^2 in the
         optimization. The default value is very large so that the
         algorithm default is to ignore the stability term. In this limit,
         this should be approximately equivalent to the ConstrainedSR3 method.
 
-    alpha_m : float, optional (default eta * 0.1)
-        Determines the step size in the prox-gradient descent over m.
-        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
-        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.
+    eps_solver :
+        If threshold != 0, this specifies the error tolerance in the
+        CVXPY (OSQP) solve. Default 1.0e-7 (Default is 1.0e-3 in OSQP.)
+
+    relax_optim :
+        If relax_optim = True, use the relax-and-split method. If False,
+        try a direct minimization on the largest eigenvalue.
 
-    alpha_A : float, optional (default eta)
+    inequality_constraints : bool, optional (default False)
+        If True, relax_optim must be false or relax_optim = True
+        AND threshold != 0, so that the CVXPY methods are used.
+
+    alpha_A :
         Determines the step size in the prox-gradient descent over A.
-        For convergence, need alpha_A <= eta, so typically
+        For convergence, need alpha_A <= eta, so default
         alpha_A = eta is used.
 
-    gamma : float, optional (default 0.1)
+    alpha_m :
+        Determines the step size in the prox-gradient descent over m.
+        For convergence, need alpha_m <= eta / ||w^T * PQ^T * PQ * w||.
+        Typically 0.01 * eta <= alpha_m <= 0.1 * eta.  (default eta * 0.1)
+
+    gamma :
         Determines the negative interval that matrix A is projected onto.
         For most applications gamma = 0.1 - 1.0 works pretty well.
 
-    tol_m : float, optional (default 1e-5)
+    tol_m :
         Tolerance used for determining convergence of the optimization
         algorithm over m.
 
-    thresholder : string, optional (default 'L1')
+    thresholder :
         Regularization function to use. For current trapping SINDy,
         only the L1 and L2 norms are implemented. Note that other convex norms
         could be straightforwardly implemented, but L0 requires
-        reformulation because of nonconvexity.
-
-    eps_solver : float, optional (default 1.0e-7)
-        If threshold != 0, this specifies the error tolerance in the
-        CVXPY (OSQP) solve. Default is 1.0e-3 in OSQP.
-
-    relax_optim : bool, optional (default True)
-        If relax_optim = True, use the relax-and-split method. If False,
-        try a direct minimization on the largest eigenvalue.
-
-    inequality_constraints : bool, optional (default False)
-        If True, relax_optim must be false or relax_optim = True
-        AND threshold != 0, so that the CVXPY methods are used.
+        reformulation because of nonconvexity. (default 'L1')
 
-    accel : bool, optional (default False)
+    accel :
         Whether or not to use accelerated prox-gradient descent for (m, A).
+        (default False)
 
-    m0 : np.ndarray, shape (n_targets), optional (default None)
-        Initial guess for vector m in the optimization. Otherwise
-        each component of m is randomly initialized in [-1, 1].
+    m0 :
+        Initial guess for vector m in the optimization. Otherwise each
+        component of m is randomly initialized in [-1, 1]. shape (n_targets),
+        default None.
 
-    A0 : np.ndarray, shape (n_targets, n_targets), optional (default None)
-        Initial guess for vector A in the optimization. Otherwise
-        A is initialized as A = diag(gamma).
+    A0 :
+        Initial guess for vector A in the optimization.  Shape (n_targets, n_targets)
+        Default None, meaning A is initialized as A = diag(gamma).
 
     Attributes
     ----------
@@ -157,19 +160,20 @@ class TrappingSR3(ConstrainedSR3):
 
     def __init__(
         self,
-        evolve_w=True,
-        eps_solver=1e-7,
-        relax_optim=True,
+        *,
+        evolve_w: bool = True,
+        eta: float | None = None,
+        eps_solver: float = 1e-7,
+        relax_optim: bool = True,
         inequality_constraints=False,
-        eta=None,
-        alpha_A=None,
-        alpha_m=None,
-        gamma=-0.1,
-        tol_m=1e-5,
-        thresholder="l1",
-        accel=False,
-        m0=None,
-        A0=None,
+        alpha_A: float | None = None,
+        alpha_m: float | None = None,
+        gamma: float = -0.1,
+        tol_m: float = 1e-5,
+        thresholder: str = "l1",
+        accel: bool = False,
+        m0: NDArray | None = None,
+        A0: NDArray | None = None,
         **kwargs,
     ):
         super().__init__(

From 35ae2ed099c91f7c647a465bd28d8be353988247 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:23:11 +0000
Subject: [PATCH 08/14] CLN: Remove evolve_w arg in TrappingSR3.

If False, code skips all optimization methods.  It isn't used in any example,
it's just a vestige of earlier work.  The section commented "update w" doesn't
modify any variables called "w"
---
 pysindy/optimizers/trapping_sr3.py | 55 ++++++++++++------------------
 test/test_optimizers.py            |  2 --
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 34e49d9a..763f3579 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -161,7 +161,6 @@ class TrappingSR3(ConstrainedSR3):
     def __init__(
         self,
         *,
-        evolve_w: bool = True,
         eta: float | None = None,
         eps_solver: float = 1e-7,
         relax_optim: bool = True,
@@ -207,18 +206,11 @@ def __init__(
             raise ValueError("gamma must be negative")
         if self.tol <= 0 or tol_m <= 0 or eps_solver <= 0:
             raise ValueError("tol and tol_m must be positive")
-        if not evolve_w and not relax_optim:
-            raise ValueError("If doing direct solve, must evolve w")
         if inequality_constraints and relax_optim and self.threshold == 0.0:
             raise ValueError(
                 "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
             )
-        if inequality_constraints and not evolve_w:
-            raise ValueError(
-                "Use of inequality constraints requires solving for xi (evolve_w=True)."
-            )
 
-        self.evolve_w = evolve_w
         self.eps_solver = eps_solver
         self.relax_optim = relax_optim
         self.inequality_constraints = inequality_constraints
@@ -522,7 +514,7 @@ def _reduce(self, x, y):
         m_prev = m
 
         # Begin optimization loop
-        objective_history = []
+        self.objective_history_ = []
         for k in range(self.max_iter):
 
             # update P tensor from the newest m
@@ -530,32 +522,28 @@ def _reduce(self, x, y):
             p = self.PL_ - mPQ
             Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
-            # update w
             coef_prev = coef_sparse
-            if self.evolve_w:
-                if self.relax_optim:
-                    if self.threshold > 0.0:
-                        xi, cost = self._create_var_and_part_cost(
-                            n_features * n_tgts, x_expanded, y
-                        )
-                        cost = (
-                            cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
-                        )
-                        # sparse relax_and_split
-                        coef_sparse = self._update_coef_cvxpy(
-                            xi, cost, n_tgts * n_features, coef_prev, self.eps_solver
-                        )
-                    else:
-                        pTp = np.dot(Pmatrix.T, Pmatrix)
-                        H = xTx + pTp / self.eta
-                        P_transpose_A = np.dot(Pmatrix.T, A.flatten())
-                        coef_sparse = self._solve_nonsparse_relax_and_split(
-                            H, xTy, P_transpose_A, coef_prev
-                        )
+            if self.relax_optim:
+                if self.threshold > 0.0:
+                    xi, cost = self._create_var_and_part_cost(
+                        n_features * n_tgts, x_expanded, y
+                    )
+                    cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
+                    # sparse relax_and_split
+                    coef_sparse = self._update_coef_cvxpy(
+                        xi, cost, n_tgts * n_features, coef_prev, self.eps_solver
+                    )
                 else:
-                    m, coef_sparse = self._solve_direct_cvxpy(
-                        n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
+                    pTp = np.dot(Pmatrix.T, Pmatrix)
+                    H = xTx + pTp / self.eta
+                    P_transpose_A = np.dot(Pmatrix.T, A.flatten())
+                    coef_sparse = self._solve_nonsparse_relax_and_split(
+                        H, xTy, P_transpose_A, coef_prev
                     )
+            else:
+                m, coef_sparse = self._solve_direct_cvxpy(
+                    n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
+                )
 
             # If problem over xi becomes infeasible, break out of the loop
             if coef_sparse is None:
@@ -582,7 +570,7 @@ def _reduce(self, x, y):
             self.PWeigs_history_.append(np.sort(eigvals))
 
             # update objective
-            objective_history.append(self._objective(x, y, coef_sparse, A, PW, k))
+            self.objective_history_.append(self._objective(x, y, coef_sparse, A, PW, k))
 
             if (
                 self._m_convergence_criterion() < self.tol_m
@@ -599,4 +587,3 @@ def _reduce(self, x, y):
             )
 
         self.coef_ = coef_sparse.T
-        self.objective_history_ = objective_history
diff --git a/test/test_optimizers.py b/test/test_optimizers.py
index f208c7a6..80850c76 100644
--- a/test/test_optimizers.py
+++ b/test/test_optimizers.py
@@ -206,13 +206,11 @@ def test_sr3_bad_parameters(optimizer, params):
         dict(eta=1, alpha_m=-1),
         dict(eta=1, alpha_A=-1),
         dict(gamma=1),
-        dict(evolve_w=False, relax_optim=False),
         dict(thresholder="l0"),
         dict(threshold=-1),
         dict(max_iter=0),
         dict(eta=10, alpha_m=20),
         dict(eta=10, alpha_A=20),
-        dict(inequality_constraints=True, evolve_w=False),
         dict(
             constraint_lhs=np.zeros((10, 10)),
             constraint_rhs=np.zeros(10),

From 21e13aafa7b2d8d2247d4ba74bd4a70bbeb8d900 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Mon, 27 Nov 2023 22:58:06 +0000
Subject: [PATCH 09/14] BUG: Expand try-except in CVXPY solve to ValueErrors

Also removes tolerance specifications from backup optimization due to name
changes

In some versions of SCS, (e.g. 3.2.3), the change from max_iter to max_iters
raises a ValueError instead of a TypeError
---
 pysindy/optimizers/constrained_sr3.py |  6 +--
 pysindy/optimizers/trapping_sr3.py    | 56 ++++++++-------------------
 2 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index 7e9fa036..9d24a541 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -300,7 +300,7 @@ def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol):
         else:
             prob = cp.Problem(cp.Minimize(cost))
 
-        # default solver is OSQP here but switches to ECOS for L2
+        # default solver is SCS/OSQP here but switches to ECOS for L2
         try:
             prob.solve(
                 max_iter=self.max_iter,
@@ -311,9 +311,9 @@ def _update_coef_cvxpy(self, xi, cost, var_len, coef_prev, tol):
         # Annoying error coming from L2 norm switching to use the ECOS
         # solver, which uses "max_iters" instead of "max_iter", and
         # similar semantic changes for the other variables.
-        except TypeError:
+        except (TypeError, ValueError):
             try:
-                prob.solve(abstol=tol, reltol=tol, verbose=self.verbose_cvxpy)
+                prob.solve(max_iters=self.max_iter, verbose=self.verbose_cvxpy)
             except cp.error.SolverError:
                 print("Solver failed, setting coefs to zeros")
                 xi.value = np.zeros(var_len)
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 763f3579..ef13f525 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -376,7 +376,9 @@ def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev):
             )
         return coef_sparse
 
-    def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev):
+    def _solve_direct_cvxpy(
+        self, n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
+    ):
         """
         If using the direct formulation of trapping SINDy, solves the
         entire problem in CVXPY regardless of the threshold value.
@@ -384,53 +386,27 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev):
         problem, solved in CVXPY, so convergence/quality guarantees are
         not available here!
         """
-        xi, cost = self._create_var_and_part_cost(N * r, x_expanded, y)
-        cost = cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (r, r))) / self.eta
-        if self.use_constraints:
-            if self.inequality_constraints:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi <= self.constraint_rhs],
-                )
-            else:
-                prob = cp.Problem(
-                    cp.Minimize(cost),
-                    [self.constraint_lhs @ xi == self.constraint_rhs],
-                )
-        else:
-            prob = cp.Problem(cp.Minimize(cost))
-
-        # default solver is SCS here
-        try:
-            prob.solve(eps=self.eps_solver, verbose=self.verbose_cvxpy)
-        # Annoying error coming from L2 norm switching to use the ECOS
-        # solver, which uses "max_iters" instead of "max_iter", and
-        # similar semantic changes for the other variables.
-        except TypeError:
-            prob.solve(
-                abstol=self.eps_solver,
-                reltol=self.eps_solver,
-                verbose=self.verbose_cvxpy,
-            )
-        except cp.error.SolverError:
-            print("Solver failed, setting coefs to zeros")
-            xi.value = np.zeros(N * r)
+        var_len = n_tgts * n_features
+        xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+        cost = (
+            cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta
+        )
 
-        if xi.value is None:
-            print("Infeasible solve, increase/decrease eta")
-            return None, None
-        coef_sparse = (xi.value).reshape(coef_prev.shape)
+        coef_sparse = self._update_coef_cvxpy(
+            xi, cost, var_len, coef_prev, self.eps_solver
+        )
 
         if np.all(self.PL_ == 0) and np.all(self.PQ_ == 0):
-            return np.zeros(r), coef_sparse  # no optimization over m
+            return np.zeros(n_tgts), coef_sparse  # no optimization over m
         else:
-            m_cp = cp.Variable(r)
+            m_cp = cp.Variable(n_tgts)
             L = np.tensordot(self.PL_, coef_sparse, axes=([3, 2], [0, 1]))
             Q = np.reshape(
-                np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])), (r, r * r)
+                np.tensordot(self.PQ_, coef_sparse, axes=([4, 3], [0, 1])),
+                (n_tgts, n_tgts * n_tgts),
             )
             Ls = 0.5 * (L + L.T).flatten()
-            cost_m = cp.lambda_max(cp.reshape(Ls - m_cp @ Q, (r, r)))
+            cost_m = cp.lambda_max(cp.reshape(Ls - m_cp @ Q, (n_tgts, n_tgts)))
             prob_m = cp.Problem(cp.Minimize(cost_m))
 
             # default solver is SCS here

From 4bd7e92364c3c2ab6ec2f6ada04d35c5822cf316 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Mon, 27 Nov 2023 23:43:53 +0000
Subject: [PATCH 10/14] CLN: Clarify when updating m and updating coefficients
 in trapping

Goal is to make it clear when trapping is solving for coefficients and when it's
solving for the updated trapping center.

* Renamed "m" "trap_ctr".
* Extracts solving for coef_prev from solve_cvxpy_direct, renaming latter to make
    its reduced role in solving for just the trapping center
* Removes unused args
---
 pysindy/optimizers/trapping_sr3.py | 62 +++++++++++++-----------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index ef13f525..dc428d26 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -98,9 +98,8 @@ class TrappingSR3(ConstrainedSR3):
         (default False)
 
     m0 :
-        Initial guess for vector m in the optimization. Otherwise each
-        component of m is randomly initialized in [-1, 1]. shape (n_targets),
-        default None.
+        Initial guess for trap center in the optimization. Default None
+        initializes vector elements randomly in [-1, 1]. shape (n_targets)
 
     A0 :
         Initial guess for vector A in the optimization.  Shape (n_targets, n_targets)
@@ -334,7 +333,7 @@ def _objective(self, x, y, coef_sparse, A, PW, q):
             )
         return 0.5 * np.sum(R2) + 0.5 * np.sum(A2) / self.eta + L1
 
-    def _solve_m_relax_and_split(self, r, N, m_prev, m, A, coef_sparse, tk_previous):
+    def _solve_m_relax_and_split(self, m_prev, m, A, coef_sparse, tk_previous):
         """
         If using the relaxation formulation of trapping SINDy, solves the
         (m, A) algorithm update.
@@ -376,9 +375,7 @@ def _solve_nonsparse_relax_and_split(self, H, xTy, P_transpose_A, coef_prev):
             )
         return coef_sparse
 
-    def _solve_direct_cvxpy(
-        self, n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
-    ):
+    def _solve_m_direct(self, n_tgts, coef_sparse):
         """
         If using the direct formulation of trapping SINDy, solves the
         entire problem in CVXPY regardless of the threshold value.
@@ -386,15 +383,6 @@ def _solve_direct_cvxpy(
         problem, solved in CVXPY, so convergence/quality guarantees are
         not available here!
         """
-        var_len = n_tgts * n_features
-        xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
-        cost = (
-            cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta
-        )
-
-        coef_sparse = self._update_coef_cvxpy(
-            xi, cost, var_len, coef_prev, self.eps_solver
-        )
 
         if np.all(self.PL_ == 0) and np.all(self.PQ_ == 0):
             return np.zeros(n_tgts), coef_sparse  # no optimization over m
@@ -415,8 +403,8 @@ def _solve_direct_cvxpy(
             m = m_cp.value
             if m is None:
                 print("Infeasible solve over m, increase/decrease eta")
-                return None, coef_sparse
-            return m, coef_sparse
+                return None
+            return m
 
     def _reduce(self, x, y):
         """
@@ -471,11 +459,11 @@ def _reduce(self, x, y):
 
         # initial guess for m
         if self.m0 is not None:
-            m = self.m0
+            trap_ctr = self.m0
         else:
             np.random.seed(1)
-            m = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
-        self.m_history_.append(m)
+            trap_ctr = (np.random.rand(n_tgts) - np.ones(n_tgts)) * 2
+        self.m_history_.append(trap_ctr)
 
         # Precompute some objects for optimization
         x_expanded = np.zeros((n_samples, n_tgts, n_features, n_tgts))
@@ -487,25 +475,25 @@ def _reduce(self, x, y):
 
         # if using acceleration
         tk_prev = 1
-        m_prev = m
+        trap_prev_ctr = trap_ctr
 
         # Begin optimization loop
         self.objective_history_ = []
         for k in range(self.max_iter):
 
-            # update P tensor from the newest m
-            mPQ = np.tensordot(m, self.PQ_, axes=([0], [0]))
+            # update P tensor from the newest trap center
+            mPQ = np.tensordot(trap_ctr, self.PQ_, axes=([0], [0]))
             p = self.PL_ - mPQ
             Pmatrix = p.reshape(n_tgts * n_tgts, n_tgts * n_features)
 
             coef_prev = coef_sparse
             if self.relax_optim:
                 if self.threshold > 0.0:
+                    # sparse relax_and_split
                     xi, cost = self._create_var_and_part_cost(
                         n_features * n_tgts, x_expanded, y
                     )
                     cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
-                    # sparse relax_and_split
                     coef_sparse = self._update_coef_cvxpy(
                         xi, cost, n_tgts * n_features, coef_prev, self.eps_solver
                     )
@@ -516,30 +504,34 @@ def _reduce(self, x, y):
                     coef_sparse = self._solve_nonsparse_relax_and_split(
                         H, xTy, P_transpose_A, coef_prev
                     )
+                trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
+                    trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev
+                )
             else:
-                m, coef_sparse = self._solve_direct_cvxpy(
-                    n_tgts, n_features, x_expanded, y, Pmatrix, coef_prev
+                var_len = n_tgts * n_features
+                xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+                cost += (
+                    cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta
+                )
+                coef_sparse = self._update_coef_cvxpy(
+                    xi, cost, var_len, coef_prev, self.eps_solver
                 )
+                trap_ctr = self._solve_m_direct(n_tgts, coef_sparse)
 
             # If problem over xi becomes infeasible, break out of the loop
             if coef_sparse is None:
                 coef_sparse = coef_prev
                 break
 
-            if self.relax_optim:
-                m_prev, m, A, tk_prev = self._solve_m_relax_and_split(
-                    n_tgts, n_features, m_prev, m, A, coef_sparse, tk_prev
-                )
-
             # If problem over m becomes infeasible, break out of the loop
-            if m is None:
-                m = m_prev
+            if trap_ctr is None:
+                trap_ctr = trap_prev_ctr
                 break
             self.history_.append(coef_sparse.T)
             PW = np.tensordot(p, coef_sparse, axes=([3, 2], [0, 1]))
 
             # (m,A) update finished, append the result
-            self.m_history_.append(m)
+            self.m_history_.append(trap_ctr)
             self.A_history_.append(A)
             eigvals, eigvecs = np.linalg.eig(PW)
             self.PW_history_.append(PW)

From 53bf180e77c8894212db9813b32d7322ff529ac9 Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Tue, 28 Nov 2023 00:24:53 +0000
Subject: [PATCH 11/14] CLN: Corrected "failed to converge" SR3 warning

The "failed to converge warning" was emitted whenever max iteration was reached,
regardless of whether convergence was achieved.  The previous test only was able
to assert convergence warning because it was written to successfully converge
after 1 iteration, with a max iteration of 1.

Also, removed test_sr3_warn and incorporated into test_fit_warn, with which it
was 95% redundant already.  This also required setting some variables outside
a loop in order to access them later - a required practice for mypy and other
type checkers.
---
 pysindy/optimizers/constrained_sr3.py   |  5 ++---
 pysindy/optimizers/sr3.py               |  6 ++----
 pysindy/optimizers/stable_linear_sr3.py |  4 +---
 pysindy/optimizers/stlsq.py             |  5 ++---
 pysindy/optimizers/trapping_sr3.py      |  7 ++-----
 test/test_optimizers.py                 | 12 +-----------
 6 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
index 9d24a541..dbc771a6 100644
--- a/pysindy/optimizers/constrained_sr3.py
+++ b/pysindy/optimizers/constrained_sr3.py
@@ -478,9 +478,8 @@ def _reduce(self, x, y):
                     break
             else:
                 warnings.warn(
-                    "SR3._reduce did not converge after {} iterations.".format(
-                        self.max_iter
-                    ),
+                    f"ConstrainedSR3 did not converge after {self.max_iter}"
+                    " iterations.",
                     ConvergenceWarning,
                 )
         if self.use_constraints and self.constraint_order.lower() == "target":
diff --git a/pysindy/optimizers/sr3.py b/pysindy/optimizers/sr3.py
index 765d4c1f..58db5ef5 100644
--- a/pysindy/optimizers/sr3.py
+++ b/pysindy/optimizers/sr3.py
@@ -342,8 +342,8 @@ def _reduce(self, x, y):
         coef_sparse = self.coef_.T
         n_samples, n_features = x.shape
 
+        coef_full = coef_sparse.copy()
         if self.use_trimming:
-            coef_full = coef_sparse.copy()
             trimming_array = np.repeat(1.0 - self.trimming_fraction, n_samples)
             self.history_trimming_ = [trimming_array]
         else:
@@ -392,9 +392,7 @@ def _reduce(self, x, y):
                 break
         else:
             warnings.warn(
-                "SR3._reduce did not converge after {} iterations.".format(
-                    self.max_iter
-                ),
+                f"SR3 did not converge after {self.max_iter} iterations.",
                 ConvergenceWarning,
             )
         self.coef_ = coef_sparse.T
diff --git a/pysindy/optimizers/stable_linear_sr3.py b/pysindy/optimizers/stable_linear_sr3.py
index fd1cdd70..efeb01e1 100644
--- a/pysindy/optimizers/stable_linear_sr3.py
+++ b/pysindy/optimizers/stable_linear_sr3.py
@@ -415,9 +415,7 @@ def _reduce(self, x, y):
                 break
         else:
             warnings.warn(
-                "StableLinearSR3._reduce did not converge after {} iterations.".format(
-                    self.max_iter
-                ),
+                f"StableLinearSR3 did not converge after {self.max_iter} iterations.",
                 ConvergenceWarning,
             )
 
diff --git a/pysindy/optimizers/stlsq.py b/pysindy/optimizers/stlsq.py
index 62ebacd9..9cbddd1b 100644
--- a/pysindy/optimizers/stlsq.py
+++ b/pysindy/optimizers/stlsq.py
@@ -203,6 +203,7 @@ def _reduce(self, x, y):
         n_targets = y.shape[1]
         n_features_selected = np.sum(ind)
         sparse_sub = [np.array(self.sparse_ind)] * y.shape[1]
+        optvar = np.zeros((n_targets, n_features))
 
         # Print initial values for each term in the optimization
         if self.verbose:
@@ -266,9 +267,7 @@ def _reduce(self, x, y):
                 break
         else:
             warnings.warn(
-                "STLSQ._reduce did not converge after {} iterations.".format(
-                    self.max_iter
-                ),
+                "STLSQ did not converge after {self.max_iter} iterations.",
                 ConvergenceWarning,
             )
         if self.sparse_ind is None:
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index dc428d26..3d2a2944 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -544,13 +544,10 @@ def _reduce(self, x, y):
                 self._m_convergence_criterion() < self.tol_m
                 and self._convergence_criterion() < self.tol
             ):
-                # Could not (further) select important features
                 break
-        if k == self.max_iter - 1:
+        else:
             warnings.warn(
-                "TrappingSR3._reduce did not converge after {} iters.".format(
-                    self.max_iter
-                ),
+                f"TrappingSR3 did not converge after {self.max_iter} iters.",
                 ConvergenceWarning,
             )
 
diff --git a/test/test_optimizers.py b/test/test_optimizers.py
index 80850c76..9c401998 100644
--- a/test/test_optimizers.py
+++ b/test/test_optimizers.py
@@ -759,17 +759,6 @@ def test_sr3_enable_trimming(optimizer, data_linear_oscillator_corrupted):
     np.testing.assert_allclose(model_plain.coef_, model_trimming.coef_)
 
 
-@pytest.mark.parametrize(
-    "optimizer", [SR3, ConstrainedSR3, StableLinearSR3, TrappingSR3]
-)
-def test_sr3_warn(optimizer, data_linear_oscillator_corrupted):
-    x, x_dot, _ = data_linear_oscillator_corrupted
-    model = optimizer(max_iter=1, tol=1e-10)
-
-    with pytest.warns(ConvergenceWarning):
-        model.fit(x, x_dot)
-
-
 @pytest.mark.parametrize(
     "optimizer",
     [
@@ -783,6 +772,7 @@ def test_sr3_warn(optimizer, data_linear_oscillator_corrupted):
 def test_fit_warn(data_derivative_1d, optimizer):
     x, x_dot = data_derivative_1d
     x = x.reshape(-1, 1)
+    optimizer.max_iter = 0  # normally prohibited in constructor
 
     with pytest.warns(ConvergenceWarning):
         optimizer.fit(x, x_dot)

From 9610260f7422f610a2f42f396ab1f17d5fd3ca3f Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 30 Nov 2023 01:01:08 +0000
Subject: [PATCH 12/14] CLN: Move Trapping guards from __init__ to helper.

This allows them to be used by set_params, part of the scikit-learn API
---
 pysindy/optimizers/base.py         | 19 ++++++---
 pysindy/optimizers/trapping_sr3.py | 62 ++++++++++++++++--------------
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py
index 45d4842b..e44a2302 100644
--- a/pysindy/optimizers/base.py
+++ b/pysindy/optimizers/base.py
@@ -97,17 +97,24 @@ def __init__(
         unbias: bool = True,
     ):
         super().__init__(fit_intercept=False, copy_X=copy_X)
-
-        if max_iter <= 0:
-            raise ValueError("max_iter must be positive")
-
         self.max_iter = max_iter
         self.iters = 0
-        if np.ndim(initial_guess) == 1:
-            initial_guess = initial_guess.reshape(1, -1)
         self.initial_guess = initial_guess
         self.normalize_columns = normalize_columns
         self.unbias = unbias
+        self.__post_init_guard()
+
+    # See name mangling rules for double underscore rationale
+    def __post_init_guard(self):
+        """Conduct initialization post-init, as required by scikitlearn API."""
+        if np.ndim(self.initial_guess) == 1:
+            self.initial_guess = self.initial_guess.reshape(1, -1)
+        if self.max_iter <= 0:
+            raise ValueError("max_iter must be positive")
+
+    def set_params(self, **kwargs):
+        super().set_params(**kwargs)
+        self.__post_init_guard
 
     # Force subclasses to implement this
     @abc.abstractmethod
diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 3d2a2944..eea4aef3 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -174,53 +174,57 @@ def __init__(
         A0: NDArray | None = None,
         **kwargs,
     ):
-        super().__init__(
-            thresholder=thresholder,
-            **kwargs,
-        )
+        super().__init__(thresholder=thresholder, **kwargs)
+        self.eps_solver = eps_solver
+        self.relax_optim = relax_optim
+        self.inequality_constraints = inequality_constraints
+        self.m0 = m0
+        self.A0 = A0
+        self.alpha_A = alpha_A
+        self.alpha_m = alpha_m
+        self.eta = eta
+        self.gamma = gamma
+        self.tol_m = tol_m
+        self.accel = accel
+        self.__post_init_guard()
+
+    def __post_init_guard(self):
+        """Conduct initialization post-init, as required by scikitlearn API"""
         if self.thresholder.lower() not in ("l1", "l2", "weighted_l1", "weighted_l2"):
             raise ValueError("Regularizer must be (weighted) L1 or L2")
-        if eta is None:
+        if self.eta is None:
             warnings.warn(
                 "eta was not set, so defaulting to eta = 1e20 "
                 "with alpha_m = 1e-2 * eta, alpha_A = eta. Here eta is so "
                 "large that the stability term in the optimization "
                 "will be ignored."
             )
-            eta = 1e20
-            alpha_m = 1e18
-            alpha_A = 1e20
+            self.eta = 1e20
+            self.alpha_m = 1e18
+            self.alpha_A = 1e20
         else:
-            if alpha_m is None:
-                alpha_m = eta * 1e-2
-            if alpha_A is None:
-                alpha_A = eta
-        if eta <= 0:
+            if self.alpha_m is None:
+                self.alpha_m = self.eta * 1e-2
+            if self.alpha_A is None:
+                self.alpha_A = self.eta
+        if self.eta <= 0:
             raise ValueError("eta must be positive")
-        if alpha_m < 0 or alpha_m > eta:
+        if self.alpha_m < 0 or self.alpha_m > self.eta:
             raise ValueError("0 <= alpha_m <= eta")
-        if alpha_A < 0 or alpha_A > eta:
+        if self.alpha_A < 0 or self.alpha_A > self.eta:
             raise ValueError("0 <= alpha_A <= eta")
-        if gamma >= 0:
+        if self.gamma >= 0:
             raise ValueError("gamma must be negative")
-        if self.tol <= 0 or tol_m <= 0 or eps_solver <= 0:
+        if self.tol <= 0 or self.tol_m <= 0 or self.eps_solver <= 0:
             raise ValueError("tol and tol_m must be positive")
-        if inequality_constraints and relax_optim and self.threshold == 0.0:
+        if self.inequality_constraints and self.relax_optim and self.threshold == 0.0:
             raise ValueError(
                 "Ineq. constr. -> threshold!=0 + relax_optim=True or relax_optim=False."
             )
 
-        self.eps_solver = eps_solver
-        self.relax_optim = relax_optim
-        self.inequality_constraints = inequality_constraints
-        self.m0 = m0
-        self.A0 = A0
-        self.alpha_A = alpha_A
-        self.alpha_m = alpha_m
-        self.eta = eta
-        self.gamma = gamma
-        self.tol_m = tol_m
-        self.accel = accel
+    def set_params(self, **kwargs):
+        super().set_params(**kwargs)
+        self.__post_init_guard
 
     def _set_Ptensors(
         self, n_targets: int

From 2662af36f26864811a198849178211f7b0ffc33d Mon Sep 17 00:00:00 2001
From: Jake Stevens-Haas <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Thu, 30 Nov 2023 01:29:15 +0000
Subject: [PATCH 13/14] CLN: Extract function to encapsulate
 coefficient-solving step of Trapping

This makes the algorithm in code look like algorithm in paper
---
 pysindy/optimizers/trapping_sr3.py | 41 +++++++++++++++++-------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index eea4aef3..51aa17fd 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -423,6 +423,7 @@ def _reduce(self, x, y):
         self.history_ = []
         n_samples, n_features = x.shape
         n_tgts = y.shape[1]
+        var_len = n_features * n_tgts
         n_feat_expected = int((n_tgts**2 + 3 * n_tgts) / 2.0)
 
         # Only relevant if the stability term is turned on.
@@ -494,31 +495,19 @@ def _reduce(self, x, y):
             if self.relax_optim:
                 if self.threshold > 0.0:
                     # sparse relax_and_split
-                    xi, cost = self._create_var_and_part_cost(
-                        n_features * n_tgts, x_expanded, y
-                    )
-                    cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
-                    coef_sparse = self._update_coef_cvxpy(
-                        xi, cost, n_tgts * n_features, coef_prev, self.eps_solver
+                    coef_sparse = self._update_coef_sparse_rs(
+                        var_len, x_expanded, y, Pmatrix, A, coef_prev
                     )
                 else:
-                    pTp = np.dot(Pmatrix.T, Pmatrix)
-                    H = xTx + pTp / self.eta
-                    P_transpose_A = np.dot(Pmatrix.T, A.flatten())
-                    coef_sparse = self._solve_nonsparse_relax_and_split(
-                        H, xTy, P_transpose_A, coef_prev
+                    coef_sparse = self._update_coef_nonsparse_rs(
+                        Pmatrix, A, coef_prev, xTx, xTy
                     )
                 trap_prev_ctr, trap_ctr, A, tk_prev = self._solve_m_relax_and_split(
                     trap_prev_ctr, trap_ctr, A, coef_sparse, tk_prev
                 )
             else:
-                var_len = n_tgts * n_features
-                xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
-                cost += (
-                    cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta
-                )
-                coef_sparse = self._update_coef_cvxpy(
-                    xi, cost, var_len, coef_prev, self.eps_solver
+                coef_sparse = self._update_coef_direct(
+                    var_len, x_expanded, y, Pmatrix, coef_prev, n_tgts
                 )
                 trap_ctr = self._solve_m_direct(n_tgts, coef_sparse)
 
@@ -556,3 +545,19 @@ def _reduce(self, x, y):
             )
 
         self.coef_ = coef_sparse.T
+
+    def _update_coef_sparse_rs(self, var_len, x_expanded, y, Pmatrix, A, coef_prev):
+        xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+        cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
+        return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver)
+
+    def _update_coef_nonsparse_rs(self, Pmatrix, A, coef_prev, xTx, xTy):
+        pTp = np.dot(Pmatrix.T, Pmatrix)
+        H = xTx + pTp / self.eta
+        P_transpose_A = np.dot(Pmatrix.T, A.flatten())
+        return self._solve_nonsparse_relax_and_split(H, xTy, P_transpose_A, coef_prev)
+
+    def _update_coef_direct(self, var_len, x_expanded, y, Pmatrix, coef_prev, n_tgts):
+        xi, cost = self._create_var_and_part_cost(var_len, x_expanded, y)
+        cost += cp.lambda_max(cp.reshape(Pmatrix @ xi, (n_tgts, n_tgts))) / self.eta
+        return self._update_coef_cvxpy(xi, cost, var_len, coef_prev, self.eps_solver)

From 639ea9d4ebf300656339414f23f6daf71ae9f463 Mon Sep 17 00:00:00 2001
From: Jake <37048747+Jacob-Stevens-Haas@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:04:02 +0000
Subject: [PATCH 14/14] BLD: Revert python3.10 syntax (type union pipe) to 3.8

---
 pysindy/optimizers/trapping_sr3.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
index 51aa17fd..0de82d81 100644
--- a/pysindy/optimizers/trapping_sr3.py
+++ b/pysindy/optimizers/trapping_sr3.py
@@ -2,6 +2,7 @@
 from itertools import combinations_with_replacement as combo_wr
 from itertools import product
 from typing import Tuple
+from typing import Union
 
 import cvxpy as cp
 import numpy as np
@@ -160,18 +161,18 @@ class TrappingSR3(ConstrainedSR3):
     def __init__(
         self,
         *,
-        eta: float | None = None,
+        eta: Union[float, None] = None,
         eps_solver: float = 1e-7,
         relax_optim: bool = True,
         inequality_constraints=False,
-        alpha_A: float | None = None,
-        alpha_m: float | None = None,
+        alpha_A: Union[float, None] = None,
+        alpha_m: Union[float, None] = None,
         gamma: float = -0.1,
         tol_m: float = 1e-5,
         thresholder: str = "l1",
         accel: bool = False,
-        m0: NDArray | None = None,
-        A0: NDArray | None = None,
+        m0: Union[NDArray, None] = None,
+        A0: Union[NDArray, None] = None,
         **kwargs,
     ):
         super().__init__(thresholder=thresholder, **kwargs)