regress now returns standard error from betas. stored as sigma in res…

…ults dicts
cosanlab · Aug 22, 2022 · 6e34f5d · 6e34f5d
1 parent d9f65ba
commit 6e34f5d
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 21 deletions.
diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
@@ -1331,24 +1331,30 @@ def regress(self, X, mode="ols", **kwargs):
         if isinstance(X, Adjacency):
             if X.square_shape()[0] != self.square_shape()[0]:
                 raise ValueError("Adjacency instances must be the same size.")
-            b, t, p, _, res = regression(X.data.T, self.data, mode=mode, **kwargs)
-            stats["beta"], stats["t"], stats["p"], stats["residual"] = (b, t, p, res)
+            (
+                stats["beta"],
+                stats["sigma"],
+                stats["t"],
+                stats["p"],
+                stats["df"],
+                stats["residual"],
+            ) = regression(X.data.T, self.data, mode=mode, **kwargs)
         elif isinstance(X, Design_Matrix):
             if X.shape[0] != len(self):
                 raise ValueError(
                     "Design matrix must have same number of observations as Adjacency"
                 )
-            b, t, p, df, res = regression(X, self.data, mode=mode, **kwargs)
-            mode = "ols"
-            stats["beta"], stats["t"], stats["p"] = [x for x in self[:3]]
-            stats["beta"].data, stats["t"].data, stats["p"].data = (
-                b.squeeze(),
-                t.squeeze(),
-                p.squeeze(),
-            )
-            stats["residual"] = self.copy()
-            stats["residual"].data = res
-            stats["df"] = df
+            (b, se, t, p, df, res) = regression(X, self.data, mode=mode, **kwargs)
+
+            stats["beta"], stats["sigma"], stats["t"] = [x for x in self[:3]]
+            stats["p"], stats["df"], stats["residual"] = [x for x in self[:3]]
+
+            stats["beta"].data = b.squeeze()
+            stats["sigma"].data = se.squeeze()
+            stats["t"].data = t.squeeze()
+            stats["p"].data = p.squeeze()
+            stats["df"].data = df.squeeze()
+            stats["residual"].data = res.squeeze()
         else:
             raise ValueError("X must be a Design_Matrix or Adjacency Instance.")
 

diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py
@@ -693,31 +693,41 @@ def regress(self, mode="ols", **kwargs):
         if self.data.shape[0] != self.X.shape[0]:
             raise ValueError("self.X does not match the correct size of " "self.data")
 
-        b, t, p, _, res = regression(self.X, self.data, mode=mode, **kwargs)
+        b, se, t, p, df, res = regression(self.X, self.data, mode=mode, **kwargs)
 
         # Prevent copy of all data in self multiple times; instead start with an empty instance and copy only needed attributes from self, and use this as a template for other outputs
         b_out = self.__class__()
         b_out.mask = deepcopy(self.mask)
         b_out.nifti_masker = deepcopy(self.nifti_masker)
 
         # Use this as template for other outputs before setting data
+        se_out = b_out.copy()
         t_out = b_out.copy()
         p_out = b_out.copy()
-        sigma_out = b_out.copy()
+        df_out = b_out.copy()
         res_out = b_out.copy()
-        b_out.data, t_out.data, p_out.data, sigma_out.data, res_out.data = (
+        (
+            b_out.data,
+            se_out.data,
+            t_out.data,
+            p_out.data,
+            df_out.data,
+            res_out.data,
+        ) = (
             b,
+            se,
             t,
             p,
-            sigma_out,
+            df,
             res,
         )
 
         return {
             "beta": b_out,
             "t": t_out,
             "p": p_out,
-            "sigma": sigma_out,
+            "df": df_out,
+            "sigma": se_out,
             "residual": res_out,
         }
 

diff --git a/nltools/stats.py b/nltools/stats.py
@@ -1012,14 +1012,18 @@ def regress(X, Y, mode="ols", stats="full", **kwargs):
     Args:
         X (ndarray): design matrix; assumes intercept is included
         Y (ndarray): dependent variable array; if 2d, a model is fit to each column of Y separately
-        mode (str): kind of model to fit; must be one of 'ols' (default), 'robust', or 'arma'
+        mode (str): kind of model to fit; must be one of 'ols' (default), 'robust', or
+        'arma'
+        stats (str): one of 'full', 'betas', 'tstats'. Useful to speed up calculation if
+        you know you only need some statistics and not others. Defaults to 'full'.
         robust_estimator (str,optional): kind of robust estimator to use if mode = 'robust'; default 'hc0'
         nlags (int,optional): auto-correlation lag correction if mode = 'robust' and robust_estimator = 'hac'; default 1
         order (tuple,optional): auto-regressive and moving-average orders for mode = 'arma'; default (1,1)
         kwargs (dict): additional keyword arguments to statsmodels.tsa.arima_model.ARMA and joblib.Parallel
 
     Returns:
         b: coefficients
+        se: standard error of coefficients
         t: t-statistics (coef/sterr)
         p : p-values
         df: degrees of freedom
@@ -1130,7 +1134,18 @@ def regress(X, Y, mode="ols", stats="full", **kwargs):
         else:
             b, t, p, df, res = _arma_func(X, Y, **kwargs)
 
-    return b.squeeze(), t.squeeze(), p.squeeze(), df.squeeze(), res.squeeze()
+        # Arma models don't return stderr, so make a variable for consistent function
+        # return values
+        stderr = np.empty_like(b)
+
+    return (
+        b.squeeze(),
+        stderr.squeeze(),
+        t.squeeze(),
+        p.squeeze(),
+        df.squeeze(),
+        res.squeeze(),
+    )
 
 
 def regress_permutation(

diff --git a/nltools/version.py b/nltools/version.py
@@ -1,4 +1,4 @@
 """Specifies current version of nltools to be used by setup.py and __init__.py
 """
 
-__version__ = "0.4.6"
+__version__ = "0.4.7"