neurodata · sampan501 · Oct 31, 2022 · Oct 31, 2022 · Oct 31, 2022
diff --git a/docs/conf.py b/docs/conf.py
@@ -54,6 +54,8 @@
     "myst_parser",
 ]
 
+mathjax_path = "https://cdn.jsdelivr.net/npm/mathjax@2/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+
 bibtex_bibfiles = ["refs.bib"]
 bibtex_reference_style = "super"
 bibtex_default_style = "unsrt"
@@ -82,7 +84,7 @@
 numpydoc_show_class_members = False
 
 # -- sphinx.ext.autosummary
-autosummary_generate = []
+autosummary_generate = True
 
 # Otherwise, the Return parameter list looks different from the Parameters list
 napoleon_use_rtype = False

diff --git a/docs/refs.bib b/docs/refs.bib
@@ -458,30 +458,23 @@ @article{shenChiSquareTestDistance2021
 }
 
 @article{chwialkowski2015fast,
-      title={Fast Two-Sample Testing with Analytic Representations of Probability Measures},
-      author={Kacper Chwialkowski and Aaditya Ramdas and Dino Sejdinovic and Arthur Gretton},
-      year={2015},
-      journal={arXiv:1506.04725 [math, stat]},
-      print={1506.04725},
-      eprinttype={arxiv},
-      abstract={We propose a class of nonparametric two-sample tests with a cost linear in the sample size. Two tests are given, both based on an ensemble of distances between analytic functions representing each of the distributions. The first test uses smoothed empirical characteristic functions to represent the distributions, the second uses distribution embeddings in a reproducing kernel Hilbert space. Analyticity implies that differences in the distributions may be detected almost surely at a finite number of randomly chosen locations/frequencies. The new tests are consistent against a larger class of alternatives than the previous linear-time tests based on the (non-smoothed) empirical characteristic functions, while being much faster than the current state-of-the-art quadratic-time kernel-based or energy distance-based tests. Experiments on artificial benchmarks and on challenging real-world testing problems demonstrate that our tests give a better power/time tradeoff than competing approaches, and in some cases, better outright power than even the most expensive quadratic-time tests. This performance advantage is retained even in high dimensions, and in cases where the difference in distributions is not observable with low order statistics.},
-      archivePrefix={arXiv},
-      primaryClass={stat.ML}
-}
-
-@article{grettonKernelJointIndependence2016,
-  title = {{Kernel-based Tests} for {Joint Independence}},
-  author = {Pfister, Nikolas and Buhlmann, Peter and Scholkopf, Bernhard and Peters, Jonas},
-  year = {2016},
-  month = nov,
-  journal = {arXiv:1603.00285 [math, stat]},
-  eprint = {1603.00285},
-  eprinttype = {arxiv},
-  primaryclass = {math, stat},
-  abstract = {We investigate the problem of testing whether d random variables, which may or may not be continuous, are jointly (or mutually) independent. Our method builds on ideas of the two variable Hilbert-Schmidt independence criterion (HSIC) but allows for an arbitrary number of variables. We embed the d-dimensional joint distribution and the product of the marginals into a reproducing kernel Hilbert space and define the d-variable Hilbert-Schmidt independence criterion (dHSIC) as the squared distance between the embeddings. In the population case, the value of dHSIC is zero if and only if the d variables are jointly independent, as long as the kernel is characteristic. Based on an empirical estimate of dHSIC, we define three different non-parametric hypothesis tests: a permutation test, a bootstrap test and a test based on a Gamma approximation. We prove that the permutation test achieves the significance level and that the bootstrap test achieves pointwise asymptotic significance level as well as pointwise asymptotic consistency (i.e., it is able to detect any type of fixed dependence in the large sample limit). The Gamma approximation does not come with these guarantees; however, it is computationally very fast and for small d, it performs well in practice. Finally, we apply the test to a problem in causal discovery.},
-  archiveprefix = {arXiv},
-  keywords = {Math - Statistics Theory, Statistics - Machine Learning},
- }
+  title={Fast two-sample testing with analytic representations of probability measures},
+  author={Chwialkowski, Kacper P and Ramdas, Aaditya and Sejdinovic, Dino and Gretton, Arthur},
+  journal={Advances in Neural Information Processing Systems},
+  volume={28},
+  year={2015}
+}
+
+@article{pfister2018kernel,
+  title={Kernel-based tests for joint independence},
+  author={Pfister, Niklas and B{\"u}hlmann, Peter and Sch{\"o}lkopf, Bernhard and Peters, Jonas},
+  journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
+  volume={80},
+  number={1},
+  pages={5--31},
+  year={2018},
+  publisher={Wiley Online Library}
+}
 
  @article{friedmanMultivariateGeneralizationsoftheWaldWolfowitzandSmirnovTwoSampleTests1979,
   title = {Multivariate Generalizations of the Wald-Wolfowitz and Smirnov Two-Sample Tests},
@@ -544,4 +537,27 @@ @Inbook{hotellingRelationsTwoSets1992
 isbn="978-1-4612-4380-9",
 doi="10.1007/978-1-4612-4380-9_14",
 url="https://doi.org/10.1007/978-1-4612-4380-9_14"
+}
+
+@article{jitkrittum2017linear,
+  title={A linear-time kernel goodness-of-fit test},
+  author={Jitkrittum, Wittawat and Xu, Wenkai and Szab{\'o}, Zolt{\'a}n and Fukumizu, Kenji and Gretton, Arthur},
+  journal={Advances in Neural Information Processing Systems},
+  volume={30},
+  year={2017}
+}
+
+@inproceedings{10.5555/3020548.3020641,
+author = {Zhang, Kun and Peters, Jonas and Janzing, Dominik and Sch\"{o}lkopf, Bernhard},
+title = {Kernel-Based Conditional Independence Test and Application in Causal Discovery},
+year = {2011},
+isbn = {9780974903972},
+publisher = {AUAI Press},
+address = {Arlington, Virginia, USA},
+abstract = {Conditional independence testing is an important problem, especially in Bayesian network learning and causal discovery. Due to the curse of dimensionality, testing for conditional independence of continuous variables is particularly challenging. We propose a Kernel-based Conditional Independence test (KCI-test), by constructing an appropriate test statistic and deriving its asymptotic distribution under the null hypothesis of conditional independence. The proposed method is computationally efficient and easy to implement. Experimental results show that it outperforms other methods, especially when the conditioning set is large or the sample size is not very large, in which case other methods encounter difficulties.},
+booktitle = {Proceedings of the Twenty-Seventh Conference on Uncertainty in Artificial Intelligence},
+pages = {804–813},
+numpages = {10},
+location = {Barcelona, Spain},
+series = {UAI'11}
 }
diff --git a/hyppo/conditional/FCIT.py b/hyppo/conditional/FCIT.py
@@ -32,8 +32,12 @@ class FCIT(ConditionalIndependenceTest):
         Proportion of data to evaluate test stat on.
     discrete: tuple of string
         Whether :math:`X` or :math:`Y` are discrete
+
     Notes
     -----
+    .. note::
+       This algorithm is currently a pre-print on arXiv.
+
     The motivation for the test rests on the assumption that if :math:`X \not\!\perp\!\!\!\perp Y \mid Z`,
     then :math:`Y` should be more accurately predicted by using both
     :math:`X` and :math:`Z` as covariates as opposed to only using

diff --git a/hyppo/conditional/kci.py b/hyppo/conditional/kci.py
@@ -10,24 +10,30 @@ class KCI(ConditionalIndependenceTest):
     r"""
     Kernel Conditional Independence Test Statistic and P-Value.
 
-    This is a conditional indpendence test utilizing a radial basis 
+    This is a conditional indpendence test utilizing a radial basis
     function to calculate the kernels of two datasets. The trace
-    of the normalized matrix product is then calculated to extract the test 
+    of the normalized matrix product is then calculated to extract the test
     statistic. A Gaussian distribution is then utilized to calculate
     the p-value given the statistic and approximate mean and variance
     of the trace values of the independent kernel matrices.
     This test is consistent against similar tests.
 
     Notes
     -----
+    The statistic is computed as follows :footcite:p:`10.5555/3020548.3020641`:
+
     Let :math:`x` be a combined sample of :math:`(n, p)` sample
     of random variables :math:`X` and let :math:`y` be a :math:`(n, 1)`
     labels of sample classes :math:`Y`. We can then generate
-    :math:`Kx` and :math:`Ky` kernel matrices for each of the respective
+    :math:`K^x` and :math:`K^y` kernel matrices for each of the respective
     samples. Normalizing, multiplying, and taking the trace of these
     kernel matrices gives the resulting test statistic.
     The p-value and null distribution for the corrected statistic are calculated a
     gamma distribution approximation.
+
+    References
+    ----------
+    .. footbibliography::
     """
 
     def __init__(self, **kwargs):
@@ -110,9 +116,7 @@ def test(self, x, y):
         stat = self.statistic(x, y)
 
         mean_appr = (np.trace(Kx) * np.trace(Ky)) / T
-        var_appr = (
-            2 * np.trace(Kx @ Kx) * np.trace(Ky @ Ky) / T**2
-        )
+        var_appr = 2 * np.trace(Kx @ Kx) * np.trace(Ky @ Ky) / T**2
         k_appr = mean_appr**2 / var_appr
         theta_appr = var_appr / mean_appr
         pvalue = 1 - np.mean(gamma.cdf(stat, k_appr, theta_appr))

diff --git a/hyppo/d_variate/dhsic.py b/hyppo/d_variate/dhsic.py
@@ -13,11 +13,11 @@ class dHsic(DVariateTest):
     dHsic is a non-parametric kernel-based independence test between an
     arbitrary number of variables. The dHsic statistic is 0 if the variables
     are jointly independent and positive if the variables are dependent
-    :footcite:p:`grettonKernelJointIndependence2016`.
+    :footcite:p:`pfister2018kernel`.
     The default choice is the Gaussian kernel, which uses the median distance
     as the bandwidth, which is a characteristic kernel that guarantees that
     dHsic is a consistent test
-    :footcite:p:`grettonKernelJointIndependence2016`
+    :footcite:p:`pfister2018kernel`
     :footcite:p:`grettonKernelStatisticalTest2007`
     :footcite:p:`grettonConsistentNonparametricTests2010`.
 
@@ -47,7 +47,7 @@ class dHsic(DVariateTest):
     Notes
     -----
     The statistic can be derived as follows
-    :footcite:p:`grettonKernelJointIndependence2016`:
+    :footcite:p:`pfister2018kernel`:
 
     dHsic builds on the two-variable Hilbert Schmidt Independence Criterion (Hsic),
     implemented in :class:`hyppo.independence.Hsic`, but allows for an arbitrary

diff --git a/hyppo/independence/friedman_rafsky.py b/hyppo/independence/friedman_rafsky.py
@@ -17,6 +17,7 @@ class FRTestOutput(NamedTuple):
 class FriedmanRafsky(IndependenceTest):
     r"""
     Friedman-Rafksy (FR) test statistic and p-value.
+
     This is a multivariate extension of the Wald-Wolfowitz
     runs test for randomness. The normal concept of a 'run'
     is replaced by a minimum spanning tree (MST) calculated between

diff --git a/hyppo/independence/kmerf.py b/hyppo/independence/kmerf.py
@@ -43,6 +43,9 @@ class KMERF(IndependenceTest):
 
     Notes
     -----
+    .. note::
+       This algorithm is currently under review at a peer-review journal.
+
     A description of KMERF in greater detail can be found in
     :footcite:p:`shenLearningInterpretableCharacteristic2020`. It is computed
     using the following steps:

diff --git a/hyppo/independence/max_margin.py b/hyppo/independence/max_margin.py
@@ -72,6 +72,11 @@ class MaxMargin(IndependenceTest):
     **kwargs
         Arbitrary keyword arguments for ``compute_distkern``.
 
+    Notes
+    -----
+    .. note::
+       This algorithm is currently under review at a peer-review journal.
+
     References
     ----------
     .. footbibliography::

diff --git a/hyppo/kgof/fssd.py b/hyppo/kgof/fssd.py
@@ -135,8 +135,10 @@ class FSSD(GofTest):
     and a set of paired test locations. The statistic is n*FSSD^2.
     The statistic can be negative because of the unbiased estimator.
 
-    :math:`H0` : the sample follows :math:`p`
-    :math:`H1` : the sample does not follow :math:`p`
+    .. math::
+
+       H_0 &: \text{ the sample follows } p \\
+       H_A &: \text{ the sample does not follow } p
 
     :math:`p` is specified to the constructor in the form of an UnnormalizedDensity.
 
@@ -147,17 +149,17 @@ class FSSD(GofTest):
     density, the GoF test tests whether or not the sample :math:`\{ \mathbf{x}_i \}_{i=1}^n`
     is distributed according to a known :math:`p`.
 
-    The implemented test relies on a new test statistic called The Finite-Set Stein Discrepancy (FSSD)
+    The implemented test relies on a new test statistic called The Finite-Set Stein Discrepancy (FSSD) :footcite:p:`jitkrittum2017linear`
     which is a discrepancy measure between a density and a sample. Unique features of the new goodness-of-fit test are:
 
-    It makes only a few mild assumptions on the distributions :math:`p` and :math:`q`. The model :math:`p`
-    can take almost any form. The normalizer of :math:`p` is not assumed known. The test only assesses the goodness of
-    :math:`p` through :math:`\nabla_{\mathbf{x}} \log p(\mathbf{x})` i.e., the first derivative of the log density.
+    * It makes only a few mild assumptions on the distributions :math:`p` and :math:`q`. The model :math:`p`
+      can take almost any form. The normalizer of :math:`p` is not assumed known. The test only assesses the goodness of
+      :math:`p` through :math:`\nabla_{\mathbf{x}} \log p(\mathbf{x})` i.e., the first derivative of the log density.
 
-    The runtime complexity of the full test (both parameter tuning and the actual test) is
-    :math:`\mathcal{O}(n)` i.e., linear in the sample size.
+    * The runtime complexity of the full test (both parameter tuning and the actual test) is
+      :math:`\mathcal{O}(n)` i.e., linear in the sample size.
 
-    It returns a set of points (features) which indicate where :math:`p` fails to fit the data.
+    * It returns a set of points (features) which indicate where :math:`p` fails to fit the data.
 
     The FSSD test requires that the derivative of :math:`\log p` exists.
     The test requires a technical condition called the "vanishing boundary" condition for it to be consistent.

diff --git a/hyppo/ksample/ksamp.py b/hyppo/ksample/ksamp.py
@@ -56,6 +56,9 @@ class KSample(KSampleTest):
 
     Notes
     -----
+    .. note::
+       This algorithm is currently under review at a peer-review journal.
+
     The formulation for this implementation is as follows
     :footcite:p:`pandaNonparMANOVAIndependence2021`:
 

diff --git a/hyppo/time_series/dcorrx.py b/hyppo/time_series/dcorrx.py
@@ -55,6 +55,9 @@ class DcorrX(TimeSeriesTest):
 
     Notes
     -----
+    .. note::
+       This algorithm is currently a pre-print on arXiv.
+
     The statistic can be derived as follows
     :footcite:p:`mehtaIndependenceTestingMultivariate2020`:
 

diff --git a/hyppo/time_series/mgcx.py b/hyppo/time_series/mgcx.py
@@ -55,6 +55,9 @@ class MGCX(TimeSeriesTest):
 
     Notes
     -----
+    .. note::
+       This algorithm is currently a pre-print on arXiv.
+
     The statistic can be derived as follows
     :footcite:p:`mehtaIndependenceTestingMultivariate2020`:
 

diff --git a/tutorials/independence.py b/tutorials/independence.py
@@ -262,6 +262,10 @@
 #
 # .. note::
 #
+#    This algorithm is currently under review at a peer-reviewed journal.
+#
+# .. note::
+#
 #    :Pros: - Highly accurate, powerful independence test for multivariate and nonlinear
 #             data
 #           - Gives information about releative dimension (or feature) importance
@@ -325,6 +329,10 @@
 #
 # .. note::
 #
+#    This algorithm is currently under review at a peer-review journal.
+#
+# .. note::
+#
 #    :Pros: - As powerful as some of the tests within this module
 #           - Minimal decrease in testing power as dimension increases
 #    :Cons: - Adds computational complexity, so can be slow

diff --git a/tutorials/ksample.py b/tutorials/ksample.py
@@ -76,6 +76,10 @@
 #
 # .. note::
 #
+#    This algorithm is currently under review at a peer-review journal.
+#
+# .. note::
+#
 #    If you want use 2-sample MGC, we have added that functionality to SciPy!
 #    Please see :func:`scipy.stats.multiscale_graphcorr`.
 #

diff --git a/tutorials/time_series.py b/tutorials/time_series.py
@@ -63,6 +63,10 @@
 #
 # .. note::
 #
+#    This algorithm is currently a preprint on arXiv.
+#
+# .. note::
+#
 #    :Pros: - Very accurate
 #           - Operates of multivariate data
 #    :Cons: - Slower than pairwise Pearson's correlation