neurodata · adam2392 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023
diff --git a/.spin/cmds.py b/.spin/cmds.py
@@ -105,6 +105,7 @@ def setup_submodule(forcesubmodule=False):
         print(commit_fpath)
         with open(commit_fpath, "w") as f:
             f.write(current_hash)
+        print(commit, current_hash)
 
         util.run(
             [

diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@
 [![Latest PyPI release](https://img.shields.io/pypi/v/scikit-tree.svg)](https://pypi.org/project/scikit-tree/)
 [![DOI](https://zenodo.org/badge/491260497.svg)](https://zenodo.org/doi/10.5281/zenodo.8412279)
 
+
 scikit-tree
 ===========
 

diff --git a/benchmarks_nonasv/bench_est_mi.py b/benchmarks_nonasv/bench_est_mi.py
@@ -0,0 +1,73 @@
+# Reimplementation of Figure 4 from Uncertainty Forests
+
+import copy
+import pickle
+
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from joblib import Parallel, delayed
+from scipy.integrate import nquad
+from scipy.stats import entropy, multivariate_normal
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.ensemble import RandomForestClassifier
+
+from sktree import HonestForestClassifier
+from sktree.experimental.simulate import simulate_separate_gaussians
+from sktree.tree import ObliqueDecisionTreeClassifier
+
+
+def plot_setting(X, y, name, ax):
+    colors = ["#c51b7d", "#2166ac", "#d95f02"]
+    ax.scatter(X[:, 0], X[:, 1], color = np.array(colors)[y], marker = ".")
+
+    ax.set_xlim(left = -5.05)
+    ax.set_xlim(right = 5.05)
+    ax.set_ylabel(name)
+
+
+def plot_example_2D_gaussians():
+    names = ['Spherical Gaussians', 'Elliptical Gaussians', 'Three Class Gaussians']
+    fig, axes = plt.subplots(1, len(names), figsize = (18,4))
+
+    mean = 3 if name == 'Three Class Gaussians' else 1
+    X, y = simulate_separate_gaussians(n_samples=n, n_dims=2, **setting['kwargs'], mu1 = mean)
+    n_samples = 2000
+    n_dims = 2
+    X, y, means, sigmas, pi = simulate_separate_gaussians(
+        n_dims=n_dims, n_samples=n_samples, n_classes=n_classes, seed=seed
+    )
+
+if __name__ == '__main__':
+    n_jobs = -1
+    n_estimators = 100
+    feature_combinations = 2.0
+    n_nbrs = 5
+    seed = 12345
+
+    # hyperparameters of the simulation
+    n_samples = 1000
+    n_noise_dims = 20
+    alpha = 0.001
+    n_classes = 2
+
+    # dimensionality of mvg
+    n_dims = 3
+
+    # simulate separated multivariate Gaussians
+    X, y, means, sigmas, pi = simulate_separate_gaussians(
+        n_dims=n_dims, n_samples=n_samples, n_classes=n_classes, seed=seed
+    )
+
+    print(X.shape, y.shape)
+
+    # Plot data.
+    fig, axes = plt.subplots(1, len(settings), figsize = (18,4))
+    for i, setting in enumerate(settings):
+        plot_setting(2000, setting, axes[i])
+
+    plt.show()
+    plt.clf()
+
+
+
diff --git a/doc/api.rst b/doc/api.rst
@@ -181,3 +181,10 @@ for the entropy, MI and CMI of the Gaussian distributions.
    mi_gaussian
    cmi_gaussian
    entropy_gaussian
+
+
+.. currentmodule:: sktree.experimental.monte_carlo
+.. autosummary::
+   :toctree: generated/
+
+   conditional_resample
diff --git a/doc/references.bib b/doc/references.bib
@@ -11,6 +11,15 @@ @article{breiman2001random
   publisher = {Springer}
 }
 
+@inproceedings{marx2022estimating,
+  title        = {Estimating Mutual Information via Geodesic k NN},
+  author       = {Marx, Alexander and Fischer, Jonas},
+  booktitle    = {Proceedings of the 2022 SIAM International Conference on Data Mining (SDM)},
+  pages        = {415--423},
+  year         = {2022},
+  organization = {SIAM}
+}
+
 @article{coleman2022scalable,
   title     = {Scalable and efficient hypothesis testing with random forests},
   author    = {Coleman, Tim and Peng, Wei and Mentch, Lucas},

diff --git a/doc/whats_new/v0.4.rst b/doc/whats_new/v0.4.rst
@@ -12,7 +12,7 @@ Version 0.4
 
 Changelog
 ---------
-- 
+- |Feature| Implement forest-based MI and CMI estimators, by `Adam Li`_ (:pr:`110`)
 
 Code and Documentation Contributors
 -----------------------------------

diff --git a/experiments/plotting_cmi_analysis_unsupervised.ipynb b/experiments/plotting_cmi_analysis_unsupervised.ipynb