Skip to content

Commit

Permalink
ENH: add functionality symlog to numpy.histogram, see #24368
Browse files Browse the repository at this point in the history
  • Loading branch information
MycrofD committed Apr 16, 2024
1 parent 75b5bf1 commit 73a04bb
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 5 deletions.
85 changes: 80 additions & 5 deletions numpy/lib/_histograms_impl.py
Expand Up @@ -359,15 +359,15 @@ def _unsigned_subtract(a, b):
casting='unsafe', dtype=unsigned_dt)


def _get_bin_edges(a, bins, range, weights):
def _get_bin_edges(a, bins, range, weights, symlog=None):
"""
Computes the bins used internally by `histogram`.
Parameters
==========
a : ndarray
Ravelled data array
bins, range
bins, range, symlog
Forwarded arguments from `histogram`.
weights : ndarray, optional
Ravelled weights array, or None
Expand All @@ -383,6 +383,17 @@ def _get_bin_edges(a, bins, range, weights):
# parse the overloaded bins argument
n_equal_bins = None
bin_edges = None
if symlog:
if np.ndim(bins) != 0:
warnings.warn(
"symlog option is only valid when bins is an integer. "
"Attempting without symlog.",
stacklevel=2
)
return _get_bin_edges(a, bins, range, weights, symlog=None)
n_unequal_bins = bins
bin_edges = _get_geomspace_edges(n_unequal_bins, a)
return bin_edges, None

if isinstance(bins, str):
bin_name = bins
Expand Down Expand Up @@ -453,6 +464,37 @@ def _get_bin_edges(a, bins, range, weights):
return bin_edges, None


def _get_geomspace_edges(n_unequal_bins, a):
"""
Compute the bin edges for a histogram with geometrically spaced bins.
The bins are spaced such that the width of each bin is constant in
log-space.
Reference issue: https://github.com/numpy/numpy/issues/24368
Returns
-------
bin_edges : ndarray
The edges of the bins.
"""
# The idea is to use the absolute min and max of the data to compute the
# range, and then the pseudo-first and last edge.
pseudo_first_edge, pseudo_last_edge = abs(a).min(), abs(a).max()
# Compute the edges of the bins.
num_bins = int(n_unequal_bins)
if n_unequal_bins % 2 == 0:
num_bins = int(n_unequal_bins / 2)
bin_edges_geomspaced = np.geomspace(
pseudo_first_edge, pseudo_last_edge, num=num_bins
)
bin_edges_concatenated = np.concatenate(
(-bin_edges_geomspaced, [0], bin_edges_geomspaced)
)
bin_edges_sorted = np.sort(bin_edges_concatenated)
bin_edges = bin_edges_sorted
if n_unequal_bins % 2 == 1:
bin_edges = bin_edges_sorted[::2]
return bin_edges


def _search_sorted_inclusive(a, v):
"""
Like `searchsorted`, but where the last item in `v` is placed on the right.
Expand Down Expand Up @@ -673,12 +715,13 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):


def _histogram_dispatcher(
a, bins=None, range=None, density=None, weights=None):
a, bins=None, range=None, density=None, weights=None, symlog=None
):
return (a, bins, weights)


@array_function_dispatch(_histogram_dispatcher)
def histogram(a, bins=10, range=None, density=None, weights=None):
def histogram(a, bins=10, range=None, density=None, weights=None, symlog=None):
r"""
Compute the histogram of a dataset.
Expand Down Expand Up @@ -721,6 +764,10 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
the *integral* over the range is 1. Note that the sum of the
histogram values will not be equal to 1 unless bins of unity
width are chosen; it is not a probability *mass* function.
symlog : bool, optional
Allow numpy.histogram to give geometrically spaced bin edges.
Data can include both negative and positive numbers.
Based on https://github.com/numpy/numpy/issues/24368.
Returns
-------
Expand Down Expand Up @@ -768,6 +815,34 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
.. versionadded:: 1.11.0
>>> # Example for symlog
>>> data = np.array([
... -2.40000e-03, -3.50000e-01, -4.60000e-02, -2.00000e-01,
... -3.60000e-04, -4.00000e+00, -2.60000e+01, -3.64000e+02,
... -2.43000e+02, -1.53240e+04, -1.35525e+05, 1.20000e-02,
... 3.00000e-01, 1.40000e-03, 7.00000e-01, 7.00000e+00,
... 3.60000e+01, 9.46000e+02, 2.54520e+04, -4.80000e-03,
... -7.00000e-01, -9.20000e-02, -4.00000e-01, -7.20000e-04,
... -8.00000e+00, -5.20000e+01, -7.28000e+02, -4.86000e+02,
... -3.06480e+04, -2.71050e+05, 2.40000e-02, 6.00000e-01,
... 2.80000e-03, 1.40000e+00, 1.40000e+01, 7.20000e+01,
... 1.89200e+03, 5.09040e+04, -1.20000e-03, -1.75000e-01,
... -2.30000e-02, -1.00000e-01, -1.80000e-04, -2.00000e+00,
... -1.30000e+01, -1.82000e+02, -1.21500e+02, -7.66200e+03,
... -6.77625e+04, 6.00000e-03, 1.50000e-01, 7.00000e-04,
... 3.50000e-01, 3.50000e+00, 1.80000e+01, 4.73000e+02,
... 1.27260e+04])
>>> np.histogram(data, bins=5, symlog=True)
(array([16, 16, 1, 13, 11]),
array([-2.71050000e+05, -6.98491231e+00, -1.80000000e-04, 1.80000000e-04,
6.98491231e+00, 2.71050000e+05]))
>>> np.histogram(data, bins=10, symlog=True)
(array([ 6, 10, 10, 6, 1, 0, 6, 7, 7, 4]),
array([-2.71050000e+05, -1.37595802e+03, -6.98491231e+00, -3.54582038e-02,
-1.80000000e-04, 0.00000000e+00, 1.80000000e-04, 3.54582038e-02,
6.98491231e+00, 1.37595802e+03, 2.71050000e+05]))
Automated Bin Selection Methods example, using 2 peak random data
with 2000 points.
Expand All @@ -787,7 +862,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
"""
a, weights = _ravel_and_check_weights(a, weights)

bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights, symlog)

# Histogram is an integer or a float array depending on the weights.
if weights is None:
Expand Down
1 change: 1 addition & 0 deletions numpy/lib/_histograms_impl.pyi
Expand Up @@ -36,6 +36,7 @@ def histogram(
range: None | tuple[float, float] = ...,
density: bool = ...,
weights: None | ArrayLike = ...,
symlog: None | bool = ...,
) -> tuple[NDArray[Any], NDArray[Any]]: ...

def histogramdd(
Expand Down
113 changes: 113 additions & 0 deletions numpy/lib/tests/test_histograms.py
Expand Up @@ -413,6 +413,119 @@ def test_gh_23110(self):
expected_hist = np.array([1, 0])
assert_array_equal(hist, expected_hist)

def test_histogram_positive_data(self):
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
bins = 5
hist, bin_edges = np.histogram(data, bins=bins, symlog=True)
assert len(hist) == bins
assert len(bin_edges) == bins + 1

def test_symlog(self):
"""
Tests the histogram function with symlog parameter.
"""

data = np.array(
[
-2.40000e-03,
-3.50000e-01,
-4.60000e-02,
-2.00000e-01,
-3.60000e-04,
-4.00000e00,
-2.60000e01,
-3.64000e02,
-2.43000e02,
-1.53240e04,
-1.35525e05,
1.20000e-02,
3.00000e-01,
1.40000e-03,
7.00000e-01,
7.00000e00,
3.60000e01,
9.46000e02,
2.54520e04,
-4.80000e-03,
-7.00000e-01,
-9.20000e-02,
-4.00000e-01,
-7.20000e-04,
-8.00000e00,
-5.20000e01,
-7.28000e02,
-4.86000e02,
-3.06480e04,
-2.71050e05,
2.40000e-02,
6.00000e-01,
2.80000e-03,
1.40000e00,
1.40000e01,
7.20000e01,
1.89200e03,
5.09040e04,
-1.20000e-03,
-1.75000e-01,
-2.30000e-02,
-1.00000e-01,
-1.80000e-04,
-2.00000e00,
-1.30000e01,
-1.82000e02,
-1.21500e02,
-7.66200e03,
-6.77625e04,
6.00000e-03,
1.50000e-01,
7.00000e-04,
3.50000e-01,
3.50000e00,
1.80000e01,
4.73000e02,
1.27260e04,
]
)

bins_a = 5
histogram_a = np.histogram(data, bins=bins_a, symlog=True)
assert len(histogram_a[0]) == bins_a
assert len(histogram_a[1]) == bins_a + 1
assert np.array_equal(histogram_a[0], np.array([16, 16, 1, 13, 11]))

bins_b = 10
histogram_b = np.histogram(data, bins=bins_b, symlog=True)
assert len(histogram_b[0]) == bins_b
assert len(histogram_b[1]) == bins_b + 1
assert np.array_equal(
histogram_b[0], np.array([6, 10, 10, 6, 1, 0, 6, 7, 7, 4])
)

bins_c = [
-2.71050000e05,
-6.98491231e00,
-1.80000000e-04,
1.80000000e-04,
6.98491231e00,
2.71050000e05,
]

with pytest.warns(
UserWarning,
match=(
"symlog option is only valid when bins is an integer. "
"Attempting without symlog."
),
):
histogram_c_a = np.histogram(data, bins=bins_c, symlog=True)
assert len(histogram_c_a[0]) == len(bins_c) - 1
assert len(histogram_c_a[1]) == len(bins_c)
assert np.array_equal(histogram_c_a[0], np.array([16, 16, 1, 13, 11]))

histogram_c_b = np.histogram(data, bins=bins_c)
assert len(histogram_c_b[0]) == len(bins_c) - 1
assert len(histogram_c_b[1]) == len(bins_c)
assert np.array_equal(histogram_c_b[0], np.array([16, 16, 1, 13, 11]))

class TestHistogramOptimBinNums:
"""
Expand Down

0 comments on commit 73a04bb

Please sign in to comment.