WIP: handling of nan in unique, fixes numpy#2111

Works for floats, but not for complex or structured dtypes
jaimefrio · Jan 23, 2015 · fd2cc66 · fd2cc66
1 parent 5b714c7
commit fd2cc66
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 0 deletions.
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
@@ -196,6 +196,12 @@ def unique(ar, return_index=False, return_inverse=False, return_counts=False):
         aux = ar
     flag = np.concatenate(([True], aux[1:] != aux[:-1]))
 
+    # gh-2111: NaNs never compare equal, so they need special handling,
+    # but they always sort to the end
+    if issubclass(ar.dtype.type, np.inexact) and np.isnan(aux[-1]):
+        nanidx = np.searchsorted(aux, np.nan)
+        flag[nanidx+1:] = False
+
     if not optional_returns:
         ret = aux[flag]
     else:

diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
@@ -115,6 +115,18 @@ def check_all(a, b, i1, i2, c, dt):
         a2, a2_inv = np.unique(a, return_inverse=True)
         assert_array_equal(a2_inv, np.zeros(5))
 
+        # test for arrays with nans, gh-2111
+        a = [5, np.nan, 1, 2, 1, 5, np.nan]*10
+        b = [1, 2, 5, np.nan]
+        i1 = [2, 3, 0, 1]
+        i2 = [2, 3, 0, 1, 0, 2, 3]*10
+        c = np.multiply([2, 1, 2, 2], 10)
+        for dt in np.typecodes['Float']:
+            aa = np.array(a, dt)
+            bb = np.array(b, dt)
+            check_all(aa, bb, i1, i2, c, dt)
+
+
     def test_intersect1d(self):
         # unique inputs
         a = np.array([5, 7, 1, 2])