databricks · itholic · Sep 23, 2020 · Sep 23, 2020 · Sep 24, 2020 · Sep 24, 2020
diff --git a/databricks/koalas/indexes.py b/databricks/koalas/indexes.py
@@ -52,6 +52,7 @@
 from databricks.koalas.missing.indexes import MissingPandasLikeIndex, MissingPandasLikeMultiIndex
 from databricks.koalas.series import Series, first_series
 from databricks.koalas.utils import (
+    combine_frames,
     compare_allow_null,
     compare_disallow_null,
     compare_null_first,
@@ -2981,6 +2982,39 @@ def item(self):
         """
         return self._kdf.head(2)._to_internal_pandas().index.item()
 
+    def equal_levels(self, other):
+        """
+        Return True if the levels of both MultiIndex objects are the same
+
+        Examples
+        --------
+        >>> from databricks.koalas.config import set_option, reset_option
+        >>> set_option("compute.ops_on_diff_frames", True)
+
+        >>> kmidx1 = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        >>> kmidx2 = ks.MultiIndex.from_tuples([("b", "y"), ("a", "x"), ("c", "z")])
+        >>> kmidx1.equal_levels(kmidx2)
+        True
+
+        >>> kmidx2 = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "j")])
+        >>> kmidx1.equal_levels(kmidx2)
+        False
+
+        >>> reset_option("compute.ops_on_diff_frames")
+        """
+        nlevels = self.nlevels
+        if (nlevels != other.nlevels) or (len(self) != len(other)):
+            return False
+        self_frame = self.sort_values().to_frame()
+        other_frame = other.sort_values().to_frame()
+        combined = combine_frames(self_frame, other_frame)
+
+        sdf = combined._internal.spark_frame
+        that_index_name = "__that_{}".format(other._internal.index_spark_column_names[0])
+        that_index_scol = scol_for(sdf, that_index_name)
+
+        return len(sdf.filter(that_index_scol.isNull()).head(1)) == 0
+
     @property
     def inferred_type(self):
         """

diff --git a/databricks/koalas/missing/indexes.py b/databricks/koalas/missing/indexes.py
@@ -105,7 +105,6 @@ class MissingPandasLikeMultiIndex(object):
     # Functions
     argsort = _unsupported_function("argsort")
     asof_locs = _unsupported_function("asof_locs")
-    equal_levels = _unsupported_function("equal_levels")
     factorize = _unsupported_function("factorize")
     format = _unsupported_function("format")
     get_indexer = _unsupported_function("get_indexer")

diff --git a/databricks/koalas/tests/test_indexes.py b/databricks/koalas/tests/test_indexes.py
@@ -1586,6 +1586,18 @@ def test_multiindex_is_unique(self):
 
             self.assertEqual(kdf.index.is_unique, expected)
 
+    def test_multiindex_equal_levels(self):
+        pmidx1 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        kmidx1 = ks.from_pandas(pmidx1)
+
+        pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
+        kmidx2 = ks.from_pandas(pmidx2)
+        self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
+
+        pmidx2 = pd.MultiIndex.from_tuples([("a", "x", "q"), ("b", "y", "w"), ("c", "z", "e")])
+        kmidx2 = ks.from_pandas(pmidx2)
+        self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
+
     def test_view(self):
         pidx = pd.Index([1, 2, 3, 4], name="Koalas")
         kidx = ks.from_pandas(pidx)

diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -911,6 +911,17 @@ def test_series_repeat(self):
         else:
             self.assert_eq(kser1.repeat(kser2).sort_index(), pser1.repeat(pser2).sort_index())
 
+    def test_multiindex_equal_levels(self):
+        pmidx1 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        pmidx2 = pd.MultiIndex.from_tuples([("b", "y"), ("a", "x"), ("c", "z")])
+        kmidx1 = ks.from_pandas(pmidx1)
+        kmidx2 = ks.from_pandas(pmidx2)
+        self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
+
+        pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "j")])
+        kmidx2 = ks.from_pandas(pmidx2)
+        self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
+
 
 class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils):
     @classmethod
@@ -1052,3 +1063,16 @@ def test_mask(self):
 
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             kdf1.mask(kdf2 > -250)
+
+    def test_multiindex_equal_levels(self):
+        pmidx1 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        pmidx2 = pd.MultiIndex.from_tuples([("b", "y"), ("a", "x"), ("c", "z")])
+        kmidx1 = ks.from_pandas(pmidx1)
+        kmidx2 = ks.from_pandas(pmidx2)
+        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+            self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
+
+        pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "j")])
+        kmidx2 = ks.from_pandas(pmidx2)
+        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+            self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2))
diff --git a/docs/source/reference/indexing.rst b/docs/source/reference/indexing.rst
@@ -216,6 +216,7 @@ MultiIndex Modifying and computations
    :toctree: api/
 
    MultiIndex.equals
+   MultiIndex.equal_levels
    MultiIndex.identical
    MultiIndex.drop
    MultiIndex.copy