simplify hdf5 loading for adjacency as well

cosanlab · Apr 18, 2024 · 68446c7 · 68446c7
1 parent b6c4b02
commit 68446c7
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 27 deletions.
diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
@@ -53,7 +53,6 @@
 
 
 class Adjacency(object):
-
     """
     Adjacency is a class to represent Adjacency matrices as a vector rather
     than a 2-dimensional matrix. This makes it easier to perform data
@@ -84,8 +83,7 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):
                 "'similarity_flat','directed_flat']"
             )
 
-        # Flag to support hdf5 files saved using nltools <= 0.4.8
-        legacy_h5 = kwargs.pop("legacy_h5", False)
+        verbose = kwargs.pop("verbose", False)
 
         # Setup data
         if data is None:
@@ -137,7 +135,33 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):
 
             # HDF5
             if (".h5" in to_load) or (".hdf5" in to_load):
-                if legacy_h5:
+                try:
+                    # Load X and Y attributes
+                    with pd.HDFStore(to_load, "r") as f:
+                        self.Y = f["Y"]
+
+                    # Load other attributes
+                    with h5File(to_load, "r") as f:
+                        self.data = np.array(f["data"])
+                        self.matrix_type = f["matrix_type"][()].decode()
+                        self.is_single_matrix = f["is_single_matrix"][()]
+                        self.issymmetric = f["issymmetric"][()]
+                        # Deepdish saved empty label lists as np arrays of length 1
+                        if len(f["labels"]) == 1:
+                            self.labels = list(f["labels"])
+                        elif len(f["labels"]) > 1:
+                            self.labels = list(f["labels"].asstr())
+                        else:
+                            self.labels = []
+
+                    # Done initializing
+                    return
+                except Exception as e:
+                    if verbose:
+                        warnings.warn(
+                            f"Falling back to legacy h5 loading due to error: {e}"
+                        )
+
                     with tables.open_file(to_load, mode="r") as f:
                         # Setup data
                         self.data = np.array(f.root["data"])
@@ -184,28 +208,6 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=None, **kwargs):
 
                         return
 
-                else:
-                    # Load X and Y attributes
-                    with pd.HDFStore(to_load, "r") as f:
-                        self.Y = f["Y"]
-
-                    # Load other attributes
-                    with h5File(to_load, "r") as f:
-                        self.data = np.array(f["data"])
-                        self.matrix_type = f["matrix_type"][()].decode()
-                        self.is_single_matrix = f["is_single_matrix"][()]
-                        self.issymmetric = f["issymmetric"][()]
-                        # Deepdish saved empty label lists as np arrays of length 1
-                        if len(f["labels"]) == 1:
-                            self.labels = list(f["labels"])
-                        elif len(f["labels"]) > 1:
-                            self.labels = list(f["labels"].asstr())
-                        else:
-                            self.labels = []
-
-                    # Done initializing
-                    return
-
             # CSV or array/dateframe
             else:
                 (

diff --git a/nltools/tests/test_adjacency.py b/nltools/tests/test_adjacency.py
@@ -1,4 +1,5 @@
 import os
+import pytest
 import numpy as np
 import pandas as pd
 from nltools.data import Adjacency, Design_Matrix
@@ -448,7 +449,9 @@ def test_cluster_summary():
 def test_load_legacy_h5(
     old_h5_adj_single, new_h5_adj_single, old_h5_adj_double, new_h5_adj_double, tmpdir
 ):
-    b_old = Adjacency(old_h5_adj_single, legacy_h5=True)
+    with pytest.warns(UserWarning):
+        # With verbosity on we should see a warning about the old h5 file format
+        b_old = Adjacency(old_h5_adj_single, verbose=True)
     b_new = Adjacency(new_h5_adj_single)
     assert b_old.shape() == b_new.shape()
     assert np.allclose(b_old.data, b_new.data)