Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OSError when downloading example data: Unable to synchronously open file #1188

Open
LiuCanidk opened this issue Apr 9, 2024 · 1 comment
Assignees
Labels
bug Something isn't working

Comments

@LiuCanidk
Copy link

description of the bug

I encountered the bug when I try to download the example data of bone_marrow following the tutorial section Get Started with CellRank, using Jupyter notebook. And the error occured when finished ~20-30% downloading task, seemingly random. I'm not sure whether it is due to the network instability.

reproducible example

import cellrank as cr
######################download the data
adata = cr.datasets.bone_marrow()
#take time: 21:11---
#often error, seems like the problem of network

error output

 35%|███████████████████████████▌                                                   | 129M/370M [23:55<44:41, 94.3kB/s]
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[27], line 2
      1 ######################load the data
----> 2 adata = cr.datasets.bone_marrow()
      3 #take time: 21:11---

File E:\python\Lib\site-packages\cellrank\datasets.py:292, in bone_marrow(path, **kwargs)
    271 @d.dedent
    272 def bone_marrow(
    273     path: Union[str, pathlib.Path] = "datasets/bone_marrow.h5ad",
    274     **kwargs: Any,
    275 ) -> AnnData:  # pragma: no cover
    276     """sc-RNA-seq dataset early human hematopoiesis (CD34+ bone marrow cells) assayed using 10X Chromium.
    277 
    278     This dataset contains raw spliced and unspliced counts estimated using *velocyto* :cite:`manno:18`.
   (...)
    290     Annotated data object.
    291     """
--> 292     return _load_dataset_from_url(path, *_datasets["bone_marrow"], **kwargs)

File E:\python\Lib\site-packages\cellrank\datasets.py:67, in _load_dataset_from_url(fpath, url, expected_shape, **kwargs)
     64 kwargs.setdefault("sparse", True)
     65 kwargs.setdefault("cache", True)
---> 67 adata = read(fpath, backup_url=url, **kwargs)
     69 if adata.shape != expected_shape:
     70     raise ValueError(f"Expected `anndata.AnnData` object to have shape `{expected_shape}`, found `{adata.shape}`.")

File E:\python\Lib\site-packages\legacy_api_wrap\__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File E:\python\Lib\site-packages\scanpy\readwrite.py:124, in read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, cache_compression, **kwargs)
    122 filename = Path(filename)  # allow passing strings
    123 if is_valid_filename(filename):
--> 124     return _read(
    125         filename,
    126         backed=backed,
    127         sheet=sheet,
    128         ext=ext,
    129         delimiter=delimiter,
    130         first_column_names=first_column_names,
    131         backup_url=backup_url,
    132         cache=cache,
    133         cache_compression=cache_compression,
    134         **kwargs,
    135     )
    136 # generate filename and read to dict
    137 filekey = str(filename)

File E:\python\Lib\site-packages\scanpy\readwrite.py:759, in _read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, cache_compression, suppress_cache_warning, **kwargs)
    757 if ext in {"h5", "h5ad"}:
    758     if sheet is None:
--> 759         return read_h5ad(filename, backed=backed)
    760     else:
    761         logg.debug(f"reading sheet {sheet} from file {filename}")

File E:\python\Lib\site-packages\anndata\_io\h5ad.py:237, in read_h5ad(filename, backed, as_sparse, as_sparse_fmt, chunk_size)
    229         raise NotImplementedError(
    230             "Currently only `X` and `raw/X` can be read as sparse."
    231         )
    233 rdasp = partial(
    234     read_dense_as_sparse, sparse_format=as_sparse_fmt, axis_chunk=chunk_size
    235 )
--> 237 with h5py.File(filename, "r") as f:
    239     def callback(func, elem_name: str, elem, iospec):
    240         if iospec.encoding_type == "anndata" or elem_name.endswith("/"):

File E:\python\Lib\site-packages\h5py\_hl\files.py:562, in File.__init__(self, name, mode, driver, libver, userblock_size, swmr, rdcc_nslots, rdcc_nbytes, rdcc_w0, track_order, fs_strategy, fs_persist, fs_threshold, fs_page_size, page_buf_size, min_meta_keep, min_raw_keep, locking, alignment_threshold, alignment_interval, meta_block_size, **kwds)
    553     fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
    554                      locking, page_buf_size, min_meta_keep, min_raw_keep,
    555                      alignment_threshold=alignment_threshold,
    556                      alignment_interval=alignment_interval,
    557                      meta_block_size=meta_block_size,
    558                      **kwds)
    559     fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy,
    560                      fs_persist=fs_persist, fs_threshold=fs_threshold,
    561                      fs_page_size=fs_page_size)
--> 562     fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
    564 if isinstance(libver, tuple):
    565     self._libver = libver

File E:\python\Lib\site-packages\h5py\_hl\files.py:235, in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
    233     if swmr and swmr_support:
    234         flags |= h5f.ACC_SWMR_READ
--> 235     fid = h5f.open(name, flags, fapl=fapl)
    236 elif mode == 'r+':
    237     fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

File h5py\_objects.pyx:54, in h5py._objects.with_phil.wrapper()

File h5py\_objects.pyx:55, in h5py._objects.with_phil.wrapper()

File h5py\h5f.pyx:102, in h5py.h5f.open()

OSError: Unable to synchronously open file (truncated file: eof = 135389460, sblock->base_addr = 0, stored_eof = 388391976)

Versions:

cellrank==2.0.4 scanpy==1.10.1 anndata==0.10.6 numpy==1.26.4 numba==0.59.1 scipy==1.11.4 pandas==2.2.1 pygpcca==1.0.4 scikit-learn==1.4.1.post1 statsmodels==0.14.1 scvelo==0.3.2 pygam==0.9.1 matplotlib==3.8.4 seaborn==0.13.2

@LiuCanidk LiuCanidk added the bug Something isn't working label Apr 9, 2024
@Marius1311
Copy link
Collaborator

Hi @LiuCanidk, did you figure out whether that's just a connection issue, or something on our side?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

3 participants