Skip to content

Commit

Permalink
BUG: fix read s3 parquet (#764)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hank0626 committed Jan 5, 2024
1 parent 8cdaa8f commit b1f1107
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions python/xorbits/_mars/dataframe/datasource/read_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def _tile_no_partitioned(cls, op: "DataFrameReadParquet"):
if path.endswith(".parquet") and not path.startswith("__MACOSX")
]
else:
paths = fs.glob(op.path, storage_options=op.storage_options)
paths = fs.glob(op.path)
if not isinstance(fs, fsspec.implementations.local.LocalFileSystem):
parsed_path = urlparse(op.path)
paths = [f"{parsed_path.scheme}://{path}" for path in paths]
Expand All @@ -414,9 +414,8 @@ def _tile_no_partitioned(cls, op: "DataFrameReadParquet"):
first_chunk_row_num = get_engine(op.engine).get_row_num(f)
first_chunk_raw_bytes = sys.getsizeof(f)
else:
of = fsspec.open(pth)
with of as f:
first_chunk_row_num = get_engine(op.engine).get_row_num(f)
of = fs.open(pth)
first_chunk_row_num = get_engine(op.engine).get_row_num(of)
first_chunk_raw_bytes = fsspec.get_fs_token_paths(
pth, storage_options=op.storage_options
)[0].size(pth)
Expand Down Expand Up @@ -854,10 +853,10 @@ def read_parquet(
dtypes = engine.read_dtypes(f, types_mapper=types_mapper)
else:
if not isinstance(path, list):
file_path = fs.glob(path, storage_options=storage_options)[0]
file_path = fs.glob(path)[0]
else:
file_path = path[0]
with fs.open(file_path, storage_options=storage_options) as f:
with fs.open(file_path) as f:
dtypes = engine.read_dtypes(f, types_mapper=types_mapper)
if columns:
dtypes = dtypes[columns]
Expand Down

0 comments on commit b1f1107

Please sign in to comment.