diff --git a/apis/python/src/tiledbvcf/dask_functions.py b/apis/python/src/tiledbvcf/dask_functions.py index 9e0cd86be..71952b15e 100644 --- a/apis/python/src/tiledbvcf/dask_functions.py +++ b/apis/python/src/tiledbvcf/dask_functions.py @@ -1,4 +1,6 @@ import dask + +dask.config.set({"dataframe.query-planning": False}) import dask.dataframe import pyarrow as pa diff --git a/apis/python/tests/test_dask.py b/apis/python/tests/test_dask.py index 28259fa73..a1fd35a67 100644 --- a/apis/python/tests/test_dask.py +++ b/apis/python/tests/test_dask.py @@ -5,6 +5,8 @@ import tiledbvcf import dask + +dask.config.set({"dataframe.query-planning": False}) import dask.distributed # Directory containing this file diff --git a/documentation/how-to/perform-distributed-queries-with-dask.md b/documentation/how-to/perform-distributed-queries-with-dask.md index 673835824..1054892d5 100644 --- a/documentation/how-to/perform-distributed-queries-with-dask.md +++ b/documentation/how-to/perform-distributed-queries-with-dask.md @@ -10,6 +10,8 @@ You can use the `tiledbvcf` package's Dask integration to partition read operati import tiledbvcf import dask +dask.config.set({'dataframe.query-planning': False}) + ds = tiledbvcf.Dataset('my-large-dataset', mode='r') dask_df = ds.read_dask(attrs=['sample_name', 'pos_start', 'pos_end'], bed_file='very-large-bedfile.bed',