Skip to content

Commit

Permalink
remove custom arrow code path in favor of proper dask-cudf support
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Aug 28, 2024
1 parent 4aa53be commit ad8df90
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 192 deletions.
1 change: 0 additions & 1 deletion dask_cuda/benchmarks/custom/__init__.py

This file was deleted.

169 changes: 0 additions & 169 deletions dask_cuda/benchmarks/custom/parquet.py

This file was deleted.

37 changes: 15 additions & 22 deletions dask_cuda/benchmarks/remote_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,22 @@ def read_data(
path = DEFAULT_DATASET_PATH
columns = DEFAULT_COLUMNS
with dask.config.set({"dataframe.backend": backend}):
if filesystem == "arrow" and backend == "cudf":
df = custom_read_parquet(
path,
columns=columns,
blocksize=blocksize,
)
if filesystem == "arrow":
# TODO: Warn user that blocksize and aggregate_files
# are ingored when `filesystem == "arrow"`
_blocksize = {}
_aggregate_files = {}
else:
if filesystem == "arrow":
# TODO: Warn user that blocksize and aggregate_files
# are ingored when `filesystem == "arrow"`
_blocksize = {}
_aggregate_files = {}
else:
_blocksize = {"blocksize": blocksize}
_aggregate_files = {"aggregate_files": aggregate_files}

df = dd.read_parquet(
path,
columns=columns,
filesystem=filesystem,
**_blocksize,
**_aggregate_files,
)
_blocksize = {"blocksize": blocksize}
_aggregate_files = {"aggregate_files": aggregate_files}

df = dd.read_parquet(
path,
columns=columns,
filesystem=filesystem,
**_blocksize,
**_aggregate_files,
)
return df.memory_usage().compute().sum()


Expand Down

0 comments on commit ad8df90

Please sign in to comment.