From 1fd0aefa5b38224e3d17c47bb2d8003ac86ce027 Mon Sep 17 00:00:00 2001 From: Alex Toker Date: Thu, 3 Aug 2023 11:32:39 +0300 Subject: [PATCH 1/3] Add support for datastore profiles --- storey/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/storey/utils.py b/storey/utils.py index b1336bac..b06061af 100644 --- a/storey/utils.py +++ b/storey/utils.py @@ -127,7 +127,7 @@ def get_remaining_path(url): if "://" in url: parsed_url = urlparse(url) scheme = parsed_url.scheme.lower() - if scheme in ("v3io", "dbfs"): + if scheme in ("ds", "v3io", "dbfs"): remaining_path = parsed_url.path elif scheme in ["wasb", "wasbs"]: remaining_path = f"{parsed_url.username}{parsed_url.path}" @@ -138,6 +138,12 @@ def get_remaining_path(url): def url_to_file_system(url, storage_options): scheme, remaining_path = get_remaining_path(url) + if url.startswith("ds://"): + parsed_url = urlparse(url) + if parsed_url.password: + scheme = parsed_url.password + else: + raise ValueError("Datastore profile URL expecting to have underlying scheme embedded as a password") if scheme: load_fs_dependencies(scheme) From 820b022ae7f61a767890946208eab51881e9fbdf Mon Sep 17 00:00:00 2001 From: Alex Toker Date: Thu, 3 Aug 2023 13:46:26 +0300 Subject: [PATCH 2/3] Addressing Gal's comments --- integration/test_filesystems_integration.py | 10 +++++++++- storey/utils.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/integration/test_filesystems_integration.py b/integration/test_filesystems_integration.py index 7ce8daa9..5fe8eda8 100644 --- a/integration/test_filesystems_integration.py +++ b/integration/test_filesystems_integration.py @@ -22,6 +22,7 @@ import pandas as pd import pytest import v3io +from fsspec.implementations.local import LocalFileSystem from integration.integration_test_utils import V3ioHeaders, _generate_table_name from storey import ( @@ -38,7 +39,7 @@ build_flow, ) from storey.dtypes import V3ioError -from storey.utils import get_remaining_path +from storey.utils import get_remaining_path, url_to_file_system @pytest.fixture() @@ -672,3 +673,10 @@ def test_get_path_utils(): schema, path = get_remaining_path(url) assert path == "mycontainer/path/to/object.csv" assert schema == "wasbs" + + +def test_ds_get_path_utils(): + url = "ds://:file@profile/path/to/object.csv" + fs, path = url_to_file_system(url, "") + assert path == "/path/to/object.csv" + assert isinstance(fs, LocalFileSystem) diff --git a/storey/utils.py b/storey/utils.py index b06061af..d2731043 100644 --- a/storey/utils.py +++ b/storey/utils.py @@ -143,7 +143,7 @@ def url_to_file_system(url, storage_options): if parsed_url.password: scheme = parsed_url.password else: - raise ValueError("Datastore profile URL expecting to have underlying scheme embedded as a password") + raise ValueError("Datastore profile URL is expected to have underlying scheme embedded as password") if scheme: load_fs_dependencies(scheme) From ee293504addd709ccbf6ac79c2ee25a5c1b0c68f Mon Sep 17 00:00:00 2001 From: Alex Toker Date: Mon, 7 Aug 2023 09:25:43 +0300 Subject: [PATCH 3/3] Move utils unit tests to separate file --- integration/test_filesystems_integration.py | 16 ----------- tests/test_utils.py | 31 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 16 deletions(-) create mode 100644 tests/test_utils.py diff --git a/integration/test_filesystems_integration.py b/integration/test_filesystems_integration.py index 5fe8eda8..648f1ef4 100644 --- a/integration/test_filesystems_integration.py +++ b/integration/test_filesystems_integration.py @@ -22,7 +22,6 @@ import pandas as pd import pytest import v3io -from fsspec.implementations.local import LocalFileSystem from integration.integration_test_utils import V3ioHeaders, _generate_table_name from storey import ( @@ -39,7 +38,6 @@ build_flow, ) from storey.dtypes import V3ioError -from storey.utils import get_remaining_path, url_to_file_system @pytest.fixture() @@ -666,17 +664,3 @@ def test_filter_before_after_partitioned_outer_other_partition(setup_teardown_te ] assert read_back_result == expected, f"{read_back_result}\n!=\n{expected}" - - -def test_get_path_utils(): - url = "wasbs://mycontainer@myaccount.blob.core.windows.net/path/to/object.csv" - schema, path = get_remaining_path(url) - assert path == "mycontainer/path/to/object.csv" - assert schema == "wasbs" - - -def test_ds_get_path_utils(): - url = "ds://:file@profile/path/to/object.csv" - fs, path = url_to_file_system(url, "") - assert path == "/path/to/object.csv" - assert isinstance(fs, LocalFileSystem) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..33c8695b --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,31 @@ +# Copyright 2023 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from fsspec.implementations.local import LocalFileSystem + +from storey.utils import get_remaining_path, url_to_file_system + + +def test_get_path_utils(): + url = "wasbs://mycontainer@myaccount.blob.core.windows.net/path/to/object.csv" + schema, path = get_remaining_path(url) + assert path == "mycontainer/path/to/object.csv" + assert schema == "wasbs" + + +def test_ds_get_path_utils(): + url = "ds://:file@profile/path/to/object.csv" + fs, path = url_to_file_system(url, "") + assert path == "/path/to/object.csv" + assert isinstance(fs, LocalFileSystem)