From 699a44b5c2f52d6b16dd0ebce3ca3a4890c5b935 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 14 Jun 2023 21:39:35 -0400 Subject: [PATCH 1/5] "Hardcode" the knowledge that location= option in URL can only point to the folder Per @AlmightyYakob observation in https://github.com/dandi/dandi-archive/issues/1546#issuecomment-1478119271 --- dandi/dandiarchive.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index 68cea6d05..b68964882 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -571,7 +571,7 @@ class _dandi_url_parser: re.compile( rf"{server_grp}(#/)?(?Pdandiset)/{dandiset_id_grp}" rf"(/(?P{VERSION_REGEX}))?" - r"(/(files(\?location=(?P.*)?)?)?)?" + r"(/(files(\?location=(?P.*)?)?)?)?" ), {}, "https://[/api]/[#/]dandiset/[/]" @@ -754,7 +754,13 @@ def parse( # asset_type = groups.get("asset_type") dandiset_id = groups.get("dandiset_id") version_id = groups.get("version") - location = groups.get("location") + if "location-folder" in groups: + assert "location" not in groups + location = groups.get("location-folder") + if not location.endswith("/"): + location += "/" + else: + location = groups.get("location") asset_id = groups.get("asset_id") path = groups.get("path") glob_param = groups.get("glob") From de4a7f422e814d686a056baa15a484a238c5c0f8 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 15 Jun 2023 09:36:29 -0400 Subject: [PATCH 2/5] Fix --- dandi/dandiarchive.py | 12 +++++++----- dandi/tests/test_dandiarchive.py | 12 ++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index b68964882..1a78f5f0e 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -571,7 +571,7 @@ class _dandi_url_parser: re.compile( rf"{server_grp}(#/)?(?Pdandiset)/{dandiset_id_grp}" rf"(/(?P{VERSION_REGEX}))?" - r"(/(files(\?location=(?P.*)?)?)?)?" + r"(/(files(\?location=(?P.*)?)?)?)?" ), {}, "https://[/api]/[#/]dandiset/[/]" @@ -754,18 +754,20 @@ def parse( # asset_type = groups.get("asset_type") dandiset_id = groups.get("dandiset_id") version_id = groups.get("version") - if "location-folder" in groups: + location: Optional[str] + if groups.get("location_folder") is not None: assert "location" not in groups - location = groups.get("location-folder") - if not location.endswith("/"): + location = urlunquote(groups["location_folder"]) + if not location.endswith("/") and not glob: location += "/" else: location = groups.get("location") + if location: + location = urlunquote(location) asset_id = groups.get("asset_id") path = groups.get("path") glob_param = groups.get("glob") if location: - location = urlunquote(location) # ATM carries leading '/' which IMHO is not needed/misguiding # somewhat, so I will just strip it location = location.lstrip("/") diff --git a/dandi/tests/test_dandiarchive.py b/dandi/tests/test_dandiarchive.py index 5bb72c69e..fbe7d9e5b 100644 --- a/dandi/tests/test_dandiarchive.py +++ b/dandi/tests/test_dandiarchive.py @@ -193,22 +193,22 @@ pytest.param( "https://gui.dandiarchive.org/#/dandiset/000001/files" "?location=%2Fsub-anm369962", - AssetItemURL( + AssetFolderURL( instance=known_instances["dandi"], dandiset_id="000001", version_id=None, - path="sub-anm369962", + path="sub-anm369962/", ), marks=mark.skipif_no_network, ), pytest.param( "https://gui.dandiarchive.org/#/dandiset/000006/0.200714.1807/files" "?location=%2Fsub-anm369962", - AssetItemURL( + AssetFolderURL( instance=known_instances["dandi"], dandiset_id="000006", version_id="0.200714.1807", - path="sub-anm369962", + path="sub-anm369962/", ), marks=mark.skipif_no_network, ), @@ -325,11 +325,11 @@ pytest.param( "https://gui.dandiarchive.org/#/dandiset/001001/draft/files" "?location=sub-RAT123/*.nwb", - AssetItemURL( + AssetFolderURL( instance=known_instances["dandi"], dandiset_id="001001", version_id="draft", - path="sub-RAT123/*.nwb", + path="sub-RAT123/*.nwb/", ), marks=mark.skipif_no_network, ), From a1b23f8082e433a1029567a950fe2a9c44f48d6e Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 15 Jun 2023 09:37:04 -0400 Subject: [PATCH 3/5] Update docs --- docs/source/ref/urls.rst | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/source/ref/urls.rst b/docs/source/ref/urls.rst index 7e1d75ddd..bd3f7ca7f 100644 --- a/docs/source/ref/urls.rst +++ b/docs/source/ref/urls.rst @@ -35,13 +35,9 @@ has one, and its draft version will be used otherwise. a collection of assets whose paths match the glob pattern ``path``, and `parse_dandi_url()` will convert the URL to an `AssetGlobURL`. - - If the ``glob``/``--path-type glob`` option is not in effect and ``path`` - ends with a trailing slash, the URL refers to an asset folder by path, and - `parse_dandi_url()` will convert the URL to an `AssetFolderURL`. - - - If the ``glob``/``--path-type glob`` option is not in effect and ``path`` - does not end with a trailing slash, the URL refers to a single asset by - path, and `parse_dandi_url()` will convert the URL to an `AssetItemURL`. + - If the ``glob``/``--path-type glob`` option is not in effect, the URL + refers to an asset folder by path, and `parse_dandi_url()` will convert the + URL to an `AssetFolderURL`. - :samp:`https://{server}[/api]/dandisets/{dandiset-id}[/versions[/{version}]]` — Refers to a Dandiset. `parse_dandi_url()` converts this format to a From 00f41bae7964916a6b74f7638bc36a8d2986ed96 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 15 Jun 2023 10:00:02 -0400 Subject: [PATCH 4/5] Preserve URL parse parameters when recursing --- dandi/dandiarchive.py | 6 +++--- dandi/tests/test_dandiarchive.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index 1a78f5f0e..be32d0bcb 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -694,12 +694,12 @@ def parse( assert not handle_redirect assert not settings.get("map_instance") new_url = rewrite(url) - return cls.parse(new_url) + return cls.parse(new_url, map_instance=map_instance, glob=glob) elif handle_redirect: assert handle_redirect in ("pass", "only") new_url = cls.follow_redirect(url) if new_url != url: - return cls.parse(new_url) + return cls.parse(new_url, map_instance=map_instance, glob=glob) if handle_redirect == "pass": # We used to issue warning in such cases, but may be it got implemented # now via reverse proxy and we had added a new regex? let's just @@ -712,7 +712,7 @@ def parse( ) elif settings.get("map_instance"): if map_instance: - parsed_url = cls.parse(url, map_instance=False) + parsed_url = cls.parse(url, map_instance=False, glob=glob) if settings["map_instance"] not in known_instances: raise ValueError( "Unknown instance {}. Known are: {}".format( diff --git a/dandi/tests/test_dandiarchive.py b/dandi/tests/test_dandiarchive.py index fbe7d9e5b..690fda63f 100644 --- a/dandi/tests/test_dandiarchive.py +++ b/dandi/tests/test_dandiarchive.py @@ -361,6 +361,16 @@ def test_parse_api_url(url: str, parsed_url: ParsedDandiURL) -> None: @pytest.mark.parametrize( "url,parsed_url", [ + ( + "https://dandiarchive.org/dandiset/001001/draft/files" + "?location=sub-RAT123/*.nwb", + AssetGlobURL( + instance=known_instances["dandi"], + dandiset_id="001001", + version_id="draft", + path="sub-RAT123/*.nwb", + ), + ), pytest.param( "https://gui.dandiarchive.org/#/dandiset/001001/draft/files" "?location=sub-RAT123/*.nwb", From 24901a9c785fe427309904b55db8c242ad25801b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 20 Oct 2023 11:57:22 -0400 Subject: [PATCH 5/5] RF: condense the code and thus make (internal var) location to be a non-Optional str --- dandi/dandiarchive.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dandi/dandiarchive.py b/dandi/dandiarchive.py index be32d0bcb..97c86ce5b 100644 --- a/dandi/dandiarchive.py +++ b/dandi/dandiarchive.py @@ -754,16 +754,14 @@ def parse( # asset_type = groups.get("asset_type") dandiset_id = groups.get("dandiset_id") version_id = groups.get("version") - location: Optional[str] + location: str if groups.get("location_folder") is not None: assert "location" not in groups location = urlunquote(groups["location_folder"]) if not location.endswith("/") and not glob: location += "/" else: - location = groups.get("location") - if location: - location = urlunquote(location) + location = urlunquote(groups.get("location") or "") asset_id = groups.get("asset_id") path = groups.get("path") glob_param = groups.get("glob")