Skip to content

Commit

Permalink
handle dandi auth with LindiRemfile
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Apr 20, 2024
1 parent 1288875 commit 2be1d25
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 139 deletions.
83 changes: 0 additions & 83 deletions lindi/LindiH5pyFile/FileSegmentReader/DandiFileSegmentReader.py

This file was deleted.

35 changes: 0 additions & 35 deletions lindi/LindiH5pyFile/FileSegmentReader/FileSegmentReader.py

This file was deleted.

Empty file.
39 changes: 20 additions & 19 deletions lindi/LindiH5pyFile/LindiReferenceFileSystemStore.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import Literal, Dict, Union
import json
import base64
import requests
from zarr.storage import Store as ZarrStore

from ..LocalCache.LocalCache import LocalCache
from .FileSegmentReader.FileSegmentReader import FileSegmentReader
from .FileSegmentReader.DandiFileSegmentReader import DandiFileSegmentReader


class LindiReferenceFileSystemStore(ZarrStore):
Expand Down Expand Up @@ -138,7 +137,7 @@ def __getitem__(self, key: str):
x = self.local_cache.get_chunk(url=url, offset=offset, size=length)
if x is not None:
return x
val = _read_bytes_from_url(url, offset, length)
val = _read_bytes_from_url_or_path(url, offset, length)
if self.local_cache is not None:
self.local_cache.put_chunk(url=url, offset=offset, size=length, data=val)
return val
Expand Down Expand Up @@ -229,22 +228,24 @@ def use_templates_in_rfs(rfs: dict) -> None:
v[0] = '{{' + template_names_for_urls[url] + '}}'


# Keep a global cache of file segment readers that apply to all instances of
# LindiReferenceFileSystemStore. The key is the URL of the file.
_file_segment_readers: Dict[str, FileSegmentReader] = {}


def _read_bytes_from_url(url: str, offset: int, length: int):
def _read_bytes_from_url_or_path(url_or_path: str, offset: int, length: int):
"""
Read a range of bytes from a URL.
"""
if url not in _file_segment_readers:
if DandiFileSegmentReader.is_dandi_url(url):
# This is a DANDI URL, so it needs to be handled specially
# see the docstring for DandiFileSegmentReader for details
file_segment_reader = DandiFileSegmentReader(url)
else:
# This is a non-DANDI URL or local file path
file_segment_reader = FileSegmentReader(url)
_file_segment_readers[url] = file_segment_reader
return _file_segment_readers[url].read(offset, length)
from ..LindiRemfile.LindiRemfile import _resolve_url
if url_or_path.startswith('http://') or url_or_path.startswith('https://'):
url_resolved = _resolve_url(url_or_path) # handle DANDI auth
range_start = offset
range_end = offset + length - 1
range_header = f"bytes={range_start}-{range_end}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Range": range_header
}
response = requests.get(url_resolved, headers=headers)
response.raise_for_status()
return response.content
else:
with open(url_or_path, 'rb') as f:
f.seek(offset)
return f.read(length)
43 changes: 41 additions & 2 deletions lindi/LindiRemfile/LindiRemfile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Union
import time
import os
import requests
from ..LocalCache.LocalCache import LocalCache

Expand Down Expand Up @@ -39,6 +40,7 @@ def __init__(
Requires that url is a string (does not accept object with .get_url() function)
Does not support using multiple threads
Does not use memory cache if LocalCache is specified
Handles DANDI authentication
A note:
In the context of LINDI, this LindiRemfile is going to be used for loading
Expand Down Expand Up @@ -328,6 +330,43 @@ def fetch_bytes(range_start: int, range_end: int, num_retries: int, verbose: boo
return fetch_bytes(start_byte, end_byte, _num_request_retries, verbose)


_global_resolved_urls = {} # url -> {timestamp, url}


def _is_dandi_url(url: str):
if url.startswith('https://api.dandiarchive.org/api/'):
return True
if url.startswith('https://api-staging.dandiarchive.org/'):
return True
return False


def _resolve_dandi_url(url: str):
resolve_with_dandi_api_key = None
if url.startswith('https://api.dandiarchive.org/api/'):
dandi_api_key = os.environ.get('DANDI_API_KEY', None)
if dandi_api_key is not None:
resolve_with_dandi_api_key = dandi_api_key
elif url.startswith('https://api-staging.dandiarchive.org/'):
dandi_api_key = os.environ.get('DANDI_STAGING_API_KEY', None)
if dandi_api_key is not None:
resolve_with_dandi_api_key = dandi_api_key
headers = {}
if resolve_with_dandi_api_key is not None:
headers['Authorization'] = f'token {resolve_with_dandi_api_key}'
# do it synchronously here
resp = requests.head(url, allow_redirects=True, headers=headers)
return str(resp.url)


def _resolve_url(url: str):
# In the future we will do the auth and get the presigned download url
return url
if url in _global_resolved_urls:
elapsed = time.time() - _global_resolved_urls[url]["timestamp"]
if elapsed < 60 * 10:
return _global_resolved_urls[url]["url"]
if _is_dandi_url(url):
resolved_url = _resolve_dandi_url(url)
else:
resolved_url = url
_global_resolved_urls[url] = {"timestamp": time.time(), "url": resolved_url}
return resolved_url

0 comments on commit 2be1d25

Please sign in to comment.