Skip to content

Commit

Permalink
add related datasets getter
Browse files Browse the repository at this point in the history
  • Loading branch information
jokasimr committed Apr 11, 2024
1 parent 29aa5f1 commit 5f23eac
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 37 deletions.
31 changes: 17 additions & 14 deletions src/ess/reduce/scicat.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,37 @@
from pathlib import Path
from threading import Lock
from typing import NewType, Optional
from typing import Optional

from scitacean import Client
from scitacean import Client, Dataset

from .nexus import FilePath

ScitaceanToken = NewType('ScitaceanToken', str)
'''Token used to authenticate the scitacean client'''
ScitaceanVersion = NewType('ScitaceanVersion', str)
'''Version of the scitacean api'''
ScitaceanClient = NewType('ScitaceanClient', Client)
'''An instance of scitacean.Client that is used to fetch data from Scicat'''


_locks = {}
_file_download_locks = {}


def download_scicat_file(
client: Client,
dataset_id: str,
filename: str,
*,
client: Client,
target: Optional[Path] = None,
) -> FilePath:
if target is None:
target = Path(f'~/.cache/essreduce/{dataset_id}')
key = (dataset_id, filename, target)
with _locks.setdefault(key, Lock()):
with _file_download_locks.setdefault(key, Lock()):
dset = client.get_dataset(dataset_id)
dset = client.download_files(dset, target=target, select=filename)
_locks.pop(key)
_file_download_locks.pop(key)
return dset.files[0].local_path


def get_related_dataset(client: Client, ds: Dataset, relationship: str) -> Dataset:
'''Goes through the datasets related to 'ds'
and finds the one with the selected relation'''
for d in getattr(ds, 'relationships', ()):
if d.relationship == relationship:
return client.get_dataset(d.pid)
raise ValueError(
f'The requested relation "{relationship}" was not found in dataset {ds}'
)
36 changes: 13 additions & 23 deletions tests/scicat_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import pytest
from dateutil.parser import parse as parse_date
from scitacean import Dataset, DatasetType, RemotePath
from scitacean.testing.client import FakeClient
from scitacean.model import Relationship
from scitacean.testing.client import FakeClient, ScicatCommError
from scitacean.testing.transfer import FakeFileTransfer

from ess.reduce.scicat import download_scicat_file
from ess.reduce.scicat import download_scicat_file, get_related_dataset


def _checksum(data: bytes) -> str:
Expand Down Expand Up @@ -41,6 +42,7 @@ def local_dataset(fs, files):
"height": {"value": 0.3, "unit": "m"},
"mass": "hefty",
},
relationships=[Relationship(pid='123', relationship='background')],
)
for name, content in files.items():
path = Path('tmp') / name
Expand All @@ -50,36 +52,24 @@ def local_dataset(fs, files):


def test_download_scicat_file(fs, local_dataset):
local_dataset.make_upload_model()
transfer = FakeFileTransfer(fs=fs)
client = FakeClient.without_login(url="https://fake.scicat", file_transfer=transfer)
uploaded = client.upload_new_dataset_now(local_dataset)
with TemporaryDirectory() as dname:
path = download_scicat_file(
client,
uploaded.pid,
uploaded.files[0].remote_path.posix,
client=client,
target=dname,
)
assert path == Path(dname) / uploaded.files[0].remote_path.posix


def test_local_scicat_file_no_download(fs, local_dataset):
client = FakeClient.without_login(
url="https://fake.scicat", file_transfer=FakeFileTransfer(fs=fs)
)
dataset = client.upload_new_dataset_now(local_dataset)
with TemporaryDirectory() as dname:
path = download_scicat_file(
dataset.pid,
dataset.files[0].remote_path.posix,
client=client,
target=dname,
)
client._file_transfer = None
no_download_path = download_scicat_file(
dataset.pid,
dataset.files[0].remote_path.posix,
client=client,
target=dname,
)
assert path == no_download_path
def test_get_related_dataset(local_dataset):
client = FakeClient.without_login(url="https://fake.scicat")
# Looking for comm error here because that indicates the dataset was queried for
with pytest.raises(ScicatCommError):
get_related_dataset(client, local_dataset, 'background')
with pytest.raises(ValueError):
get_related_dataset(client, local_dataset, 'reference')

0 comments on commit 5f23eac

Please sign in to comment.