diff --git a/one/alf/io.py b/one/alf/io.py index 9be8a67f..a3be627e 100644 --- a/one/alf/io.py +++ b/one/alf/io.py @@ -687,7 +687,7 @@ def remove_uuid_recursive(folder, dry=False) -> None: def next_num_folder(session_date_folder: Union[str, Path]) -> str: - """Return the next number for a session given a session_date_folder""" + """Return the next number for a session given a session_date_folder.""" session_date_folder = Path(session_date_folder) if not session_date_folder.exists(): return '001' @@ -701,7 +701,7 @@ def next_num_folder(session_date_folder: Union[str, Path]) -> str: def remove_empty_folders(folder: Union[str, Path]) -> None: - """Will iteratively remove any children empty folders""" + """Iteratively remove any empty child folders.""" all_folders = sorted(x for x in Path(folder).rglob('*') if x.is_dir()) for f in reversed(all_folders): # Reversed sorted ensures we remove deepest first try: @@ -712,8 +712,9 @@ def remove_empty_folders(folder: Union[str, Path]) -> None: def filter_by(alf_path, wildcards=True, **kwargs): """ - Given a path and optional filters, returns all ALF files and their associated parts. The - filters constitute a logical AND. For all but `extra`, if a list is provided, one or more + Given a path and optional filters, returns all ALF files and their associated parts. + + The filters constitute a logical AND. For all but `extra`, if a list is provided, one or more elements must match (a logical OR). Parameters diff --git a/one/api.py b/one/api.py index 616ca999..ab624f7c 100644 --- a/one/api.py +++ b/one/api.py @@ -12,6 +12,7 @@ from urllib.error import URLError import time import threading +import os import pandas as pd import numpy as np @@ -34,18 +35,18 @@ _logger = logging.getLogger(__name__) __all__ = ['ONE', 'One', 'OneAlyx'] -"""int: The number of download threads""" -N_THREADS = 4 +N_THREADS = os.environ.get('ONE_HTTP_DL_THREADS', 4) +"""int: The number of download threads.""" class One(ConversionMixin): - """An API for searching and loading data on a local filesystem""" + """An API for searching and loading data on a local filesystem.""" _search_terms = ( 'dataset', 'date_range', 'laboratory', 'number', 'projects', 'subject', 'task_protocol' ) uuid_filenames = None - """bool: whether datasets on disk have a UUID in their filename""" + """bool: whether datasets on disk have a UUID in their filename.""" def __init__(self, cache_dir=None, mode='auto', wildcards=True, tables_dir=None): """An API for searching and loading data on a local filesystem @@ -86,16 +87,16 @@ def __repr__(self): @property def offline(self): - """bool: True if mode is local or no Web client set""" + """bool: True if mode is local or no Web client set.""" return self.mode == 'local' or not getattr(self, '_web_client', False) @util.refresh def search_terms(self, query_type=None) -> tuple: - """List the search term keyword args for use in the search method""" + """List the search term keyword args for use in the search method.""" return self._search_terms def _reset_cache(self): - """Replace the cache object with a Bunch that contains the right fields""" + """Replace the cache object with a Bunch that contains the right fields.""" self._cache = Bunch({'_meta': { 'expired': False, 'created_time': None, @@ -214,18 +215,18 @@ def _save_cache(self, save_dir=None, force=False): lock_file.unlink() def refresh_cache(self, mode='auto'): - """Check and reload cache tables + """Check and reload cache tables. Parameters ---------- mode : {'local', 'refresh', 'auto', 'remote'} Options are 'local' (don't reload); 'refresh' (reload); 'auto' (reload if expired); - 'remote' (don't reload) + 'remote' (don't reload). Returns ------- datetime.datetime - Loaded timestamp + Loaded timestamp. """ # NB: Currently modified table will be lost if called with 'refresh'; # May be instances where modified cache is saved then immediately replaced with a new @@ -253,13 +254,13 @@ def _update_cache_from_records(self, strict=False, **kwargs): strict : bool If not True, the columns don't need to match. Extra columns in input tables are dropped and missing columns are added and filled with np.nan. - **kwargs - pandas.DataFrame or pandas.Series to insert/update for each table + kwargs + pandas.DataFrame or pandas.Series to insert/update for each table. Returns ------- datetime.datetime: - A timestamp of when the cache was updated + A timestamp of when the cache was updated. Example ------- @@ -272,7 +273,7 @@ def _update_cache_from_records(self, strict=False, **kwargs): When strict is True the input columns must exactly match those oo the cache table, including the order. KeyError - One or more of the keyword arguments does not match a table in One._cache + One or more of the keyword arguments does not match a table in One._cache. """ updated = None for table, records in kwargs.items(): @@ -389,7 +390,7 @@ def search(self, details=False, query_type=None, **kwargs): one.search_terms() - For all of the search parameters, a single value or list may be provided. For `dataset`, + For all search parameters, a single value or list may be provided. For `dataset`, the sessions returned will contain all listed datasets. For the other parameters, the session must contain at least one of the entries. @@ -521,7 +522,7 @@ def sort_fcn(itm): eids = sessions.index.to_list() if details: - return eids, sessions.reset_index(drop=True).to_dict('records', Bunch) + return eids, sessions.reset_index(drop=True).to_dict('records', into=Bunch) else: return eids @@ -1237,7 +1238,7 @@ def _verify_specifiers(specifiers): # Make list of metadata Bunches out of the table records = (present_datasets .reset_index(names='id') - .to_dict('records', Bunch)) + .to_dict('records', into=Bunch)) # Ensure result same length as input datasets list files = [None if not here else files.pop(0) for here in present] @@ -2025,7 +2026,7 @@ def search(self, details=False, query_type=None, **kwargs): one.search_terms(query_type='remote') - For all of the search parameters, a single value or list may be provided. For `dataset`, + For all search parameters, a single value or list may be provided. For `dataset`, the sessions returned will contain all listed datasets. For the other parameters, the session must contain at least one of the entries. diff --git a/one/registration.py b/one/registration.py index 8cb0f170..3a2f4897 100644 --- a/one/registration.py +++ b/one/registration.py @@ -190,17 +190,17 @@ def create_new_session(self, subject, session_root=None, date=None, register=Tru def find_files(self, session_path): """ - Returns an generator of file names that match one of the dataset type patterns in Alyx + Returns a generator of file names that match one of the dataset type patterns in Alyx. Parameters ---------- session_path : str, pathlib.Path - The session path to search + The session path to search. Yields ------- pathlib.Path - File paths that match the dataset type patterns in Alyx + File paths that match the dataset type patterns in Alyx. """ session_path = Path(session_path) for p in session_path.rglob('*.*.*'): diff --git a/one/webclient.py b/one/webclient.py index 3e57f1ad..ff66b1f1 100644 --- a/one/webclient.py +++ b/one/webclient.py @@ -980,7 +980,7 @@ def rest(self, url=None, action=None, id=None, data=None, files=None, Option file(s) to upload. no_cache : bool If true the `list` and `read` actions are performed without returning the cache. - **kwargs + kwargs Filters as per the Alyx REST documentation cf. https://openalyx.internationalbrainlab.org/docs/