Skip to content

Commit

Permalink
Resolves #114; minor punctuation; control thread count w/ env var
Browse files Browse the repository at this point in the history
  • Loading branch information
k1o0 committed Feb 6, 2024
1 parent 22f2972 commit a17cc18
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
9 changes: 5 additions & 4 deletions one/alf/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ def remove_uuid_recursive(folder, dry=False) -> None:


def next_num_folder(session_date_folder: Union[str, Path]) -> str:
"""Return the next number for a session given a session_date_folder"""
"""Return the next number for a session given a session_date_folder."""
session_date_folder = Path(session_date_folder)
if not session_date_folder.exists():
return '001'
Expand All @@ -701,7 +701,7 @@ def next_num_folder(session_date_folder: Union[str, Path]) -> str:


def remove_empty_folders(folder: Union[str, Path]) -> None:
"""Will iteratively remove any children empty folders"""
"""Iteratively remove any empty child folders."""
all_folders = sorted(x for x in Path(folder).rglob('*') if x.is_dir())
for f in reversed(all_folders): # Reversed sorted ensures we remove deepest first
try:
Expand All @@ -712,8 +712,9 @@ def remove_empty_folders(folder: Union[str, Path]) -> None:

def filter_by(alf_path, wildcards=True, **kwargs):
"""
Given a path and optional filters, returns all ALF files and their associated parts. The
filters constitute a logical AND. For all but `extra`, if a list is provided, one or more
Given a path and optional filters, returns all ALF files and their associated parts.
The filters constitute a logical AND. For all but `extra`, if a list is provided, one or more
elements must match (a logical OR).
Parameters
Expand Down
37 changes: 19 additions & 18 deletions one/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from urllib.error import URLError
import time
import threading
import os

import pandas as pd
import numpy as np
Expand All @@ -34,18 +35,18 @@

_logger = logging.getLogger(__name__)
__all__ = ['ONE', 'One', 'OneAlyx']
"""int: The number of download threads"""
N_THREADS = 4
N_THREADS = os.environ.get('ONE_HTTP_DL_THREADS', 4)
"""int: The number of download threads."""


class One(ConversionMixin):
"""An API for searching and loading data on a local filesystem"""
"""An API for searching and loading data on a local filesystem."""
_search_terms = (
'dataset', 'date_range', 'laboratory', 'number', 'projects', 'subject', 'task_protocol'
)

uuid_filenames = None
"""bool: whether datasets on disk have a UUID in their filename"""
"""bool: whether datasets on disk have a UUID in their filename."""

def __init__(self, cache_dir=None, mode='auto', wildcards=True, tables_dir=None):
"""An API for searching and loading data on a local filesystem
Expand Down Expand Up @@ -86,16 +87,16 @@ def __repr__(self):

@property
def offline(self):
"""bool: True if mode is local or no Web client set"""
"""bool: True if mode is local or no Web client set."""
return self.mode == 'local' or not getattr(self, '_web_client', False)

@util.refresh
def search_terms(self, query_type=None) -> tuple:
"""List the search term keyword args for use in the search method"""
"""List the search term keyword args for use in the search method."""
return self._search_terms

def _reset_cache(self):
"""Replace the cache object with a Bunch that contains the right fields"""
"""Replace the cache object with a Bunch that contains the right fields."""
self._cache = Bunch({'_meta': {
'expired': False,
'created_time': None,
Expand Down Expand Up @@ -214,18 +215,18 @@ def _save_cache(self, save_dir=None, force=False):
lock_file.unlink()

def refresh_cache(self, mode='auto'):
"""Check and reload cache tables
"""Check and reload cache tables.
Parameters
----------
mode : {'local', 'refresh', 'auto', 'remote'}
Options are 'local' (don't reload); 'refresh' (reload); 'auto' (reload if expired);
'remote' (don't reload)
'remote' (don't reload).
Returns
-------
datetime.datetime
Loaded timestamp
Loaded timestamp.
"""
# NB: Currently modified table will be lost if called with 'refresh';
# May be instances where modified cache is saved then immediately replaced with a new
Expand Down Expand Up @@ -253,13 +254,13 @@ def _update_cache_from_records(self, strict=False, **kwargs):
strict : bool
If not True, the columns don't need to match. Extra columns in input tables are
dropped and missing columns are added and filled with np.nan.
**kwargs
pandas.DataFrame or pandas.Series to insert/update for each table
kwargs
pandas.DataFrame or pandas.Series to insert/update for each table.
Returns
-------
datetime.datetime:
A timestamp of when the cache was updated
A timestamp of when the cache was updated.
Example
-------
Expand All @@ -272,7 +273,7 @@ def _update_cache_from_records(self, strict=False, **kwargs):
When strict is True the input columns must exactly match those oo the cache table,
including the order.
KeyError
One or more of the keyword arguments does not match a table in One._cache
One or more of the keyword arguments does not match a table in One._cache.
"""
updated = None
for table, records in kwargs.items():
Expand Down Expand Up @@ -389,7 +390,7 @@ def search(self, details=False, query_type=None, **kwargs):
one.search_terms()
For all of the search parameters, a single value or list may be provided. For `dataset`,
For all search parameters, a single value or list may be provided. For `dataset`,
the sessions returned will contain all listed datasets. For the other parameters,
the session must contain at least one of the entries.
Expand Down Expand Up @@ -521,7 +522,7 @@ def sort_fcn(itm):
eids = sessions.index.to_list()

if details:
return eids, sessions.reset_index(drop=True).to_dict('records', Bunch)
return eids, sessions.reset_index(drop=True).to_dict('records', into=Bunch)
else:
return eids

Expand Down Expand Up @@ -1237,7 +1238,7 @@ def _verify_specifiers(specifiers):
# Make list of metadata Bunches out of the table
records = (present_datasets
.reset_index(names='id')
.to_dict('records', Bunch))
.to_dict('records', into=Bunch))

# Ensure result same length as input datasets list
files = [None if not here else files.pop(0) for here in present]
Expand Down Expand Up @@ -2025,7 +2026,7 @@ def search(self, details=False, query_type=None, **kwargs):
one.search_terms(query_type='remote')
For all of the search parameters, a single value or list may be provided. For `dataset`,
For all search parameters, a single value or list may be provided. For `dataset`,
the sessions returned will contain all listed datasets. For the other parameters,
the session must contain at least one of the entries.
Expand Down
6 changes: 3 additions & 3 deletions one/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,17 @@ def create_new_session(self, subject, session_root=None, date=None, register=Tru

def find_files(self, session_path):
"""
Returns an generator of file names that match one of the dataset type patterns in Alyx
Returns a generator of file names that match one of the dataset type patterns in Alyx.
Parameters
----------
session_path : str, pathlib.Path
The session path to search
The session path to search.
Yields
-------
pathlib.Path
File paths that match the dataset type patterns in Alyx
File paths that match the dataset type patterns in Alyx.
"""
session_path = Path(session_path)
for p in session_path.rglob('*.*.*'):
Expand Down
2 changes: 1 addition & 1 deletion one/webclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,7 @@ def rest(self, url=None, action=None, id=None, data=None, files=None,
Option file(s) to upload.
no_cache : bool
If true the `list` and `read` actions are performed without returning the cache.
**kwargs
kwargs
Filters as per the Alyx REST documentation
cf. https://openalyx.internationalbrainlab.org/docs/
Expand Down

0 comments on commit a17cc18

Please sign in to comment.