Resolves #114; minor punctuation; control thread count w/ env var

int-brain-lab · Feb 6, 2024 · a17cc18 · a17cc18
1 parent 22f2972
commit a17cc18
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 26 deletions.
diff --git a/one/alf/io.py b/one/alf/io.py
@@ -687,7 +687,7 @@ def remove_uuid_recursive(folder, dry=False) -> None:
 
 
 def next_num_folder(session_date_folder: Union[str, Path]) -> str:
-    """Return the next number for a session given a session_date_folder"""
+    """Return the next number for a session given a session_date_folder."""
     session_date_folder = Path(session_date_folder)
     if not session_date_folder.exists():
         return '001'
@@ -701,7 +701,7 @@ def next_num_folder(session_date_folder: Union[str, Path]) -> str:
 
 
 def remove_empty_folders(folder: Union[str, Path]) -> None:
-    """Will iteratively remove any children empty folders"""
+    """Iteratively remove any empty child folders."""
     all_folders = sorted(x for x in Path(folder).rglob('*') if x.is_dir())
     for f in reversed(all_folders):  # Reversed sorted ensures we remove deepest first
         try:
@@ -712,8 +712,9 @@ def remove_empty_folders(folder: Union[str, Path]) -> None:
 
 def filter_by(alf_path, wildcards=True, **kwargs):
     """
-    Given a path and optional filters, returns all ALF files and their associated parts. The
-    filters constitute a logical AND.  For all but `extra`, if a list is provided, one or more
+    Given a path and optional filters, returns all ALF files and their associated parts.
+
+    The filters constitute a logical AND.  For all but `extra`, if a list is provided, one or more
     elements must match (a logical OR).
 
     Parameters

diff --git a/one/api.py b/one/api.py
@@ -12,6 +12,7 @@
 from urllib.error import URLError
 import time
 import threading
+import os
 
 import pandas as pd
 import numpy as np
@@ -34,18 +35,18 @@
 
 _logger = logging.getLogger(__name__)
 __all__ = ['ONE', 'One', 'OneAlyx']
-"""int: The number of download threads"""
-N_THREADS = 4
+N_THREADS = os.environ.get('ONE_HTTP_DL_THREADS', 4)
+"""int: The number of download threads."""
 
 
 class One(ConversionMixin):
-    """An API for searching and loading data on a local filesystem"""
+    """An API for searching and loading data on a local filesystem."""
     _search_terms = (
         'dataset', 'date_range', 'laboratory', 'number', 'projects', 'subject', 'task_protocol'
     )
 
     uuid_filenames = None
-    """bool: whether datasets on disk have a UUID in their filename"""
+    """bool: whether datasets on disk have a UUID in their filename."""
 
     def __init__(self, cache_dir=None, mode='auto', wildcards=True, tables_dir=None):
         """An API for searching and loading data on a local filesystem
@@ -86,16 +87,16 @@ def __repr__(self):
 
     @property
     def offline(self):
-        """bool: True if mode is local or no Web client set"""
+        """bool: True if mode is local or no Web client set."""
         return self.mode == 'local' or not getattr(self, '_web_client', False)
 
     @util.refresh
     def search_terms(self, query_type=None) -> tuple:
-        """List the search term keyword args for use in the search method"""
+        """List the search term keyword args for use in the search method."""
         return self._search_terms
 
     def _reset_cache(self):
-        """Replace the cache object with a Bunch that contains the right fields"""
+        """Replace the cache object with a Bunch that contains the right fields."""
         self._cache = Bunch({'_meta': {
             'expired': False,
             'created_time': None,
@@ -214,18 +215,18 @@ def _save_cache(self, save_dir=None, force=False):
             lock_file.unlink()
 
     def refresh_cache(self, mode='auto'):
-        """Check and reload cache tables
+        """Check and reload cache tables.
 
         Parameters
         ----------
         mode : {'local', 'refresh', 'auto', 'remote'}
             Options are 'local' (don't reload); 'refresh' (reload); 'auto' (reload if expired);
-            'remote' (don't reload)
+            'remote' (don't reload).
 
         Returns
         -------
         datetime.datetime
-            Loaded timestamp
+            Loaded timestamp.
         """
         # NB: Currently modified table will be lost if called with 'refresh';
         # May be instances where modified cache is saved then immediately replaced with a new
@@ -253,13 +254,13 @@ def _update_cache_from_records(self, strict=False, **kwargs):
         strict : bool
             If not True, the columns don't need to match.  Extra columns in input tables are
             dropped and missing columns are added and filled with np.nan.
-        **kwargs
-            pandas.DataFrame or pandas.Series to insert/update for each table
+        kwargs
+            pandas.DataFrame or pandas.Series to insert/update for each table.
 
         Returns
         -------
         datetime.datetime:
-            A timestamp of when the cache was updated
+            A timestamp of when the cache was updated.
 
         Example
         -------
@@ -272,7 +273,7 @@ def _update_cache_from_records(self, strict=False, **kwargs):
             When strict is True the input columns must exactly match those oo the cache table,
             including the order.
         KeyError
-            One or more of the keyword arguments does not match a table in One._cache
+            One or more of the keyword arguments does not match a table in One._cache.
         """
         updated = None
         for table, records in kwargs.items():
@@ -389,7 +390,7 @@ def search(self, details=False, query_type=None, **kwargs):
 
             one.search_terms()
 
-        For all of the search parameters, a single value or list may be provided.  For `dataset`,
+        For all search parameters, a single value or list may be provided.  For `dataset`,
         the sessions returned will contain all listed datasets.  For the other parameters,
         the session must contain at least one of the entries.
 
@@ -521,7 +522,7 @@ def sort_fcn(itm):
         eids = sessions.index.to_list()
 
         if details:
-            return eids, sessions.reset_index(drop=True).to_dict('records', Bunch)
+            return eids, sessions.reset_index(drop=True).to_dict('records', into=Bunch)
         else:
             return eids
 
@@ -1237,7 +1238,7 @@ def _verify_specifiers(specifiers):
         # Make list of metadata Bunches out of the table
         records = (present_datasets
                    .reset_index(names='id')
-                   .to_dict('records', Bunch))
+                   .to_dict('records', into=Bunch))
 
         # Ensure result same length as input datasets list
         files = [None if not here else files.pop(0) for here in present]
@@ -2025,7 +2026,7 @@ def search(self, details=False, query_type=None, **kwargs):
 
             one.search_terms(query_type='remote')
 
-        For all of the search parameters, a single value or list may be provided.  For `dataset`,
+        For all search parameters, a single value or list may be provided.  For `dataset`,
         the sessions returned will contain all listed datasets.  For the other parameters,
         the session must contain at least one of the entries.
 

diff --git a/one/registration.py b/one/registration.py
@@ -190,17 +190,17 @@ def create_new_session(self, subject, session_root=None, date=None, register=Tru
 
     def find_files(self, session_path):
         """
-        Returns an generator of file names that match one of the dataset type patterns in Alyx
+        Returns a generator of file names that match one of the dataset type patterns in Alyx.
 
         Parameters
         ----------
         session_path : str, pathlib.Path
-            The session path to search
+            The session path to search.
 
         Yields
         -------
         pathlib.Path
-            File paths that match the dataset type patterns in Alyx
+            File paths that match the dataset type patterns in Alyx.
         """
         session_path = Path(session_path)
         for p in session_path.rglob('*.*.*'):

diff --git a/one/webclient.py b/one/webclient.py
@@ -980,7 +980,7 @@ def rest(self, url=None, action=None, id=None, data=None, files=None,
             Option file(s) to upload.
         no_cache : bool
             If true the `list` and `read` actions are performed without returning the cache.
-        **kwargs
+        kwargs
             Filters as per the Alyx REST documentation
             cf. https://openalyx.internationalbrainlab.org/docs/