dandi · yarikoptic · Nov 9, 2023 · Nov 8, 2023
diff --git a/dandi/download.py b/dandi/download.py
@@ -17,7 +17,7 @@
 from threading import Lock
 import time
 from types import TracebackType
-from typing import IO, Any, Literal, Protocol
+from typing import IO, Any, Literal
 
 from dandischema.models import DigestType
 from fasteners import InterProcessLock
@@ -36,6 +36,7 @@
 from .support.iterators import IteratorWithAggregation
 from .support.pyout import naturalsize
 from .utils import (
+    Hasher,
     abbrev_prompt,
     ensure_datetime,
     exclude_from_zarr,
@@ -488,14 +489,6 @@ def _populate_dandiset_yaml(
     }
 
 
-class Hasher(Protocol):
-    def update(self, data: bytes) -> None:
-        ...
-
-    def hexdigest(self) -> str:
-        ...
-
-
 def _download_file(
     downloader: Callable[[int], Iterator[bytes]],
     path: Path,

diff --git a/dandi/support/digests.py b/dandi/support/digests.py
@@ -11,6 +11,8 @@
 
 from __future__ import annotations
 
+from collections.abc import Callable
+from dataclasses import dataclass, field
 import hashlib
 import logging
 import os.path
@@ -21,12 +23,12 @@
 from zarr_checksum import ZarrChecksumTree
 
 from .threaded_walk import threaded_walk
-from ..utils import auto_repr, exclude_from_zarr
+from ..utils import Hasher, exclude_from_zarr
 
 lgr = logging.getLogger("dandi.support.digests")
 
 
-@auto_repr
+@dataclass
 class Digester:
     """Helper to compute multiple digests in one pass for a file"""
 
@@ -36,28 +38,18 @@ class Digester:
     # Ideally we should find an efficient way to parallelize this but
     # atm this one is sufficiently speedy
 
-    DEFAULT_DIGESTS = ["md5", "sha1", "sha256", "sha512"]
+    #: List of any supported algorithm labels, such as md5, sha1, etc.
+    digests: list[str] = field(
+        default_factory=lambda: ["md5", "sha1", "sha256", "sha512"]
+    )
 
-    def __init__(
-        self, digests: list[str] | None = None, blocksize: int = 1 << 16
-    ) -> None:
-        """
-        Parameters
-        ----------
-        digests : list or None
-          List of any supported algorithm labels, such as md5, sha1, etc.
-          If None, a default set of hashes will be computed (md5, sha1,
-          sha256, sha512).
-        blocksize : int
-          Chunk size (in bytes) by which to consume a file.
-        """
-        self._digests = digests or self.DEFAULT_DIGESTS
-        self._digest_funcs = [getattr(hashlib, digest) for digest in self._digests]
-        self.blocksize = blocksize
+    #: Chunk size (in bytes) by which to consume a file.
+    blocksize: int = 1 << 16
 
-    @property
-    def digests(self) -> list[str]:
-        return self._digests
+    digest_funcs: list[Callable[[], Hasher]] = field(init=False, repr=False)
+
+    def __post_init__(self) -> None:
+        self.digest_funcs = [getattr(hashlib, digest) for digest in self.digests]
 
     def __call__(self, fpath: str | Path) -> dict[str, str]:
         """
@@ -70,14 +62,14 @@ def __call__(self, fpath: str | Path) -> dict[str, str]:
           Keys are algorithm labels, and values are checksum strings
         """
         lgr.debug("Estimating digests for %s" % fpath)
-        digests = [x() for x in self._digest_funcs]
+        digests = [x() for x in self.digest_funcs]
         with open(fpath, "rb") as f:
             while True:
                 block = f.read(self.blocksize)
                 if not block:
                     break
-                [d.update(block) for d in digests]
-
+                for d in digests:
+                    d.update(block)
         return {n: d.hexdigest() for n, d in zip(self.digests, digests)}
 
 

diff --git a/dandi/utils.py b/dandi/utils.py
@@ -19,7 +19,7 @@
 import subprocess
 import sys
 import types
-from typing import IO, Any, List, Optional, TypeVar, Union
+from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
 from urllib.parse import parse_qs, urlparse, urlunparse
 
 import dateutil.parser
@@ -60,6 +60,14 @@
 )
 
 
+class Hasher(Protocol):
+    def update(self, data: bytes) -> None:
+        ...
+
+    def hexdigest(self) -> str:
+        ...
+
+
 def is_interactive() -> bool:
     """Return True if all in/outs are tty"""
     # TODO: check on windows if hasattr check would work correctly and add value:
@@ -511,42 +519,6 @@ def shortened_repr(value: Any, length: int = 30) -> str:
     return value_repr
 
 
-def __auto_repr__(obj: Any) -> str:
-    attr_names: tuple[str, ...] = ()
-    if hasattr(obj, "__dict__"):
-        attr_names += tuple(obj.__dict__.keys())
-    if hasattr(obj, "__slots__"):
-        attr_names += tuple(obj.__slots__)
-
-    items = []
-    for attr in sorted(set(attr_names)):
-        if attr.startswith("_"):
-            continue
-        value = getattr(obj, attr)
-        # TODO:  should we add this feature to minimize some talktative reprs
-        # such as of URL?
-        # if value is None:
-        #    continue
-        items.append("%s=%s" % (attr, shortened_repr(value)))
-
-    return "%s(%s)" % (obj.__class__.__name__, ", ".join(items))
-
-
-TT = TypeVar("TT", bound=type)
-
-
-def auto_repr(cls: TT) -> TT:
-    """Decorator for a class to assign it an automagic quick and dirty __repr__
-
-    It uses public class attributes to prepare repr of a class
-
-    Original idea: http://stackoverflow.com/a/27799004/1265472
-    """
-
-    cls.__repr__ = __auto_repr__  # type: ignore[assignment]
-    return cls
-
-
 def Parallel(**kwargs: Any) -> Any:  # TODO: disable lint complaint
     """Adapter for joblib.Parallel so we could if desired, centralize control"""
     # ATM just a straight invocation