Skip to content

Commit

Permalink
RF+OPT: centralize logic on creation of record on zarr download progress
Browse files Browse the repository at this point in the history
OPT: this avoids 2nd loop (after Counter) over the .files in .messge property
which was used only to report stats anyways.  This could be notable while
reporting on a zarr with 100_000 or more files
  • Loading branch information
yarikoptic committed Dec 16, 2024
1 parent 21dcfbc commit fe30c7b
Showing 1 changed file with 27 additions and 40 deletions.
67 changes: 27 additions & 40 deletions dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,27 +1193,6 @@ class ProgressCombiner:
prev_status: str = ""
yielded_size: bool = False

@property
def message(self) -> str:
done = 0
errored = 0
skipped = 0
for s in self.files.values():
if s.state is DLState.DONE:
done += 1
elif s.state in (DLState.ERROR, DLState.CHECKSUM_ERROR):
errored += 1
elif s.state is DLState.SKIPPED:
skipped += 1
parts = []
if done:
parts.append(f"{done} done")
if errored:
parts.append(f"{errored} errored")
if skipped:
parts.append(f"{skipped} skipped")
return ", ".join(parts)

def get_done(self) -> dict:
total_downloaded = sum(
s.downloaded
Expand All @@ -1231,7 +1210,8 @@ def get_done(self) -> dict:
"done%": total_downloaded / self.zarr_size * 100 if self.zarr_size else 0,
}

def set_status(self, statusdict: dict) -> None:
def get_status(self, report_done: bool = True) -> dict:

state_qtys = Counter(s.state for s in self.files.values())
total = len(self.files)
if (
Expand All @@ -1250,9 +1230,28 @@ def set_status(self, statusdict: dict) -> None:
new_status = "downloading"
else:
new_status = ""

statusdict = {}

if report_done:
msg_comps = []
for msg_label, states in {
"done": (DLState.DONE,),
"errored": (DLState.ERROR, DLState.CHECKSUM_ERROR),
"skipped": (DLState.SKIPPED,),
}.items():
if count := sum(state_qtys.get(state, 0) for state in states):
msg_comps.append(f"{count} {msg_label}")
if msg_comps:
statusdict["message"] = ", ".join(msg_comps)

if new_status != self.prev_status:
statusdict["status"] = new_status
self.prev_status = new_status
self.prev_status = statusdict["status"] = new_status

if report_done and self.zarr_size:
statusdict.update(self.get_done())

return statusdict

def feed(self, path: str, status: dict) -> Iterator[dict]:
keys = list(status.keys())
Expand All @@ -1265,25 +1264,19 @@ def feed(self, path: str, status: dict) -> Iterator[dict]:
yield {"size": self.zarr_size}
if status.get("status") == "skipped":
self.files[path].state = DLState.SKIPPED
out = {"message": self.message}
# Treat skipped as "downloaded" for the matter of accounting
if size is not None:
self.files[path].downloaded = size
self.maxsize += size
self.set_status(out)
if self.zarr_size:
out.update(self.get_done())
yield out
yield self.get_status()
elif keys == ["size"]:
self.files[path].size = size
self.maxsize += status["size"]
if any(s.state is DLState.DOWNLOADING for s in self.files.values()):
yield self.get_done()
elif status == {"status": "downloading"}:
self.files[path].state = DLState.DOWNLOADING
out = {}
self.set_status(out)
if out:
if out := self.get_status(report_done=False):
yield out
elif "done" in status:
self.files[path].downloaded = status["done"]
Expand All @@ -1296,20 +1289,14 @@ def feed(self, path: str, status: dict) -> Iterator[dict]:
sz = self.files[path].size
if sz is not None:
self.maxsize -= sz
out = {"message": self.message}
self.set_status(out)
out.update(self.get_done())
yield out
yield self.get_status()
elif keys == ["checksum"]:
pass
elif status == {"status": "setting mtime"}:
pass
elif status == {"status": "done"}:
self.files[path].state = DLState.DONE
out = {"message": self.message}
self.set_status(out)
out.update(self.get_done())
yield out
yield self.get_status()
else:
lgr.warning(
"Unexpected download status dict for %r received: %r", path, status
Expand Down

0 comments on commit fe30c7b

Please sign in to comment.