Skip to content

Commit

Permalink
remove object type
Browse files Browse the repository at this point in the history
  • Loading branch information
CodyCBakerPhD committed Aug 26, 2024
1 parent b6ed033 commit 46ea0c8
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 27 deletions.
11 changes: 1 addition & 10 deletions src/dandi_s3_log_parser/_command_line_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import collections
import pathlib
from typing import Literal

import click

Expand Down Expand Up @@ -134,12 +133,6 @@ def _bin_all_reduced_s3_logs_by_object_key_cli(
required=True,
type=click.Path(writable=False),
)
@click.option(
"--object_type",
help="The type of objects to map the logs to, as determined by the parents of the object keys.",
required=True,
type=click.Choice(["blobs", "zarr"]),
)
@click.option(
"--excluded_dandisets",
help="A comma-separated list of Dandiset IDs to exclude from processing.",
Expand All @@ -156,15 +149,14 @@ def _bin_all_reduced_s3_logs_by_object_key_cli(
)
@click.option(
"--dandiset_limit",
help="The maximum number of Dandisets to process per call.",
help="The maximum number of Dandisets to process per call. Useful for quick testing.",
required=False,
type=int,
default=None,
)
def _map_binned_s3_logs_to_dandisets_cli(
binned_s3_logs_folder_path: pathlib.Path,
mapped_s3_logs_folder_path: pathlib.Path,
object_type: Literal["blobs", "zarr"],
excluded_dandisets: str | None,
restrict_to_dandisets: str | None,
dandiset_limit: int | None,
Expand All @@ -175,7 +167,6 @@ def _map_binned_s3_logs_to_dandisets_cli(
map_binned_s3_logs_to_dandisets(
binned_s3_logs_folder_path=binned_s3_logs_folder_path,
mapped_s3_logs_folder_path=mapped_s3_logs_folder_path,
object_type=object_type,
excluded_dandisets=split_excluded_dandisets,
restrict_to_dandisets=split_restrict_to_dandisets,
dandiset_limit=dandiset_limit,
Expand Down
12 changes: 1 addition & 11 deletions src/dandi_s3_log_parser/_map_binned_s3_logs_to_dandisets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import pathlib
from typing import Literal

import dandi.dandiapi
import natsort
Expand All @@ -15,7 +14,6 @@
def map_binned_s3_logs_to_dandisets(
binned_s3_logs_folder_path: DirectoryPath,
mapped_s3_logs_folder_path: DirectoryPath,
object_type: Literal["blobs", "zarr"],
excluded_dandisets: list[str] | None = None,
restrict_to_dandisets: list[str] | None = None,
dandiset_limit: int | None = None,
Expand All @@ -33,14 +31,13 @@ def map_binned_s3_logs_to_dandisets(
The path to the folder containing the reduced S3 log files.
mapped_s3_logs_folder_path : DirectoryPath
The path to the folder where the mapped logs will be saved.
object_type : one of "blobs" or "zarr"
The type of objects to map the logs to, as determined by the parents of the object keys.
excluded_dandisets : list of str, optional
A list of Dandiset IDs to exclude from processing.
restrict_to_dandisets : list of str, optional
A list of Dandiset IDs to exclusively process.
dandiset_limit : int, optional
The maximum number of Dandisets to process per call.
Useful for quick testing.
"""
if "IPINFO_CREDENTIALS" not in os.environ:
message = "The environment variable 'IPINFO_CREDENTIALS' must be set to import `dandi_s3_log_parser`!"
Expand Down Expand Up @@ -89,7 +86,6 @@ def map_binned_s3_logs_to_dandisets(
dandiset=dandiset,
binned_s3_logs_folder_path=binned_s3_logs_folder_path,
dandiset_logs_folder_path=mapped_s3_logs_folder_path,
object_type=object_type,
client=client,
ip_hash_to_region=ip_hash_to_region,
ip_hash_not_in_services=ip_hash_not_in_services,
Expand All @@ -105,7 +101,6 @@ def _map_binned_logs_to_dandiset(
dandiset: dandi.dandiapi.RemoteDandiset,
binned_s3_logs_folder_path: pathlib.Path,
dandiset_logs_folder_path: pathlib.Path,
object_type: Literal["blobs", "zarr"],
client: dandi.dandiapi.DandiAPIClient,
ip_hash_to_region: dict[str, str],
ip_hash_not_in_services: dict[str, bool],
Expand Down Expand Up @@ -148,11 +143,6 @@ def _map_binned_logs_to_dandiset(
dandi_filename = asset_as_path.name.replace(".", "_")

is_asset_zarr = ".zarr" in asset_suffixes
if is_asset_zarr and object_type == "blobs":
continue
if not is_asset_zarr and object_type == "zarr":
continue

if is_asset_zarr:
blob_id = asset.zarr
binned_s3_log_file_path = binned_s3_logs_folder_path / "zarr" / f"{blob_id}.tsv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ def test_map_all_reduced_s3_logs_to_dandisets(tmpdir: py.path.local):
dandi_s3_log_parser.map_binned_s3_logs_to_dandisets(
binned_s3_logs_folder_path=example_binned_s3_logs_folder_path,
mapped_s3_logs_folder_path=test_mapped_s3_logs_folder_path,
object_type="blobs",
)
dandi_s3_log_parser.map_binned_s3_logs_to_dandisets(
binned_s3_logs_folder_path=example_binned_s3_logs_folder_path,
mapped_s3_logs_folder_path=test_mapped_s3_logs_folder_path,
object_type="zarr",
)

test_file_paths = {
Expand Down

0 comments on commit 46ea0c8

Please sign in to comment.