Skip to content

Commit

Permalink
Add option to preserve trailing slash on deletes
Browse files Browse the repository at this point in the history
Most object storages support having a slash in the object
name and rohmu should be able to delete such objects.

Let the default behaviour the same but introduce an option
`preserve_trailing_slash` which allows to control whether the trailing
slash in the key are stripped or not.

Added test coverage for `delete_key` and `delete_keys` methods for all
transfer types (several had no coverage of it).
  • Loading branch information
giacomo-alzetta-aiven committed Jan 16, 2025
1 parent 7ed9186 commit afed708
Show file tree
Hide file tree
Showing 13 changed files with 439 additions and 31 deletions.
6 changes: 4 additions & 2 deletions rohmu/object_storage/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,10 @@ def _iter_key(self, *, path: str, with_metadata: bool, deep: bool) -> Iterator[I
},
)

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key, remove_slash_prefix=True)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(
key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/")
)
self.log.debug("Deleting key: %r", path)
try:
blob_client = self.get_blob_service_client().get_blob_client(container=self.container_name, blob=path)
Expand Down
10 changes: 5 additions & 5 deletions rohmu/object_storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,20 +255,20 @@ def format_key_from_backend(self, key: str) -> str:
raise StorageError(f"Key {repr(key)} does not start with expected prefix {repr(self.prefix)}")
return key[len(self.prefix) :]

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
raise NotImplementedError

def delete_keys(self, keys: Collection[str]) -> None:
def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None:
"""Delete specified keys"""
for key in keys:
self.delete_key(key)
self.delete_key(key, preserve_trailing_slash=preserve_trailing_slash)

def delete_tree(self, key: str) -> None:
def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None:
"""Delete all keys under given root key. Basic implementation works by just listing all available
keys and deleting them individually but storage providers can implement more efficient logic."""
self.log.debug("Deleting tree: %r", key)
names = [item["name"] for item in self.list_path(key, with_metadata=False, deep=True)]
self.delete_keys(names)
self.delete_keys(names, preserve_trailing_slash=preserve_trailing_slash)

def get_contents_to_file(
self, key: str, filepath_to_store_to: AnyPath, *, progress_callback: ProgressProportionCallbackType = None
Expand Down
4 changes: 2 additions & 2 deletions rohmu/object_storage/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,8 @@ def initial_op(domain: Any) -> HttpRequest:
else:
raise NotImplementedError(property_name)

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/"))
self.log.debug("Deleting key: %r", path)
with self._object_client(not_found=path) as clob:
# https://googleapis.github.io/google-api-python-client/docs/dyn/storage_v1.objects.html#delete
Expand Down
10 changes: 7 additions & 3 deletions rohmu/object_storage/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path
from rohmu.common.models import StorageOperation
from rohmu.common.statsd import StatsdConfig
from rohmu.errors import ConcurrentUploadError, FileNotFoundFromStorageError
from rohmu.errors import ConcurrentUploadError, Error, FileNotFoundFromStorageError
from rohmu.notifier.interface import Notifier
from rohmu.object_storage.base import (
BaseTransfer,
Expand Down Expand Up @@ -123,8 +123,10 @@ def _filter_metadata(self, metadata: Metadata) -> Metadata:
def get_metadata_for_key(self, key: str) -> Metadata:
return self._filter_metadata(self._get_metadata_for_key(key))

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
self.log.debug("Deleting key: %r", key)
if preserve_trailing_slash:
raise Error("LocalTransfer does not support preserving trailing slashes")
target_path = self.format_key_for_backend(key.strip("/"))
if not os.path.exists(target_path):
raise FileNotFoundFromStorageError(key)
Expand All @@ -137,8 +139,10 @@ def delete_key(self, key: str) -> None:
os.unlink(metadata_path)
self.notifier.object_deleted(key=key)

def delete_tree(self, key: str) -> None:
def delete_tree(self, key: str, preserve_trailing_slash: bool = False) -> None:
self.log.debug("Deleting tree: %r", key)
if preserve_trailing_slash:
raise Error("LocalTransfer does not support preserving trailing slashes")
target_path = self.format_key_for_backend(key.strip("/"))
if not os.path.isdir(target_path):
raise FileNotFoundFromStorageError(key)
Expand Down
18 changes: 14 additions & 4 deletions rohmu/object_storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,20 +331,30 @@ def _metadata_for_key(self, key: str) -> Metadata:

return response["Metadata"]

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key, remove_slash_prefix=True)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(
key, remove_slash_prefix=True, trailing_slash=preserve_trailing_slash and key.endswith("/")
)
self.log.debug("Deleting key: %r", path)
self._metadata_for_key(path) # check that key exists
self.stats.operation(StorageOperation.delete_key)
self.get_client().delete_object(Bucket=self.bucket_name, Key=path)
self.notifier.object_deleted(key=key)

def delete_keys(self, keys: Collection[str]) -> None:
def delete_keys(self, keys: Collection[str], preserve_trailing_slash: bool = False) -> None:
self.stats.operation(StorageOperation.delete_key, count=len(keys))
for batch in batched(keys, 1000): # Cannot delete more than 1000 objects at a time
formatted_keys = [
self.format_key_for_backend(
k,
remove_slash_prefix=True,
trailing_slash=preserve_trailing_slash and k.endswith("/"),
)
for k in batch
]
self.get_client().delete_objects(
Bucket=self.bucket_name,
Delete={"Objects": [{"Key": self.format_key_for_backend(key, remove_slash_prefix=True)} for key in batch]},
Delete={"Objects": [{"Key": key} for key in formatted_keys]},
)
# Note: `tree_deleted` is not used here because the operation on S3 is not atomic, i.e.
# it is possible for a new object to be created after `list_objects` above
Expand Down
6 changes: 4 additions & 2 deletions rohmu/object_storage/sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from io import BytesIO
from rohmu.common.statsd import StatsdConfig
from rohmu.errors import FileNotFoundFromStorageError, InvalidConfigurationError
from rohmu.errors import Error, FileNotFoundFromStorageError, InvalidConfigurationError
from rohmu.notifier.interface import Notifier
from rohmu.object_storage.base import (
BaseTransfer,
Expand Down Expand Up @@ -210,7 +210,9 @@ def copy_file(
) -> None:
raise NotImplementedError

def delete_key(self, key: str) -> None:
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
if preserve_trailing_slash:
raise Error("SftpTransfer does not support preserving trailing slashes")
target_path = self.format_key_for_backend(key.strip("/"))
self.log.info("Removing path: %r", target_path)

Expand Down
4 changes: 2 additions & 2 deletions rohmu/object_storage/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ def _delete_object_segments(self, key: str, manifest: str) -> None:
with suppress(FileNotFoundFromStorageError):
self._delete_object_plain(item["name"])

def delete_key(self, key: str) -> None:
path = self.format_key_for_backend(key)
def delete_key(self, key: str, preserve_trailing_slash: bool = False) -> None:
path = self.format_key_for_backend(key, trailing_slash=preserve_trailing_slash and key.endswith("/"))
self.log.debug("Deleting key: %r", path)
try:
headers = self.conn.head_object(self.container_name, path)
Expand Down
71 changes: 69 additions & 2 deletions test/object_storage/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from rohmu.object_storage.azure import AzureTransfer
from rohmu.object_storage.config import AzureObjectStorageConfig
from tempfile import NamedTemporaryFile
from typing import Any, Optional
from unittest.mock import MagicMock, patch
from typing import Any, Optional, Union
from unittest.mock import call, MagicMock, patch

import azure.storage.blob
import pytest
Expand Down Expand Up @@ -241,3 +241,70 @@ def test_create_container_str(mocker: MockerFixture) -> None:
)
container_name = container_client_mock.call_args.kwargs["container_name"]
assert container_name == "bucket_name"


@pytest.mark.parametrize(
("key", "preserve_trailing_slash", "expected_key"),
[
("1", True, "test-prefix/1"),
("2/", True, "test-prefix/2/"),
("1", False, "test-prefix/1"),
("2/", False, "test-prefix/2"),
("1", None, "test-prefix/1"),
("2/", None, "test-prefix/2"),
],
)
def test_delete_key(
mock_get_blob_client: MagicMock,
key: str,
preserve_trailing_slash: Union[bool, None],
expected_key: str,
) -> None:
notifier = MagicMock()
transfer = AzureTransfer(
bucket_name="test_bucket",
account_name="test_account",
account_key="test_key2",
prefix="test-prefix/",
notifier=notifier,
)

if preserve_trailing_slash is None:
transfer.delete_key(key)
else:
transfer.delete_key(key, preserve_trailing_slash=preserve_trailing_slash)

mock_get_blob_client.assert_has_calls(
[
call(container="test_bucket", blob=expected_key),
call().delete_blob(),
]
)


@pytest.mark.parametrize("preserve_trailing_slash", [True, False, None])
def test_delete_keys(mock_get_blob_client: MagicMock, preserve_trailing_slash: Union[bool, None]) -> None:
notifier = MagicMock()
transfer = AzureTransfer(
bucket_name="test_bucket",
account_name="test_account",
account_key="test_key2",
prefix="test-prefix/",
notifier=notifier,
)
if preserve_trailing_slash is None:
transfer.delete_keys(["2", "3", "4/"])
else:
transfer.delete_keys(["2", "3", "4/"], preserve_trailing_slash=preserve_trailing_slash)

expected_keys = ["2", "3", "4/" if preserve_trailing_slash else "4"]
expected_calls = []
for expected_key in expected_keys:
expected_calls.extend(
[
call(container="test_bucket", blob=f"test-prefix/{expected_key}"),
call().delete_blob(),
]
)

mock_get_blob_client.assert_has_calls(expected_calls)
77 changes: 77 additions & 0 deletions test/object_storage/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from rohmu.object_storage.base import IterKeyItem
from rohmu.object_storage.google import GoogleTransfer, MediaIoBaseDownloadWithByteRange, Reporter
from tempfile import NamedTemporaryFile
from typing import Union
from unittest.mock import ANY, call, MagicMock, Mock, patch

import base64
Expand Down Expand Up @@ -452,3 +453,79 @@ def test_error_handling() -> None:
# ... and the legacy behaviour of bubbling up should not regress
with pytest.raises(HttpError, match="403"):
transfer._create_object_store_if_needed_unwrapped()


@pytest.mark.parametrize(
("key", "preserve_trailing_slash", "expected_key"),
[
("1", True, "test-prefix/1"),
("2/", True, "test-prefix/2/"),
("1", False, "test-prefix/1"),
("2/", False, "test-prefix/2"),
("1", None, "test-prefix/1"),
("2/", None, "test-prefix/2"),
],
)
def test_delete_key(key: str, preserve_trailing_slash: Union[bool, None], expected_key: str) -> None:
notifier = MagicMock()
with ExitStack() as stack:
stack.enter_context(patch("rohmu.object_storage.google.get_credentials"))
stack.enter_context(patch("rohmu.object_storage.google.GoogleTransfer._create_object_store_if_needed_unwrapped"))
_init_google_client_mock = stack.enter_context(
patch("rohmu.object_storage.google.GoogleTransfer._init_google_client")
)

transfer = GoogleTransfer(
project_id="test-project-id",
bucket_name="test-bucket",
prefix="test-prefix/",
notifier=notifier,
)
if preserve_trailing_slash is None:
transfer.delete_key(key)
else:
transfer.delete_key(key, preserve_trailing_slash=preserve_trailing_slash)

mock_client_delete = _init_google_client_mock.return_value.objects().delete
mock_client_delete.assert_has_calls(
[
call(bucket="test-bucket", object=expected_key),
call().execute(),
]
)


@pytest.mark.parametrize("preserve_trailing_slash", [True, False, None])
def test_delete_keys(preserve_trailing_slash: Union[bool, None]) -> None:
notifier = MagicMock()
with ExitStack() as stack:
stack.enter_context(patch("rohmu.object_storage.google.get_credentials"))
stack.enter_context(patch("rohmu.object_storage.google.GoogleTransfer._create_object_store_if_needed_unwrapped"))
_init_google_client_mock = stack.enter_context(
patch("rohmu.object_storage.google.GoogleTransfer._init_google_client")
)

transfer = GoogleTransfer(
project_id="test-project-id",
bucket_name="test-bucket",
prefix="test-prefix/",
notifier=notifier,
)
if preserve_trailing_slash is None:
transfer.delete_keys(["2", "3", "4/"])
else:
transfer.delete_keys(["2", "3", "4/"], preserve_trailing_slash=preserve_trailing_slash)

mock_client_delete = _init_google_client_mock.return_value.objects().delete

mock_client_delete = _init_google_client_mock.return_value.objects().delete
expected_keys = ["2", "3", "4"] if not preserve_trailing_slash else ["2", "3", "4/"]
expected_calls = []
for key in expected_keys:
expected_calls.extend(
[
call(bucket="test-bucket", object=f"test-prefix/{key}"),
call().execute(),
]
)
mock_client_delete.assert_has_calls(expected_calls)
Loading

0 comments on commit afed708

Please sign in to comment.