Skip to content

Commit

Permalink
feat: add ManabiS3Provider
Browse files Browse the repository at this point in the history
  • Loading branch information
open-dynaMIX committed Feb 1, 2024
1 parent 96eb1ab commit 9ebe611
Show file tree
Hide file tree
Showing 10 changed files with 1,333 additions and 732 deletions.
32 changes: 32 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
version: "3.8"

services:

db:
image: postgres:alpine
restart: "unless-stopped"
Expand All @@ -9,3 +11,33 @@ services:
POSTGRES_DB: manabi
ports:
- "5432:5432"

minio:
image: minio/minio:RELEASE.2023-09-04T19-57-37Z
volumes:
- minio_data:/data
ports:
- "9000:9000"
- "9090:9090"
environment:
MINIO_ROOT_USER: veryvery
MINIO_ROOT_PASSWORD: secretsecret
MINIO_BROWSER: on
command: server data --console-address ":9090"

mc:
image: minio/mc:latest
environment:
MINIO_ROOT_USER: veryvery
MINIO_ROOT_PASSWORD: secretsecret
entrypoint: >
/bin/sh -c "
mc config host add dc-minio http://minio:9000 $${MINIO_ROOT_USER} $${MINIO_ROOT_PASSWORD} --api S3v4;
mc mb dc-minio/manabi-media;
true"
depends_on:
- minio

volumes:
minio_data:
driver: local
13 changes: 13 additions & 0 deletions manabi/__snapshots__/filesystem_test.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# serializer version: 1
# name: test_collection_get[False-False-False-asdf.docx]
b'<?xml version="1.0" encoding="utf-8" ?>\n<ns0:multistatus xmlns:ns0="DAV:"><ns0:response><ns0:href>/dav/./</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype><ns0:collection /></ns0:resourcetype></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response><ns0:response><ns0:href>/dav/asdf.docx</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype /></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response></ns0:multistatus>'
# ---
# name: test_collection_get[False-True-True-asdf-s3.docx]
b'<?xml version="1.0" encoding="utf-8" ?>\n<ns0:multistatus xmlns:ns0="DAV:"><ns0:response><ns0:href>/dav/./</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype><ns0:collection /></ns0:resourcetype></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response><ns0:response><ns0:href>/dav/asdf-s3.docx</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype /></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response></ns0:multistatus>'
# ---
# name: test_collection_get[True-False-False-asdf.docx]
b'<?xml version="1.0" encoding="utf-8" ?>\n<ns0:multistatus xmlns:ns0="DAV:"><ns0:response><ns0:href>/dav/./</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype><ns0:collection /></ns0:resourcetype></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response><ns0:response><ns0:href>/dav/asdf.docx</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype /></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response></ns0:multistatus>'
# ---
# name: test_collection_get[True-True-True-asdf-s3.docx]
b'<?xml version="1.0" encoding="utf-8" ?>\n<ns0:multistatus xmlns:ns0="DAV:"><ns0:response><ns0:href>/dav/./</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype><ns0:collection /></ns0:resourcetype></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response><ns0:response><ns0:href>/dav/asdf-s3.docx</ns0:href><ns0:propstat><ns0:prop><ns0:resourcetype /></ns0:prop><ns0:status>HTTP/1.1 200 OK</ns0:status></ns0:propstat></ns0:response></ns0:multistatus>'
# ---
35 changes: 30 additions & 5 deletions manabi/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
from typing import Any, Dict, Generator
from unittest import mock as unitmock

import boto3
import pytest
from hypothesis import settings
from moto import mock_aws
from psycopg2 import connect

from . import mock
from .log import verbose_logging
from .mock import MockManabiDbLockStorage
from .mock import MockManabiDbLockStorage, upload_file_to_s3

TEST_FILES_DIR = Path(__file__).parent.resolve() / "data"


def configure_hypothesis():
Expand Down Expand Up @@ -88,13 +92,13 @@ def write_callback():


@pytest.fixture()
def server_dir() -> Path:
def server_dir(s3_file) -> Path:
return mock.get_server_dir()


@pytest.fixture()
def config(server_dir, lock_storage) -> Dict[str, Any]:
return mock.get_config(server_dir, lock_storage)
@pytest.fixture(params=[False])
def config(server_dir, lock_storage, request) -> Dict[str, Any]:
return mock.get_config(server_dir, lock_storage, request.param)


@pytest.fixture()
Expand All @@ -120,3 +124,24 @@ def cargo():
if shutil.which("cargo"):
with mock.branca_impl():
run(["cargo", "run", "x", "y"])


@pytest.fixture
def s3():
with mock_aws():
s3 = boto3.client(
"s3",
endpoint_url=os.environ.get("S3_ENDPOINT", "http://127.0.0.1:9000"),
aws_access_key_id=os.environ.get("S3_ACCESS_KEY_ID", "veryvery"),
aws_secret_access_key=os.environ.get(
"S3_SECRET_ACCESS_KEY", "secretsecret"
),
region_name=os.environ.get("S3_REGION", "us-east-1"),
)
s3.create_bucket(Bucket=os.environ.get("S3_BUCKET_NAME", "manabi-media"))
yield s3


@pytest.fixture
def s3_file(s3):
return upload_file_to_s3(s3)
183 changes: 164 additions & 19 deletions manabi/filesystem.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,38 @@
import time
from pathlib import Path
from typing import Any, Dict, Optional

import boto3
from attr import dataclass
from botocore.exceptions import ClientError
from smart_open import open as s3_open
from wsgidav.dav_error import HTTP_FORBIDDEN, DAVError
from wsgidav.fs_dav_provider import FileResource, FilesystemProvider, FolderResource
from wsgidav.dav_provider import DAVCollection
from wsgidav.fs_dav_provider import FileResource, FilesystemProvider
from wsgidav.util import join_uri

from .token import Token
from .type_alias import WriteType
from .util import cattrib, requests_session


class ManabiFolderResource(FolderResource):
class ManabiFolderResource(DAVCollection):
def get_member_names(self):
token: Token = self.environ["manabi.token"]
# type manually checked
token_path: Path = token.path # type: ignore
if not token_path:
return []
path = Path(self._file_path, token_path)
if path.exists():
return [str(token.path)]
else:
return []
return [str(token.path)] if token.path else []

def get_member(self, name):
token: Token = self.environ["manabi.token"]
path = token.path
if Path(name) != path:
raise DAVError(HTTP_FORBIDDEN)
return super().get_member(name)
# This is hacky: `super().get_member(name)` will call `get_resource_inst()` for
# every member (always just one in our case). This member must be a
# `FileResource`, so we set `dir_access` to `False`.
# TODO: rethink our approach for distinguishing dir_access
self.environ["manabi.dir_access"] = False
return self.provider.get_resource_inst(join_uri(self.path, name), self.environ)

def create_empty_resource(self, name):
raise DAVError(HTTP_FORBIDDEN)
Expand Down Expand Up @@ -142,24 +146,165 @@ def __init__(
self._cb_hook_config = cb_hook_config
super().__init__(root_folder, readonly=readonly, fs_opts=fs_opts)

def get_file_resource(self, path, environ, fp):
return ManabiFileResource(
path,
environ,
fp,
cb_hook_config=self._cb_hook_config,
)

def file_exists(self, path: Path):
return Path(path).exists()

def get_resource_inst(self, path: str, environ: Dict[str, Any]):
token: Token = environ["manabi.token"]
dir_access = environ["manabi.dir_access"]

if dir_access:
assert token.path
path = f"/{str(token.path.parent)}"
fp = self._loc_to_file_path(path, environ)
if dir_access or Path(fp).is_dir():
return ManabiFolderResource(path, environ, fp)
return ManabiFolderResource(path, environ)
else:
path = token.path_as_url()
fp = self._loc_to_file_path(path, environ)
if Path(fp).exists():
return ManabiFileResource(
path,
environ,
fp,
cb_hook_config=self._cb_hook_config,
)
if self.file_exists(fp):
return self.get_file_resource(path, environ, fp)
else:
return None


class ManabiS3FileResource(ManabiFileResource):
def __init__(
self,
s3,
bucket_name,
path,
environ,
file_path,
*,
cb_hook_config: Optional[CallbackHookConfig] = None,
):
self.s3 = s3
self.bucket_name = bucket_name
self._cb_config = cb_hook_config
self._token = environ["manabi.token"]
self.file_path = file_path
self._file_path = file_path
super(FileResource, self).__init__(path, environ)
self.file_obj = self.s3.get_object(Bucket=self.bucket_name, Key=file_path)
self.name = Path(self.path).name
self.provider: ManabiS3Provider

def get_content_length(self):
return self.file_obj["ContentLength"]

def get_content_type(self):
return self.file_obj["ContentType"]

def get_creation_date(self):
# Amazon S3 maintains only the last modified date for each object.
return self.get_last_modified()

def get_etag(self):
return self.file_obj["ETag"].strip('"')

def get_last_modified(self):
return time.mktime(self.file_obj["LastModified"].timetuple())

def get_content(self):
"""Open content as a stream for reading.
We can't call `super()` here, because we need to use `open` from `smart_open`.
"""
assert not self.is_collection
return s3_open(
f"s3://{self.bucket_name}/{self._file_path}",
"rb",
transport_params={"client": self.s3},
)

def begin_write(self, *, content_type):
"""
Open content as a stream for writing.
We can't call `super()` here, because we need to use `open` from `smart_open`.
"""
self.process_pre_write_hooks()
assert not self.is_collection
if self.provider.readonly:
raise DAVError(HTTP_FORBIDDEN)
return s3_open(
f"s3://{self.bucket_name}/{self._file_path}",
"wb",
transport_params={"client": self.s3},
)


class ManabiS3Provider(ManabiProvider):
def __init__(
self,
root_folder,
endpoint_url,
aws_access_key_id,
aws_secret_access_key,
region_name,
bucket_name,
readonly=False,
shadow=None,
cb_hook_config: Optional[CallbackHookConfig] = None,
):
super(FilesystemProvider, self).__init__()

if not root_folder:
raise ValueError("Invalid root path: {dir}".format(dir=root_folder))

self.root_folder_path = str(root_folder)
self.readonly = readonly
if shadow:
self.shadow = {k.lower(): v for k, v in shadow.items()}
else:
self.shadow = {}

self.fs_opts = {}
# Get shadow map and convert keys to lower case
self.shadow_map = self.fs_opts.get("shadow_map") or {}
if self.shadow_map:
self.shadow_map = {k.lower(): v for k, v in self.shadow_map.items()}

self._cb_hook_config = cb_hook_config

self.endpoint_url = endpoint_url
self.aws_access_key_id = aws_access_key_id
self.aws_secret_access_key = aws_secret_access_key
self.region_name = region_name
self.bucket_name = bucket_name
self.s3 = boto3.client(
"s3",
endpoint_url=self.endpoint_url,
aws_access_key_id=self.aws_access_key_id,
aws_secret_access_key=self.aws_secret_access_key,
region_name=self.region_name,
)
self._file_resource = None

def get_file_resource(self, path, environ, fp):
try:
return ManabiS3FileResource(
self.s3,
self.bucket_name,
path,
environ,
fp,
cb_hook_config=self._cb_hook_config,
)
except ClientError as ex:
if ex.response["Error"]["Code"] == "NoSuchKey":
# File does not exist
return None

def file_exists(self, path: Path):
# Is handled implicitly in `get_file_resource()`
return True
Loading

0 comments on commit 9ebe611

Please sign in to comment.