From 500f1ddd9ad80cc27c0a233b8ee1ae4d1f4dec5e Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 5 Sep 2023 10:07:49 -0400 Subject: [PATCH] refact!: start working on moving DRS to s3-only backend --- chord_drs/backend.py | 37 +++++++++++++++++++++------- chord_drs/backends/__init__.py | 0 chord_drs/backends/base.py | 18 -------------- chord_drs/backends/local.py | 25 ------------------- chord_drs/backends/minio.py | 35 --------------------------- chord_drs/config.py | 44 +++++++++++++--------------------- chord_drs/data_sources.py | 21 ---------------- chord_drs/models.py | 10 +------- chord_drs/routes.py | 1 - 9 files changed, 46 insertions(+), 145 deletions(-) delete mode 100644 chord_drs/backends/__init__.py delete mode 100644 chord_drs/backends/base.py delete mode 100644 chord_drs/backends/local.py delete mode 100644 chord_drs/backends/minio.py delete mode 100644 chord_drs/data_sources.py diff --git a/chord_drs/backend.py b/chord_drs/backend.py index 809fcca..10ac099 100644 --- a/chord_drs/backend.py +++ b/chord_drs/backend.py @@ -1,7 +1,6 @@ +import boto3 from flask import current_app, g - -from chord_drs.backends.base import Backend -from chord_drs.data_sources import DATA_SOURCE_BACKENDS +from urllib.parse import urlparse __all__ = [ @@ -10,15 +9,35 @@ ] -def _get_backend() -> Backend | None: - # Instantiate backend if needed - backend_class = DATA_SOURCE_BACKENDS.get(current_app.config["SERVICE_DATA_SOURCE"]) - return backend_class() if backend_class else None +class Backend: + def __init__(self, resource=None): + self._client = resource or boto3.resource( + "s3", + endpoint_url=current_app.config["DRS_S3_API_URL"], + aws_access_key_id=current_app.config["DRS_S3_ACCESS_KEY"], + aws_secret_access_key=current_app.config["DRS_S3_SECRET_KEY"] + ) + + self.bucket = self._client.Bucket(current_app.config["DRS_S3_BUCKET"]) + + @staticmethod + def build_minio_location(obj): + host = urlparse(current_app.config["MINIO_URL"]).netloc + return f"s3://{host}/{obj.bucket_name}/{obj.key}" + + def get_minio_object(self, location: str): + obj = self.bucket.Object(location.split("/")[-1]) + return obj.get() + + def save(self, current_location: str, filename: str) -> str: + with open(current_location, "rb") as f: + obj = self.bucket.put_object(Key=filename, Body=f) + return self.build_minio_location(obj) -def get_backend() -> Backend | None: +def get_backend() -> Backend: if "backend" not in g: - g.backend = _get_backend() + g.backend = Backend() return g.backend diff --git a/chord_drs/backends/__init__.py b/chord_drs/backends/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/chord_drs/backends/base.py b/chord_drs/backends/base.py deleted file mode 100644 index b810f63..0000000 --- a/chord_drs/backends/base.py +++ /dev/null @@ -1,18 +0,0 @@ -from abc import ABC, abstractmethod - - -__all__ = ["Backend", "FakeBackend"] - - -class Backend(ABC): - @abstractmethod - def save(self, current_location: str, filename: str) -> str: # pragma: no cover - pass - - -class FakeBackend(Backend): - """ - For the tests - """ - def save(self, current_location: str, filename: str) -> str: - return current_location diff --git a/chord_drs/backends/local.py b/chord_drs/backends/local.py deleted file mode 100644 index a714902..0000000 --- a/chord_drs/backends/local.py +++ /dev/null @@ -1,25 +0,0 @@ -from shutil import copy -from flask import current_app -from pathlib import Path - -from .base import Backend - - -__all__ = ["LocalBackend"] - - -class LocalBackend(Backend): - """ - Default backend class for the location of the objects served - by this service. Lives on the current filesystem, in a directory - specified by the DATA var env, the default being in ~/chord_drs_data - """ - def __init__(self): - self.base_location = Path(current_app.config["SERVICE_DATA"]) - # We can use mkdir, since resolve has been called in config.py - self.base_location.mkdir(exist_ok=True) - - def save(self, current_location: str | Path, filename: str) -> str: - new_location = self.base_location / filename - copy(current_location, new_location) - return str(new_location.resolve()) diff --git a/chord_drs/backends/minio.py b/chord_drs/backends/minio.py deleted file mode 100644 index e35bd7b..0000000 --- a/chord_drs/backends/minio.py +++ /dev/null @@ -1,35 +0,0 @@ -import boto3 - -from flask import current_app -from urllib.parse import urlparse - -from .base import Backend - - -__all__ = ["MinioBackend"] - - -class MinioBackend(Backend): - def __init__(self, resource=None): - self.minio = resource or boto3.resource( - 's3', - endpoint_url=current_app.config["MINIO_URL"], - aws_access_key_id=current_app.config["MINIO_USERNAME"], - aws_secret_access_key=current_app.config["MINIO_PASSWORD"] - ) - - self.bucket = self.minio.Bucket(current_app.config["MINIO_BUCKET"]) - - @staticmethod - def build_minio_location(obj): - host = urlparse(current_app.config["MINIO_URL"]).netloc - return f"s3://{host}/{obj.bucket_name}/{obj.key}" - - def get_minio_object(self, location: str): - obj = self.bucket.Object(location.split("/")[-1]) - return obj.get() - - def save(self, current_location: str, filename: str) -> str: - with open(current_location, "rb") as f: - obj = self.bucket.put_object(Key=filename, Body=f) - return MinioBackend.build_minio_location(obj) diff --git a/chord_drs/config.py b/chord_drs/config.py index dd26eab..9847ac8 100644 --- a/chord_drs/config.py +++ b/chord_drs/config.py @@ -4,7 +4,6 @@ from dotenv import load_dotenv from .constants import SERVICE_NAME, SERVICE_TYPE -from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO from .logger import logger @@ -41,26 +40,21 @@ def _get_from_environ_or_fail(var: str) -> str: AUTHZ_ENABLED = os.environ.get("AUTHZ_ENABLED", "true").strip().lower() in TRUTH_VALUES AUTHZ_URL: str = _get_from_environ_or_fail("BENTO_AUTHZ_SERVICE_URL").strip().rstrip("/") if AUTHZ_ENABLED else "" -# MinIO-related, check if the credentials have been provided in a file -MINIO_URL = os.environ.get("MINIO_URL") -MINIO_ACCESS_KEY_FILE = os.environ.get("MINIO_ACCESS_KEY_FILE") -MINIO_SECRET_KEY_FILE = os.environ.get("MINIO_ACCESS_KEY_FILE") +# S3 backend-related, check if the credentials have been provided in a file +DRS_S3_API_URL = os.environ.get("DRS_S3_API_URL") -MINIO_USERNAME = os.environ.get("MINIO_USERNAME") -MINIO_PASSWORD = os.environ.get("MINIO_PASSWORD") +DRS_S3_ACCESS_KEY = os.environ.get("DRS_S3_ACCESS_KEY") +DRS_S3_SECRET_KEY = os.environ.get("DRS_S3_SECRET_KEY") -if MINIO_SECRET_KEY_FILE: - MINIO_ACCESS_KEY_PATH = Path(MINIO_ACCESS_KEY_FILE).resolve() +if DRS_S3_ACCESS_KEY_FILE := os.environ.get("DRS_S3_ACCESS_KEY_FILE"): + if (kp := Path(DRS_S3_ACCESS_KEY_FILE).resolve()).exists(): + with open(kp, "r") as f: + DRS_S3_ACCESS_KEY = f.read().strip() - if MINIO_ACCESS_KEY_PATH.exists(): - with open(MINIO_ACCESS_KEY_PATH, "r") as f: - MINIO_USERNAME = f.read().strip() - -if MINIO_SECRET_KEY_FILE: - MINIO_SECRET_KEY_PATH = Path(MINIO_SECRET_KEY_FILE).resolve() - if MINIO_SECRET_KEY_PATH.exists(): - with open(MINIO_SECRET_KEY_PATH, "r") as f: - MINIO_PASSWORD = f.read().strip() +if DRS_S3_SECRET_KEY_FILE := os.environ.get("DRS_S3_SECRET_KEY_FILE"): + if (kp := Path(DRS_S3_SECRET_KEY_FILE).resolve()).exists(): + with open(kp, "r") as f: + DRS_S3_SECRET_KEY = f.read().strip() class Config: @@ -70,14 +64,12 @@ class Config: PROMETHEUS_ENABLED: bool = os.environ.get("PROMETHEUS_ENABLED", "false").strip().lower() in TRUTH_VALUES SERVICE_ID: str = os.environ.get("SERVICE_ID", ":".join(list(SERVICE_TYPE.values())[:2])) - SERVICE_DATA_SOURCE: str = DATA_SOURCE_MINIO if MINIO_URL else DATA_SOURCE_LOCAL - SERVICE_DATA: str | None = None if MINIO_URL else SERVICE_DATA SERVICE_BASE_URL: str = os.environ.get("SERVICE_BASE_URL", "http://127.0.0.1").strip().rstrip("/") - MINIO_URL: str | None = MINIO_URL - MINIO_USERNAME: str | None = MINIO_USERNAME - MINIO_PASSWORD: str | None = MINIO_PASSWORD - MINIO_BUCKET: str | None = os.environ.get("MINIO_BUCKET") if MINIO_URL else None + DRS_S3_API_URL: str | None = DRS_S3_API_URL + DRS_S3_ACCESS_KEY: str | None = DRS_S3_ACCESS_KEY + DRS_S3_SECRET_KEY: str | None = DRS_S3_SECRET_KEY + DRS_S3_BUCKET: str | None = os.environ.get("DRS_S3_BUCKET") BENTO_DEBUG = os.environ.get("BENTO_DEBUG", os.environ.get("FLASK_DEBUG", "false")).strip().lower() in TRUTH_VALUES # CORS @@ -89,6 +81,4 @@ class Config: print(f"[{SERVICE_NAME}] Using: database URI {Config.SQLALCHEMY_DATABASE_URI}") -print(f"[{SERVICE_NAME}] data source {Config.SERVICE_DATA_SOURCE}") -print(f"[{SERVICE_NAME}] data path {Config.SERVICE_DATA}") -print(f"[{SERVICE_NAME}] minio URL {Config.MINIO_URL}", flush=True) +print(f"[{SERVICE_NAME}] s3 URL {Config.DRS_S3_API_URL}", flush=True) diff --git a/chord_drs/data_sources.py b/chord_drs/data_sources.py deleted file mode 100644 index 22117cf..0000000 --- a/chord_drs/data_sources.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Type - -from .backends.base import Backend -from .backends.local import LocalBackend -from .backends.minio import MinioBackend - - -__all__ = [ - "DATA_SOURCE_LOCAL", - "DATA_SOURCE_MINIO", - "DATA_SOURCE_BACKENDS", -] - - -DATA_SOURCE_LOCAL = "local" -DATA_SOURCE_MINIO = "minio" - -DATA_SOURCE_BACKENDS: dict[str, Type[Backend]] = { - DATA_SOURCE_LOCAL: LocalBackend, - DATA_SOURCE_MINIO: MinioBackend, -} diff --git a/chord_drs/models.py b/chord_drs/models.py index 906f6cc..c382b48 100644 --- a/chord_drs/models.py +++ b/chord_drs/models.py @@ -8,7 +8,6 @@ from uuid import uuid4 from .backend import get_backend -from .backends.minio import MinioBackend from .db import db from .utils import drs_file_checksum @@ -93,8 +92,6 @@ def __init__(self, *args, **kwargs): backend = get_backend() - if not backend: - raise Exception("The backend for this instance is not properly configured.") try: self.location = backend.save(location, new_filename) self.size = os.path.getsize(p) @@ -115,9 +112,4 @@ def return_minio_object(self): if parsed_url.scheme != "s3": return None - backend = get_backend() - - if not backend or not isinstance(backend, MinioBackend): - raise Exception("The backend for this instance is not properly configured.") - - return backend.get_minio_object(self.location) + return get_backend().get_minio_object(self.location) diff --git a/chord_drs/routes.py b/chord_drs/routes.py index 97a9283..0949054 100644 --- a/chord_drs/routes.py +++ b/chord_drs/routes.py @@ -20,7 +20,6 @@ from . import __version__ from .authz import authz_middleware, PERMISSION_INGEST_DATA, PERMISSION_QUERY_DATA, PERMISSION_DOWNLOAD_DATA from .constants import BENTO_SERVICE_KIND, SERVICE_NAME, SERVICE_TYPE -from .data_sources import DATA_SOURCE_LOCAL, DATA_SOURCE_MINIO from .db import db from .models import DrsBlob, DrsBundle from .types import DRSAccessMethodDict, DRSContentsDict, DRSObjectDict