diff --git a/packages/backend/apps/users/services/export/__init__.py b/packages/backend/apps/users/services/export/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/backend/apps/users/services/export/constants.py b/packages/backend/apps/users/services/export/constants.py new file mode 100644 index 000000000..9b1c8463e --- /dev/null +++ b/packages/backend/apps/users/services/export/constants.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class ExportUserArchiveRootPaths(str, Enum): + LOCAL_ROOT = "tmp" + S3_ROOT = "exports" + + +class UserEmails(str, Enum): + USER_EXPORT = "USER_EXPORT" + USER_EXPORT_ADMIN = "USER_EXPORT_ADMIN" diff --git a/packages/backend/apps/users/services/export/emails.py b/packages/backend/apps/users/services/export/emails.py new file mode 100644 index 000000000..44a35daa2 --- /dev/null +++ b/packages/backend/apps/users/services/export/emails.py @@ -0,0 +1,11 @@ +from common.emails import get_send_email_event +from userauth.constants import UserEmails +from userauth.types import ExportedUserData + + +def get_user_export_email_event(to: str, data: ExportedUserData): + return get_send_email_event(detail_type=UserEmails.USER_EXPORT.value, data={"to": to, "data": data}) + + +def get_admin_export_email_event(to: str, data: list[ExportedUserData]): + return get_send_email_event(detail_type=UserEmails.USER_EXPORT_ADMIN.value, data={"to": to, "data": data}) diff --git a/packages/backend/apps/users/services/export/handlers.py b/packages/backend/apps/users/services/export/handlers.py new file mode 100644 index 000000000..c27d433c1 --- /dev/null +++ b/packages/backend/apps/users/services/export/handlers.py @@ -0,0 +1,18 @@ +import logging + +from dao.db.session import db_session +from .services import user as user_services + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def user_data_export(event, context): + logger.info(event) + + event_detail = event.get("detail", {}) + user_ids = event_detail.get("user_ids", []) + admin_email = event_detail.get("admin_email") + + with db_session() as session: + user_services.process_user_data_export(session=session, user_ids=user_ids, admin_email=admin_email) diff --git a/packages/backend/apps/users/services/export/protocols.py b/packages/backend/apps/users/services/export/protocols.py new file mode 100644 index 000000000..9364c13a4 --- /dev/null +++ b/packages/backend/apps/users/services/export/protocols.py @@ -0,0 +1,18 @@ +from typing import Protocol, Type, Union +from pydantic import BaseModel +from ...models import User + + +class UserDataExportable(Protocol): + export_key: str + schema_class: Type[BaseModel] + + @classmethod + def export(cls, user: User) -> Union[str, list[str]]: + ... + + +class UserFilesExportable(Protocol): + @classmethod + def export(cls, user: User) -> list[str]: + ... diff --git a/packages/backend/apps/users/services/export/services/__init__.py b/packages/backend/apps/users/services/export/services/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/backend/apps/users/services/export/services/export.py b/packages/backend/apps/users/services/export/services/export.py new file mode 100644 index 000000000..3009a14f0 --- /dev/null +++ b/packages/backend/apps/users/services/export/services/export.py @@ -0,0 +1,103 @@ +import datetime +import io +import json +import zipfile +from typing import Union + +import boto3 +from ..protocols import UserDataExportable, UserFilesExportable +from ..constants import ExportUserArchiveRootPaths +from ....models import User + + + +class CrudDemoItemDataExport(UserDataExportable): + export_key = "crud_demo_items" + + @classmethod + def export(cls, user: User) -> list[str]: + return [cls.schema_class.from_orm(item).json() for item in user.cruddemoitem_set] + + +class DocumentDemoItemFileExport(UserFilesExportable): + @classmethod + def export(cls, user: User) -> list[str]: + return [document.file for document in user.documents] + + +class UserDataExport(UserDataExportable): + export_key = "user" + schema_class = User + + @classmethod + def export(cls, user: User) -> Union[str, list[str]]: + return cls.schema_class.from_orm(user).json() + + +class ExportUserArchive: + _DATA_EXPORTS: list[UserDataExportable] = [UserDataExport, CrudDemoItemDataExport] + _FILES_EXPORTS: list[UserFilesExportable] = [DocumentDemoItemFileExport] + + def __init__(self, user: User): + self._user = user + + @property + def _user_id(self) -> str: + return hashid.encode(self._user.id) + + def run(self) -> str: + user_data = self._export_user_data() + user_files = self._export_user_files() + + archive_filename = self._export_user_archive_to_zip(user_data, user_files) + export_url = self._export_zip_archive_to_s3(archive_filename) + + return export_url + + def _export_user_data(self) -> dict: + export_data = {} + + for user_data_export in self._DATA_EXPORTS: + export_data[user_data_export.export_key] = user_data_export.export(self._user) + + return export_data + + def _export_user_files(self) -> list: + export_files_paths = [] + + for user_file_export in self._FILES_EXPORTS: + export_files_paths.extend(user_file_export.export(self._user)) + + return export_files_paths + + def _export_user_archive_to_zip(self, user_data: dict, user_files: list[str]) -> str: + s3 = boto3.client("s3", endpoint_url=settings.AWS_S3_ENDPOINT_URL) + archive_filename = f"/{ExportUserArchiveRootPaths.LOCAL_ROOT.value}/{self._user_id}.zip" + + with zipfile.ZipFile(archive_filename, "w", zipfile.ZIP_DEFLATED) as zf: + json_data_filename = f"{self._user_id}/{self._user_id}.json" + zf.writestr(json_data_filename, json.dumps(user_data).encode('utf-8')) + + for file_path in user_files: + with io.BytesIO() as buffer: + s3.download_fileobj(settings.AWS_STORAGE_BUCKET_NAME, file_path, buffer) + zf.writestr(f"{self._user_id}/{file_path}", buffer.getvalue()) + + return archive_filename + + def _export_zip_archive_to_s3(self, user_archive_filename: str) -> str: + s3 = boto3.client("s3", endpoint_url=settings.AWS_S3_ENDPOINT_URL) + user_archive_obj_key = self._get_user_archive_obj_key() + + s3.upload_file(user_archive_filename, settings.AWS_EXPORTS_STORAGE_BUCKET_NAME, user_archive_obj_key) + export_url = s3.generate_presigned_url( + 'get_object', + Params={'Bucket': settings.AWS_EXPORTS_STORAGE_BUCKET_NAME, 'Key': user_archive_obj_key}, + ExpiresIn=settings.USER_DATA_EXPORT_EXPIRY_SECONDS, + ) + + return export_url + + def _get_user_archive_obj_key(self) -> str: + timestamp = datetime.datetime.now().strftime("%d-%m-%y_%H-%M-%S") + return f"{ExportUserArchiveRootPaths.S3_ROOT.value}/{self._user_id}_{timestamp}.zip" diff --git a/packages/backend/apps/users/services/export/services/user.py b/packages/backend/apps/users/services/export/services/user.py new file mode 100644 index 000000000..b86abc5a0 --- /dev/null +++ b/packages/backend/apps/users/services/export/services/user.py @@ -0,0 +1,49 @@ +from typing import Optional + + +import boto3 +from typing import TypedDict +from . import export +from ....models import User + + +class ExportedUserData(TypedDict): + email: str + export_url: str + + +class _ProcessUserDataExport: + def __call__(self, user_ids: list[str], admin_email: str): + entries = [] + email_events = [] + + for user_id in user_ids: + if user := self._get_user(user_id): + entry = self._get_user_export_entry(user) + email_events.append(emails.get_user_export_email_event(to=user.email, data=entry)) + entries.append(entry) + + if entries: + email_events.append(emails.get_admin_export_email_event(to=admin_email, data=entries)) + self._send_export_emails(email_events) + + @staticmethod + def _get_user(user_id: str) -> Optional[User]: + try: + user = User.objects.prefetch_related('profile','cruddemoitem_set','documents').get(id=user_id) + return user + except User.DoesNotExist: + return None + + @staticmethod + def _get_user_export_entry(user: User) -> ExportedUserData: + export_user_archive = export.ExportUserArchive(user) + return {"email": user.email, "export_url": export_user_archive.run()} + + @staticmethod + def _send_export_emails(email_events: list): + events = boto3.client('events', endpoint_url=settings.AWS_ENDPOINT_URL) + events.put_events(Entries=email_events) + + +process_user_data_export = _ProcessUserDataExport() diff --git a/packages/backend/apps/users/services/export/types.py b/packages/backend/apps/users/services/export/types.py new file mode 100644 index 000000000..a2fb5ce09 --- /dev/null +++ b/packages/backend/apps/users/services/export/types.py @@ -0,0 +1,36 @@ +import datetime +from typing import TypedDict + +from pydantic import validator, BaseModel + + +class OrmBase(BaseModel): + id: int + + @validator("*", pre=True) + def evaluate_lazy_columns(cls, v): + if isinstance(v, Query): + return v.all() + return v + + class Config: + orm_mode = True + + +class UserProfile(OrmBase): + first_name: str + last_name: str + + +class UserType(OrmBase): + profile: UserProfile + email: str + is_superuser: bool + is_active: bool + is_confirmed: bool + created: datetime.datetime + + +class ExportedUserData(TypedDict): + email: str + export_url: str diff --git a/packages/backend/apps/users/tasks.py b/packages/backend/apps/users/tasks.py index 263d8f2ea..00ed1e786 100644 --- a/packages/backend/apps/users/tasks.py +++ b/packages/backend/apps/users/tasks.py @@ -1,6 +1,8 @@ import importlib from django.conf import settings +from celery import shared_task, states +from .services.export.services import user as user_services module_name, package = settings.LAMBDA_TASKS_BASE_HANDLER.rsplit(".", maxsplit=1) LambdaTask = getattr(importlib.import_module(module_name), package) @@ -9,3 +11,8 @@ class ExportUserData(LambdaTask): def __init__(self): super().__init__(name="EXPORT_USER_DATA", source='backend.export_user') + + +@shared_task(bind=True) +def export_user_data(user_ids, admin_email): + user_services.process_user_data_export(user_ids=user_ids, admin_email=admin_email)