Skip to content

Commit

Permalink
feat: Move user export task on celery
Browse files Browse the repository at this point in the history
  • Loading branch information
wojcikmat authored and mkleszcz committed Jul 3, 2024
1 parent c017ee5 commit 3d91c35
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 0 deletions.
Empty file.
11 changes: 11 additions & 0 deletions packages/backend/apps/users/services/export/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from enum import Enum


class ExportUserArchiveRootPaths(str, Enum):
LOCAL_ROOT = "tmp"
S3_ROOT = "exports"


class UserEmails(str, Enum):
USER_EXPORT = "USER_EXPORT"
USER_EXPORT_ADMIN = "USER_EXPORT_ADMIN"
11 changes: 11 additions & 0 deletions packages/backend/apps/users/services/export/emails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from common.emails import get_send_email_event
from userauth.constants import UserEmails
from userauth.types import ExportedUserData


def get_user_export_email_event(to: str, data: ExportedUserData):
return get_send_email_event(detail_type=UserEmails.USER_EXPORT.value, data={"to": to, "data": data})


def get_admin_export_email_event(to: str, data: list[ExportedUserData]):
return get_send_email_event(detail_type=UserEmails.USER_EXPORT_ADMIN.value, data={"to": to, "data": data})
18 changes: 18 additions & 0 deletions packages/backend/apps/users/services/export/handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import logging

from dao.db.session import db_session
from .services import user as user_services

logger = logging.getLogger()
logger.setLevel(logging.INFO)


def user_data_export(event, context):
logger.info(event)

event_detail = event.get("detail", {})
user_ids = event_detail.get("user_ids", [])
admin_email = event_detail.get("admin_email")

with db_session() as session:
user_services.process_user_data_export(session=session, user_ids=user_ids, admin_email=admin_email)
18 changes: 18 additions & 0 deletions packages/backend/apps/users/services/export/protocols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Protocol, Type, Union
from pydantic import BaseModel
from ...models import User


class UserDataExportable(Protocol):
export_key: str
schema_class: Type[BaseModel]

@classmethod
def export(cls, user: User) -> Union[str, list[str]]:
...


class UserFilesExportable(Protocol):
@classmethod
def export(cls, user: User) -> list[str]:
...
Empty file.
103 changes: 103 additions & 0 deletions packages/backend/apps/users/services/export/services/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import datetime
import io
import json
import zipfile
from typing import Union

import boto3
from ..protocols import UserDataExportable, UserFilesExportable
from ..constants import ExportUserArchiveRootPaths
from ....models import User



class CrudDemoItemDataExport(UserDataExportable):
export_key = "crud_demo_items"

@classmethod
def export(cls, user: User) -> list[str]:
return [cls.schema_class.from_orm(item).json() for item in user.cruddemoitem_set]


class DocumentDemoItemFileExport(UserFilesExportable):
@classmethod
def export(cls, user: User) -> list[str]:
return [document.file for document in user.documents]


class UserDataExport(UserDataExportable):
export_key = "user"
schema_class = User

@classmethod
def export(cls, user: User) -> Union[str, list[str]]:
return cls.schema_class.from_orm(user).json()


class ExportUserArchive:
_DATA_EXPORTS: list[UserDataExportable] = [UserDataExport, CrudDemoItemDataExport]
_FILES_EXPORTS: list[UserFilesExportable] = [DocumentDemoItemFileExport]

def __init__(self, user: User):
self._user = user

@property
def _user_id(self) -> str:
return hashid.encode(self._user.id)

def run(self) -> str:
user_data = self._export_user_data()
user_files = self._export_user_files()

archive_filename = self._export_user_archive_to_zip(user_data, user_files)
export_url = self._export_zip_archive_to_s3(archive_filename)

return export_url

def _export_user_data(self) -> dict:
export_data = {}

for user_data_export in self._DATA_EXPORTS:
export_data[user_data_export.export_key] = user_data_export.export(self._user)

return export_data

def _export_user_files(self) -> list:
export_files_paths = []

for user_file_export in self._FILES_EXPORTS:
export_files_paths.extend(user_file_export.export(self._user))

return export_files_paths

def _export_user_archive_to_zip(self, user_data: dict, user_files: list[str]) -> str:
s3 = boto3.client("s3", endpoint_url=settings.AWS_S3_ENDPOINT_URL)
archive_filename = f"/{ExportUserArchiveRootPaths.LOCAL_ROOT.value}/{self._user_id}.zip"

with zipfile.ZipFile(archive_filename, "w", zipfile.ZIP_DEFLATED) as zf:
json_data_filename = f"{self._user_id}/{self._user_id}.json"
zf.writestr(json_data_filename, json.dumps(user_data).encode('utf-8'))

for file_path in user_files:
with io.BytesIO() as buffer:
s3.download_fileobj(settings.AWS_STORAGE_BUCKET_NAME, file_path, buffer)
zf.writestr(f"{self._user_id}/{file_path}", buffer.getvalue())

return archive_filename

def _export_zip_archive_to_s3(self, user_archive_filename: str) -> str:
s3 = boto3.client("s3", endpoint_url=settings.AWS_S3_ENDPOINT_URL)
user_archive_obj_key = self._get_user_archive_obj_key()

s3.upload_file(user_archive_filename, settings.AWS_EXPORTS_STORAGE_BUCKET_NAME, user_archive_obj_key)
export_url = s3.generate_presigned_url(
'get_object',
Params={'Bucket': settings.AWS_EXPORTS_STORAGE_BUCKET_NAME, 'Key': user_archive_obj_key},
ExpiresIn=settings.USER_DATA_EXPORT_EXPIRY_SECONDS,
)

return export_url

def _get_user_archive_obj_key(self) -> str:
timestamp = datetime.datetime.now().strftime("%d-%m-%y_%H-%M-%S")
return f"{ExportUserArchiveRootPaths.S3_ROOT.value}/{self._user_id}_{timestamp}.zip"
49 changes: 49 additions & 0 deletions packages/backend/apps/users/services/export/services/user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import Optional


import boto3
from typing import TypedDict
from . import export
from ....models import User


class ExportedUserData(TypedDict):
email: str
export_url: str


class _ProcessUserDataExport:
def __call__(self, user_ids: list[str], admin_email: str):
entries = []
email_events = []

for user_id in user_ids:
if user := self._get_user(user_id):
entry = self._get_user_export_entry(user)
email_events.append(emails.get_user_export_email_event(to=user.email, data=entry))
entries.append(entry)

if entries:
email_events.append(emails.get_admin_export_email_event(to=admin_email, data=entries))
self._send_export_emails(email_events)

@staticmethod
def _get_user(user_id: str) -> Optional[User]:
try:
user = User.objects.prefetch_related('profile','cruddemoitem_set','documents').get(id=user_id)
return user
except User.DoesNotExist:
return None

@staticmethod
def _get_user_export_entry(user: User) -> ExportedUserData:
export_user_archive = export.ExportUserArchive(user)
return {"email": user.email, "export_url": export_user_archive.run()}

@staticmethod
def _send_export_emails(email_events: list):
events = boto3.client('events', endpoint_url=settings.AWS_ENDPOINT_URL)
events.put_events(Entries=email_events)


process_user_data_export = _ProcessUserDataExport()
36 changes: 36 additions & 0 deletions packages/backend/apps/users/services/export/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import datetime
from typing import TypedDict

from pydantic import validator, BaseModel


class OrmBase(BaseModel):
id: int

@validator("*", pre=True)
def evaluate_lazy_columns(cls, v):
if isinstance(v, Query):
return v.all()
return v

class Config:
orm_mode = True


class UserProfile(OrmBase):
first_name: str
last_name: str


class UserType(OrmBase):
profile: UserProfile
email: str
is_superuser: bool
is_active: bool
is_confirmed: bool
created: datetime.datetime


class ExportedUserData(TypedDict):
email: str
export_url: str
7 changes: 7 additions & 0 deletions packages/backend/apps/users/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import importlib

from django.conf import settings
from celery import shared_task, states
from .services.export.services import user as user_services

module_name, package = settings.LAMBDA_TASKS_BASE_HANDLER.rsplit(".", maxsplit=1)
LambdaTask = getattr(importlib.import_module(module_name), package)
Expand All @@ -9,3 +11,8 @@
class ExportUserData(LambdaTask):
def __init__(self):
super().__init__(name="EXPORT_USER_DATA", source='backend.export_user')


@shared_task(bind=True)
def export_user_data(user_ids, admin_email):
user_services.process_user_data_export(user_ids=user_ids, admin_email=admin_email)

0 comments on commit 3d91c35

Please sign in to comment.