Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get analysis datasets #30

Merged
merged 4 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added common/constants/__init__.py
Empty file.
8 changes: 8 additions & 0 deletions common/constants/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Contains constants"""
# Units in bytes
KB = 1024
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB

DATASET_MAX_SIZE = 10 * MB
7 changes: 7 additions & 0 deletions common/helpers/get_mime_type_from_extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Contains a method for getting a mimetype from a file extension"""
import mimetypes

def get_mimetype_from_extension(filename: str) -> str:
"""Retrieve a mimetype from a given filename"""
mimetype, _ = mimetypes.guess_type(filename)
return mimetype
14 changes: 10 additions & 4 deletions file_management/contract/dto/dataset_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
class DatasetTO(BaseTO):
"""Contains the fields for a dataset"""
id: str | None
uploadedBy: UserTO | None
createdOn: datetime | None
uploadedBy: str | None
sizeBytes: int | None
createdAt: datetime | None
updatedAt: datetime | None
mimeType: str | None
url: str | None
filename: str | None

Expand All @@ -24,9 +27,12 @@ def from_model(cls, instance: Dataset) -> 'DatasetTO | None':
if not instance:
return None
return cls(
uploadedBy=instance.uploaded_by,
createdOn=instance.created_on,
uploadedBy=instance.uploaded_by.id,
createdAt=instance.created_at,
id=instance.id,
sizeBytes=instance.size_bytes,
mimeType=instance.mime_type,
updatedAt=instance.updated_at,
filename=instance.filename,
url=instance.url
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ class CreatePresignedUrlUploadFileIn(serializers.Serializer):
"""Request input for an analysis creation"""
filename = serializers.CharField()
analysis_id = serializers.IntegerField()
size_bytes = serializers.IntegerField()
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains the File Management repository"""

from abc import abstractmethod
from typing import List

from file_management.contract.dto.dataset_to import DatasetTO
from file_management.contract.dto.s3_presigned_url_to import S3PresignedUrlTO
Expand All @@ -11,7 +12,7 @@ class FileManagementRepository:

@abstractmethod
def create_presigned_url_upload_file(
self, filename: str, user_id: str
self, filename: str, user_id: str, size_bytes: int
) -> tuple[S3PresignedUrlTO, DatasetTO]:
"""
Create a presigned URL to allow the frontend to upload a file
Expand All @@ -38,3 +39,12 @@ def get_dataset_by_id(self, dataset_id) -> DatasetTO:
"""
Retrieve a dataset by id
"""

@abstractmethod
def get_analysis_datasets(self, analysis_id: int) -> List[DatasetTO]:
"""Retrieve the datasets of the analysis

Keyword arguments:
analysis_id -- the id of the analysis
Return: A list of DatasetTO
"""
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ def create_presigned_url_upload_file_controller(request):
raise BadRequestException("All fields are required")
filename = data.validated_data["filename"]
analysis_id = data.validated_data["analysis_id"]
url = service.create_presigned_url_upload_file(request.user, filename, analysis_id)
size_bytes = data.validated_data["size_bytes"]
url = service.create_presigned_url_upload_file(request.user, filename, analysis_id, size_bytes)
return api_response_success(data=url)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Contains the controller for getting the analysis datasets"""
from rest_framework.decorators import api_view

from common.exceptions.exceptions import BadRequestException
from common.helpers.api_responses import api_response_success
from file_management.service.impl.file_management_service_impl import FileManagementServiceImpl

@api_view(["GET"])
def get_analysis_datasets_controller(request):
"""Retrieve the analysis datasets"""
analysis_id = request.query_params.get("analysis_id", None)
if not analysis_id:
raise BadRequestException("The analysis id is required")
service = FileManagementServiceImpl()
datasets = service.get_analysis_datasets(request.user, analysis_id)
return api_response_success(data=datasets)
39 changes: 39 additions & 0 deletions file_management/migrations/0002_alter_dataset_options_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Django 5.1.4 on 2024-12-30 15:59

import django.utils.timezone
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('file_management', '0001_initial'),
]

operations = [
migrations.AlterModelOptions(
name='dataset',
options={'ordering': ['created_at']},
),
migrations.RenameField(
model_name='dataset',
old_name='created_on',
new_name='created_at',
),
migrations.AddField(
model_name='dataset',
name='mime_type',
field=models.CharField(default='text/csv'),
),
migrations.AddField(
model_name='dataset',
name='size_bytes',
field=models.IntegerField(default=0),
preserve_default=False,
),
migrations.AddField(
model_name='dataset',
name='updated_at',
field=models.DateTimeField(auto_now=True),
),
]
7 changes: 5 additions & 2 deletions file_management/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ class Dataset(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4)
filename = models.CharField(max_length=255)
url = models.URLField(null=True)
created_on = models.DateTimeField(auto_now_add=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
uploaded_by = models.ForeignKey('user_management.User', on_delete=models.CASCADE)
mime_type = models.CharField(null=False, default="text/csv")
size_bytes = models.IntegerField(default=0)

class Meta:
"""Table's metadata"""
db_table = 'dataset'
ordering = ['created_on']
ordering = ['created_at']
16 changes: 13 additions & 3 deletions file_management/repository/file_management_repository_impl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains the analysis repository"""

import logging
from typing import List
import urllib
from datetime import timedelta
from os import getenv
Expand All @@ -9,6 +10,7 @@
from django.db import transaction

from analysis.models.analysis import Analysis
from common.helpers.get_mime_type_from_extension import get_mimetype_from_extension
from file_management.contract.dto.dataset_to import DatasetTO
from file_management.contract.dto.s3_presigned_url_to import S3PresignedUrlTO
from file_management.contract.repository.file_management_repository import (
Expand All @@ -23,17 +25,20 @@
class FileManagementRepositoryImpl(FileManagementRepository):
"""Analysis repository"""

def create_presigned_url_upload_file(self, filename: str, user_id: str):
def create_presigned_url_upload_file(self, filename: str, user_id: str, size_bytes: int):
s3_client = boto3.client("s3")
expires_in = timedelta(hours=1).seconds
user = User.objects.filter(id=user_id).first()

try:
with transaction.atomic():
new_dataset = Dataset.objects.create(
filename=filename, uploaded_by=user
filename=filename,
uploaded_by=user,
size_bytes=size_bytes,
mime_type=get_mimetype_from_extension(filename)
)
object_name = f"datasets/{str(new_dataset.id)}"
object_name = f"datasets/{filename}"
response = s3_client.generate_presigned_post(
bucket_name,
object_name,
Expand Down Expand Up @@ -70,3 +75,8 @@ def attach_file_to_analysis(self, dataset_id: str, analysis_id: int) -> DatasetT
def get_dataset_by_id(self, dataset_id: str) -> DatasetTO:
dataset = Dataset.objects.filter(id=dataset_id).first()
return DatasetTO.from_model(dataset)

def get_analysis_datasets(self, analysis_id: int) -> List[DatasetTO]:
analysis = Analysis.objects.filter(id=analysis_id).first()
datasets = analysis.datasets.all()
return DatasetTO.from_models(datasets)
8 changes: 8 additions & 0 deletions file_management/service/file_management_service.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Contains the abstract class of file management service"""

from abc import abstractmethod
from typing import List

from common.service.base_service import BaseService
from file_management.contract.dto.dataset_to import DatasetTO
from file_management.contract.dto.s3_presigned_url_to import S3PresignedUrlTO


Expand All @@ -16,3 +18,9 @@ def create_presigned_url_upload_file(self, user, filename: str) -> S3PresignedUr
@abstractmethod
def create_presigned_url_download_file(self, user, dataset_id: str) -> S3PresignedUrlTO:
"""Generate a presigned URL for downloading files"""

@abstractmethod
def get_analysis_datasets(self, user, analysis_id: int) -> List[DatasetTO]:
"""
Retrieve all the datasets from the given analysis if the user has the required permissions
"""
25 changes: 20 additions & 5 deletions file_management/service/impl/file_management_service_impl.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Contains the implementation of AnalysisService"""

import urllib
from analysis.service.impl.analysis_service_impl import AnalysisServiceImpl
from common.exceptions.exceptions import BadRequestException, NotFoundException
from file_management.contract.dto.s3_presigned_url_to import S3PresignedUrlTO
from common.constants.constants import DATASET_MAX_SIZE, MB
from common.exceptions.exceptions import BadRequestException, NotFoundException, ForbiddenException
from file_management.repository.file_management_repository_impl import (
FileManagementRepositoryImpl,
)
Expand All @@ -14,7 +13,9 @@
from file_management.use_cases.create_presigned_url_upload_file_uc import (
CreatePresignedUrlUploadFileUC,
)
from file_management.use_cases.get_analysis_datasets_uc import GetAnalysisDatasetsUC
from user_management.repository.role_repository_impl import RoleRepositoryImpl
from user_management.service.impl.users_service_impl import UsersServiceImpl
from user_management.usecases.attach_file_to_analysis_uc import AttachFileToAnalysisUC
from user_management.usecases.get_user_role_in_analysis_uc import (
GetUserRoleInAnalysisUC,
Expand All @@ -30,23 +31,28 @@ def __init__(self):
)
self.get_user_role_in_analysis_uc = GetUserRoleInAnalysisUC.get_instance()
self.attach_file_to_analysis_uc = AttachFileToAnalysisUC.get_instance()
self.get_analysis_datasets_uc = GetAnalysisDatasetsUC.get_instance()
self.create_presigned_url_download_file_uc = (
CreatePresignedUrlDownloadFileUC.get_instance()
)
self.repository = FileManagementRepositoryImpl()
self.role_repository = RoleRepositoryImpl()
self.analysis_service = AnalysisServiceImpl()
self.user_service = UsersServiceImpl()

def create_presigned_url_upload_file(
self, user, filename: str, analysis_id: int
self, user, filename: str, analysis_id: int, size_bytes: int
) -> str:
# TODO: validate if the user is in ['FACILITATOR', 'DATA MANAGER']
if size_bytes > DATASET_MAX_SIZE:
raise BadRequestException(f"The file must be smaller than {DATASET_MAX_SIZE / MB}MB")

self.analysis_service.get_analysis_by_id(
analysis_id
) # Raise 404 if analysis doesn't exist

presigned_url, dataset = self.create_presigned_url_upload_file_uc.exec(
self.repository, filename, user.id
self.repository, filename, user.id, size_bytes
)

self.attach_file_to_analysis_uc.exec(self.repository, dataset.id, analysis_id)
Expand All @@ -69,3 +75,12 @@ def create_presigned_url_download_file(
if not response:
raise BadRequestException()
return response

def get_analysis_datasets(self, user, analysis_id: int):
self.analysis_service.get_analysis_by_id(analysis_id) # Raises 404 if not found
if not self.user_service.is_user_in_analysis(user.id, analysis_id):
raise ForbiddenException("You can't see that analysis")
datasets = self.get_analysis_datasets_uc.exec(
self.repository, analysis_id
)
return [dataset.to_dict() for dataset in datasets]
Loading
Loading