Skip to content

Commit

Permalink
feat(file): move thumbnail generation to celery
Browse files Browse the repository at this point in the history
  • Loading branch information
Yelinz committed Sep 24, 2024
1 parent 088a897 commit 6d30dcd
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 61 deletions.
4 changes: 4 additions & 0 deletions alexandria/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def mock_celery(mocker):
"alexandria.core.tasks.set_content_vector.delay",
side_effect=lambda id: tasks.set_content_vector(id),
)
mocker.patch(
"alexandria.core.tasks.create_thumbnail.delay",
side_effect=lambda id: tasks.create_thumbnail(id),
)


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from tqdm import tqdm

from alexandria.core.models import File
from alexandria.core.tasks import create_thumbnail


class Command(BaseCommand):
Expand All @@ -12,7 +13,7 @@ def handle(self, *args, **options):
for file in tqdm(
File.objects.filter(variant="original", renderings__isnull=True)
):
file.create_thumbnail()
create_thumbnail(file.pk)
if virtual_memory().available < 300_000_000:
print("about to run out of memory, stopping")
break
56 changes: 6 additions & 50 deletions alexandria/core/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import logging
import re
import uuid
from mimetypes import guess_extension
from pathlib import Path
from tempfile import NamedTemporaryFile

Expand All @@ -13,18 +11,14 @@
from django.core.files import File as DjangoFile
from django.core.validators import RegexValidator
from django.db import models, transaction
from django.db.models.fields.files import ImageFile
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _
from localized_fields.fields import LocalizedCharField, LocalizedTextField
from manabi.token import Key, Token
from preview_generator.manager import PreviewManager

from alexandria.core.presign_urls import make_signature_components
from alexandria.storages.fields import DynamicStorageFileField

log = logging.getLogger(__name__)


def upload_file_content_to(instance, _):
return f"{instance.pk}_{instance.name}"
Expand Down Expand Up @@ -255,49 +249,6 @@ def get_webdav_url(self, username, group, host="http://localhost:8000"):
f"{handler}{host}{settings.ALEXANDRIA_MANABI_DAV_URL_PATH}/{token.as_url()}"
)

def create_thumbnail(self):
if (
self.variant != File.Variant.ORIGINAL
or self.renderings.count() > 0
or not settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION
):
return

with NamedTemporaryFile() as tmp:
temp_file = Path(tmp.name)
manager = PreviewManager(str(temp_file.parent))
with temp_file.open("wb") as f:
f.write(self.content.file.file.read())
extension = guess_extension(self.mime_type)
preview_kwargs = {"file_ext": extension}
if settings.ALEXANDRIA_THUMBNAIL_WIDTH: # pragma: no cover
preview_kwargs["width"] = settings.ALEXANDRIA_THUMBNAIL_WIDTH
if settings.ALEXANDRIA_THUMBNAIL_HEIGHT: # pragma: no cover
preview_kwargs["height"] = settings.ALEXANDRIA_THUMBNAIL_HEIGHT
try:
path_to_preview_image = Path(
manager.get_jpeg_preview(str(temp_file), **preview_kwargs)
)
# thumbnail generation can throw many different exceptions, catch all
except Exception: # noqa: B902
log.exception("Thumbnail generation failed")
return None

with path_to_preview_image.open("rb") as thumb:
file = ImageFile(thumb)
thumb_file = File.objects.create(
name=f"{self.name}_preview.jpg",
document=self.document,
variant=File.Variant.THUMBNAIL.value,
original=self,
encryption_status=self.encryption_status,
content=file,
mime_type="image/jpeg",
size=file.size,
)

return thumb_file

def get_download_url(self, request):
if not request:
return None
Expand Down Expand Up @@ -336,4 +287,9 @@ def set_file_attributes(sender, instance, **kwargs):
):
tasks.set_content_vector.delay_on_commit(instance.pk)

instance.create_thumbnail()
if (
instance.variant == File.Variant.ORIGINAL
and instance.renderings.count() < 1
and settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION
):
tasks.create_thumbnail.delay_on_commit(instance.pk)
47 changes: 47 additions & 0 deletions alexandria/core/tasks.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import hashlib
from mimetypes import guess_extension
from pathlib import Path
from tempfile import NamedTemporaryFile

import tika.language
import tika.parser
from django.conf import settings
from django.contrib.postgres.search import SearchVector
from django.db.models import Value
from django.db.models.fields.files import ImageFile
from preview_generator.manager import PreviewManager

from alexandria.core.models import File
from celery import shared_task
from celery.utils.log import get_task_logger

logger = get_task_logger(__name__)


@shared_task(soft_time_limit=301)
Expand Down Expand Up @@ -56,3 +63,43 @@ def set_checksum(file_pk: str):

def make_checksum(bytes_: bytes) -> str:
return f"sha256:{hashlib.sha256(bytes_).hexdigest()}"


@shared_task
def create_thumbnail(file_pk: str):
file = File.objects.get(pk=file_pk)

with NamedTemporaryFile() as tmp:
temp_file = Path(tmp.name)
manager = PreviewManager(str(temp_file.parent))
with temp_file.open("wb") as f:
f.write(file.content.file.file.read())
extension = guess_extension(file.mime_type)
preview_kwargs = {"file_ext": extension}
if settings.ALEXANDRIA_THUMBNAIL_WIDTH: # pragma: no cover
preview_kwargs["width"] = settings.ALEXANDRIA_THUMBNAIL_WIDTH
if settings.ALEXANDRIA_THUMBNAIL_HEIGHT: # pragma: no cover
preview_kwargs["height"] = settings.ALEXANDRIA_THUMBNAIL_HEIGHT
try:
path_to_preview_image = Path(
manager.get_jpeg_preview(str(temp_file), **preview_kwargs)
)
# thumbnail generation can throw many different exceptions, catch all
except Exception: # noqa: B902
logger.exception("Thumbnail generation failed")
return None

with path_to_preview_image.open("rb") as thumb:
image = ImageFile(thumb)
thumb_file = File.objects.create(
name=f"{file.name}_preview.jpg",
document=file.document,
variant=File.Variant.THUMBNAIL.value,
original=file,
encryption_status=file.encryption_status,
content=image,
mime_type="image/jpeg",
size=file.size,
)

return thumb_file
12 changes: 7 additions & 5 deletions alexandria/core/tests/__snapshots__/test_viewsets.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21',
'UPDATE "alexandria_core_file" SET "content_vector" = (setweight(to_tsvector(COALESCE(\'content\', \'\')), \'A\') || setweight(to_tsvector(\'english\'::regconfig, COALESCE(\'Important text\', \'\')), \'B\')), "language" = \'en\' WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid',
'SELECT COUNT(*) AS "__count" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid',
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21',
'COMMIT',
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid ORDER BY "alexandria_core_file"."created_at" DESC',
'SELECT "alexandria_core_tag"."created_at", "alexandria_core_tag"."created_by_user", "alexandria_core_tag"."created_by_group", "alexandria_core_tag"."modified_at", "alexandria_core_tag"."modified_by_user", "alexandria_core_tag"."modified_by_group", "alexandria_core_tag"."metainfo", "alexandria_core_tag"."id", "alexandria_core_tag"."name", "alexandria_core_tag"."description", "alexandria_core_tag"."tag_synonym_group_id" FROM "alexandria_core_tag" INNER JOIN "alexandria_core_document_tags" ON ("alexandria_core_tag"."id" = "alexandria_core_document_tags"."tag_id") WHERE "alexandria_core_document_tags"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid',
'SELECT "alexandria_core_mark"."created_at", "alexandria_core_mark"."created_by_user", "alexandria_core_mark"."created_by_group", "alexandria_core_mark"."modified_at", "alexandria_core_mark"."modified_by_user", "alexandria_core_mark"."modified_by_group", "alexandria_core_mark"."metainfo", "alexandria_core_mark"."slug", "alexandria_core_mark"."name", "alexandria_core_mark"."description" FROM "alexandria_core_mark" INNER JOIN "alexandria_core_document_marks" ON ("alexandria_core_mark"."slug" = "alexandria_core_document_marks"."mark_id") WHERE "alexandria_core_document_marks"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid',
]),
'query_count': 15,
'query_count': 16,
'request': dict({
'CONTENT_LENGTH': '397',
'CONTENT_TYPE': 'multipart/form-data; boundary=BoUnDaRyStRiNg; charset=utf-8',
Expand Down Expand Up @@ -172,10 +173,11 @@
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21',
'UPDATE "alexandria_core_file" SET "content_vector" = (setweight(to_tsvector(COALESCE(\'father\', \'\')), \'A\') || setweight(to_tsvector(\'english\'::regconfig, COALESCE(\'Important text\', \'\')), \'B\')), "language" = \'en\' WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid',
'SELECT COUNT(*) AS "__count" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid',
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21',
'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid ORDER BY "alexandria_core_file"."created_at" DESC',
'SELECT 1 AS "a" FROM "alexandria_core_document" WHERE ("alexandria_core_document"."id" = \'9dd4e461268c8034f5c8564e155c67a6\'::uuid AND "alexandria_core_document"."id" = \'9dd4e461268c8034f5c8564e155c67a6\'::uuid) LIMIT 1',
]),
'query_count': 10,
'query_count': 11,
'request': dict({
'CONTENT_LENGTH': '345',
'CONTENT_TYPE': 'multipart/form-data; boundary=BoUnDaRyStRiNg; charset=utf-8',
Expand Down Expand Up @@ -870,7 +872,7 @@
'modified-by-group': None,
'modified-by-user': None,
'name': 'father.png_preview.jpg',
'size': 2257,
'size': 3030,
'variant': 'thumbnail',
}),
'id': 'f561aaf6-ef0b-f14d-4208-bb46a4ccb3ad',
Expand Down Expand Up @@ -1563,7 +1565,7 @@
'modified-by-group': None,
'modified-by-user': None,
'name': 'father.png_preview.jpg',
'size': 2257,
'size': 3030,
'variant': 'thumbnail',
}),
'id': 'f561aaf6-ef0b-f14d-4208-bb46a4ccb3ad',
Expand Down Expand Up @@ -1646,7 +1648,7 @@
'modified-by-group': None,
'modified-by-user': None,
'name': 'run.png_preview.jpg',
'size': 2257,
'size': 7738,
'variant': 'thumbnail',
}),
'id': 'dad3a37a-a9d5-0688-b515-7698acfd7aee',
Expand Down
2 changes: 1 addition & 1 deletion alexandria/storages/tests/test_dynamic_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_dynamic_storage_select_global_ssec(
# Patch away file opens
mocker.patch("alexandria.core.tasks.set_checksum.delay", side_effect=None)
mocker.patch("alexandria.core.tasks.set_content_vector.delay", side_effect=None)
mocker.patch("alexandria.core.models.File.create_thumbnail")
mocker.patch("alexandria.core.tasks.create_thumbnail.delay", side_effect=None)
if raises is not None:
with pytest.raises(raises):
file_factory()
Expand Down
4 changes: 0 additions & 4 deletions compose.override.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ services:
- CONCURRENT_DATABASE_RELOAD=false
- ENABLE_DEFINITIONS_UPDATE=false

redis:
environment:
- REDIS_PASSWORD=redis

celery:
<<: *alexandria
ports:
Expand Down

0 comments on commit 6d30dcd

Please sign in to comment.