Skip to content

Commit

Permalink
Add LeadreviewAttachment Model
Browse files Browse the repository at this point in the history
  • Loading branch information
sudan45 committed Jun 7, 2024
1 parent 61da4fa commit 8a58a5c
Show file tree
Hide file tree
Showing 18 changed files with 198 additions and 57 deletions.
4 changes: 2 additions & 2 deletions apps/bulk_data_migration/entry_images_v2/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from utils.common import parse_number

from lead.models import LeadPreviewImage
from lead.models import LeadPreviewAttachment
from entry.models import Entry
from gallery.models import File

Expand Down Expand Up @@ -34,7 +34,7 @@ def _get_file_from_s3_url(entry, string):
return
# NOTE: For lead-preview generate gallery files
if file_path.startswith('lead-preview/'):
lead_preview = LeadPreviewImage.objects.filter(file=file_path).first()
lead_preview = LeadPreviewAttachment.objects.filter(file=file_path).first()
if lead_preview and lead_preview.file and lead_preview.file.storage.exists(lead_preview.file.name):
return lead_preview.clone_as_deep_file(entry.created_by)
return
Expand Down
4 changes: 2 additions & 2 deletions apps/commons/receivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

from lead.models import (
LeadPreview,
LeadPreviewImage,
LeadPreviewAttachment,
)
from unified_connector.models import ConnectorLeadPreviewImage


# Lead
@receiver(models.signals.post_delete, sender=LeadPreview)
@receiver(models.signals.post_delete, sender=LeadPreviewImage)
@receiver(models.signals.post_delete, sender=LeadPreviewAttachment)
# Unified Connector
@receiver(models.signals.post_delete, sender=ConnectorLeadPreviewImage)
def cleanup_file_on_instance_delete(sender, instance, **kwargs):
Expand Down
59 changes: 43 additions & 16 deletions apps/deepl_integration/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from lead.models import (
Lead,
LeadPreview,
LeadPreviewImage,
LeadPreviewAttachment,
)
from lead.typings import NlpExtractorDocument
from entry.models import Entry
Expand Down Expand Up @@ -636,13 +636,14 @@ def trigger_lead_extract(cls, lead, task_instance=None):
def save_data(
lead: Lead,
text_source_uri: str,
images_uri: List[str],
images_uri: List[dict],
table_uri: List[dict],
word_count: int,
page_count: int,
text_extraction_id: str,
):
LeadPreview.objects.filter(lead=lead).delete()
LeadPreviewImage.objects.filter(lead=lead).delete()
LeadPreviewAttachment.objects.filter(lead=lead).delete()
# and create new one
LeadPreview.objects.create(
lead=lead,
Expand All @@ -651,18 +652,44 @@ def save_data(
page_count=page_count,
text_extraction_id=text_extraction_id,
)
# Save extracted images as LeadPreviewImage instances
# Save extracted images as LeadPreviewAttachment instances
# TODO: The logic is same for unified_connector leads as well. Maybe have a single func?
image_base_path = f'{lead.pk}'
for image_uri in images_uri:
lead_image = LeadPreviewImage(lead=lead)
image_obj = RequestHelper(url=image_uri, ignore_error=True).get_file()
if image_obj:
lead_image.file.save(
os.path.join(image_base_path, os.path.basename(urlparse(image_uri).path)),
image_obj

attachement_base_path = f'{lead.pk}'
images = [dict(item) for item in images_uri]
for image_uri in images:
for image in image_uri['images']:
lead_attachement = LeadPreviewAttachment(lead=lead)
image_obj = RequestHelper(url=image, ignore_error=True).get_file()
if image_obj:
lead_attachement.file.save(
os.path.join(attachement_base_path, os.path.basename(urlparse(image).path)),
image_obj
)
lead_attachement.page_number = image_uri['page_number']
lead_attachement.type = LeadPreviewAttachment.AttachementFileType.IMAGE
lead_attachement.file_preview = lead_attachement.file

lead_attachement.save()

table_path = [dict(item) for item in table_uri]
for table in table_path:
lead_attachement = LeadPreviewAttachment(lead=lead)
table_img = RequestHelper(url=table['image_link'], ignore_error=True).get_file()
table_attahcment = RequestHelper(url=table['content_link'], ignore_error=True).get_file()
if table_img:
lead_attachement.file_preview.save(
os.path.join(attachement_base_path, os.path.basename(urlparse(table['image_link']).path)),
table_img
)
lead_image.save()
lead_attachement.page_number = table['page_number']
lead_attachement.type = LeadPreviewAttachment.AttachementFileType.XLSX
lead_attachement.file.save(
os.path.join(attachement_base_path, os.path.basename(urlparse(table['content_link']).path)),
table_attahcment
)
lead_attachement.save()

lead.update_extraction_status(Lead.ExtractionStatus.SUCCESS)
return lead

Expand All @@ -674,7 +701,7 @@ def save_lead_data_using_connector_lead(
if connector_lead.extraction_status != ConnectorLead.ExtractionStatus.SUCCESS:
return False
LeadPreview.objects.filter(lead=lead).delete()
LeadPreviewImage.objects.filter(lead=lead).delete()
LeadPreviewAttachment.objects.filter(lead=lead).delete()
# and create new one
LeadPreview.objects.create(
lead=lead,
Expand All @@ -683,10 +710,10 @@ def save_lead_data_using_connector_lead(
page_count=connector_lead.page_count,
text_extraction_id=connector_lead.text_extraction_id,
)
# Save extracted images as LeadPreviewImage instances
# Save extracted images as LeadPreviewAttachment instances
# TODO: The logic is same for unified_connector leads as well. Maybe have a single func?
for connector_lead_preview_image in connector_lead.preview_images.all():
lead_image = LeadPreviewImage(lead=lead)
lead_image = LeadPreviewAttachment(lead=lead)
lead_image.file.save(
connector_lead_preview_image.image.name,
connector_lead_preview_image.image,
Expand Down
22 changes: 18 additions & 4 deletions apps/deepl_integration/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,30 @@ class Status(models.IntegerChoices):
status = serializers.ChoiceField(choices=Status.choices)


class ImagePathSerializer(serializers.Serializer):
page_number = serializers.IntegerField(required=True)
images = serializers.ListField(
child=serializers.CharField(allow_blank=True),
default=[]
)


class TablePathSerializer(serializers.Serializer):
page_number = serializers.IntegerField(required=True)
order = serializers.IntegerField(required=True)
image_link = serializers.URLField(required=True)
content_link = serializers.URLField(required=True)


# -- Lead
class LeadExtractCallbackSerializer(DeeplServerBaseCallbackSerializer):
"""
Serialize deepl extractor
"""
url = serializers.CharField(required=False)
# Data fields
images_path = serializers.ListField(
child=serializers.CharField(allow_blank=True),
required=False, default=[],
)
images_path = serializers.ListSerializer(child=ImagePathSerializer(required=False))
tables_path = serializers.ListSerializer(child=TablePathSerializer(required=False))
text_path = serializers.CharField(required=False, allow_null=True)
total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True)
total_pages = serializers.IntegerField(required=False, default=0, allow_null=True)
Expand Down Expand Up @@ -106,6 +119,7 @@ def create(self, data):
lead,
data['text_path'],
data.get('images_path', [])[:10], # TODO: Support for more images, too much image will error.
data.get('tables_path', []),
data.get('total_words_count'),
data.get('total_pages'),
data.get('text_extraction_id'),
Expand Down
6 changes: 3 additions & 3 deletions apps/entry/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from gallery.serializers import FileSerializer, SimpleFileSerializer
from project.models import Project
from lead.serializers import LeadSerializer
from lead.models import Lead, LeadPreviewImage
from lead.models import Lead, LeadPreviewAttachment
from analysis_framework.serializers import AnalysisFrameworkSerializer
from geo.models import GeoArea, Region
from geo.serializers import SimpleRegionSerializer
Expand Down Expand Up @@ -211,7 +211,7 @@ class EntrySerializer(RemoveNullFieldsMixin,
lead_image = serializers.PrimaryKeyRelatedField(
required=False,
write_only=True,
queryset=LeadPreviewImage.objects.all()
queryset=LeadPreviewAttachment.objects.all()
)
# NOTE: Provided by annotate `annotate_comment_count`
verified_by_count = serializers.IntegerField(read_only=True)
Expand Down Expand Up @@ -594,7 +594,7 @@ class EntryGqSerializer(ProjectPropertySerializerMixin, TempClientIdMixin, UserR
lead_image = serializers.PrimaryKeyRelatedField(
required=False,
write_only=True,
queryset=LeadPreviewImage.objects.all(),
queryset=LeadPreviewAttachment.objects.all(),
help_text=(
'This is used to add images from Lead Preview Images.'
' This will be changed into gallery image and supplied back in image field.'
Expand Down
6 changes: 3 additions & 3 deletions apps/entry/tests/test_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from deep.tests import TestCase
from project.models import Project
from user.models import User
from lead.models import Lead, LeadPreviewImage
from lead.models import Lead, LeadPreviewAttachment
from organization.models import Organization, OrganizationType
from analysis_framework.models import (
AnalysisFramework, Widget, Filter
Expand Down Expand Up @@ -724,15 +724,15 @@ def test_entry_image_validation(self):

self.authenticate()
# Using lead image (same lead)
data['lead_image'] = self.create(LeadPreviewImage, lead=lead, file=image.file).pk
data['lead_image'] = self.create(LeadPreviewAttachment, lead=lead, file=image.file).pk
response = self.client.post(url, data)
self.assert_201(response)
assert 'image' in response.data
assert 'image_details' in response.data
data.pop('lead_image')

# Using lead image (different lead)
data['lead_image'] = self.create(LeadPreviewImage, lead=self.create_lead(), file=image.file).pk
data['lead_image'] = self.create(LeadPreviewAttachment, lead=self.create_lead(), file=image.file).pk
response = self.client.post(url, data)
self.assert_400(response)
data.pop('lead_image')
Expand Down
8 changes: 4 additions & 4 deletions apps/lead/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .tasks import extract_from_lead
from .models import (
Lead, LeadGroup,
LeadPreview, LeadPreviewImage,
LeadPreview, LeadPreviewAttachment,
EMMEntity,
)

Expand All @@ -16,8 +16,8 @@ class LeadPreviewInline(admin.StackedInline):
model = LeadPreview


class LeadPreviewImageInline(admin.TabularInline):
model = LeadPreviewImage
class LeadPreviewAttachmentInline(admin.TabularInline):
model = LeadPreviewAttachment
extra = 0


Expand All @@ -42,7 +42,7 @@ def trigger_lead_extract(modeladmin, request, queryset):

@admin.register(Lead)
class LeadAdmin(VersionAdmin):
inlines = [LeadPreviewInline, LeadPreviewImageInline]
inlines = [LeadPreviewInline, LeadPreviewAttachmentInline]
search_fields = ['title']
list_filter = (
AutocompleteFilterFactory('Project', 'project'),
Expand Down
15 changes: 14 additions & 1 deletion apps/lead/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from organization.dataloaders import OrganizationLoader

from .models import Lead, LeadPreview, LeadGroup
from .models import Lead, LeadPreview, LeadGroup, LeadPreviewAttachment
from assisted_tagging.models import DraftEntry
from assessment_registry.models import AssessmentRegistry

Expand All @@ -26,6 +26,15 @@ def batch_load_fn(self, keys):
return Promise.resolve([_map.get(key) for key in keys])


class LeadPreviewAttachmentLoader(DataLoaderWithContext):
def batch_load_fn(self, keys):
lead_preview_attachment_qs = LeadPreviewAttachment.objects.filter(lead__in=keys)
lead_preview_attachments = defaultdict(list)
for lead_preview_attachment in lead_preview_attachment_qs:
lead_preview_attachments[lead_preview_attachment.lead_id].append(lead_preview_attachment)
return Promise.resolve([lead_preview_attachments.get(key) for key in keys])


class EntriesCountLoader(DataLoaderWithContext):
def batch_load_fn(self, keys):
active_af = self.context.active_project.analysis_framework
Expand Down Expand Up @@ -137,6 +146,10 @@ class DataLoaders(WithContextMixin):
def lead_preview(self):
return LeadPreviewLoader(context=self.context)

@cached_property
def lead_preview_attachment(self):
return LeadPreviewAttachmentLoader(context=self.context)

@cached_property
def entries_count(self):
return EntriesCountLoader(context=self.context)
Expand Down
1 change: 1 addition & 0 deletions apps/lead/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Lead.AutoExtractionStatus, name='LeadAutoEntryExtractionTypeEnum'
)


enum_map = {
get_enum_name_from_django_field(field): enum
for field, enum in (
Expand Down
6 changes: 3 additions & 3 deletions apps/lead/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
LeadGroup,
LeadEMMTrigger,
LeadPreview,
LeadPreviewImage,
LeadPreviewAttachment,
UserSavedLeadFilter,
)

Expand Down Expand Up @@ -84,9 +84,9 @@ class Meta:
model = LeadPreview


class LeadPreviewImageFactory(DjangoModelFactory):
class LeadPreviewAttachmentFactory(DjangoModelFactory):
class Meta:
model = LeadPreviewImage
model = LeadPreviewAttachment


class UserSavedLeadFilterFactory(DjangoModelFactory):
Expand Down
29 changes: 29 additions & 0 deletions apps/lead/migrations/0050_auto_20240606_0608.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 3.2.25 on 2024-06-06 06:08

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('lead', '0049_auto_20231121_0926_squashed_0054_auto_20231218_0552'),
]

operations = [
migrations.CreateModel(
name='LeadPreviewAttachment',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('order', models.IntegerField(default=0)),
('page_number', models.IntegerField(default=0)),
('type', models.CharField(choices=[('XLSX', 'XLSX'), ('image', 'Image')], max_length=20)),
('file', models.FileField(upload_to='lead-preview/attachments/')),
('file_preview', models.FileField(upload_to='lead-preview/attachments-preview/')),
('lead', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='images', to='lead.lead')),
],
),
migrations.DeleteModel(
name='LeadPreviewImage',
),
]
15 changes: 13 additions & 2 deletions apps/lead/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,14 +372,25 @@ def __str__(self):
return 'Text extracted for {}'.format(self.lead)


class LeadPreviewImage(models.Model):
class LeadPreviewAttachment(models.Model):
"""
NOTE: File can be only used by gallery (when attached to a entry)
"""
class AttachementFileType(models.TextChoices):
XLSX = 'XLSX', 'XLSX'
IMAGE = 'image', 'Image'

lead = models.ForeignKey(
Lead, related_name='images', on_delete=models.CASCADE,
)
file = models.FileField(upload_to='lead-preview/')
order = models.IntegerField(default=0)
page_number = models.IntegerField(default=0)
type = models.CharField(
max_length=20,
choices=AttachementFileType.choices,
)
file = models.FileField(upload_to='lead-preview/attachments/')
file_preview = models.FileField(upload_to='lead-preview/attachments-preview/')

def __str__(self):
return 'Image extracted for {}'.format(self.lead)
Expand Down
Loading

0 comments on commit 8a58a5c

Please sign in to comment.