diff --git a/apps/commons/receivers.py b/apps/commons/receivers.py index 088ce696d3..9b420e29f9 100644 --- a/apps/commons/receivers.py +++ b/apps/commons/receivers.py @@ -7,14 +7,14 @@ LeadPreview, LeadPreviewAttachment, ) -from unified_connector.models import ConnectorLeadPreviewImage +from unified_connector.models import ConnectorLeadPreviewAttachment # Lead @receiver(models.signals.post_delete, sender=LeadPreview) @receiver(models.signals.post_delete, sender=LeadPreviewAttachment) # Unified Connector -@receiver(models.signals.post_delete, sender=ConnectorLeadPreviewImage) +@receiver(models.signals.post_delete, sender=ConnectorLeadPreviewAttachment) def cleanup_file_on_instance_delete(sender, instance, **kwargs): files = [] for field in instance._meta.get_fields(): diff --git a/apps/deepl_integration/handlers.py b/apps/deepl_integration/handlers.py index 4858875eaa..c64a37c2d5 100644 --- a/apps/deepl_integration/handlers.py +++ b/apps/deepl_integration/handlers.py @@ -3,7 +3,7 @@ import copy import requests import logging -from typing import List, Type +from typing import Dict, List, Type from functools import reduce from urllib.parse import urlparse @@ -30,7 +30,7 @@ ) from unified_connector.models import ( ConnectorLead, - ConnectorLeadPreviewImage, + ConnectorLeadPreviewAttachment, ConnectorSource, UnifiedConnector, ) @@ -706,6 +706,7 @@ def save_data( return lead @staticmethod + @transaction.atomic def save_lead_data_using_connector_lead( lead: Lead, connector_lead: ConnectorLead, @@ -724,11 +725,16 @@ def save_lead_data_using_connector_lead( ) # Save extracted images as LeadPreviewAttachment instances # TODO: The logic is same for unified_connector leads as well. Maybe have a single func? - for connector_lead_preview_image in connector_lead.preview_images.all(): - lead_image = LeadPreviewAttachment(lead=lead) - lead_image.file.save( - connector_lead_preview_image.image.name, - connector_lead_preview_image.image, + for connector_lead_attachment in connector_lead.preview_images.all(): + lead_attachment = LeadPreviewAttachment(lead=lead) + lead_attachment.order = connector_lead_attachment.order + lead_attachment.file.save( + connector_lead_attachment.file.name, + connector_lead_attachment.file, + ) + lead_attachment.file_preview.save( + connector_lead_attachment.file_preview.name, + connector_lead_attachment.file_preview ) lead.update_extraction_status(Lead.ExtractionStatus.SUCCESS) return True @@ -742,7 +748,8 @@ class UnifiedConnectorLeadHandler(BaseHandler): def save_data( connector_lead: ConnectorLead, text_source_uri: str, - images_uri: List[str], + images_uri: List[Dict], + table_uri: List[Dict], word_count: int, page_count: int, text_extraction_id: str, @@ -751,16 +758,54 @@ def save_data( connector_lead.word_count = word_count connector_lead.page_count = page_count connector_lead.text_extraction_id = text_extraction_id - image_base_path = f'{connector_lead.pk}' + + attachment_base_path = f'{connector_lead.pk}' for image_uri in images_uri: - lead_image = ConnectorLeadPreviewImage(connector_lead=connector_lead) - image_obj = RequestHelper(url=image_uri, ignore_error=True).get_file() - if image_obj: - lead_image.image.save( - os.path.join(image_base_path, os.path.basename(urlparse(image_uri).path)), - image_obj, + for image in image_uri['images']: + image_obj = RequestHelper(url=image, ignore_error=True).get_file() + if image_obj: + connector_lead_attachment = ConnectorLeadPreviewAttachment(connector_lead=connector_lead) + connector_lead_attachment.file.save( + os.path.join( + attachment_base_path, + os.path.basename( + urlparse(image).path + ) + ), + image_obj, + ) + connector_lead_attachment.page_number = image_uri['page_number'] + connector_lead_attachment.type = ConnectorLeadPreviewAttachment.ConnectorAttachmentFileType.IMAGE + connector_lead_attachment.file_preview = connector_lead_attachment.file + connector_lead_attachment.save() + + for table in table_uri: + table_img = RequestHelper(url=table['image_link'], ignore_error=True).get_file() + table_attachment = RequestHelper(url=table['content_link'], ignore_error=True).get_file() + if table_img: + connector_lead_attachment = ConnectorLeadPreviewAttachment(connector_lead=connector_lead) + connector_lead_attachment.file_preview.save( + os.path.join( + attachment_base_path, + os.path.basename( + urlparse(table['image_link']).path + ) + ), + table_img, ) - lead_image.save() + connector_lead_attachment.page_number = table['page_number'] + connector_lead_attachment.type = ConnectorLeadPreviewAttachment.ConnectorAttachmentFileType.XLSX + connector_lead_attachment.file.save( + os.path.join( + attachment_base_path, + os.path.basename( + urlparse(table['content_link']).path + ) + ), + table_attachment, + ) + connector_lead_attachment.save() + connector_lead.update_extraction_status(ConnectorLead.ExtractionStatus.SUCCESS, commit=False) connector_lead.save() return connector_lead diff --git a/apps/deepl_integration/serializers.py b/apps/deepl_integration/serializers.py index ffe69995fe..3915bea38f 100644 --- a/apps/deepl_integration/serializers.py +++ b/apps/deepl_integration/serializers.py @@ -143,9 +143,13 @@ class UnifiedConnectorLeadExtractCallbackSerializer(DeeplServerBaseCallbackSeria Serialize deepl extractor """ # Data fields - images_path = serializers.ListField( - child=serializers.CharField(allow_blank=True), - required=False, default=[], + images_path = serializers.ListSerializer( + child=ImagePathSerializer(required=True), + required=False + ) + tables_path = serializers.ListSerializer( + child=TablePathSerializer(required=True), + required=False ) text_path = serializers.CharField(required=False, allow_null=True) total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True) @@ -175,6 +179,7 @@ def create(self, data): connector_lead, data['text_path'], data.get('images_path', [])[:10], # TODO: Support for more images, to much image will error. + data['tables_path'], data['total_words_count'], data['total_pages'], data['text_extraction_id'], diff --git a/apps/entry/schema.py b/apps/entry/schema.py index 4944d3354e..34f43282ca 100644 --- a/apps/entry/schema.py +++ b/apps/entry/schema.py @@ -10,7 +10,6 @@ from utils.graphene.fields import DjangoPaginatedListObjectField, DjangoListField from user_resource.schema import UserResourceMixin from deep.permissions import ProjectPermissions as PP -from deep.serializers import URLCachedFileField from lead.models import Lead from user.schema import UserType @@ -155,6 +154,7 @@ def resolve_verified_by_count(root, info, **_): return info.context.dl.entry.verified_by_count.load(root.pk) @staticmethod + # NOTE: Client might not need this field so we have not refactor the dataloader def resolve_canonical_preview_image(root, info, **_): return info.context.dl.entry.entry_image_preview_url.load(root.pk) diff --git a/apps/lead/views.py b/apps/lead/views.py index d56e8428b8..339563cb9d 100644 --- a/apps/lead/views.py +++ b/apps/lead/views.py @@ -433,8 +433,10 @@ def post(self, request, version=None): # Dynamic Options 'lead_groups': LeadGroup.objects.filter(project_filter, id__in=lead_groups_id).distinct(), - 'members': _filter_users_by_projects_memberships(members_qs, projects)\ - .prefetch_related('profile').distinct(), + 'members': _filter_users_by_projects_memberships( + members_qs, + projects, + ).prefetch_related('profile').distinct(), 'organizations': Organization.objects.filter(id__in=organizations_id).distinct(), # EMM specific options diff --git a/apps/unified_connector/migrations/0009_auto_20240618_0924.py b/apps/unified_connector/migrations/0009_auto_20240618_0924.py new file mode 100644 index 0000000000..87e58dc174 --- /dev/null +++ b/apps/unified_connector/migrations/0009_auto_20240618_0924.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.25 on 2024-06-18 09:24 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('unified_connector', '0008_connectorlead_text_extraction_id'), + ] + + operations = [ + migrations.CreateModel( + name='ConnectorLeadPreviewAttachment', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('order', models.IntegerField(default=0)), + ('page_number', models.IntegerField(default=0)), + ('type', models.PositiveSmallIntegerField(choices=[(1, 'XLSX'), (2, 'Image')], default=1)), + ('file', models.FileField(upload_to='connector-lead/attachments/')), + ('file_preview', models.FileField(upload_to='connector-lead/attachments-preview/')), + ('connector_lead', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='preview_images', to='unified_connector.connectorlead')), + ], + ), + migrations.DeleteModel( + name='ConnectorLeadPreviewImage', + ), + ] diff --git a/apps/unified_connector/models.py b/apps/unified_connector/models.py index efb04a7293..0f0485d6c1 100644 --- a/apps/unified_connector/models.py +++ b/apps/unified_connector/models.py @@ -49,7 +49,7 @@ class ExtractionStatus(models.IntegerChoices): ) def __init__(self, *args, **kwargs): - self.preview_images: models.QuerySet[ConnectorLeadPreviewImage] + self.preview_images: models.QuerySet[ConnectorLeadPreviewAttachment] super().__init__(*args, **kwargs) @classmethod @@ -78,9 +78,20 @@ def update_extraction_status(self, new_status, commit=True): self.save(update_fields=('extraction_status',)) -class ConnectorLeadPreviewImage(models.Model): +class ConnectorLeadPreviewAttachment(models.Model): + class ConnectorAttachmentFileType(models.IntegerChoices): + XLSX = 1, 'XLSX' + IMAGE = 2, 'Image' + connector_lead = models.ForeignKey(ConnectorLead, on_delete=models.CASCADE, related_name='preview_images') - image = models.FileField(upload_to='connector-lead/preview-images/', max_length=255) + order = models.IntegerField(default=0) + page_number = models.IntegerField(default=0) + type = models.PositiveSmallIntegerField( + choices=ConnectorAttachmentFileType.choices, + default=ConnectorAttachmentFileType.XLSX + ) + file = models.FileField(upload_to='connector-lead/attachments/') + file_preview = models.FileField(upload_to='connector-lead/attachments-preview/') class UnifiedConnector(UserResource): diff --git a/apps/unified_connector/tests/test_mutation.py b/apps/unified_connector/tests/test_mutation.py index 8d36c0ae54..6ce6f1edfd 100644 --- a/apps/unified_connector/tests/test_mutation.py +++ b/apps/unified_connector/tests/test_mutation.py @@ -12,7 +12,7 @@ from unified_connector.models import ( ConnectorLead, ConnectorSource, - ConnectorLeadPreviewImage, + ConnectorLeadPreviewAttachment ) from deepl_integration.handlers import UnifiedConnectorLeadHandler from deepl_integration.serializers import DeeplServerBaseCallbackSerializer @@ -492,7 +492,23 @@ def _check_connector_lead_status(connector_lead, status): # ------ Extraction FAILED data = dict( client_id='some-random-client-id', - images_path=['https://example.com/sample-file-1.jpg'], + images_path=[ + { + 'page_number': 1, + 'images': [ + 'http://random.com/image1.jpeg', + 'http://random.com/image2.jpeg' + ], + } + ], + tables_path=[ + { + "page_number": 1, + "order": 0, + "image_link": "http://random.com/timetable.png", + "content_link": "http://random.com/table_timetable.xlsx" + } + ], text_path='https://example.com/url-where-data-is-fetched-from-mock-response', total_words_count=100, total_pages=10, @@ -520,7 +536,16 @@ def _check_connector_lead_status(connector_lead, status): # ------ Extraction SUCCESS data = dict( client_id='some-random-client-id', - images_path=['https://example.com/sample-file-1.jpg', 'https://example.com/sample-file-2.jpg'], + images_path=[ + { + 'page_number': 1, + 'images': [ + 'http://random.com/image1.jpeg', + 'http://random.com/image2.jpeg' + ], + } + ], + tables_path=[], text_path='https://example.com/url-where-data-is-fetched-from-mock-response', total_words_count=100, total_pages=10, @@ -542,8 +567,8 @@ def _check_connector_lead_status(connector_lead, status): assert connector_lead2.page_count == 10 _check_connector_lead_status(connector_lead2, ConnectorLead.ExtractionStatus.SUCCESS) - preview_image_qs = ConnectorLeadPreviewImage.objects.filter(connector_lead=connector_lead2) - preview_image = preview_image_qs.first() + preview_attachment_qs = ConnectorLeadPreviewAttachment.objects.filter(connector_lead=connector_lead2) + preview_attachment = preview_attachment_qs.first() self.assertEqual(connector_lead2.simplified_text, SAMPLE_SIMPLIFIED_TEXT) - self.assertEqual(preview_image_qs.count(), 2) - self.assertIsNotNone(preview_image and preview_image.image.name) + self.assertEqual(preview_attachment_qs.count(), 2) + self.assertIsNotNone(preview_attachment and preview_attachment.file) diff --git a/utils/graphene/mutation.py b/utils/graphene/mutation.py index aa5f1bb99c..a90b7162b5 100644 --- a/utils/graphene/mutation.py +++ b/utils/graphene/mutation.py @@ -176,10 +176,10 @@ def fields_for_serializer( is_excluded = any( [ name in exclude_fields, - field.write_only and - not is_input, # don't show write_only fields in Query - field.read_only and is_input \ - and lookup_field != name, # don't show read_only fields in Input + # don't show write_only fields in Query + field.write_only and not is_input, + # don't show read_only fields in Input + field.read_only and is_input and lookup_field != name, ] )