Skip to content

Commit

Permalink
Add connector lead attachment and save on lead
Browse files Browse the repository at this point in the history
  • Loading branch information
sudan45 committed Jun 19, 2024
1 parent adcd665 commit bdc0401
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 38 deletions.
4 changes: 2 additions & 2 deletions apps/commons/receivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
LeadPreview,
LeadPreviewAttachment,
)
from unified_connector.models import ConnectorLeadPreviewImage
from unified_connector.models import ConnectorLeadPreviewAttachment


# Lead
@receiver(models.signals.post_delete, sender=LeadPreview)
@receiver(models.signals.post_delete, sender=LeadPreviewAttachment)
# Unified Connector
@receiver(models.signals.post_delete, sender=ConnectorLeadPreviewImage)
@receiver(models.signals.post_delete, sender=ConnectorLeadPreviewAttachment)
def cleanup_file_on_instance_delete(sender, instance, **kwargs):
files = []
for field in instance._meta.get_fields():
Expand Down
77 changes: 61 additions & 16 deletions apps/deepl_integration/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import copy
import requests
import logging
from typing import List, Type
from typing import Dict, List, Type
from functools import reduce
from urllib.parse import urlparse

Expand All @@ -30,7 +30,7 @@
)
from unified_connector.models import (
ConnectorLead,
ConnectorLeadPreviewImage,
ConnectorLeadPreviewAttachment,
ConnectorSource,
UnifiedConnector,
)
Expand Down Expand Up @@ -706,6 +706,7 @@ def save_data(
return lead

@staticmethod
@transaction.atomic
def save_lead_data_using_connector_lead(
lead: Lead,
connector_lead: ConnectorLead,
Expand All @@ -724,11 +725,16 @@ def save_lead_data_using_connector_lead(
)
# Save extracted images as LeadPreviewAttachment instances
# TODO: The logic is same for unified_connector leads as well. Maybe have a single func?
for connector_lead_preview_image in connector_lead.preview_images.all():
lead_image = LeadPreviewAttachment(lead=lead)
lead_image.file.save(
connector_lead_preview_image.image.name,
connector_lead_preview_image.image,
for connector_lead_attachment in connector_lead.preview_images.all():
lead_attachment = LeadPreviewAttachment(lead=lead)
lead_attachment.order = connector_lead_attachment.order
lead_attachment.file.save(
connector_lead_attachment.file.name,
connector_lead_attachment.file,
)
lead_attachment.file_preview.save(
connector_lead_attachment.file_preview.name,
connector_lead_attachment.file_preview
)
lead.update_extraction_status(Lead.ExtractionStatus.SUCCESS)
return True
Expand All @@ -742,7 +748,8 @@ class UnifiedConnectorLeadHandler(BaseHandler):
def save_data(
connector_lead: ConnectorLead,
text_source_uri: str,
images_uri: List[str],
images_uri: List[Dict],
table_uri: List[Dict],
word_count: int,
page_count: int,
text_extraction_id: str,
Expand All @@ -751,16 +758,54 @@ def save_data(
connector_lead.word_count = word_count
connector_lead.page_count = page_count
connector_lead.text_extraction_id = text_extraction_id
image_base_path = f'{connector_lead.pk}'

attachment_base_path = f'{connector_lead.pk}'
for image_uri in images_uri:
lead_image = ConnectorLeadPreviewImage(connector_lead=connector_lead)
image_obj = RequestHelper(url=image_uri, ignore_error=True).get_file()
if image_obj:
lead_image.image.save(
os.path.join(image_base_path, os.path.basename(urlparse(image_uri).path)),
image_obj,
for image in image_uri['images']:
image_obj = RequestHelper(url=image, ignore_error=True).get_file()
if image_obj:
connector_lead_attachment = ConnectorLeadPreviewAttachment(connector_lead=connector_lead)
connector_lead_attachment.file.save(
os.path.join(
attachment_base_path,
os.path.basename(
urlparse(image).path
)
),
image_obj,
)
connector_lead_attachment.page_number = image_uri['page_number']
connector_lead_attachment.type = ConnectorLeadPreviewAttachment.ConnectorAttachmentFileType.IMAGE
connector_lead_attachment.file_preview = connector_lead_attachment.file
connector_lead_attachment.save()

for table in table_uri:
table_img = RequestHelper(url=table['image_link'], ignore_error=True).get_file()
table_attachment = RequestHelper(url=table['content_link'], ignore_error=True).get_file()
if table_img:
connector_lead_attachment = ConnectorLeadPreviewAttachment(connector_lead=connector_lead)
connector_lead_attachment.file_preview.save(
os.path.join(
attachment_base_path,
os.path.basename(
urlparse(table['image_link']).path
)
),
table_img,
)
lead_image.save()
connector_lead_attachment.page_number = table['page_number']
connector_lead_attachment.type = ConnectorLeadPreviewAttachment.ConnectorAttachmentFileType.XLSX
connector_lead_attachment.file.save(
os.path.join(
attachment_base_path,
os.path.basename(
urlparse(table['content_link']).path
)
),
table_attachment,
)
connector_lead_attachment.save()

connector_lead.update_extraction_status(ConnectorLead.ExtractionStatus.SUCCESS, commit=False)
connector_lead.save()
return connector_lead
Expand Down
11 changes: 8 additions & 3 deletions apps/deepl_integration/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,13 @@ class UnifiedConnectorLeadExtractCallbackSerializer(DeeplServerBaseCallbackSeria
Serialize deepl extractor
"""
# Data fields
images_path = serializers.ListField(
child=serializers.CharField(allow_blank=True),
required=False, default=[],
images_path = serializers.ListSerializer(
child=ImagePathSerializer(required=True),
required=False
)
tables_path = serializers.ListSerializer(
child=TablePathSerializer(required=True),
required=False
)
text_path = serializers.CharField(required=False, allow_null=True)
total_words_count = serializers.IntegerField(required=False, default=0, allow_null=True)
Expand Down Expand Up @@ -175,6 +179,7 @@ def create(self, data):
connector_lead,
data['text_path'],
data.get('images_path', [])[:10], # TODO: Support for more images, to much image will error.
data['tables_path'],
data['total_words_count'],
data['total_pages'],
data['text_extraction_id'],
Expand Down
2 changes: 1 addition & 1 deletion apps/entry/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from utils.graphene.fields import DjangoPaginatedListObjectField, DjangoListField
from user_resource.schema import UserResourceMixin
from deep.permissions import ProjectPermissions as PP
from deep.serializers import URLCachedFileField
from lead.models import Lead
from user.schema import UserType

Expand Down Expand Up @@ -155,6 +154,7 @@ def resolve_verified_by_count(root, info, **_):
return info.context.dl.entry.verified_by_count.load(root.pk)

@staticmethod
# NOTE: Client might not need this field so we have not refactor the dataloader
def resolve_canonical_preview_image(root, info, **_):
return info.context.dl.entry.entry_image_preview_url.load(root.pk)

Expand Down
6 changes: 4 additions & 2 deletions apps/lead/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,10 @@ def post(self, request, version=None):
# Dynamic Options

'lead_groups': LeadGroup.objects.filter(project_filter, id__in=lead_groups_id).distinct(),
'members': _filter_users_by_projects_memberships(members_qs, projects)\
.prefetch_related('profile').distinct(),
'members': _filter_users_by_projects_memberships(
members_qs,
projects,
).prefetch_related('profile').distinct(),
'organizations': Organization.objects.filter(id__in=organizations_id).distinct(),

# EMM specific options
Expand Down
29 changes: 29 additions & 0 deletions apps/unified_connector/migrations/0009_auto_20240618_0924.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 3.2.25 on 2024-06-18 09:24

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('unified_connector', '0008_connectorlead_text_extraction_id'),
]

operations = [
migrations.CreateModel(
name='ConnectorLeadPreviewAttachment',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('order', models.IntegerField(default=0)),
('page_number', models.IntegerField(default=0)),
('type', models.PositiveSmallIntegerField(choices=[(1, 'XLSX'), (2, 'Image')], default=1)),
('file', models.FileField(upload_to='connector-lead/attachments/')),
('file_preview', models.FileField(upload_to='connector-lead/attachments-preview/')),
('connector_lead', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='preview_images', to='unified_connector.connectorlead')),
],
),
migrations.DeleteModel(
name='ConnectorLeadPreviewImage',
),
]
17 changes: 14 additions & 3 deletions apps/unified_connector/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class ExtractionStatus(models.IntegerChoices):
)

def __init__(self, *args, **kwargs):
self.preview_images: models.QuerySet[ConnectorLeadPreviewImage]
self.preview_images: models.QuerySet[ConnectorLeadPreviewAttachment]
super().__init__(*args, **kwargs)

@classmethod
Expand Down Expand Up @@ -78,9 +78,20 @@ def update_extraction_status(self, new_status, commit=True):
self.save(update_fields=('extraction_status',))


class ConnectorLeadPreviewImage(models.Model):
class ConnectorLeadPreviewAttachment(models.Model):
class ConnectorAttachmentFileType(models.IntegerChoices):
XLSX = 1, 'XLSX'
IMAGE = 2, 'Image'

connector_lead = models.ForeignKey(ConnectorLead, on_delete=models.CASCADE, related_name='preview_images')
image = models.FileField(upload_to='connector-lead/preview-images/', max_length=255)
order = models.IntegerField(default=0)
page_number = models.IntegerField(default=0)
type = models.PositiveSmallIntegerField(
choices=ConnectorAttachmentFileType.choices,
default=ConnectorAttachmentFileType.XLSX
)
file = models.FileField(upload_to='connector-lead/attachments/')
file_preview = models.FileField(upload_to='connector-lead/attachments-preview/')


class UnifiedConnector(UserResource):
Expand Down
40 changes: 33 additions & 7 deletions apps/unified_connector/tests/test_mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from unified_connector.models import (
ConnectorLead,
ConnectorSource,
ConnectorLeadPreviewImage,
ConnectorLeadPreviewAttachment
)
from deepl_integration.handlers import UnifiedConnectorLeadHandler
from deepl_integration.serializers import DeeplServerBaseCallbackSerializer
Expand Down Expand Up @@ -492,7 +492,23 @@ def _check_connector_lead_status(connector_lead, status):
# ------ Extraction FAILED
data = dict(
client_id='some-random-client-id',
images_path=['https://example.com/sample-file-1.jpg'],
images_path=[
{
'page_number': 1,
'images': [
'http://random.com/image1.jpeg',
'http://random.com/image2.jpeg'
],
}
],
tables_path=[
{
"page_number": 1,
"order": 0,
"image_link": "http://random.com/timetable.png",
"content_link": "http://random.com/table_timetable.xlsx"
}
],
text_path='https://example.com/url-where-data-is-fetched-from-mock-response',
total_words_count=100,
total_pages=10,
Expand Down Expand Up @@ -520,7 +536,16 @@ def _check_connector_lead_status(connector_lead, status):
# ------ Extraction SUCCESS
data = dict(
client_id='some-random-client-id',
images_path=['https://example.com/sample-file-1.jpg', 'https://example.com/sample-file-2.jpg'],
images_path=[
{
'page_number': 1,
'images': [
'http://random.com/image1.jpeg',
'http://random.com/image2.jpeg'
],
}
],
tables_path=[],
text_path='https://example.com/url-where-data-is-fetched-from-mock-response',
total_words_count=100,
total_pages=10,
Expand All @@ -536,14 +561,15 @@ def _check_connector_lead_status(connector_lead, status):
self.assert_200(response)
connector_lead2.refresh_from_db()
_check_connector_lead_status(connector_lead2, ConnectorLead.ExtractionStatus.SUCCESS)
print(connector_lead2.text_extraction_id)
assert str(connector_lead2.text_extraction_id) == data['text_extraction_id']
assert connector_lead2.simplified_text is not None
assert connector_lead2.word_count == 100
assert connector_lead2.page_count == 10

_check_connector_lead_status(connector_lead2, ConnectorLead.ExtractionStatus.SUCCESS)
preview_image_qs = ConnectorLeadPreviewImage.objects.filter(connector_lead=connector_lead2)
preview_image = preview_image_qs.first()
preview_attachment_qs = ConnectorLeadPreviewAttachment.objects.filter(connector_lead=connector_lead2)
preview_attachment = preview_attachment_qs.first()
self.assertEqual(connector_lead2.simplified_text, SAMPLE_SIMPLIFIED_TEXT)
self.assertEqual(preview_image_qs.count(), 2)
self.assertIsNotNone(preview_image and preview_image.image.name)
self.assertEqual(preview_attachment_qs.count(), 2)
self.assertIsNotNone(preview_attachment and preview_attachment.file)
8 changes: 4 additions & 4 deletions utils/graphene/mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ def fields_for_serializer(
is_excluded = any(
[
name in exclude_fields,
field.write_only and
not is_input, # don't show write_only fields in Query
field.read_only and is_input \
and lookup_field != name, # don't show read_only fields in Input
# don't show write_only fields in Query
field.write_only and not is_input,
# don't show read_only fields in Input
field.read_only and is_input and lookup_field != name,
]
)

Expand Down

0 comments on commit bdc0401

Please sign in to comment.