diff --git a/apps/assisted_tagging/schema.py b/apps/assisted_tagging/schema.py index fd926dcb4c..1338b5d37b 100644 --- a/apps/assisted_tagging/schema.py +++ b/apps/assisted_tagging/schema.py @@ -282,10 +282,10 @@ def custom_queryset(root, info, lead_id): class AssistedTaggingQueryType(graphene.ObjectType): draft_entry = DjangoObjectField(DraftEntryType) draft_entry_by_leads = DjangoListField(DraftEntryByLeadType, filter=DraftEntryFilterDataInputType()) - extraction_status_by_lead = graphene.Field(AutoextractionStatusType,lead_id=graphene.Int(required=True)) + extraction_status_by_lead = graphene.Field(AutoextractionStatusType, lead_id=graphene.ID(required=True)) def resolve_draft_entry_by_leads(root, info, filter): return DraftEntryByLeadType.custom_queryset(root, info, filter) def resolve_extraction_status_by_lead(root, info, lead_id): - return AutoextractionStatusType.custom_queryset(root, info,lead_id) + return AutoextractionStatusType.custom_queryset(root, info, lead_id) diff --git a/apps/assisted_tagging/tasks.py b/apps/assisted_tagging/tasks.py index ed9afed588..a0b4fb4f56 100644 --- a/apps/assisted_tagging/tasks.py +++ b/apps/assisted_tagging/tasks.py @@ -1,5 +1,6 @@ import logging import requests +import json from celery import shared_task from lead.models import Lead @@ -25,7 +26,9 @@ def _get_existing_tags_by_tagid(): tag.tag_id: tag # tag_id is from deepl for tag in AssistedTaggingModelPredictionTag.objects.all() } - response = requests.get(DeeplServiceEndpoint.ASSISTED_TAGGING_TAGS_ENDPOINT).json() + headers = {"Authorization": "TOKEN c2e6c102ad3b4e1097242d0730a091c54f112c66"} + response = requests.get(DeeplServiceEndpoint.ASSISTED_TAGGING_TAGS_ENDPOINT, headers=headers) + response = json.loads(response.text) existing_tags_by_tagid = _get_existing_tags_by_tagid() new_tags = [] diff --git a/apps/deepl_integration/handlers.py b/apps/deepl_integration/handlers.py index e4b8b92608..6da6fd45cd 100644 --- a/apps/deepl_integration/handlers.py +++ b/apps/deepl_integration/handlers.py @@ -399,7 +399,7 @@ def get_versions_map(): lambda acc, item: acc | item, [ models.Q( - model__model_id=model_data['id'], + model__model_id=model_data['name'], version=model_data['version'], ) for model_data in models_data @@ -412,16 +412,16 @@ def get_versions_map(): new_model_versions = [ model_data for model_data in models_data - if (model_data['id'], model_data['version']) not in existing_model_versions + if (model_data['name'], model_data['version']) not in existing_model_versions ] if new_model_versions: AssistedTaggingModelVersion.objects.bulk_create([ AssistedTaggingModelVersion( model=AssistedTaggingModel.objects.get_or_create( - model_id=model_data['id'], + model_id=model_data['name'], defaults=dict( - name=model_data['id'], + name=model_data['name'], ), )[0], version=model_data['version'], @@ -464,12 +464,11 @@ def get_tags_map(): @classmethod def _process_model_preds(cls, model_version, current_tags_map, draft_entry, model_prediction): prediction_status = model_prediction['prediction_status'] - if prediction_status == 0: # If 0 no tags are provided + if not prediction_status: # If False no tags are provided return - tags = model_prediction.get('tags', {}) # NLP TagId + tags = model_prediction.get('classification', {}) # NLP TagId values = model_prediction.get('values', []) # Raw value - common_attrs = dict( model_version=model_version, draft_entry_id=draft_entry.id, @@ -505,35 +504,37 @@ def _process_model_preds(cls, model_version, current_tags_map, draft_entry, mode @classmethod def save_data(cls, lead, data): + # print("handler data......",data) for model_preds in data['blocks']: classification = model_preds['classification'] current_tags_map = cls._get_or_create_tags_map([ tag - for prediction in classification['model_preds'] - for category_tag, tags in prediction.get('tags', {}).items() + for category_tag, tags in classification.items() for tag in [ category_tag, *tags.keys(), ] ]) models_version_map = cls._get_or_create_models_version([ - prediction['model_info'] - for prediction in classification['model_preds'] + data['classification_model_info'] ]) with transaction.atomic(): - draft = DraftEntry.objects.create( - project=lead.project, - lead=lead, - excerpt=model_preds['text'], - draft_entry_type=0 - ) - lead.auto_entry_extraction_status = Lead.AutoExtractionStatus.SUCCESS - lead.save() - draft.save() - for prediction in classification['model_preds']: - model_version = models_version_map[(prediction['model_info']['id'], prediction['model_info']['version'])] - cls._process_model_preds(model_version, current_tags_map, draft, prediction) + if model_preds['relevant']: + draft = DraftEntry.objects.create( + project=lead.project, + lead=lead, + excerpt=model_preds['text'], + draft_entry_type=0 + ) + lead.auto_entry_extraction_status = Lead.AutoExtractionStatus.SUCCESS + lead.save() + draft.save() + model_version = models_version_map[ + (data['classification_model_info']['name'], data['classification_model_info']['version']) + ] + for prediction in data['blocks']: + cls._process_model_preds(model_version, current_tags_map, draft, prediction) return lead diff --git a/apps/deepl_integration/serializers.py b/apps/deepl_integration/serializers.py index 94499ad618..7fd433a915 100644 --- a/apps/deepl_integration/serializers.py +++ b/apps/deepl_integration/serializers.py @@ -31,6 +31,8 @@ from .models import DeeplTrackBaseModel +from utils.request import RequestHelper + class BaseCallbackSerializer(serializers.Serializer): nlp_handler: Type[BaseHandler] @@ -192,6 +194,32 @@ class ModelPredictionCallbackSerializerTagValue(serializers.Serializer): prediction_status = serializers.IntegerField() # 0 -> Failure, 1 -> Success +class AutoAssistedTaggingModelPredicationCallBackSerializer(serializers.Serializer): + class ModelPredictionCallbackSerializerTagValue(serializers.Serializer): + predication = serializers.DecimalField( + max_digits=AssistedTaggingPrediction.prediction.field.max_digits, + decimal_places=AssistedTaggingPrediction.prediction.field.decimal_places, + required=False, + ) + threshold = serializers.DecimalField( + # From apps/assisted_tagging/models.py::AssistedTaggingPrediction::threshold + max_digits=AssistedTaggingPrediction.threshold.field.max_digits, + decimal_places=AssistedTaggingPrediction.threshold.field.decimal_places, + required=False, + ) + is_selected = serializers.BooleanField() + values = serializers.ListSerializer( + child=serializers.CharField(), + required=False, + ) + tags = serializers.DictField( + child=serializers.DictField( + child=ModelPredictionCallbackSerializerTagValue(), + ), + required=False, + ) + + class AssistedTaggingDraftEntryPredictionCallbackSerializer(BaseCallbackSerializer): model_preds = AssistedTaggingModelPredictionCallbackSerializer(many=True) @@ -208,22 +236,38 @@ def create(self, validated_data): class AutoAssistedBlockPredicationCallbackSerializer(serializers.Serializer): - class ClassificationInfoCallBackSerializer(serializers.Serializer): - model_preds = AssistedTaggingModelPredictionCallbackSerializer(many=True) text = serializers.CharField() - classification = ClassificationInfoCallBackSerializer() + relevant = serializers.BooleanField() + prediction_status = serializers.BooleanField() + # classification = AutoAssistedTaggingModelPredicationCallBackSerializer() + classification = serializers.DictField(child=serializers.DictField()) class AutoAssistedTaggingDraftEntryCallbackSerializer(BaseCallbackSerializer): - blocks = AutoAssistedBlockPredicationCallbackSerializer(many=True) + entry_extraction_classification_path = serializers.URLField(required=True) + text_extraction_id = serializers.CharField(required=True) + status = serializers.IntegerField() nlp_handler = AutoAssistedTaggingDraftEntryHandler def create(self, validated_data): + obj = validated_data['object'] + validated_data = RequestHelper(url=validated_data['entry_extraction_classification_path'], ignore_error=True).json() return self.nlp_handler.save_data( - validated_data['object'], + obj, validated_data ) +# class AutoAssistedTaggingDraftEntryCallbackSerializer(BaseCallbackSerializer): +# blocks = AutoAssistedBlockPredicationCallbackSerializer(many=True) +# classification_model_info = serializers.DictField() +# nlp_handler = AutoAssistedTaggingDraftEntryHandler + +# def create(self, validated_data): +# return self.nlp_handler.save_data( +# validated_data['object'], +# validated_data +# ) + class EntriesCollectionBaseCallbackSerializer(DeeplServerBaseCallbackSerializer): model: Type[DeeplTrackBaseModel] diff --git a/apps/deepl_integration/views.py b/apps/deepl_integration/views.py index a33112d37b..264d0ce6ec 100644 --- a/apps/deepl_integration/views.py +++ b/apps/deepl_integration/views.py @@ -18,8 +18,6 @@ AutoAssistedTaggingDraftEntryCallbackSerializer ) -from utils.request import RequestHelper - class BaseCallbackView(views.APIView): serializer: Type[serializers.Serializer] @@ -36,16 +34,8 @@ class AssistedTaggingDraftEntryPredictionCallbackView(BaseCallbackView): serializer = AssistedTaggingDraftEntryPredictionCallbackSerializer -class AutoTaggingDraftEntryPredictionCallbackView(views.APIView): +class AutoTaggingDraftEntryPredictionCallbackView(BaseCallbackView): serializer = AutoAssistedTaggingDraftEntryCallbackSerializer - permission_classes = [permissions.AllowAny] - - def post(self, request, **_): - data = RequestHelper(url=request.data['entry_extraction_classification_path'], ignore_error=True).json() - serializer = self.serializer(data=data) - serializer.is_valid(raise_exception=True) - serializer.save() - return response.Response("Request successfully completed", status=status.HTTP_200_OK) class LeadExtractCallbackView(BaseCallbackView): diff --git a/apps/lead/migrations/0051_auto_20231128_0958.py b/apps/lead/migrations/0051_auto_20231128_0958.py new file mode 100644 index 0000000000..8f8bb7fb6b --- /dev/null +++ b/apps/lead/migrations/0051_auto_20231128_0958.py @@ -0,0 +1,28 @@ +# Generated by Django 3.2.17 on 2023-11-28 09:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('lead', '0050_delete_extractedlead'), + ] + + operations = [ + migrations.AddField( + model_name='leadpreview', + name='entry_extraction_id', + field=models.CharField(blank=True, max_length=30, null=True), + ), + migrations.AddField( + model_name='leadpreview', + name='text_extraction_id', + field=models.CharField(blank=True, max_length=30, null=True), + ), + migrations.AlterField( + model_name='lead', + name='auto_entry_extraction_status', + field=models.SmallIntegerField(choices=[(0, 'None'), (1, 'Pending'), (2, 'Success'), (3, 'Failed')], default=0), + ), + ] diff --git a/apps/lead/models.py b/apps/lead/models.py index c1ce191965..3de8b98a49 100644 --- a/apps/lead/models.py +++ b/apps/lead/models.py @@ -362,6 +362,8 @@ class ClassificationStatus(models.TextChoices): choices=ClassificationStatus.choices, default=ClassificationStatus.NONE, ) + entry_extraction_id = models.CharField(max_length=30, null=True, blank=True) + text_extraction_id = models.CharField(max_length=30, null=True, blank=True) def __str__(self): return 'Text extracted for {}'.format(self.lead) diff --git a/apps/lead/tests/test_apis.py b/apps/lead/tests/test_apis.py index 13d149c2ac..2d16fe9136 100644 --- a/apps/lead/tests/test_apis.py +++ b/apps/lead/tests/test_apis.py @@ -25,7 +25,7 @@ from organization.serializers import SimpleOrganizationSerializer from lead.filter_set import LeadFilterSet from lead.serializers import SimpleLeadGroupSerializer -from deepl_integration.handlers import LeadExtractionHandler +from deepl_integration.handlers import AutoAssistedTaggingDraftEntryHandler, LeadExtractionHandler from deepl_integration.serializers import DeeplServerBaseCallbackSerializer from entry.models import ( Entry, @@ -1852,3 +1852,23 @@ def test_client_id_generator(self): LeadExtractionHandler.get_object_using_client_id(client_id) else: assert LeadExtractionHandler.get_object_using_client_id(client_id) == lead + + +class AutoEntryExtractionCallback(TestCase): + def setUp(self): + super().setUp() + self.lead = LeadFactory.create() + + @mock.patch('deepl_integration.handlers.RequestHelper.json') + def test_entry_extraction_callback(self, get_json_mock): + url = '/api/v1/callback/auto-assisted-tagging-draft-entry-prediction/' + self.authenticate() + get_json_mock.return_value = "" + data = { + "client_id": AutoAssistedTaggingDraftEntryHandler.get_client_id(self.lead), + 'entry_extraction_classification_path': 'https://server-deepl.dev.datafriendlyspace.org/media/', + 'text_extraction_id': '43545', + 'status': 1 + } + response = self.client.post(url, data) + self.assert_200(response) diff --git a/deep/deepl.py b/deep/deepl.py index 7064fb6676..20fe9b1be8 100644 --- a/deep/deepl.py +++ b/deep/deepl.py @@ -8,7 +8,7 @@ class DeeplServiceEndpoint(): # DEEPL Service Endpoints (Existing/Legacy) # NOTE: This will be moved to server endpoints in near future - ASSISTED_TAGGING_TAGS_ENDPOINT = f'{DEEPL_SERVICE_DOMAIN}/vf_tags' + ASSISTED_TAGGING_TAGS_ENDPOINT = f'{DEEPL_SERVICE_DOMAIN}/api/v1/nlp-tags/' ASSISTED_TAGGING_MODELS_ENDPOINT = f'{DEEPL_SERVICE_DOMAIN}/model_info' ASSISTED_TAGGING_ENTRY_PREDICT_ENDPOINT = f'{DEEPL_SERVICE_DOMAIN}/entry_predict' diff --git a/schema.graphql b/schema.graphql index bd75cb723a..07bc981596 100644 --- a/schema.graphql +++ b/schema.graphql @@ -1043,7 +1043,7 @@ type AssistedTaggingPredictionType { type AssistedTaggingQueryType { draftEntry(id: ID!): DraftEntryType draftEntryByLeads(filter: DraftEntryFilterDataInputType): [DraftEntryByLeadType!] - extractionStatusByLead: AutoextractionStatusType + extractionStatusByLead(leadId: ID!): AutoextractionStatusType } type AssistedTaggingRootQueryType { @@ -1082,6 +1082,7 @@ input AutoDraftEntryInputType { } enum AutoEntryExtractionTypeEnum { + NONE PENDING SUCCESS FAILED