Skip to content

Commit

Permalink
Merge pull request #45 from the-deep-nlp/fix/geolocation_mockdata
Browse files Browse the repository at this point in the history
Fix/geolocation mockdata
  • Loading branch information
sudan45 authored Feb 5, 2024
2 parents 98e8f37 + f7722d7 commit 47c7eac
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 930 deletions.
61 changes: 58 additions & 3 deletions analysis_module/mock_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,24 @@
}
},
"geolocations": [
"New York"
{
"entity": "Somalia",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Portugal",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": 39.6945,
"longitude": -8.13057
}
}
]
}

Expand Down Expand Up @@ -616,7 +633,26 @@
"textOrder": 1,
"relevant": True,
"prediction_status": True,
"geolocations": ["Somalia"],
"geolocations": [
{
"entity": "Somalia",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Portugal",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": 39.6945,
"longitude": -8.13057
}
}
],
"classification": {
"1": {
"101": {
Expand Down Expand Up @@ -1135,7 +1171,26 @@
"textOrder": 2,
"relevant": True,
"prediction_status": True,
"geolocations": ["Niger", "Nigeria"],
"geolocations": [
{
"entity": "Niger",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Nigeria",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": None,
"longitude": None
}
}
],
"classification": {
"1": {
"101": {
Expand Down
35 changes: 22 additions & 13 deletions analysis_module/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import requests
import logging
import random
import numpy as np

from random import shuffle
from math import ceil
Expand Down Expand Up @@ -213,9 +212,23 @@ def process_geolocation(body) -> Any:
"""geolocation extraction"""

def shape_geo_entities(entity: dict, excerpt: str):
ent = entity.copy()
start = random.randint(0, len(excerpt) - len(ent["ent"]))
ent.update({"offset_start": start, "offset_end": start + len(ent["ent"])})
ent = {}
ent["entity"] = entity["ent"]
start = random.randint(0, len(excerpt) - len(entity["ent"]))
ent["meta"] = {
"offset_start": start,
"offset_end": start + len(entity["ent"]),
"latitude": None,
"longitude": None
}
ent["meta"].update({"latitude": None, "longitude": None})
for geoid in entity["geoids"]:
if entity["ent"] == geoid["match"]:
ent["meta"].update({
"latitude": geoid["latitude"],
"longitude": geoid["longitude"]
})
break
return ent

request_body = body if isinstance(body, dict) else json.loads(body)
Expand All @@ -238,14 +251,10 @@ def shape_geo_entities(entity: dict, excerpt: str):
return

data = []
for entry_id, excerpt in excerpts:
entities = list(
np.random.choice(
MOCK_GEOLOCATION, size=random.randint(0, len(MOCK_GEOLOCATION))
)
)
entities = [shape_geo_entities(x, excerpt) for x in entities]
data.append({"entry_id": int(entry_id), "entities": entities})
for idx, (entry_id, excerpt) in enumerate(excerpts):
entities = MOCK_GEOLOCATION[idx]
entities = [shape_geo_entities(entities, excerpt)]
data.append({"entry_id": int(entry_id), "locations": entities})

filepath = save_data_local_and_get_url(
dir_name="geolocation",
Expand All @@ -262,7 +271,7 @@ def shape_geo_entities(entity: dict, excerpt: str):


def geolocation_mock_model(body) -> Any:
process_geolocation.delay(body, countdown=2) # Trigger task after 2 seconds
process_geolocation.delay(body)
return json.dumps({"status": "Successfully received the request."}), 200


Expand Down
27 changes: 23 additions & 4 deletions analysis_module/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import requests
import boto3
import uuid
import logging
from celery import shared_task
from urllib.parse import urljoin
from typing import Dict, Literal, List, Any
from nlp_scripts.model_prediction.model_prediction import ModelTagsPrediction
from nlp_scripts.model_prediction.geolocation import get_geolocations

from django.utils import timezone

Expand All @@ -19,7 +19,6 @@
ENTRYEXTRACTION_ECS_ENDPOINT,
GEOLOCATION_ECS_ENDPOINT
)
import logging

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -162,6 +161,26 @@ def send_callback_url_request(callback_url: str, client_id: str, filepath: str,
return json.dumps({"status": "No callback url found."}), 400


def get_geolocations(excerpts: List[str], req_timeout: int = 60):
""" Get geolocations from excerpts by requesting from geolocation module """
if not GEOLOCATION_ECS_ENDPOINT:
logging.error("The geolocation module endpoint not found.")
return None
data = {"entries_list": excerpts}
try:
response = requests.post(
GEOLOCATION_ECS_ENDPOINT + "/get_geolocations",
json=data,
timeout=req_timeout
)
return response.json()
except requests.exceptions.Timeout as terr:
logging.error("Request timeout to the geolocation endpoint. %s", str(terr))
except requests.exceptions.ConnectionError as cerr:
logging.error("Request connection error occurred. %s", str(cerr))
return None


@shared_task
def send_classification_tags(nlp_request_id: int):
nlp_request = NLPRequest.objects.get(pk=nlp_request_id)
Expand All @@ -170,12 +189,12 @@ def send_classification_tags(nlp_request_id: int):
pred_data = predictor(entries_dict)

entries_only = [item["entry"] for item in entries_dict]
geolocations = get_geolocations(entries_only, "nlpthedeep") # GEONAME_API_USER)
geolocations = get_geolocations(entries_only)

output_data = {
"client_id": entries_dict[0]["client_id"],
"model_tags": pred_data,
"geolocations": geolocations[0]["locations"],
"geolocations": geolocations[0]["locations"] if geolocations else [],
"model_info": {
"id": "all_tags_model",
"version": "1.0.0"
Expand Down
53 changes: 0 additions & 53 deletions nlp_scripts/model_prediction/geolocation.py

This file was deleted.

Loading

0 comments on commit 47c7eac

Please sign in to comment.