Skip to content

Commit

Permalink
use the geolocation endpoint in tags classification;
Browse files Browse the repository at this point in the history
  • Loading branch information
ranjan-stha committed Jan 31, 2024
1 parent 300f7cb commit 51f4f19
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 915 deletions.
19 changes: 18 additions & 1 deletion analysis_module/mock_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,24 @@
}
},
"geolocations": [
"New York"
{
"entity": "Somalia",
"meta": {
"offset_start": 88,
"offset_end": 94,
"latitude": -10,
"longitude": -55
}
},
{
"entity": "Portugal",
"meta": {
"offset_start": 183,
"offset_end": 191,
"latitude": 39.6945,
"longitude": -8.13057
}
}
]
}

Expand Down
26 changes: 22 additions & 4 deletions analysis_module/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import requests
import boto3
import uuid
import logging
from celery import shared_task
from urllib.parse import urljoin
from typing import Dict, Literal, List, Any
from nlp_scripts.model_prediction.model_prediction import ModelTagsPrediction
from nlp_scripts.model_prediction.geolocation import get_geolocations

from django.utils import timezone

Expand All @@ -19,7 +19,6 @@
ENTRYEXTRACTION_ECS_ENDPOINT,
GEOLOCATION_ECS_ENDPOINT
)
import logging

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,6 +160,25 @@ def send_callback_url_request(callback_url: str, client_id: str, filepath: str,
logging.error("No callback url found.")
return json.dumps({"status": "No callback url found."}), 400

def get_geolocations(excerpts: List[str], req_timeout: int=60):
""" Get geolocations from excerpts by requesting from geolocation module """
if not GEOLOCATION_ECS_ENDPOINT:
logging.error("The geolocation module endpoint not found.")
return None
data = {"entries_list": excerpts}
try:
response = requests.post(
GEOLOCATION_ECS_ENDPOINT + "/get_geolocations",
json=data,
timeout=req_timeout
)
return response.json()
except requests.exceptions.Timeout as terr:
logging.error("Request timeout to the geolocation endpoint. %s", str(terr))
except requests.exceptions.ConnectionError as cerr:
logging.error("Request connection error occurred. %s", str(cerr))
return None


@shared_task
def send_classification_tags(nlp_request_id: int):
Expand All @@ -170,12 +188,12 @@ def send_classification_tags(nlp_request_id: int):
pred_data = predictor(entries_dict)

entries_only = [item["entry"] for item in entries_dict]
geolocations = get_geolocations(entries_only, "nlpthedeep") # GEONAME_API_USER)
geolocations = get_geolocations(entries_only)

output_data = {
"client_id": entries_dict[0]["client_id"],
"model_tags": pred_data,
"geolocations": geolocations[0]["locations"],
"geolocations": geolocations[0]["locations"] if geolocations else [],
"model_info": {
"id": "all_tags_model",
"version": "1.0.0"
Expand Down
53 changes: 0 additions & 53 deletions nlp_scripts/model_prediction/geolocation.py

This file was deleted.

Loading

0 comments on commit 51f4f19

Please sign in to comment.