Skip to content

Commit

Permalink
Fix: Geographic Name Maximum Query Length #361 (#372)
Browse files Browse the repository at this point in the history
* Adding batch system to API call to avoid max query length error

* Accidentally removed a couple relevant comments and code

* Adding unique_values_list to loop in case of duplicates
  • Loading branch information
JulianForeman authored Aug 7, 2024
1 parent 8913558 commit dd93fb9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 6 deletions.
5 changes: 2 additions & 3 deletions django/api/services/bcngws.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from django.conf import settings
from api.constants.misc import RELEVANT_FEATURES


# names should be a list of location names, page_size should be an integer >=1, <=200
# start_index should be an integer, result should be a set
def get_placename_matches(names, page_size, start_index, result):
Expand All @@ -11,13 +10,13 @@ def get_placename_matches(names, page_size, start_index, result):
query = {
"outputFormat": "json",
"name": names_string,
"itemsPerPage": 200,
"itemsPerPage": page_size,
"startIndex": start_index,
}

try:
response = requests.get(settings.PLACENAMES_ENDPOINT, params=query)
response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
response.raise_for_status()
response = response.json()

for feature in response["features"]:
Expand Down
12 changes: 9 additions & 3 deletions django/api/services/spreadsheet_uploader_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,23 +250,29 @@ def validate_phone_numbers(df, *columns, **kwargs):
return result


def location_checker(df, *columns, **kwargs):
def location_checker(df, *columns, batch_size=50, **kwargs):
result = {}
for column in columns:
indices = []
series = df[column]
map_of_values_to_indices = get_map_of_values_to_indices(series, kwargs.get("indices_offset", 0))
values = series.to_list()
unique_values = set(series)
unique_values_list = list(values)

communities = set()
# populate communities by calling the bcngws API with the values:
get_placename_matches(values, 200, 1, communities)
for i in range(0, len(unique_values_list), batch_size):
batch_values = unique_values_list[i:i + batch_size]
# Send request to API with list of names, returns all the communities that somewhat matched
get_placename_matches(batch_values, 200, 1, communities)

# Find names that don't have a match in the locations_set
names_without_match = unique_values.difference(communities)
for name in names_without_match:
indices_to_add = map_of_values_to_indices[name]
indices.extend(indices_to_add)
if indices:
indices.sort()
result[column] = {
"Unrecognized City Names": {
"Expected Type": "The following city names are not in the list of geographic names. Please double check that these places exist or have correct spelling and adjust your dataset accordingly.",
Expand Down

0 comments on commit dd93fb9

Please sign in to comment.